diff options
Diffstat (limited to 'include/linux')
119 files changed, 7838 insertions, 3601 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c187817471fb..c471dfc93b71 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -53,7 +53,7 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) return adev ? adev->handle : NULL; } -#define ACPI_COMPANION(dev) acpi_node((dev)->fwnode) +#define ACPI_COMPANION(dev) to_acpi_node((dev)->fwnode) #define ACPI_COMPANION_SET(dev, adev) set_primary_fwnode(dev, (adev) ? \ acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) @@ -261,8 +261,13 @@ extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); #ifdef CONFIG_ACPI_NUMA +int acpi_map_pxm_to_online_node(int pxm); int acpi_get_node(acpi_handle handle); #else +static inline int acpi_map_pxm_to_online_node(int pxm) +{ + return 0; +} static inline int acpi_get_node(acpi_handle handle) { return 0; @@ -449,7 +454,7 @@ static inline bool is_acpi_node(struct fwnode_handle *fwnode) return false; } -static inline struct acpi_device *acpi_node(struct fwnode_handle *fwnode) +static inline struct acpi_device *to_acpi_node(struct fwnode_handle *fwnode) { return NULL; } diff --git a/include/linux/ata.h b/include/linux/ata.h index b666b773e111..fed36418dd1c 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -704,9 +704,19 @@ static inline bool ata_id_wcache_enabled(const u16 *id) static inline bool ata_id_has_read_log_dma_ext(const u16 *id) { + /* Word 86 must have bit 15 set */ if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) return false; - return id[ATA_ID_COMMAND_SET_3] & (1 << 3); + + /* READ LOG DMA EXT support can be signaled either from word 119 + * or from word 120. The format is the same for both words: Bit + * 15 must be cleared, bit 14 set and bit 3 set. + */ + if ((id[ATA_ID_COMMAND_SET_3] & 0xC008) == 0x4008 || + (id[ATA_ID_COMMAND_SET_4] & 0xC008) == 0x4008) + return true; + + return false; } static inline bool ata_id_has_sense_reporting(const u16 *id) diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h new file mode 100644 index 000000000000..a48d90e3bcbb --- /dev/null +++ b/include/linux/backing-dev-defs.h @@ -0,0 +1,255 @@ +#ifndef __LINUX_BACKING_DEV_DEFS_H +#define __LINUX_BACKING_DEV_DEFS_H + +#include <linux/list.h> +#include <linux/radix-tree.h> +#include <linux/rbtree.h> +#include <linux/spinlock.h> +#include <linux/percpu_counter.h> +#include <linux/percpu-refcount.h> +#include <linux/flex_proportions.h> +#include <linux/timer.h> +#include <linux/workqueue.h> + +struct page; +struct device; +struct dentry; + +/* + * Bits in bdi_writeback.state + */ +enum wb_state { + WB_registered, /* bdi_register() was done */ + WB_writeback_running, /* Writeback is in progress */ + WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */ +}; + +enum wb_congested_state { + WB_async_congested, /* The async (write) queue is getting full */ + WB_sync_congested, /* The sync queue is getting full */ +}; + +typedef int (congested_fn)(void *, int); + +enum wb_stat_item { + WB_RECLAIMABLE, + WB_WRITEBACK, + WB_DIRTIED, + WB_WRITTEN, + NR_WB_STAT_ITEMS +}; + +#define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) + +/* + * For cgroup writeback, multiple wb's may map to the same blkcg. Those + * wb's can operate mostly independently but should share the congested + * state. To facilitate such sharing, the congested state is tracked using + * the following struct which is created on demand, indexed by blkcg ID on + * its bdi, and refcounted. + */ +struct bdi_writeback_congested { + unsigned long state; /* WB_[a]sync_congested flags */ + +#ifdef CONFIG_CGROUP_WRITEBACK + struct backing_dev_info *bdi; /* the associated bdi */ + atomic_t refcnt; /* nr of attached wb's and blkg */ + int blkcg_id; /* ID of the associated blkcg */ + struct rb_node rb_node; /* on bdi->cgwb_congestion_tree */ +#endif +}; + +/* + * Each wb (bdi_writeback) can perform writeback operations, is measured + * and throttled, independently. Without cgroup writeback, each bdi + * (bdi_writeback) is served by its embedded bdi->wb. + * + * On the default hierarchy, blkcg implicitly enables memcg. This allows + * using memcg's page ownership for attributing writeback IOs, and every + * memcg - blkcg combination can be served by its own wb by assigning a + * dedicated wb to each memcg, which enables isolation across different + * cgroups and propagation of IO back pressure down from the IO layer upto + * the tasks which are generating the dirty pages to be written back. + * + * A cgroup wb is indexed on its bdi by the ID of the associated memcg, + * refcounted with the number of inodes attached to it, and pins the memcg + * and the corresponding blkcg. As the corresponding blkcg for a memcg may + * change as blkcg is disabled and enabled higher up in the hierarchy, a wb + * is tested for blkcg after lookup and removed from index on mismatch so + * that a new wb for the combination can be created. + */ +struct bdi_writeback { + struct backing_dev_info *bdi; /* our parent bdi */ + + unsigned long state; /* Always use atomic bitops on this */ + unsigned long last_old_flush; /* last old data flush */ + + struct list_head b_dirty; /* dirty inodes */ + struct list_head b_io; /* parked for writeback */ + struct list_head b_more_io; /* parked for more writeback */ + struct list_head b_dirty_time; /* time stamps are dirty */ + spinlock_t list_lock; /* protects the b_* lists */ + + struct percpu_counter stat[NR_WB_STAT_ITEMS]; + + struct bdi_writeback_congested *congested; + + unsigned long bw_time_stamp; /* last time write bw is updated */ + unsigned long dirtied_stamp; + unsigned long written_stamp; /* pages written at bw_time_stamp */ + unsigned long write_bandwidth; /* the estimated write bandwidth */ + unsigned long avg_write_bandwidth; /* further smoothed write bw, > 0 */ + + /* + * The base dirty throttle rate, re-calculated on every 200ms. + * All the bdi tasks' dirty rate will be curbed under it. + * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit + * in small steps and is much more smooth/stable than the latter. + */ + unsigned long dirty_ratelimit; + unsigned long balanced_dirty_ratelimit; + + struct fprop_local_percpu completions; + int dirty_exceeded; + + spinlock_t work_lock; /* protects work_list & dwork scheduling */ + struct list_head work_list; + struct delayed_work dwork; /* work item used for writeback */ + +#ifdef CONFIG_CGROUP_WRITEBACK + struct percpu_ref refcnt; /* used only for !root wb's */ + struct fprop_local_percpu memcg_completions; + struct cgroup_subsys_state *memcg_css; /* the associated memcg */ + struct cgroup_subsys_state *blkcg_css; /* and blkcg */ + struct list_head memcg_node; /* anchored at memcg->cgwb_list */ + struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */ + + union { + struct work_struct release_work; + struct rcu_head rcu; + }; +#endif +}; + +struct backing_dev_info { + struct list_head bdi_list; + unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ + unsigned int capabilities; /* Device capabilities */ + congested_fn *congested_fn; /* Function pointer if device is md/dm */ + void *congested_data; /* Pointer to aux data for congested func */ + + char *name; + + unsigned int min_ratio; + unsigned int max_ratio, max_prop_frac; + + /* + * Sum of avg_write_bw of wbs with dirty inodes. > 0 if there are + * any dirty wbs, which is depended upon by bdi_has_dirty(). + */ + atomic_long_t tot_write_bandwidth; + + struct bdi_writeback wb; /* the root writeback info for this bdi */ + struct bdi_writeback_congested wb_congested; /* its congested state */ +#ifdef CONFIG_CGROUP_WRITEBACK + struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ + struct rb_root cgwb_congested_tree; /* their congested states */ + atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */ +#endif + wait_queue_head_t wb_waitq; + + struct device *dev; + + struct timer_list laptop_mode_wb_timer; + +#ifdef CONFIG_DEBUG_FS + struct dentry *debug_dir; + struct dentry *debug_stats; +#endif +}; + +enum { + BLK_RW_ASYNC = 0, + BLK_RW_SYNC = 1, +}; + +void clear_wb_congested(struct bdi_writeback_congested *congested, int sync); +void set_wb_congested(struct bdi_writeback_congested *congested, int sync); + +static inline void clear_bdi_congested(struct backing_dev_info *bdi, int sync) +{ + clear_wb_congested(bdi->wb.congested, sync); +} + +static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync) +{ + set_wb_congested(bdi->wb.congested, sync); +} + +#ifdef CONFIG_CGROUP_WRITEBACK + +/** + * wb_tryget - try to increment a wb's refcount + * @wb: bdi_writeback to get + */ +static inline bool wb_tryget(struct bdi_writeback *wb) +{ + if (wb != &wb->bdi->wb) + return percpu_ref_tryget(&wb->refcnt); + return true; +} + +/** + * wb_get - increment a wb's refcount + * @wb: bdi_writeback to get + */ +static inline void wb_get(struct bdi_writeback *wb) +{ + if (wb != &wb->bdi->wb) + percpu_ref_get(&wb->refcnt); +} + +/** + * wb_put - decrement a wb's refcount + * @wb: bdi_writeback to put + */ +static inline void wb_put(struct bdi_writeback *wb) +{ + if (wb != &wb->bdi->wb) + percpu_ref_put(&wb->refcnt); +} + +/** + * wb_dying - is a wb dying? + * @wb: bdi_writeback of interest + * + * Returns whether @wb is unlinked and being drained. + */ +static inline bool wb_dying(struct bdi_writeback *wb) +{ + return percpu_ref_is_dying(&wb->refcnt); +} + +#else /* CONFIG_CGROUP_WRITEBACK */ + +static inline bool wb_tryget(struct bdi_writeback *wb) +{ + return true; +} + +static inline void wb_get(struct bdi_writeback *wb) +{ +} + +static inline void wb_put(struct bdi_writeback *wb) +{ +} + +static inline bool wb_dying(struct bdi_writeback *wb) +{ + return false; +} + +#endif /* CONFIG_CGROUP_WRITEBACK */ + +#endif /* __LINUX_BACKING_DEV_DEFS_H */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index d87d8eced064..0e6d4828a77a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -8,106 +8,13 @@ #ifndef _LINUX_BACKING_DEV_H #define _LINUX_BACKING_DEV_H -#include <linux/percpu_counter.h> -#include <linux/log2.h> -#include <linux/flex_proportions.h> #include <linux/kernel.h> #include <linux/fs.h> #include <linux/sched.h> -#include <linux/timer.h> +#include <linux/blkdev.h> #include <linux/writeback.h> -#include <linux/atomic.h> -#include <linux/sysctl.h> -#include <linux/workqueue.h> - -struct page; -struct device; -struct dentry; - -/* - * Bits in backing_dev_info.state - */ -enum bdi_state { - BDI_async_congested, /* The async (write) queue is getting full */ - BDI_sync_congested, /* The sync queue is getting full */ - BDI_registered, /* bdi_register() was done */ - BDI_writeback_running, /* Writeback is in progress */ -}; - -typedef int (congested_fn)(void *, int); - -enum bdi_stat_item { - BDI_RECLAIMABLE, - BDI_WRITEBACK, - BDI_DIRTIED, - BDI_WRITTEN, - NR_BDI_STAT_ITEMS -}; - -#define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) - -struct bdi_writeback { - struct backing_dev_info *bdi; /* our parent bdi */ - - unsigned long last_old_flush; /* last old data flush */ - - struct delayed_work dwork; /* work item used for writeback */ - struct list_head b_dirty; /* dirty inodes */ - struct list_head b_io; /* parked for writeback */ - struct list_head b_more_io; /* parked for more writeback */ - struct list_head b_dirty_time; /* time stamps are dirty */ - spinlock_t list_lock; /* protects the b_* lists */ -}; - -struct backing_dev_info { - struct list_head bdi_list; - unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ - unsigned long state; /* Always use atomic bitops on this */ - unsigned int capabilities; /* Device capabilities */ - congested_fn *congested_fn; /* Function pointer if device is md/dm */ - void *congested_data; /* Pointer to aux data for congested func */ - - char *name; - - struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS]; - - unsigned long bw_time_stamp; /* last time write bw is updated */ - unsigned long dirtied_stamp; - unsigned long written_stamp; /* pages written at bw_time_stamp */ - unsigned long write_bandwidth; /* the estimated write bandwidth */ - unsigned long avg_write_bandwidth; /* further smoothed write bw */ - - /* - * The base dirty throttle rate, re-calculated on every 200ms. - * All the bdi tasks' dirty rate will be curbed under it. - * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit - * in small steps and is much more smooth/stable than the latter. - */ - unsigned long dirty_ratelimit; - unsigned long balanced_dirty_ratelimit; - - struct fprop_local_percpu completions; - int dirty_exceeded; - - unsigned int min_ratio; - unsigned int max_ratio, max_prop_frac; - - struct bdi_writeback wb; /* default writeback info for this bdi */ - spinlock_t wb_lock; /* protects work_list & wb.dwork scheduling */ - - struct list_head work_list; - - struct device *dev; - - struct timer_list laptop_mode_wb_timer; - -#ifdef CONFIG_DEBUG_FS - struct dentry *debug_dir; - struct dentry *debug_stats; -#endif -}; - -struct backing_dev_info *inode_to_bdi(struct inode *inode); +#include <linux/blk-cgroup.h> +#include <linux/backing-dev-defs.h> int __must_check bdi_init(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi); @@ -117,97 +24,99 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); -void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, - enum wb_reason reason); -void bdi_start_background_writeback(struct backing_dev_info *bdi); -void bdi_writeback_workfn(struct work_struct *work); -int bdi_has_dirty_io(struct backing_dev_info *bdi); -void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); +void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, + bool range_cyclic, enum wb_reason reason); +void wb_start_background_writeback(struct bdi_writeback *wb); +void wb_workfn(struct work_struct *work); +void wb_wakeup_delayed(struct bdi_writeback *wb); extern spinlock_t bdi_lock; extern struct list_head bdi_list; extern struct workqueue_struct *bdi_wq; -static inline int wb_has_dirty_io(struct bdi_writeback *wb) +static inline bool wb_has_dirty_io(struct bdi_writeback *wb) { - return !list_empty(&wb->b_dirty) || - !list_empty(&wb->b_io) || - !list_empty(&wb->b_more_io); + return test_bit(WB_has_dirty_io, &wb->state); +} + +static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi) +{ + /* + * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are + * any dirty wbs. See wb_update_write_bandwidth(). + */ + return atomic_long_read(&bdi->tot_write_bandwidth); } -static inline void __add_bdi_stat(struct backing_dev_info *bdi, - enum bdi_stat_item item, s64 amount) +static inline void __add_wb_stat(struct bdi_writeback *wb, + enum wb_stat_item item, s64 amount) { - __percpu_counter_add(&bdi->bdi_stat[item], amount, BDI_STAT_BATCH); + __percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH); } -static inline void __inc_bdi_stat(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline void __inc_wb_stat(struct bdi_writeback *wb, + enum wb_stat_item item) { - __add_bdi_stat(bdi, item, 1); + __add_wb_stat(wb, item, 1); } -static inline void inc_bdi_stat(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { unsigned long flags; local_irq_save(flags); - __inc_bdi_stat(bdi, item); + __inc_wb_stat(wb, item); local_irq_restore(flags); } -static inline void __dec_bdi_stat(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline void __dec_wb_stat(struct bdi_writeback *wb, + enum wb_stat_item item) { - __add_bdi_stat(bdi, item, -1); + __add_wb_stat(wb, item, -1); } -static inline void dec_bdi_stat(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { unsigned long flags; local_irq_save(flags); - __dec_bdi_stat(bdi, item); + __dec_wb_stat(wb, item); local_irq_restore(flags); } -static inline s64 bdi_stat(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - return percpu_counter_read_positive(&bdi->bdi_stat[item]); + return percpu_counter_read_positive(&wb->stat[item]); } -static inline s64 __bdi_stat_sum(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline s64 __wb_stat_sum(struct bdi_writeback *wb, + enum wb_stat_item item) { - return percpu_counter_sum_positive(&bdi->bdi_stat[item]); + return percpu_counter_sum_positive(&wb->stat[item]); } -static inline s64 bdi_stat_sum(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item) { s64 sum; unsigned long flags; local_irq_save(flags); - sum = __bdi_stat_sum(bdi, item); + sum = __wb_stat_sum(wb, item); local_irq_restore(flags); return sum; } -extern void bdi_writeout_inc(struct backing_dev_info *bdi); +extern void wb_writeout_inc(struct bdi_writeback *wb); /* * maximal error of a stat counter. */ -static inline unsigned long bdi_stat_error(struct backing_dev_info *bdi) +static inline unsigned long wb_stat_error(struct bdi_writeback *wb) { #ifdef CONFIG_SMP - return nr_cpu_ids * BDI_STAT_BATCH; + return nr_cpu_ids * WB_STAT_BATCH; #else return 1; #endif @@ -231,50 +140,57 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * BDI_CAP_NO_WRITEBACK: Don't write pages back * BDI_CAP_NO_ACCT_WB: Don't automatically account writeback pages * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. + * + * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback. */ #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 #define BDI_CAP_NO_ACCT_WB 0x00000004 #define BDI_CAP_STABLE_WRITES 0x00000008 #define BDI_CAP_STRICTLIMIT 0x00000010 +#define BDI_CAP_CGROUP_WRITEBACK 0x00000020 #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) extern struct backing_dev_info noop_backing_dev_info; -int writeback_in_progress(struct backing_dev_info *bdi); - -static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits) +/** + * writeback_in_progress - determine whether there is writeback in progress + * @wb: bdi_writeback of interest + * + * Determine whether there is writeback waiting to be handled against a + * bdi_writeback. + */ +static inline bool writeback_in_progress(struct bdi_writeback *wb) { - if (bdi->congested_fn) - return bdi->congested_fn(bdi->congested_data, bdi_bits); - return (bdi->state & bdi_bits); + return test_bit(WB_writeback_running, &wb->state); } -static inline int bdi_read_congested(struct backing_dev_info *bdi) +static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) { - return bdi_congested(bdi, 1 << BDI_sync_congested); -} + struct super_block *sb; -static inline int bdi_write_congested(struct backing_dev_info *bdi) -{ - return bdi_congested(bdi, 1 << BDI_async_congested); + if (!inode) + return &noop_backing_dev_info; + + sb = inode->i_sb; +#ifdef CONFIG_BLOCK + if (sb_is_blkdev_sb(sb)) + return blk_get_backing_dev_info(I_BDEV(inode)); +#endif + return sb->s_bdi; } -static inline int bdi_rw_congested(struct backing_dev_info *bdi) +static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) { - return bdi_congested(bdi, (1 << BDI_sync_congested) | - (1 << BDI_async_congested)); -} + struct backing_dev_info *bdi = wb->bdi; -enum { - BLK_RW_ASYNC = 0, - BLK_RW_SYNC = 1, -}; + if (bdi->congested_fn) + return bdi->congested_fn(bdi->congested_data, cong_bits); + return wb->congested->state & cong_bits; +} -void clear_bdi_congested(struct backing_dev_info *bdi, int sync); -void set_bdi_congested(struct backing_dev_info *bdi, int sync); long congestion_wait(int sync, long timeout); long wait_iff_congested(struct zone *zone, int sync, long timeout); int pdflush_proc_obsolete(struct ctl_table *table, int write, @@ -318,4 +234,333 @@ static inline int bdi_sched_wait(void *word) return 0; } -#endif /* _LINUX_BACKING_DEV_H */ +#ifdef CONFIG_CGROUP_WRITEBACK + +struct bdi_writeback_congested * +wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp); +void wb_congested_put(struct bdi_writeback_congested *congested); +struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, + struct cgroup_subsys_state *memcg_css, + gfp_t gfp); +void wb_memcg_offline(struct mem_cgroup *memcg); +void wb_blkcg_offline(struct blkcg *blkcg); +int inode_congested(struct inode *inode, int cong_bits); + +/** + * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode + * @inode: inode of interest + * + * cgroup writeback requires support from both the bdi and filesystem. + * Test whether @inode has both. + */ +static inline bool inode_cgwb_enabled(struct inode *inode) +{ + struct backing_dev_info *bdi = inode_to_bdi(inode); + + return bdi_cap_account_dirty(bdi) && + (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) && + (inode->i_sb->s_iflags & SB_I_CGROUPWB); +} + +/** + * wb_find_current - find wb for %current on a bdi + * @bdi: bdi of interest + * + * Find the wb of @bdi which matches both the memcg and blkcg of %current. + * Must be called under rcu_read_lock() which protects the returend wb. + * NULL if not found. + */ +static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) +{ + struct cgroup_subsys_state *memcg_css; + struct bdi_writeback *wb; + + memcg_css = task_css(current, memory_cgrp_id); + if (!memcg_css->parent) + return &bdi->wb; + + wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id); + + /* + * %current's blkcg equals the effective blkcg of its memcg. No + * need to use the relatively expensive cgroup_get_e_css(). + */ + if (likely(wb && wb->blkcg_css == task_css(current, blkio_cgrp_id))) + return wb; + return NULL; +} + +/** + * wb_get_create_current - get or create wb for %current on a bdi + * @bdi: bdi of interest + * @gfp: allocation mask + * + * Equivalent to wb_get_create() on %current's memcg. This function is + * called from a relatively hot path and optimizes the common cases using + * wb_find_current(). + */ +static inline struct bdi_writeback * +wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) +{ + struct bdi_writeback *wb; + + rcu_read_lock(); + wb = wb_find_current(bdi); + if (wb && unlikely(!wb_tryget(wb))) + wb = NULL; + rcu_read_unlock(); + + if (unlikely(!wb)) { + struct cgroup_subsys_state *memcg_css; + + memcg_css = task_get_css(current, memory_cgrp_id); + wb = wb_get_create(bdi, memcg_css, gfp); + css_put(memcg_css); + } + return wb; +} + +/** + * inode_to_wb_is_valid - test whether an inode has a wb associated + * @inode: inode of interest + * + * Returns %true if @inode has a wb associated. May be called without any + * locking. + */ +static inline bool inode_to_wb_is_valid(struct inode *inode) +{ + return inode->i_wb; +} + +/** + * inode_to_wb - determine the wb of an inode + * @inode: inode of interest + * + * Returns the wb @inode is currently associated with. The caller must be + * holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the + * associated wb's list_lock. + */ +static inline struct bdi_writeback *inode_to_wb(struct inode *inode) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(debug_locks && + (!lockdep_is_held(&inode->i_lock) && + !lockdep_is_held(&inode->i_mapping->tree_lock) && + !lockdep_is_held(&inode->i_wb->list_lock))); +#endif + return inode->i_wb; +} + +/** + * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction + * @inode: target inode + * @lockedp: temp bool output param, to be passed to the end function + * + * The caller wants to access the wb associated with @inode but isn't + * holding inode->i_lock, mapping->tree_lock or wb->list_lock. This + * function determines the wb associated with @inode and ensures that the + * association doesn't change until the transaction is finished with + * unlocked_inode_to_wb_end(). + * + * The caller must call unlocked_inode_to_wb_end() with *@lockdep + * afterwards and can't sleep during transaction. IRQ may or may not be + * disabled on return. + */ +static inline struct bdi_writeback * +unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) +{ + rcu_read_lock(); + + /* + * Paired with store_release in inode_switch_wb_work_fn() and + * ensures that we see the new wb if we see cleared I_WB_SWITCH. + */ + *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; + + if (unlikely(*lockedp)) + spin_lock_irq(&inode->i_mapping->tree_lock); + + /* + * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock. + * inode_to_wb() will bark. Deref directly. + */ + return inode->i_wb; +} + +/** + * unlocked_inode_to_wb_end - end inode wb access transaction + * @inode: target inode + * @locked: *@lockedp from unlocked_inode_to_wb_begin() + */ +static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) +{ + if (unlikely(locked)) + spin_unlock_irq(&inode->i_mapping->tree_lock); + + rcu_read_unlock(); +} + +struct wb_iter { + int start_blkcg_id; + struct radix_tree_iter tree_iter; + void **slot; +}; + +static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter, + struct backing_dev_info *bdi) +{ + struct radix_tree_iter *titer = &iter->tree_iter; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (iter->start_blkcg_id >= 0) { + iter->slot = radix_tree_iter_init(titer, iter->start_blkcg_id); + iter->start_blkcg_id = -1; + } else { + iter->slot = radix_tree_next_slot(iter->slot, titer, 0); + } + + if (!iter->slot) + iter->slot = radix_tree_next_chunk(&bdi->cgwb_tree, titer, 0); + if (iter->slot) + return *iter->slot; + return NULL; +} + +static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter, + struct backing_dev_info *bdi, + int start_blkcg_id) +{ + iter->start_blkcg_id = start_blkcg_id; + + if (start_blkcg_id) + return __wb_iter_next(iter, bdi); + else + return &bdi->wb; +} + +/** + * bdi_for_each_wb - walk all wb's of a bdi in ascending blkcg ID order + * @wb_cur: cursor struct bdi_writeback pointer + * @bdi: bdi to walk wb's of + * @iter: pointer to struct wb_iter to be used as iteration buffer + * @start_blkcg_id: blkcg ID to start iteration from + * + * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending + * blkcg ID order starting from @start_blkcg_id. @iter is struct wb_iter + * to be used as temp storage during iteration. rcu_read_lock() must be + * held throughout iteration. + */ +#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id) \ + for ((wb_cur) = __wb_iter_init(iter, bdi, start_blkcg_id); \ + (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi)) + +#else /* CONFIG_CGROUP_WRITEBACK */ + +static inline bool inode_cgwb_enabled(struct inode *inode) +{ + return false; +} + +static inline struct bdi_writeback_congested * +wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp) +{ + return bdi->wb.congested; +} + +static inline void wb_congested_put(struct bdi_writeback_congested *congested) +{ +} + +static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) +{ + return &bdi->wb; +} + +static inline struct bdi_writeback * +wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) +{ + return &bdi->wb; +} + +static inline bool inode_to_wb_is_valid(struct inode *inode) +{ + return true; +} + +static inline struct bdi_writeback *inode_to_wb(struct inode *inode) +{ + return &inode_to_bdi(inode)->wb; +} + +static inline struct bdi_writeback * +unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) +{ + return inode_to_wb(inode); +} + +static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) +{ +} + +static inline void wb_memcg_offline(struct mem_cgroup *memcg) +{ +} + +static inline void wb_blkcg_offline(struct blkcg *blkcg) +{ +} + +struct wb_iter { + int next_id; +}; + +#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id) \ + for ((iter)->next_id = (start_blkcg_id); \ + ({ (wb_cur) = !(iter)->next_id++ ? &(bdi)->wb : NULL; }); ) + +static inline int inode_congested(struct inode *inode, int cong_bits) +{ + return wb_congested(&inode_to_bdi(inode)->wb, cong_bits); +} + +#endif /* CONFIG_CGROUP_WRITEBACK */ + +static inline int inode_read_congested(struct inode *inode) +{ + return inode_congested(inode, 1 << WB_sync_congested); +} + +static inline int inode_write_congested(struct inode *inode) +{ + return inode_congested(inode, 1 << WB_async_congested); +} + +static inline int inode_rw_congested(struct inode *inode) +{ + return inode_congested(inode, (1 << WB_sync_congested) | + (1 << WB_async_congested)); +} + +static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits) +{ + return wb_congested(&bdi->wb, cong_bits); +} + +static inline int bdi_read_congested(struct backing_dev_info *bdi) +{ + return bdi_congested(bdi, 1 << WB_sync_congested); +} + +static inline int bdi_write_congested(struct backing_dev_info *bdi) +{ + return bdi_congested(bdi, 1 << WB_async_congested); +} + +static inline int bdi_rw_congested(struct backing_dev_info *bdi) +{ + return bdi_congested(bdi, (1 << WB_sync_congested) | + (1 << WB_async_congested)); +} + +#endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h index b12b07e75929..2793652fbf66 100644 --- a/include/linux/bcm47xx_nvram.h +++ b/include/linux/bcm47xx_nvram.h @@ -10,11 +10,17 @@ #include <linux/types.h> #include <linux/kernel.h> +#include <linux/vmalloc.h> -#ifdef CONFIG_BCM47XX +#ifdef CONFIG_BCM47XX_NVRAM int bcm47xx_nvram_init_from_mem(u32 base, u32 lim); int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len); int bcm47xx_nvram_gpio_pin(const char *name); +char *bcm47xx_nvram_get_contents(size_t *val_len); +static inline void bcm47xx_nvram_release_contents(char *nvram) +{ + vfree(nvram); +}; #else static inline int bcm47xx_nvram_init_from_mem(u32 base, u32 lim) { @@ -29,6 +35,15 @@ static inline int bcm47xx_nvram_gpio_pin(const char *name) { return -ENOTSUPP; }; + +static inline char *bcm47xx_nvram_get_contents(size_t *val_len) +{ + return NULL; +}; + +static inline void bcm47xx_nvram_release_contents(char *nvram) +{ +}; #endif #endif /* __BCM47XX_NVRAM_H */ diff --git a/include/linux/bio.h b/include/linux/bio.h index da3a127c9958..5e963a6d7c14 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -290,7 +290,21 @@ static inline unsigned bio_segments(struct bio *bio) * returns. and then bio would be freed memory when if (bio->bi_flags ...) * runs */ -#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) +static inline void bio_get(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_REFFED); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_cnt); +} + +static inline void bio_cnt_set(struct bio *bio, unsigned int count) +{ + if (count != 1) { + bio->bi_flags |= (1 << BIO_REFFED); + smp_mb__before_atomic(); + } + atomic_set(&bio->__bi_cnt, count); +} enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ @@ -413,7 +427,6 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) } extern void bio_endio(struct bio *, int); -extern void bio_endio_nodec(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); @@ -469,9 +482,12 @@ extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); #ifdef CONFIG_BLK_CGROUP +int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); int bio_associate_current(struct bio *bio); void bio_disassociate_task(struct bio *bio); #else /* CONFIG_BLK_CGROUP */ +static inline int bio_associate_blkcg(struct bio *bio, + struct cgroup_subsys_state *blkcg_css) { return 0; } static inline int bio_associate_current(struct bio *bio) { return -ENOENT; } static inline void bio_disassociate_task(struct bio *bio) { } #endif /* CONFIG_BLK_CGROUP */ diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h new file mode 100644 index 000000000000..58cfab80dd70 --- /dev/null +++ b/include/linux/blk-cgroup.h @@ -0,0 +1,655 @@ +#ifndef _BLK_CGROUP_H +#define _BLK_CGROUP_H +/* + * Common Block IO controller cgroup interface + * + * Based on ideas and code from CFQ, CFS and BFQ: + * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> + * + * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> + * Paolo Valente <paolo.valente@unimore.it> + * + * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> + * Nauman Rafique <nauman@google.com> + */ + +#include <linux/cgroup.h> +#include <linux/u64_stats_sync.h> +#include <linux/seq_file.h> +#include <linux/radix-tree.h> +#include <linux/blkdev.h> +#include <linux/atomic.h> + +/* Max limits for throttle policy */ +#define THROTL_IOPS_MAX UINT_MAX + +#ifdef CONFIG_BLK_CGROUP + +enum blkg_rwstat_type { + BLKG_RWSTAT_READ, + BLKG_RWSTAT_WRITE, + BLKG_RWSTAT_SYNC, + BLKG_RWSTAT_ASYNC, + + BLKG_RWSTAT_NR, + BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, +}; + +struct blkcg_gq; + +struct blkcg { + struct cgroup_subsys_state css; + spinlock_t lock; + + struct radix_tree_root blkg_tree; + struct blkcg_gq *blkg_hint; + struct hlist_head blkg_list; + + struct blkcg_policy_data *pd[BLKCG_MAX_POLS]; + +#ifdef CONFIG_CGROUP_WRITEBACK + struct list_head cgwb_list; +#endif +}; + +struct blkg_stat { + struct u64_stats_sync syncp; + uint64_t cnt; +}; + +struct blkg_rwstat { + struct u64_stats_sync syncp; + uint64_t cnt[BLKG_RWSTAT_NR]; +}; + +/* + * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a + * request_queue (q). This is used by blkcg policies which need to track + * information per blkcg - q pair. + * + * There can be multiple active blkcg policies and each has its private + * data on each blkg, the size of which is determined by + * blkcg_policy->pd_size. blkcg core allocates and frees such areas + * together with blkg and invokes pd_init/exit_fn() methods. + * + * Such private data must embed struct blkg_policy_data (pd) at the + * beginning and pd_size can't be smaller than pd. + */ +struct blkg_policy_data { + /* the blkg and policy id this per-policy data belongs to */ + struct blkcg_gq *blkg; + int plid; + + /* used during policy activation */ + struct list_head alloc_node; +}; + +/* + * Policies that need to keep per-blkcg data which is independent + * from any request_queue associated to it must specify its size + * with the cpd_size field of the blkcg_policy structure and + * embed a blkcg_policy_data in it. blkcg core allocates + * policy-specific per-blkcg structures lazily the first time + * they are actually needed, so it handles them together with + * blkgs. cpd_init() is invoked to let each policy handle + * per-blkcg data. + */ +struct blkcg_policy_data { + /* the policy id this per-policy data belongs to */ + int plid; + + /* used during policy activation */ + struct list_head alloc_node; +}; + +/* association between a blk cgroup and a request queue */ +struct blkcg_gq { + /* Pointer to the associated request_queue */ + struct request_queue *q; + struct list_head q_node; + struct hlist_node blkcg_node; + struct blkcg *blkcg; + + /* + * Each blkg gets congested separately and the congestion state is + * propagated to the matching bdi_writeback_congested. + */ + struct bdi_writeback_congested *wb_congested; + + /* all non-root blkcg_gq's are guaranteed to have access to parent */ + struct blkcg_gq *parent; + + /* request allocation list for this blkcg-q pair */ + struct request_list rl; + + /* reference count */ + atomic_t refcnt; + + /* is this blkg online? protected by both blkcg and q locks */ + bool online; + + struct blkg_policy_data *pd[BLKCG_MAX_POLS]; + + struct rcu_head rcu_head; +}; + +typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg); +typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); + +struct blkcg_policy { + int plid; + /* policy specific private data size */ + size_t pd_size; + /* policy specific per-blkcg data size */ + size_t cpd_size; + /* cgroup files for the policy */ + struct cftype *cftypes; + + /* operations */ + blkcg_pol_init_cpd_fn *cpd_init_fn; + blkcg_pol_init_pd_fn *pd_init_fn; + blkcg_pol_online_pd_fn *pd_online_fn; + blkcg_pol_offline_pd_fn *pd_offline_fn; + blkcg_pol_exit_pd_fn *pd_exit_fn; + blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; +}; + +extern struct blkcg blkcg_root; +extern struct cgroup_subsys_state * const blkcg_root_css; + +struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); +struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q); +int blkcg_init_queue(struct request_queue *q); +void blkcg_drain_queue(struct request_queue *q); +void blkcg_exit_queue(struct request_queue *q); + +/* Blkio controller policy registration */ +int blkcg_policy_register(struct blkcg_policy *pol); +void blkcg_policy_unregister(struct blkcg_policy *pol); +int blkcg_activate_policy(struct request_queue *q, + const struct blkcg_policy *pol); +void blkcg_deactivate_policy(struct request_queue *q, + const struct blkcg_policy *pol); + +void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, + u64 (*prfill)(struct seq_file *, + struct blkg_policy_data *, int), + const struct blkcg_policy *pol, int data, + bool show_total); +u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); +u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, + const struct blkg_rwstat *rwstat); +u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); +u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, + int off); + +u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); +struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, + int off); + +struct blkg_conf_ctx { + struct gendisk *disk; + struct blkcg_gq *blkg; + u64 v; +}; + +int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, + const char *input, struct blkg_conf_ctx *ctx); +void blkg_conf_finish(struct blkg_conf_ctx *ctx); + + +static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct blkcg, css) : NULL; +} + +static inline struct blkcg *task_blkcg(struct task_struct *tsk) +{ + return css_to_blkcg(task_css(tsk, blkio_cgrp_id)); +} + +static inline struct blkcg *bio_blkcg(struct bio *bio) +{ + if (bio && bio->bi_css) + return css_to_blkcg(bio->bi_css); + return task_blkcg(current); +} + +static inline struct cgroup_subsys_state * +task_get_blkcg_css(struct task_struct *task) +{ + return task_get_css(task, blkio_cgrp_id); +} + +/** + * blkcg_parent - get the parent of a blkcg + * @blkcg: blkcg of interest + * + * Return the parent blkcg of @blkcg. Can be called anytime. + */ +static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) +{ + return css_to_blkcg(blkcg->css.parent); +} + +/** + * blkg_to_pdata - get policy private data + * @blkg: blkg of interest + * @pol: policy of interest + * + * Return pointer to private data associated with the @blkg-@pol pair. + */ +static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, + struct blkcg_policy *pol) +{ + return blkg ? blkg->pd[pol->plid] : NULL; +} + +static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, + struct blkcg_policy *pol) +{ + return blkcg ? blkcg->pd[pol->plid] : NULL; +} + +/** + * pdata_to_blkg - get blkg associated with policy private data + * @pd: policy private data of interest + * + * @pd is policy private data. Determine the blkg it's associated with. + */ +static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) +{ + return pd ? pd->blkg : NULL; +} + +/** + * blkg_path - format cgroup path of blkg + * @blkg: blkg of interest + * @buf: target buffer + * @buflen: target buffer length + * + * Format the path of the cgroup of @blkg into @buf. + */ +static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) +{ + char *p; + + p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); + if (!p) { + strncpy(buf, "<unavailable>", buflen); + return -ENAMETOOLONG; + } + + memmove(buf, p, buf + buflen - p); + return 0; +} + +/** + * blkg_get - get a blkg reference + * @blkg: blkg to get + * + * The caller should be holding an existing reference. + */ +static inline void blkg_get(struct blkcg_gq *blkg) +{ + WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); + atomic_inc(&blkg->refcnt); +} + +void __blkg_release_rcu(struct rcu_head *rcu); + +/** + * blkg_put - put a blkg reference + * @blkg: blkg to put + */ +static inline void blkg_put(struct blkcg_gq *blkg) +{ + WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); + if (atomic_dec_and_test(&blkg->refcnt)) + call_rcu(&blkg->rcu_head, __blkg_release_rcu); +} + +struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, + bool update_hint); + +/** + * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants + * @d_blkg: loop cursor pointing to the current descendant + * @pos_css: used for iteration + * @p_blkg: target blkg to walk descendants of + * + * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU + * read locked. If called under either blkcg or queue lock, the iteration + * is guaranteed to include all and only online blkgs. The caller may + * update @pos_css by calling css_rightmost_descendant() to skip subtree. + * @p_blkg is included in the iteration and the first node to be visited. + */ +#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ + css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ + if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ + (p_blkg)->q, false))) + +/** + * blkg_for_each_descendant_post - post-order walk of a blkg's descendants + * @d_blkg: loop cursor pointing to the current descendant + * @pos_css: used for iteration + * @p_blkg: target blkg to walk descendants of + * + * Similar to blkg_for_each_descendant_pre() but performs post-order + * traversal instead. Synchronization rules are the same. @p_blkg is + * included in the iteration and the last node to be visited. + */ +#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ + css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ + if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ + (p_blkg)->q, false))) + +/** + * blk_get_rl - get request_list to use + * @q: request_queue of interest + * @bio: bio which will be attached to the allocated request (may be %NULL) + * + * The caller wants to allocate a request from @q to use for @bio. Find + * the request_list to use and obtain a reference on it. Should be called + * under queue_lock. This function is guaranteed to return non-%NULL + * request_list. + */ +static inline struct request_list *blk_get_rl(struct request_queue *q, + struct bio *bio) +{ + struct blkcg *blkcg; + struct blkcg_gq *blkg; + + rcu_read_lock(); + + blkcg = bio_blkcg(bio); + + /* bypass blkg lookup and use @q->root_rl directly for root */ + if (blkcg == &blkcg_root) + goto root_rl; + + /* + * Try to use blkg->rl. blkg lookup may fail under memory pressure + * or if either the blkcg or queue is going away. Fall back to + * root_rl in such cases. + */ + blkg = blkg_lookup_create(blkcg, q); + if (unlikely(IS_ERR(blkg))) + goto root_rl; + + blkg_get(blkg); + rcu_read_unlock(); + return &blkg->rl; +root_rl: + rcu_read_unlock(); + return &q->root_rl; +} + +/** + * blk_put_rl - put request_list + * @rl: request_list to put + * + * Put the reference acquired by blk_get_rl(). Should be called under + * queue_lock. + */ +static inline void blk_put_rl(struct request_list *rl) +{ + /* root_rl may not have blkg set */ + if (rl->blkg && rl->blkg->blkcg != &blkcg_root) + blkg_put(rl->blkg); +} + +/** + * blk_rq_set_rl - associate a request with a request_list + * @rq: request of interest + * @rl: target request_list + * + * Associate @rq with @rl so that accounting and freeing can know the + * request_list @rq came from. + */ +static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) +{ + rq->rl = rl; +} + +/** + * blk_rq_rl - return the request_list a request came from + * @rq: request of interest + * + * Return the request_list @rq is allocated from. + */ +static inline struct request_list *blk_rq_rl(struct request *rq) +{ + return rq->rl; +} + +struct request_list *__blk_queue_next_rl(struct request_list *rl, + struct request_queue *q); +/** + * blk_queue_for_each_rl - iterate through all request_lists of a request_queue + * + * Should be used under queue_lock. + */ +#define blk_queue_for_each_rl(rl, q) \ + for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) + +static inline void blkg_stat_init(struct blkg_stat *stat) +{ + u64_stats_init(&stat->syncp); +} + +/** + * blkg_stat_add - add a value to a blkg_stat + * @stat: target blkg_stat + * @val: value to add + * + * Add @val to @stat. The caller is responsible for synchronizing calls to + * this function. + */ +static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) +{ + u64_stats_update_begin(&stat->syncp); + stat->cnt += val; + u64_stats_update_end(&stat->syncp); +} + +/** + * blkg_stat_read - read the current value of a blkg_stat + * @stat: blkg_stat to read + * + * Read the current value of @stat. This function can be called without + * synchroniztion and takes care of u64 atomicity. + */ +static inline uint64_t blkg_stat_read(struct blkg_stat *stat) +{ + unsigned int start; + uint64_t v; + + do { + start = u64_stats_fetch_begin_irq(&stat->syncp); + v = stat->cnt; + } while (u64_stats_fetch_retry_irq(&stat->syncp, start)); + + return v; +} + +/** + * blkg_stat_reset - reset a blkg_stat + * @stat: blkg_stat to reset + */ +static inline void blkg_stat_reset(struct blkg_stat *stat) +{ + stat->cnt = 0; +} + +/** + * blkg_stat_merge - merge a blkg_stat into another + * @to: the destination blkg_stat + * @from: the source + * + * Add @from's count to @to. + */ +static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) +{ + blkg_stat_add(to, blkg_stat_read(from)); +} + +static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat) +{ + u64_stats_init(&rwstat->syncp); +} + +/** + * blkg_rwstat_add - add a value to a blkg_rwstat + * @rwstat: target blkg_rwstat + * @rw: mask of REQ_{WRITE|SYNC} + * @val: value to add + * + * Add @val to @rwstat. The counters are chosen according to @rw. The + * caller is responsible for synchronizing calls to this function. + */ +static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, + int rw, uint64_t val) +{ + u64_stats_update_begin(&rwstat->syncp); + + if (rw & REQ_WRITE) + rwstat->cnt[BLKG_RWSTAT_WRITE] += val; + else + rwstat->cnt[BLKG_RWSTAT_READ] += val; + if (rw & REQ_SYNC) + rwstat->cnt[BLKG_RWSTAT_SYNC] += val; + else + rwstat->cnt[BLKG_RWSTAT_ASYNC] += val; + + u64_stats_update_end(&rwstat->syncp); +} + +/** + * blkg_rwstat_read - read the current values of a blkg_rwstat + * @rwstat: blkg_rwstat to read + * + * Read the current snapshot of @rwstat and return it as the return value. + * This function can be called without synchronization and takes care of + * u64 atomicity. + */ +static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) +{ + unsigned int start; + struct blkg_rwstat tmp; + + do { + start = u64_stats_fetch_begin_irq(&rwstat->syncp); + tmp = *rwstat; + } while (u64_stats_fetch_retry_irq(&rwstat->syncp, start)); + + return tmp; +} + +/** + * blkg_rwstat_total - read the total count of a blkg_rwstat + * @rwstat: blkg_rwstat to read + * + * Return the total count of @rwstat regardless of the IO direction. This + * function can be called without synchronization and takes care of u64 + * atomicity. + */ +static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) +{ + struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); + + return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; +} + +/** + * blkg_rwstat_reset - reset a blkg_rwstat + * @rwstat: blkg_rwstat to reset + */ +static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) +{ + memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); +} + +/** + * blkg_rwstat_merge - merge a blkg_rwstat into another + * @to: the destination blkg_rwstat + * @from: the source + * + * Add @from's counts to @to. + */ +static inline void blkg_rwstat_merge(struct blkg_rwstat *to, + struct blkg_rwstat *from) +{ + struct blkg_rwstat v = blkg_rwstat_read(from); + int i; + + u64_stats_update_begin(&to->syncp); + for (i = 0; i < BLKG_RWSTAT_NR; i++) + to->cnt[i] += v.cnt[i]; + u64_stats_update_end(&to->syncp); +} + +#else /* CONFIG_BLK_CGROUP */ + +struct blkcg { +}; + +struct blkg_policy_data { +}; + +struct blkcg_policy_data { +}; + +struct blkcg_gq { +}; + +struct blkcg_policy { +}; + +#define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) + +static inline struct cgroup_subsys_state * +task_get_blkcg_css(struct task_struct *task) +{ + return NULL; +} + +#ifdef CONFIG_BLOCK + +static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } +static inline int blkcg_init_queue(struct request_queue *q) { return 0; } +static inline void blkcg_drain_queue(struct request_queue *q) { } +static inline void blkcg_exit_queue(struct request_queue *q) { } +static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } +static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } +static inline int blkcg_activate_policy(struct request_queue *q, + const struct blkcg_policy *pol) { return 0; } +static inline void blkcg_deactivate_policy(struct request_queue *q, + const struct blkcg_policy *pol) { } + +static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } + +static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, + struct blkcg_policy *pol) { return NULL; } +static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } +static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } +static inline void blkg_get(struct blkcg_gq *blkg) { } +static inline void blkg_put(struct blkcg_gq *blkg) { } + +static inline struct request_list *blk_get_rl(struct request_queue *q, + struct bio *bio) { return &q->root_rl; } +static inline void blk_put_rl(struct request_list *rl) { } +static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } +static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } + +#define blk_queue_for_each_rl(rl, q) \ + for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) + +#endif /* CONFIG_BLOCK */ +#endif /* CONFIG_BLK_CGROUP */ +#endif /* _BLK_CGROUP_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2056a99b92f8..37d1602c4f7a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -96,6 +96,7 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int, typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); +typedef void (busy_tag_iter_fn)(struct request *, void *, bool); struct blk_mq_ops { /* @@ -182,6 +183,7 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *); struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); +struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags); enum { BLK_MQ_UNIQUE_TAG_BITS = 16, @@ -224,6 +226,8 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, void *priv); +void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, + void *priv); void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); void blk_mq_freeze_queue_start(struct request_queue *q); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index b7299febc4b4..7303b3405520 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -65,7 +65,7 @@ struct bio { unsigned int bi_seg_front_size; unsigned int bi_seg_back_size; - atomic_t bi_remaining; + atomic_t __bi_remaining; bio_end_io_t *bi_end_io; @@ -92,7 +92,7 @@ struct bio { unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ - atomic_t bi_cnt; /* pin count */ + atomic_t __bi_cnt; /* pin count */ struct bio_vec *bi_io_vec; /* the actual vec list */ @@ -112,16 +112,15 @@ struct bio { * bio flags */ #define BIO_UPTODATE 0 /* ok after I/O completion */ -#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ -#define BIO_EOF 2 /* out-out-bounds error */ -#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ -#define BIO_CLONED 4 /* doesn't own data */ -#define BIO_BOUNCED 5 /* bio is a bounce bio */ -#define BIO_USER_MAPPED 6 /* contains user pages */ -#define BIO_EOPNOTSUPP 7 /* not supported */ -#define BIO_NULL_MAPPED 8 /* contains invalid user pages */ -#define BIO_QUIET 9 /* Make BIO Quiet */ -#define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ +#define BIO_SEG_VALID 1 /* bi_phys_segments valid */ +#define BIO_CLONED 2 /* doesn't own data */ +#define BIO_BOUNCED 3 /* bio is a bounce bio */ +#define BIO_USER_MAPPED 4 /* contains user pages */ +#define BIO_NULL_MAPPED 5 /* contains invalid user pages */ +#define BIO_QUIET 6 /* Make BIO Quiet */ +#define BIO_SNAP_STABLE 7 /* bio data must be snapshotted during write */ +#define BIO_CHAIN 8 /* chained bio, ->bi_remaining in effect */ +#define BIO_REFFED 9 /* bio has elevated ->bi_cnt */ /* * Flags starting here get preserved by bio_reset() - this includes diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5d93a6645e88..d4068c17d0df 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -12,7 +12,7 @@ #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/pagemap.h> -#include <linux/backing-dev.h> +#include <linux/backing-dev-defs.h> #include <linux/wait.h> #include <linux/mempool.h> #include <linux/bio.h> @@ -22,15 +22,13 @@ #include <linux/smp.h> #include <linux/rcupdate.h> #include <linux/percpu-refcount.h> - -#include <asm/scatterlist.h> +#include <linux/scatterlist.h> struct module; struct scsi_ioctl_command; struct request_queue; struct elevator_queue; -struct request_pm_state; struct blk_trace; struct request; struct sg_io_hdr; @@ -75,18 +73,7 @@ struct request_list { enum rq_cmd_type_bits { REQ_TYPE_FS = 1, /* fs request */ REQ_TYPE_BLOCK_PC, /* scsi command */ - REQ_TYPE_SENSE, /* sense request */ - REQ_TYPE_PM_SUSPEND, /* suspend request */ - REQ_TYPE_PM_RESUME, /* resume request */ - REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ - REQ_TYPE_SPECIAL, /* driver defined type */ - /* - * for ATA/ATAPI devices. this really doesn't belong here, ide should - * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver - * private REQ_LB opcodes to differentiate what type of request this is - */ - REQ_TYPE_ATA_TASKFILE, - REQ_TYPE_ATA_PC, + REQ_TYPE_DRV_PRIV, /* driver defined types from here */ }; #define BLK_MAX_CDB 16 @@ -108,7 +95,7 @@ struct request { struct blk_mq_ctx *mq_ctx; u64 cmd_flags; - enum rq_cmd_type_bits cmd_type; + unsigned cmd_type; unsigned long atomic_flags; int cpu; @@ -216,19 +203,6 @@ static inline unsigned short req_get_ioprio(struct request *req) return req->ioprio; } -/* - * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME - * requests. Some step values could eventually be made generic. - */ -struct request_pm_state -{ - /* PM state machine step value, currently driver specific */ - int pm_step; - /* requested PM state value (S1, S2, S3, S4, ...) */ - u32 pm_state; - void* data; /* for driver use */ -}; - #include <linux/elevator.h> struct blk_queue_ctx; @@ -469,7 +443,7 @@ struct request_queue { struct mutex sysfs_lock; int bypass_depth; - int mq_freeze_depth; + atomic_t mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; @@ -610,10 +584,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) (((rq)->cmd_flags & REQ_STARTED) && \ ((rq)->cmd_type == REQ_TYPE_FS)) -#define blk_pm_request(rq) \ - ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ - (rq)->cmd_type == REQ_TYPE_PM_RESUME) - #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) /* rq->queuelist of dequeued request must be list_empty() */ @@ -821,30 +791,12 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -/* - * A queue has just exitted congestion. Note this in the global counter of - * congested queues, and wake up anyone who was waiting for requests to be - * put back. - */ -static inline void blk_clear_queue_congested(struct request_queue *q, int sync) -{ - clear_bdi_congested(&q->backing_dev_info, sync); -} - -/* - * A queue has just entered congestion. Flag that in the queue's VM-visible - * state flags and increment the global gounter of congested queues. - */ -static inline void blk_set_queue_congested(struct request_queue *q, int sync) -{ - set_bdi_congested(&q->backing_dev_info, sync); -} - extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *q); +extern void __blk_run_queue_uncond(struct request_queue *q); extern void blk_run_queue(struct request_queue *); extern void blk_run_queue_async(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, @@ -933,7 +885,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq) if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC)) return q->limits.max_hw_sectors; - if (!q->limits.chunk_sectors) + if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD)) return blk_queue_get_max_sectors(q, rq->cmd_flags); return min(blk_max_size_offset(q, blk_rq_pos(rq)), @@ -1054,6 +1006,7 @@ bool __must_check blk_get_queue(struct request_queue *); struct request_queue *blk_alloc_queue(gfp_t); struct request_queue *blk_alloc_queue_node(gfp_t, int); extern void blk_put_queue(struct request_queue *); +extern void blk_set_queue_dying(struct request_queue *); /* * block layer runtime pm functions diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 3daf5ed392c9..2189935075b4 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -19,7 +19,7 @@ enum cache_type { /** * struct cacheinfo - represent a cache leaf node * @type: type of the cache - data, inst or unified - * @level: represents the hierarcy in the multi-level cache + * @level: represents the hierarchy in the multi-level cache * @coherency_line_size: size of each cache line usually representing * the minimum amount of data that gets transferred from memory * @number_of_sets: total number of sets, a set is a collection of cache diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h new file mode 100644 index 000000000000..93755a629299 --- /dev/null +++ b/include/linux/cgroup-defs.h @@ -0,0 +1,501 @@ +/* + * linux/cgroup-defs.h - basic definitions for cgroup + * + * This file provides basic type and interface. Include this file directly + * only if necessary to avoid cyclic dependencies. + */ +#ifndef _LINUX_CGROUP_DEFS_H +#define _LINUX_CGROUP_DEFS_H + +#include <linux/limits.h> +#include <linux/list.h> +#include <linux/idr.h> +#include <linux/wait.h> +#include <linux/mutex.h> +#include <linux/rcupdate.h> +#include <linux/percpu-refcount.h> +#include <linux/percpu-rwsem.h> +#include <linux/workqueue.h> + +#ifdef CONFIG_CGROUPS + +struct cgroup; +struct cgroup_root; +struct cgroup_subsys; +struct cgroup_taskset; +struct kernfs_node; +struct kernfs_ops; +struct kernfs_open_file; +struct seq_file; + +#define MAX_CGROUP_TYPE_NAMELEN 32 +#define MAX_CGROUP_ROOT_NAMELEN 64 +#define MAX_CFTYPE_NAME 64 + +/* define the enumeration of all cgroup subsystems */ +#define SUBSYS(_x) _x ## _cgrp_id, +enum cgroup_subsys_id { +#include <linux/cgroup_subsys.h> + CGROUP_SUBSYS_COUNT, +}; +#undef SUBSYS + +/* bits in struct cgroup_subsys_state flags field */ +enum { + CSS_NO_REF = (1 << 0), /* no reference counting for this css */ + CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ + CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ +}; + +/* bits in struct cgroup flags field */ +enum { + /* Control Group requires release notifications to userspace */ + CGRP_NOTIFY_ON_RELEASE, + /* + * Clone the parent's configuration when creating a new child + * cpuset cgroup. For historical reasons, this option can be + * specified at mount time and thus is implemented here. + */ + CGRP_CPUSET_CLONE_CHILDREN, +}; + +/* cgroup_root->flags */ +enum { + CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ + CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ + CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ +}; + +/* cftype->flags */ +enum { + CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ + CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ + CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ + + /* internal flags, do not use outside cgroup core proper */ + __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ + __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ +}; + +/* + * Per-subsystem/per-cgroup state maintained by the system. This is the + * fundamental structural building block that controllers deal with. + * + * Fields marked with "PI:" are public and immutable and may be accessed + * directly without synchronization. + */ +struct cgroup_subsys_state { + /* PI: the cgroup that this css is attached to */ + struct cgroup *cgroup; + + /* PI: the cgroup subsystem that this css is attached to */ + struct cgroup_subsys *ss; + + /* reference count - access via css_[try]get() and css_put() */ + struct percpu_ref refcnt; + + /* PI: the parent css */ + struct cgroup_subsys_state *parent; + + /* siblings list anchored at the parent's ->children */ + struct list_head sibling; + struct list_head children; + + /* + * PI: Subsys-unique ID. 0 is unused and root is always 1. The + * matching css can be looked up using css_from_id(). + */ + int id; + + unsigned int flags; + + /* + * Monotonically increasing unique serial number which defines a + * uniform order among all csses. It's guaranteed that all + * ->children lists are in the ascending order of ->serial_nr and + * used to allow interrupting and resuming iterations. + */ + u64 serial_nr; + + /* percpu_ref killing and RCU release */ + struct rcu_head rcu_head; + struct work_struct destroy_work; +}; + +/* + * A css_set is a structure holding pointers to a set of + * cgroup_subsys_state objects. This saves space in the task struct + * object and speeds up fork()/exit(), since a single inc/dec and a + * list_add()/del() can bump the reference count on the entire cgroup + * set for a task. + */ +struct css_set { + /* Reference count */ + atomic_t refcount; + + /* + * List running through all cgroup groups in the same hash + * slot. Protected by css_set_lock + */ + struct hlist_node hlist; + + /* + * Lists running through all tasks using this cgroup group. + * mg_tasks lists tasks which belong to this cset but are in the + * process of being migrated out or in. Protected by + * css_set_rwsem, but, during migration, once tasks are moved to + * mg_tasks, it can be read safely while holding cgroup_mutex. + */ + struct list_head tasks; + struct list_head mg_tasks; + + /* + * List of cgrp_cset_links pointing at cgroups referenced from this + * css_set. Protected by css_set_lock. + */ + struct list_head cgrp_links; + + /* the default cgroup associated with this css_set */ + struct cgroup *dfl_cgrp; + + /* + * Set of subsystem states, one for each subsystem. This array is + * immutable after creation apart from the init_css_set during + * subsystem registration (at boot time). + */ + struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + + /* + * List of csets participating in the on-going migration either as + * source or destination. Protected by cgroup_mutex. + */ + struct list_head mg_preload_node; + struct list_head mg_node; + + /* + * If this cset is acting as the source of migration the following + * two fields are set. mg_src_cgrp is the source cgroup of the + * on-going migration and mg_dst_cset is the destination cset the + * target tasks on this cset should be migrated to. Protected by + * cgroup_mutex. + */ + struct cgroup *mg_src_cgrp; + struct css_set *mg_dst_cset; + + /* + * On the default hierarhcy, ->subsys[ssid] may point to a css + * attached to an ancestor instead of the cgroup this css_set is + * associated with. The following node is anchored at + * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to + * iterate through all css's attached to a given cgroup. + */ + struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; + + /* For RCU-protected deletion */ + struct rcu_head rcu_head; +}; + +struct cgroup { + /* self css with NULL ->ss, points back to this cgroup */ + struct cgroup_subsys_state self; + + unsigned long flags; /* "unsigned long" so bitops work */ + + /* + * idr allocated in-hierarchy ID. + * + * ID 0 is not used, the ID of the root cgroup is always 1, and a + * new cgroup will be assigned with a smallest available ID. + * + * Allocating/Removing ID must be protected by cgroup_mutex. + */ + int id; + + /* + * If this cgroup contains any tasks, it contributes one to + * populated_cnt. All children with non-zero popuplated_cnt of + * their own contribute one. The count is zero iff there's no task + * in this cgroup or its subtree. + */ + int populated_cnt; + + struct kernfs_node *kn; /* cgroup kernfs entry */ + struct kernfs_node *procs_kn; /* kn for "cgroup.procs" */ + struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ + + /* + * The bitmask of subsystems enabled on the child cgroups. + * ->subtree_control is the one configured through + * "cgroup.subtree_control" while ->child_subsys_mask is the + * effective one which may have more subsystems enabled. + * Controller knobs are made available iff it's enabled in + * ->subtree_control. + */ + unsigned int subtree_control; + unsigned int child_subsys_mask; + + /* Private pointers for each registered subsystem */ + struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; + + struct cgroup_root *root; + + /* + * List of cgrp_cset_links pointing at css_sets with tasks in this + * cgroup. Protected by css_set_lock. + */ + struct list_head cset_links; + + /* + * On the default hierarchy, a css_set for a cgroup with some + * susbsys disabled will point to css's which are associated with + * the closest ancestor which has the subsys enabled. The + * following lists all css_sets which point to this cgroup's css + * for the given subsystem. + */ + struct list_head e_csets[CGROUP_SUBSYS_COUNT]; + + /* + * list of pidlists, up to two for each namespace (one for procs, one + * for tasks); created on demand. + */ + struct list_head pidlists; + struct mutex pidlist_mutex; + + /* used to wait for offlining of csses */ + wait_queue_head_t offline_waitq; + + /* used to schedule release agent */ + struct work_struct release_agent_work; +}; + +/* + * A cgroup_root represents the root of a cgroup hierarchy, and may be + * associated with a kernfs_root to form an active hierarchy. This is + * internal to cgroup core. Don't access directly from controllers. + */ +struct cgroup_root { + struct kernfs_root *kf_root; + + /* The bitmask of subsystems attached to this hierarchy */ + unsigned int subsys_mask; + + /* Unique id for this hierarchy. */ + int hierarchy_id; + + /* The root cgroup. Root is destroyed on its release. */ + struct cgroup cgrp; + + /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ + atomic_t nr_cgrps; + + /* A list running through the active hierarchies */ + struct list_head root_list; + + /* Hierarchy-specific flags */ + unsigned int flags; + + /* IDs for cgroups in this hierarchy */ + struct idr cgroup_idr; + + /* The path to use for release notifications. */ + char release_agent_path[PATH_MAX]; + + /* The name for this hierarchy - may be empty */ + char name[MAX_CGROUP_ROOT_NAMELEN]; +}; + +/* + * struct cftype: handler definitions for cgroup control files + * + * When reading/writing to a file: + * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata + * - the 'cftype' of the file is file->f_path.dentry->d_fsdata + */ +struct cftype { + /* + * By convention, the name should begin with the name of the + * subsystem, followed by a period. Zero length string indicates + * end of cftype array. + */ + char name[MAX_CFTYPE_NAME]; + int private; + /* + * If not 0, file mode is set to this value, otherwise it will + * be figured out automatically + */ + umode_t mode; + + /* + * The maximum length of string, excluding trailing nul, that can + * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. + */ + size_t max_write_len; + + /* CFTYPE_* flags */ + unsigned int flags; + + /* + * Fields used for internal bookkeeping. Initialized automatically + * during registration. + */ + struct cgroup_subsys *ss; /* NULL for cgroup core files */ + struct list_head node; /* anchored at ss->cfts */ + struct kernfs_ops *kf_ops; + + /* + * read_u64() is a shortcut for the common case of returning a + * single integer. Use it in place of read() + */ + u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); + /* + * read_s64() is a signed version of read_u64() + */ + s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); + + /* generic seq_file read interface */ + int (*seq_show)(struct seq_file *sf, void *v); + + /* optional ops, implement all or none */ + void *(*seq_start)(struct seq_file *sf, loff_t *ppos); + void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); + void (*seq_stop)(struct seq_file *sf, void *v); + + /* + * write_u64() is a shortcut for the common case of accepting + * a single integer (as parsed by simple_strtoull) from + * userspace. Use in place of write(); return 0 or error. + */ + int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, + u64 val); + /* + * write_s64() is a signed version of write_u64() + */ + int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, + s64 val); + + /* + * write() is the generic write callback which maps directly to + * kernfs write operation and overrides all other operations. + * Maximum write size is determined by ->max_write_len. Use + * of_css/cft() to access the associated css and cft. + */ + ssize_t (*write)(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lockdep_key; +#endif +}; + +/* + * Control Group subsystem type. + * See Documentation/cgroups/cgroups.txt for details + */ +struct cgroup_subsys { + struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); + int (*css_online)(struct cgroup_subsys_state *css); + void (*css_offline)(struct cgroup_subsys_state *css); + void (*css_released)(struct cgroup_subsys_state *css); + void (*css_free)(struct cgroup_subsys_state *css); + void (*css_reset)(struct cgroup_subsys_state *css); + void (*css_e_css_changed)(struct cgroup_subsys_state *css); + + int (*can_attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*cancel_attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*fork)(struct task_struct *task); + void (*exit)(struct cgroup_subsys_state *css, + struct cgroup_subsys_state *old_css, + struct task_struct *task); + void (*bind)(struct cgroup_subsys_state *root_css); + + int disabled; + int early_init; + + /* + * If %false, this subsystem is properly hierarchical - + * configuration, resource accounting and restriction on a parent + * cgroup cover those of its children. If %true, hierarchy support + * is broken in some ways - some subsystems ignore hierarchy + * completely while others are only implemented half-way. + * + * It's now disallowed to create nested cgroups if the subsystem is + * broken and cgroup core will emit a warning message on such + * cases. Eventually, all subsystems will be made properly + * hierarchical and this will go away. + */ + bool broken_hierarchy; + bool warned_broken_hierarchy; + + /* the following two fields are initialized automtically during boot */ + int id; + const char *name; + + /* link to parent, protected by cgroup_lock() */ + struct cgroup_root *root; + + /* idr for css->id */ + struct idr css_idr; + + /* + * List of cftypes. Each entry is the first entry of an array + * terminated by zero length name. + */ + struct list_head cfts; + + /* + * Base cftypes which are automatically registered. The two can + * point to the same array. + */ + struct cftype *dfl_cftypes; /* for the default hierarchy */ + struct cftype *legacy_cftypes; /* for the legacy hierarchies */ + + /* + * A subsystem may depend on other subsystems. When such subsystem + * is enabled on a cgroup, the depended-upon subsystems are enabled + * together if available. Subsystems enabled due to dependency are + * not visible to userland until explicitly enabled. The following + * specifies the mask of subsystems that this one depends on. + */ + unsigned int depends_on; +}; + +extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; + +/** + * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups + * @tsk: target task + * + * Called from threadgroup_change_begin() and allows cgroup operations to + * synchronize against threadgroup changes using a percpu_rw_semaphore. + */ +static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) +{ + percpu_down_read(&cgroup_threadgroup_rwsem); +} + +/** + * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups + * @tsk: target task + * + * Called from threadgroup_change_end(). Counterpart of + * cgroup_threadcgroup_change_begin(). + */ +static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) +{ + percpu_up_read(&cgroup_threadgroup_rwsem); +} + +#else /* CONFIG_CGROUPS */ + +#define CGROUP_SUBSYS_COUNT 0 + +static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} +static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} + +#endif /* CONFIG_CGROUPS */ + +#endif /* _LINUX_CGROUP_DEFS_H */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b9cb94c3102a..a593e299162e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -11,94 +11,200 @@ #include <linux/sched.h> #include <linux/cpumask.h> #include <linux/nodemask.h> -#include <linux/rcupdate.h> #include <linux/rculist.h> #include <linux/cgroupstats.h> #include <linux/rwsem.h> -#include <linux/idr.h> -#include <linux/workqueue.h> #include <linux/fs.h> -#include <linux/percpu-refcount.h> #include <linux/seq_file.h> #include <linux/kernfs.h> -#include <linux/wait.h> + +#include <linux/cgroup-defs.h> #ifdef CONFIG_CGROUPS -struct cgroup_root; -struct cgroup_subsys; -struct cgroup; +/* a css_task_iter should be treated as an opaque object */ +struct css_task_iter { + struct cgroup_subsys *ss; -extern int cgroup_init_early(void); -extern int cgroup_init(void); -extern void cgroup_fork(struct task_struct *p); -extern void cgroup_post_fork(struct task_struct *p); -extern void cgroup_exit(struct task_struct *p); -extern int cgroupstats_build(struct cgroupstats *stats, - struct dentry *dentry); + struct list_head *cset_pos; + struct list_head *cset_head; -extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, - struct pid *pid, struct task_struct *tsk); + struct list_head *task_pos; + struct list_head *tasks_head; + struct list_head *mg_tasks_head; +}; -/* define the enumeration of all cgroup subsystems */ -#define SUBSYS(_x) _x ## _cgrp_id, -enum cgroup_subsys_id { +extern struct cgroup_root cgrp_dfl_root; +extern struct css_set init_css_set; + +#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include <linux/cgroup_subsys.h> - CGROUP_SUBSYS_COUNT, -}; #undef SUBSYS +bool css_has_online_children(struct cgroup_subsys_state *css); +struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); +struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, + struct cgroup_subsys *ss); +struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, + struct cgroup_subsys *ss); + +bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); +int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); + +int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_rm_cftypes(struct cftype *cfts); + +char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); +int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); +int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *tsk); + +void cgroup_fork(struct task_struct *p); +void cgroup_post_fork(struct task_struct *p); +void cgroup_exit(struct task_struct *p); + +int cgroup_init_early(void); +int cgroup_init(void); + /* - * Per-subsystem/per-cgroup state maintained by the system. This is the - * fundamental structural building block that controllers deal with. + * Iteration helpers and macros. + */ + +struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, + struct cgroup_subsys_state *parent); +struct cgroup_subsys_state *css_next_descendant_pre(struct cgroup_subsys_state *pos, + struct cgroup_subsys_state *css); +struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state *pos); +struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos, + struct cgroup_subsys_state *css); + +struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); +struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); + +void css_task_iter_start(struct cgroup_subsys_state *css, + struct css_task_iter *it); +struct task_struct *css_task_iter_next(struct css_task_iter *it); +void css_task_iter_end(struct css_task_iter *it); + +/** + * css_for_each_child - iterate through children of a css + * @pos: the css * to use as the loop cursor + * @parent: css whose children to walk * - * Fields marked with "PI:" are public and immutable and may be accessed - * directly without synchronization. + * Walk @parent's children. Must be called under rcu_read_lock(). + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. + * + * It is allowed to temporarily drop RCU read lock during iteration. The + * caller is responsible for ensuring that @pos remains accessible until + * the start of the next iteration by, for example, bumping the css refcnt. */ -struct cgroup_subsys_state { - /* PI: the cgroup that this css is attached to */ - struct cgroup *cgroup; - - /* PI: the cgroup subsystem that this css is attached to */ - struct cgroup_subsys *ss; - - /* reference count - access via css_[try]get() and css_put() */ - struct percpu_ref refcnt; - - /* PI: the parent css */ - struct cgroup_subsys_state *parent; - - /* siblings list anchored at the parent's ->children */ - struct list_head sibling; - struct list_head children; - - /* - * PI: Subsys-unique ID. 0 is unused and root is always 1. The - * matching css can be looked up using css_from_id(). - */ - int id; - - unsigned int flags; - - /* - * Monotonically increasing unique serial number which defines a - * uniform order among all csses. It's guaranteed that all - * ->children lists are in the ascending order of ->serial_nr and - * used to allow interrupting and resuming iterations. - */ - u64 serial_nr; - - /* percpu_ref killing and RCU release */ - struct rcu_head rcu_head; - struct work_struct destroy_work; -}; +#define css_for_each_child(pos, parent) \ + for ((pos) = css_next_child(NULL, (parent)); (pos); \ + (pos) = css_next_child((pos), (parent))) -/* bits in struct cgroup_subsys_state flags field */ -enum { - CSS_NO_REF = (1 << 0), /* no reference counting for this css */ - CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ - CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ -}; +/** + * css_for_each_descendant_pre - pre-order walk of a css's descendants + * @pos: the css * to use as the loop cursor + * @root: css whose descendants to walk + * + * Walk @root's descendants. @root is included in the iteration and the + * first node to be visited. Must be called under rcu_read_lock(). + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. + * + * For example, the following guarantees that a descendant can't escape + * state updates of its ancestors. + * + * my_online(@css) + * { + * Lock @css's parent and @css; + * Inherit state from the parent; + * Unlock both. + * } + * + * my_update_state(@css) + * { + * css_for_each_descendant_pre(@pos, @css) { + * Lock @pos; + * if (@pos == @css) + * Update @css's state; + * else + * Verify @pos is alive and inherit state from its parent; + * Unlock @pos; + * } + * } + * + * As long as the inheriting step, including checking the parent state, is + * enclosed inside @pos locking, double-locking the parent isn't necessary + * while inheriting. The state update to the parent is guaranteed to be + * visible by walking order and, as long as inheriting operations to the + * same @pos are atomic to each other, multiple updates racing each other + * still result in the correct state. It's guaranateed that at least one + * inheritance happens for any css after the latest update to its parent. + * + * If checking parent's state requires locking the parent, each inheriting + * iteration should lock and unlock both @pos->parent and @pos. + * + * Alternatively, a subsystem may choose to use a single global lock to + * synchronize ->css_online() and ->css_offline() against tree-walking + * operations. + * + * It is allowed to temporarily drop RCU read lock during iteration. The + * caller is responsible for ensuring that @pos remains accessible until + * the start of the next iteration by, for example, bumping the css refcnt. + */ +#define css_for_each_descendant_pre(pos, css) \ + for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \ + (pos) = css_next_descendant_pre((pos), (css))) + +/** + * css_for_each_descendant_post - post-order walk of a css's descendants + * @pos: the css * to use as the loop cursor + * @css: css whose descendants to walk + * + * Similar to css_for_each_descendant_pre() but performs post-order + * traversal instead. @root is included in the iteration and the last + * node to be visited. + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. + * + * Note that the walk visibility guarantee example described in pre-order + * walk doesn't apply the same to post-order walks. + */ +#define css_for_each_descendant_post(pos, css) \ + for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ + (pos) = css_next_descendant_post((pos), (css))) + +/** + * cgroup_taskset_for_each - iterate cgroup_taskset + * @task: the loop cursor + * @tset: taskset to iterate + */ +#define cgroup_taskset_for_each(task, tset) \ + for ((task) = cgroup_taskset_first((tset)); (task); \ + (task) = cgroup_taskset_next((tset))) + +/* + * Inline functions. + */ /** * css_get - obtain a reference on the specified css @@ -185,309 +291,112 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) percpu_ref_put_many(&css->refcnt, n); } -/* bits in struct cgroup flags field */ -enum { - /* Control Group requires release notifications to userspace */ - CGRP_NOTIFY_ON_RELEASE, - /* - * Clone the parent's configuration when creating a new child - * cpuset cgroup. For historical reasons, this option can be - * specified at mount time and thus is implemented here. - */ - CGRP_CPUSET_CLONE_CHILDREN, -}; - -struct cgroup { - /* self css with NULL ->ss, points back to this cgroup */ - struct cgroup_subsys_state self; - - unsigned long flags; /* "unsigned long" so bitops work */ - - /* - * idr allocated in-hierarchy ID. - * - * ID 0 is not used, the ID of the root cgroup is always 1, and a - * new cgroup will be assigned with a smallest available ID. - * - * Allocating/Removing ID must be protected by cgroup_mutex. - */ - int id; - - /* - * If this cgroup contains any tasks, it contributes one to - * populated_cnt. All children with non-zero popuplated_cnt of - * their own contribute one. The count is zero iff there's no task - * in this cgroup or its subtree. - */ - int populated_cnt; - - struct kernfs_node *kn; /* cgroup kernfs entry */ - struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ - - /* - * The bitmask of subsystems enabled on the child cgroups. - * ->subtree_control is the one configured through - * "cgroup.subtree_control" while ->child_subsys_mask is the - * effective one which may have more subsystems enabled. - * Controller knobs are made available iff it's enabled in - * ->subtree_control. - */ - unsigned int subtree_control; - unsigned int child_subsys_mask; - - /* Private pointers for each registered subsystem */ - struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; - - struct cgroup_root *root; - - /* - * List of cgrp_cset_links pointing at css_sets with tasks in this - * cgroup. Protected by css_set_lock. - */ - struct list_head cset_links; - - /* - * On the default hierarchy, a css_set for a cgroup with some - * susbsys disabled will point to css's which are associated with - * the closest ancestor which has the subsys enabled. The - * following lists all css_sets which point to this cgroup's css - * for the given subsystem. - */ - struct list_head e_csets[CGROUP_SUBSYS_COUNT]; - - /* - * list of pidlists, up to two for each namespace (one for procs, one - * for tasks); created on demand. - */ - struct list_head pidlists; - struct mutex pidlist_mutex; - - /* used to wait for offlining of csses */ - wait_queue_head_t offline_waitq; - - /* used to schedule release agent */ - struct work_struct release_agent_work; -}; - -#define MAX_CGROUP_ROOT_NAMELEN 64 - -/* cgroup_root->flags */ -enum { - CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ - CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ - CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ -}; - -/* - * A cgroup_root represents the root of a cgroup hierarchy, and may be - * associated with a kernfs_root to form an active hierarchy. This is - * internal to cgroup core. Don't access directly from controllers. +/** + * task_css_set_check - obtain a task's css_set with extra access conditions + * @task: the task to obtain css_set for + * @__c: extra condition expression to be passed to rcu_dereference_check() + * + * A task's css_set is RCU protected, initialized and exited while holding + * task_lock(), and can only be modified while holding both cgroup_mutex + * and task_lock() while the task is alive. This macro verifies that the + * caller is inside proper critical section and returns @task's css_set. + * + * The caller can also specify additional allowed conditions via @__c, such + * as locks used during the cgroup_subsys::attach() methods. */ -struct cgroup_root { - struct kernfs_root *kf_root; - - /* The bitmask of subsystems attached to this hierarchy */ - unsigned int subsys_mask; - - /* Unique id for this hierarchy. */ - int hierarchy_id; - - /* The root cgroup. Root is destroyed on its release. */ - struct cgroup cgrp; - - /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ - atomic_t nr_cgrps; - - /* A list running through the active hierarchies */ - struct list_head root_list; - - /* Hierarchy-specific flags */ - unsigned int flags; - - /* IDs for cgroups in this hierarchy */ - struct idr cgroup_idr; - - /* The path to use for release notifications. */ - char release_agent_path[PATH_MAX]; - - /* The name for this hierarchy - may be empty */ - char name[MAX_CGROUP_ROOT_NAMELEN]; -}; +#ifdef CONFIG_PROVE_RCU +extern struct mutex cgroup_mutex; +extern struct rw_semaphore css_set_rwsem; +#define task_css_set_check(task, __c) \ + rcu_dereference_check((task)->cgroups, \ + lockdep_is_held(&cgroup_mutex) || \ + lockdep_is_held(&css_set_rwsem) || \ + ((task)->flags & PF_EXITING) || (__c)) +#else +#define task_css_set_check(task, __c) \ + rcu_dereference((task)->cgroups) +#endif -/* - * A css_set is a structure holding pointers to a set of - * cgroup_subsys_state objects. This saves space in the task struct - * object and speeds up fork()/exit(), since a single inc/dec and a - * list_add()/del() can bump the reference count on the entire cgroup - * set for a task. +/** + * task_css_check - obtain css for (task, subsys) w/ extra access conds + * @task: the target task + * @subsys_id: the target subsystem ID + * @__c: extra condition expression to be passed to rcu_dereference_check() + * + * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The + * synchronization rules are the same as task_css_set_check(). */ +#define task_css_check(task, subsys_id, __c) \ + task_css_set_check((task), (__c))->subsys[(subsys_id)] -struct css_set { - - /* Reference count */ - atomic_t refcount; - - /* - * List running through all cgroup groups in the same hash - * slot. Protected by css_set_lock - */ - struct hlist_node hlist; - - /* - * Lists running through all tasks using this cgroup group. - * mg_tasks lists tasks which belong to this cset but are in the - * process of being migrated out or in. Protected by - * css_set_rwsem, but, during migration, once tasks are moved to - * mg_tasks, it can be read safely while holding cgroup_mutex. - */ - struct list_head tasks; - struct list_head mg_tasks; - - /* - * List of cgrp_cset_links pointing at cgroups referenced from this - * css_set. Protected by css_set_lock. - */ - struct list_head cgrp_links; - - /* the default cgroup associated with this css_set */ - struct cgroup *dfl_cgrp; - - /* - * Set of subsystem states, one for each subsystem. This array is - * immutable after creation apart from the init_css_set during - * subsystem registration (at boot time). - */ - struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; - - /* - * List of csets participating in the on-going migration either as - * source or destination. Protected by cgroup_mutex. - */ - struct list_head mg_preload_node; - struct list_head mg_node; - - /* - * If this cset is acting as the source of migration the following - * two fields are set. mg_src_cgrp is the source cgroup of the - * on-going migration and mg_dst_cset is the destination cset the - * target tasks on this cset should be migrated to. Protected by - * cgroup_mutex. - */ - struct cgroup *mg_src_cgrp; - struct css_set *mg_dst_cset; - - /* - * On the default hierarhcy, ->subsys[ssid] may point to a css - * attached to an ancestor instead of the cgroup this css_set is - * associated with. The following node is anchored at - * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to - * iterate through all css's attached to a given cgroup. - */ - struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; - - /* For RCU-protected deletion */ - struct rcu_head rcu_head; -}; - -/* - * struct cftype: handler definitions for cgroup control files +/** + * task_css_set - obtain a task's css_set + * @task: the task to obtain css_set for * - * When reading/writing to a file: - * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata - * - the 'cftype' of the file is file->f_path.dentry->d_fsdata + * See task_css_set_check(). */ +static inline struct css_set *task_css_set(struct task_struct *task) +{ + return task_css_set_check(task, false); +} -/* cftype->flags */ -enum { - CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ - CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ - CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ +/** + * task_css - obtain css for (task, subsys) + * @task: the target task + * @subsys_id: the target subsystem ID + * + * See task_css_check(). + */ +static inline struct cgroup_subsys_state *task_css(struct task_struct *task, + int subsys_id) +{ + return task_css_check(task, subsys_id, false); +} - /* internal flags, do not use outside cgroup core proper */ - __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ - __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ -}; +/** + * task_get_css - find and get the css for (task, subsys) + * @task: the target task + * @subsys_id: the target subsystem ID + * + * Find the css for the (@task, @subsys_id) combination, increment a + * reference on and return it. This function is guaranteed to return a + * valid css. + */ +static inline struct cgroup_subsys_state * +task_get_css(struct task_struct *task, int subsys_id) +{ + struct cgroup_subsys_state *css; + + rcu_read_lock(); + while (true) { + css = task_css(task, subsys_id); + if (likely(css_tryget_online(css))) + break; + cpu_relax(); + } + rcu_read_unlock(); + return css; +} -#define MAX_CFTYPE_NAME 64 - -struct cftype { - /* - * By convention, the name should begin with the name of the - * subsystem, followed by a period. Zero length string indicates - * end of cftype array. - */ - char name[MAX_CFTYPE_NAME]; - int private; - /* - * If not 0, file mode is set to this value, otherwise it will - * be figured out automatically - */ - umode_t mode; - - /* - * The maximum length of string, excluding trailing nul, that can - * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. - */ - size_t max_write_len; - - /* CFTYPE_* flags */ - unsigned int flags; - - /* - * Fields used for internal bookkeeping. Initialized automatically - * during registration. - */ - struct cgroup_subsys *ss; /* NULL for cgroup core files */ - struct list_head node; /* anchored at ss->cfts */ - struct kernfs_ops *kf_ops; - - /* - * read_u64() is a shortcut for the common case of returning a - * single integer. Use it in place of read() - */ - u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); - /* - * read_s64() is a signed version of read_u64() - */ - s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); - - /* generic seq_file read interface */ - int (*seq_show)(struct seq_file *sf, void *v); - - /* optional ops, implement all or none */ - void *(*seq_start)(struct seq_file *sf, loff_t *ppos); - void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); - void (*seq_stop)(struct seq_file *sf, void *v); - - /* - * write_u64() is a shortcut for the common case of accepting - * a single integer (as parsed by simple_strtoull) from - * userspace. Use in place of write(); return 0 or error. - */ - int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, - u64 val); - /* - * write_s64() is a signed version of write_u64() - */ - int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, - s64 val); - - /* - * write() is the generic write callback which maps directly to - * kernfs write operation and overrides all other operations. - * Maximum write size is determined by ->max_write_len. Use - * of_css/cft() to access the associated css and cft. - */ - ssize_t (*write)(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off); - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lock_class_key lockdep_key; -#endif -}; +/** + * task_css_is_root - test whether a task belongs to the root css + * @task: the target task + * @subsys_id: the target subsystem ID + * + * Test whether @task belongs to the root css on the specified subsystem. + * May be invoked in any context. + */ +static inline bool task_css_is_root(struct task_struct *task, int subsys_id) +{ + return task_css_check(task, subsys_id, true) == + init_css_set.subsys[subsys_id]; +} -extern struct cgroup_root cgrp_dfl_root; -extern struct css_set init_css_set; +static inline struct cgroup *task_cgroup(struct task_struct *task, + int subsys_id) +{ + return task_css(task, subsys_id)->cgroup; +} /** * cgroup_on_dfl - test whether a cgroup is on the default hierarchy @@ -604,367 +513,22 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } -char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); - -int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); -int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); -int cgroup_rm_cftypes(struct cftype *cfts); - -bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); - -/* - * Control Group taskset, used to pass around set of tasks to cgroup_subsys - * methods. - */ -struct cgroup_taskset; -struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); -struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); - -/** - * cgroup_taskset_for_each - iterate cgroup_taskset - * @task: the loop cursor - * @tset: taskset to iterate - */ -#define cgroup_taskset_for_each(task, tset) \ - for ((task) = cgroup_taskset_first((tset)); (task); \ - (task) = cgroup_taskset_next((tset))) - -/* - * Control Group subsystem type. - * See Documentation/cgroups/cgroups.txt for details - */ - -struct cgroup_subsys { - struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); - int (*css_online)(struct cgroup_subsys_state *css); - void (*css_offline)(struct cgroup_subsys_state *css); - void (*css_released)(struct cgroup_subsys_state *css); - void (*css_free)(struct cgroup_subsys_state *css); - void (*css_reset)(struct cgroup_subsys_state *css); - void (*css_e_css_changed)(struct cgroup_subsys_state *css); - - int (*can_attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*cancel_attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*fork)(struct task_struct *task); - void (*exit)(struct cgroup_subsys_state *css, - struct cgroup_subsys_state *old_css, - struct task_struct *task); - void (*bind)(struct cgroup_subsys_state *root_css); - - int disabled; - int early_init; - - /* - * If %false, this subsystem is properly hierarchical - - * configuration, resource accounting and restriction on a parent - * cgroup cover those of its children. If %true, hierarchy support - * is broken in some ways - some subsystems ignore hierarchy - * completely while others are only implemented half-way. - * - * It's now disallowed to create nested cgroups if the subsystem is - * broken and cgroup core will emit a warning message on such - * cases. Eventually, all subsystems will be made properly - * hierarchical and this will go away. - */ - bool broken_hierarchy; - bool warned_broken_hierarchy; - - /* the following two fields are initialized automtically during boot */ - int id; -#define MAX_CGROUP_TYPE_NAMELEN 32 - const char *name; - - /* link to parent, protected by cgroup_lock() */ - struct cgroup_root *root; - - /* idr for css->id */ - struct idr css_idr; - - /* - * List of cftypes. Each entry is the first entry of an array - * terminated by zero length name. - */ - struct list_head cfts; - - /* - * Base cftypes which are automatically registered. The two can - * point to the same array. - */ - struct cftype *dfl_cftypes; /* for the default hierarchy */ - struct cftype *legacy_cftypes; /* for the legacy hierarchies */ - - /* - * A subsystem may depend on other subsystems. When such subsystem - * is enabled on a cgroup, the depended-upon subsystems are enabled - * together if available. Subsystems enabled due to dependency are - * not visible to userland until explicitly enabled. The following - * specifies the mask of subsystems that this one depends on. - */ - unsigned int depends_on; -}; - -#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; -#include <linux/cgroup_subsys.h> -#undef SUBSYS - -/** - * task_css_set_check - obtain a task's css_set with extra access conditions - * @task: the task to obtain css_set for - * @__c: extra condition expression to be passed to rcu_dereference_check() - * - * A task's css_set is RCU protected, initialized and exited while holding - * task_lock(), and can only be modified while holding both cgroup_mutex - * and task_lock() while the task is alive. This macro verifies that the - * caller is inside proper critical section and returns @task's css_set. - * - * The caller can also specify additional allowed conditions via @__c, such - * as locks used during the cgroup_subsys::attach() methods. - */ -#ifdef CONFIG_PROVE_RCU -extern struct mutex cgroup_mutex; -extern struct rw_semaphore css_set_rwsem; -#define task_css_set_check(task, __c) \ - rcu_dereference_check((task)->cgroups, \ - lockdep_is_held(&cgroup_mutex) || \ - lockdep_is_held(&css_set_rwsem) || \ - ((task)->flags & PF_EXITING) || (__c)) -#else -#define task_css_set_check(task, __c) \ - rcu_dereference((task)->cgroups) -#endif - -/** - * task_css_check - obtain css for (task, subsys) w/ extra access conds - * @task: the target task - * @subsys_id: the target subsystem ID - * @__c: extra condition expression to be passed to rcu_dereference_check() - * - * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The - * synchronization rules are the same as task_css_set_check(). - */ -#define task_css_check(task, subsys_id, __c) \ - task_css_set_check((task), (__c))->subsys[(subsys_id)] - -/** - * task_css_set - obtain a task's css_set - * @task: the task to obtain css_set for - * - * See task_css_set_check(). - */ -static inline struct css_set *task_css_set(struct task_struct *task) -{ - return task_css_set_check(task, false); -} - -/** - * task_css - obtain css for (task, subsys) - * @task: the target task - * @subsys_id: the target subsystem ID - * - * See task_css_check(). - */ -static inline struct cgroup_subsys_state *task_css(struct task_struct *task, - int subsys_id) -{ - return task_css_check(task, subsys_id, false); -} - -/** - * task_css_is_root - test whether a task belongs to the root css - * @task: the target task - * @subsys_id: the target subsystem ID - * - * Test whether @task belongs to the root css on the specified subsystem. - * May be invoked in any context. - */ -static inline bool task_css_is_root(struct task_struct *task, int subsys_id) -{ - return task_css_check(task, subsys_id, true) == - init_css_set.subsys[subsys_id]; -} - -static inline struct cgroup *task_cgroup(struct task_struct *task, - int subsys_id) -{ - return task_css(task, subsys_id)->cgroup; -} - -struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, - struct cgroup_subsys_state *parent); - -struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); - -/** - * css_for_each_child - iterate through children of a css - * @pos: the css * to use as the loop cursor - * @parent: css whose children to walk - * - * Walk @parent's children. Must be called under rcu_read_lock(). - * - * If a subsystem synchronizes ->css_online() and the start of iteration, a - * css which finished ->css_online() is guaranteed to be visible in the - * future iterations and will stay visible until the last reference is put. - * A css which hasn't finished ->css_online() or already finished - * ->css_offline() may show up during traversal. It's each subsystem's - * responsibility to synchronize against on/offlining. - * - * It is allowed to temporarily drop RCU read lock during iteration. The - * caller is responsible for ensuring that @pos remains accessible until - * the start of the next iteration by, for example, bumping the css refcnt. - */ -#define css_for_each_child(pos, parent) \ - for ((pos) = css_next_child(NULL, (parent)); (pos); \ - (pos) = css_next_child((pos), (parent))) - -struct cgroup_subsys_state * -css_next_descendant_pre(struct cgroup_subsys_state *pos, - struct cgroup_subsys_state *css); - -struct cgroup_subsys_state * -css_rightmost_descendant(struct cgroup_subsys_state *pos); - -/** - * css_for_each_descendant_pre - pre-order walk of a css's descendants - * @pos: the css * to use as the loop cursor - * @root: css whose descendants to walk - * - * Walk @root's descendants. @root is included in the iteration and the - * first node to be visited. Must be called under rcu_read_lock(). - * - * If a subsystem synchronizes ->css_online() and the start of iteration, a - * css which finished ->css_online() is guaranteed to be visible in the - * future iterations and will stay visible until the last reference is put. - * A css which hasn't finished ->css_online() or already finished - * ->css_offline() may show up during traversal. It's each subsystem's - * responsibility to synchronize against on/offlining. - * - * For example, the following guarantees that a descendant can't escape - * state updates of its ancestors. - * - * my_online(@css) - * { - * Lock @css's parent and @css; - * Inherit state from the parent; - * Unlock both. - * } - * - * my_update_state(@css) - * { - * css_for_each_descendant_pre(@pos, @css) { - * Lock @pos; - * if (@pos == @css) - * Update @css's state; - * else - * Verify @pos is alive and inherit state from its parent; - * Unlock @pos; - * } - * } - * - * As long as the inheriting step, including checking the parent state, is - * enclosed inside @pos locking, double-locking the parent isn't necessary - * while inheriting. The state update to the parent is guaranteed to be - * visible by walking order and, as long as inheriting operations to the - * same @pos are atomic to each other, multiple updates racing each other - * still result in the correct state. It's guaranateed that at least one - * inheritance happens for any css after the latest update to its parent. - * - * If checking parent's state requires locking the parent, each inheriting - * iteration should lock and unlock both @pos->parent and @pos. - * - * Alternatively, a subsystem may choose to use a single global lock to - * synchronize ->css_online() and ->css_offline() against tree-walking - * operations. - * - * It is allowed to temporarily drop RCU read lock during iteration. The - * caller is responsible for ensuring that @pos remains accessible until - * the start of the next iteration by, for example, bumping the css refcnt. - */ -#define css_for_each_descendant_pre(pos, css) \ - for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \ - (pos) = css_next_descendant_pre((pos), (css))) - -struct cgroup_subsys_state * -css_next_descendant_post(struct cgroup_subsys_state *pos, - struct cgroup_subsys_state *css); - -/** - * css_for_each_descendant_post - post-order walk of a css's descendants - * @pos: the css * to use as the loop cursor - * @css: css whose descendants to walk - * - * Similar to css_for_each_descendant_pre() but performs post-order - * traversal instead. @root is included in the iteration and the last - * node to be visited. - * - * If a subsystem synchronizes ->css_online() and the start of iteration, a - * css which finished ->css_online() is guaranteed to be visible in the - * future iterations and will stay visible until the last reference is put. - * A css which hasn't finished ->css_online() or already finished - * ->css_offline() may show up during traversal. It's each subsystem's - * responsibility to synchronize against on/offlining. - * - * Note that the walk visibility guarantee example described in pre-order - * walk doesn't apply the same to post-order walks. - */ -#define css_for_each_descendant_post(pos, css) \ - for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ - (pos) = css_next_descendant_post((pos), (css))) - -bool css_has_online_children(struct cgroup_subsys_state *css); - -/* A css_task_iter should be treated as an opaque object */ -struct css_task_iter { - struct cgroup_subsys *ss; - - struct list_head *cset_pos; - struct list_head *cset_head; - - struct list_head *task_pos; - struct list_head *tasks_head; - struct list_head *mg_tasks_head; -}; - -void css_task_iter_start(struct cgroup_subsys_state *css, - struct css_task_iter *it); -struct task_struct *css_task_iter_next(struct css_task_iter *it); -void css_task_iter_end(struct css_task_iter *it); - -int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); -int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); - -struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, - struct cgroup_subsys *ss); -struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, - struct cgroup_subsys *ss); - #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; -static inline int cgroup_init_early(void) { return 0; } -static inline int cgroup_init(void) { return 0; } +static inline void css_put(struct cgroup_subsys_state *css) {} +static inline int cgroup_attach_task_all(struct task_struct *from, + struct task_struct *t) { return 0; } +static inline int cgroupstats_build(struct cgroupstats *stats, + struct dentry *dentry) { return -EINVAL; } + static inline void cgroup_fork(struct task_struct *p) {} static inline void cgroup_post_fork(struct task_struct *p) {} static inline void cgroup_exit(struct task_struct *p) {} -static inline int cgroupstats_build(struct cgroupstats *stats, - struct dentry *dentry) -{ - return -EINVAL; -} - -static inline void css_put(struct cgroup_subsys_state *css) {} - -/* No cgroups - nothing to do */ -static inline int cgroup_attach_task_all(struct task_struct *from, - struct task_struct *t) -{ - return 0; -} +static inline int cgroup_init_early(void) { return 0; } +static inline int cgroup_init(void) { return 0; } #endif /* !CONFIG_CGROUPS */ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 371e560d13cf..dfaa7b3e9ae9 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -5,9 +5,9 @@ /* * Common definitions for all gcc versions go here. */ -#define GCC_VERSION (__GNUC__ * 10000 \ - + __GNUC_MINOR__ * 100 \ - + __GNUC_PATCHLEVEL__) +#define GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) /* Optimization barrier */ @@ -46,55 +46,63 @@ * the inline assembly constraint from =g to =r, in this particular * case either is valid. */ -#define RELOC_HIDE(ptr, off) \ - ({ unsigned long __ptr; \ - __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ - (typeof(ptr)) (__ptr + (off)); }) +#define RELOC_HIDE(ptr, off) \ +({ \ + unsigned long __ptr; \ + __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ + (typeof(ptr)) (__ptr + (off)); \ +}) /* Make the optimizer believe the variable can be manipulated arbitrarily. */ -#define OPTIMIZER_HIDE_VAR(var) __asm__ ("" : "=r" (var) : "0" (var)) +#define OPTIMIZER_HIDE_VAR(var) \ + __asm__ ("" : "=r" (var) : "0" (var)) #ifdef __CHECKER__ -#define __must_be_array(arr) 0 +#define __must_be_array(a) 0 #else /* &a[0] degrades to a pointer: a different type from an array */ -#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) #endif /* * Force always-inline if the user requests it so via the .config, * or if gcc is too old: */ -#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ +#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -# define inline inline __attribute__((always_inline)) notrace -# define __inline__ __inline__ __attribute__((always_inline)) notrace -# define __inline __inline __attribute__((always_inline)) notrace +#define inline inline __attribute__((always_inline)) notrace +#define __inline__ __inline__ __attribute__((always_inline)) notrace +#define __inline __inline __attribute__((always_inline)) notrace #else /* A lot of inline functions can cause havoc with function tracing */ -# define inline inline notrace -# define __inline__ __inline__ notrace -# define __inline __inline notrace +#define inline inline notrace +#define __inline__ __inline__ notrace +#define __inline __inline notrace #endif -#define __deprecated __attribute__((deprecated)) -#define __packed __attribute__((packed)) -#define __weak __attribute__((weak)) -#define __alias(symbol) __attribute__((alias(#symbol))) +#define __always_inline inline __attribute__((always_inline)) +#define noinline __attribute__((noinline)) + +#define __deprecated __attribute__((deprecated)) +#define __packed __attribute__((packed)) +#define __weak __attribute__((weak)) +#define __alias(symbol) __attribute__((alias(#symbol))) /* - * it doesn't make sense on ARM (currently the only user of __naked) to trace - * naked functions because then mcount is called without stack and frame pointer - * being set up and there is no chance to restore the lr register to the value - * before mcount was called. + * it doesn't make sense on ARM (currently the only user of __naked) + * to trace naked functions because then mcount is called without + * stack and frame pointer being set up and there is no chance to + * restore the lr register to the value before mcount was called. + * + * The asm() bodies of naked functions often depend on standard calling + * conventions, therefore they must be noinline and noclone. * - * The asm() bodies of naked functions often depend on standard calling conventions, - * therefore they must be noinline and noclone. GCC 4.[56] currently fail to enforce - * this, so we must do so ourselves. See GCC PR44290. + * GCC 4.[56] currently fail to enforce this, so we must do so ourselves. + * See GCC PR44290. */ -#define __naked __attribute__((naked)) noinline __noclone notrace +#define __naked __attribute__((naked)) noinline __noclone notrace -#define __noreturn __attribute__((noreturn)) +#define __noreturn __attribute__((noreturn)) /* * From the GCC manual: @@ -106,19 +114,130 @@ * would be. * [...] */ -#define __pure __attribute__((pure)) -#define __aligned(x) __attribute__((aligned(x))) -#define __printf(a, b) __attribute__((format(printf, a, b))) -#define __scanf(a, b) __attribute__((format(scanf, a, b))) -#define noinline __attribute__((noinline)) -#define __attribute_const__ __attribute__((__const__)) -#define __maybe_unused __attribute__((unused)) -#define __always_unused __attribute__((unused)) - -#define __gcc_header(x) #x -#define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h) -#define gcc_header(x) _gcc_header(x) -#include gcc_header(__GNUC__) +#define __pure __attribute__((pure)) +#define __aligned(x) __attribute__((aligned(x))) +#define __printf(a, b) __attribute__((format(printf, a, b))) +#define __scanf(a, b) __attribute__((format(scanf, a, b))) +#define __attribute_const__ __attribute__((__const__)) +#define __maybe_unused __attribute__((unused)) +#define __always_unused __attribute__((unused)) + +/* gcc version specific checks */ + +#if GCC_VERSION < 30200 +# error Sorry, your compiler is too old - please upgrade it. +#endif + +#if GCC_VERSION < 30300 +# define __used __attribute__((__unused__)) +#else +# define __used __attribute__((__used__)) +#endif + +#ifdef CONFIG_GCOV_KERNEL +# if GCC_VERSION < 30400 +# error "GCOV profiling support for gcc versions below 3.4 not included" +# endif /* __GNUC_MINOR__ */ +#endif /* CONFIG_GCOV_KERNEL */ + +#if GCC_VERSION >= 30400 +#define __must_check __attribute__((warn_unused_result)) +#endif + +#if GCC_VERSION >= 40000 + +/* GCC 4.1.[01] miscompiles __weak */ +#ifdef __KERNEL__ +# if GCC_VERSION >= 40100 && GCC_VERSION <= 40101 +# error Your version of gcc miscompiles the __weak directive +# endif +#endif + +#define __used __attribute__((__used__)) +#define __compiler_offsetof(a, b) \ + __builtin_offsetof(a, b) + +#if GCC_VERSION >= 40100 && GCC_VERSION < 40600 +# define __compiletime_object_size(obj) __builtin_object_size(obj, 0) +#endif + +#if GCC_VERSION >= 40300 +/* Mark functions as cold. gcc will assume any path leading to a call + * to them will be unlikely. This means a lot of manual unlikely()s + * are unnecessary now for any paths leading to the usual suspects + * like BUG(), printk(), panic() etc. [but let's keep them for now for + * older compilers] + * + * Early snapshots of gcc 4.3 don't support this and we can't detect this + * in the preprocessor, but we can live with this because they're unreleased. + * Maketime probing would be overkill here. + * + * gcc also has a __attribute__((__hot__)) to move hot functions into + * a special section, but I don't see any sense in this right now in + * the kernel context + */ +#define __cold __attribute__((__cold__)) + +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) + +#ifndef __CHECKER__ +# define __compiletime_warning(message) __attribute__((warning(message))) +# define __compiletime_error(message) __attribute__((error(message))) +#endif /* __CHECKER__ */ +#endif /* GCC_VERSION >= 40300 */ + +#if GCC_VERSION >= 40500 +/* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer + * control elsewhere. + * + * Early snapshots of gcc 4.5 don't support this and we can't detect + * this in the preprocessor, but we can live with this because they're + * unreleased. Really, we need to have autoconf for the kernel. + */ +#define unreachable() __builtin_unreachable() + +/* Mark a function definition as prohibited from being cloned. */ +#define __noclone __attribute__((__noclone__)) + +#endif /* GCC_VERSION >= 40500 */ + +#if GCC_VERSION >= 40600 +/* + * Tell the optimizer that something else uses this function or variable. + */ +#define __visible __attribute__((externally_visible)) +#endif + +/* + * GCC 'asm goto' miscompiles certain code sequences: + * + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. + * + * (asm goto is automatically volatile - the naming reflects this.) + */ +#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) + +#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP +#if GCC_VERSION >= 40400 +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#endif +#if GCC_VERSION >= 40800 || (defined(__powerpc__) && GCC_VERSION >= 40600) +#define __HAVE_BUILTIN_BSWAP16__ +#endif +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + +#if GCC_VERSION >= 50000 +#define KASAN_ABI_VERSION 4 +#elif GCC_VERSION >= 40902 +#define KASAN_ABI_VERSION 3 +#endif + +#endif /* gcc version >= 40000 specific checks */ #if !defined(__noclone) #define __noclone /* not needed */ @@ -129,5 +248,3 @@ * code */ #define uninitialized_var(x) x = x - -#define __always_inline inline __attribute__((always_inline)) diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h deleted file mode 100644 index 7d89febe4d79..000000000000 --- a/include/linux/compiler-gcc3.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __LINUX_COMPILER_H -#error "Please don't include <linux/compiler-gcc3.h> directly, include <linux/compiler.h> instead." -#endif - -#if GCC_VERSION < 30200 -# error Sorry, your compiler is too old - please upgrade it. -#endif - -#if GCC_VERSION >= 30300 -# define __used __attribute__((__used__)) -#else -# define __used __attribute__((__unused__)) -#endif - -#if GCC_VERSION >= 30400 -#define __must_check __attribute__((warn_unused_result)) -#endif - -#ifdef CONFIG_GCOV_KERNEL -# if GCC_VERSION < 30400 -# error "GCOV profiling support for gcc versions below 3.4 not included" -# endif /* __GNUC_MINOR__ */ -#endif /* CONFIG_GCOV_KERNEL */ diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h deleted file mode 100644 index 769e19864632..000000000000 --- a/include/linux/compiler-gcc4.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef __LINUX_COMPILER_H -#error "Please don't include <linux/compiler-gcc4.h> directly, include <linux/compiler.h> instead." -#endif - -/* GCC 4.1.[01] miscompiles __weak */ -#ifdef __KERNEL__ -# if GCC_VERSION >= 40100 && GCC_VERSION <= 40101 -# error Your version of gcc miscompiles the __weak directive -# endif -#endif - -#define __used __attribute__((__used__)) -#define __must_check __attribute__((warn_unused_result)) -#define __compiler_offsetof(a,b) __builtin_offsetof(a,b) - -#if GCC_VERSION >= 40100 && GCC_VERSION < 40600 -# define __compiletime_object_size(obj) __builtin_object_size(obj, 0) -#endif - -#if GCC_VERSION >= 40300 -/* Mark functions as cold. gcc will assume any path leading to a call - to them will be unlikely. This means a lot of manual unlikely()s - are unnecessary now for any paths leading to the usual suspects - like BUG(), printk(), panic() etc. [but let's keep them for now for - older compilers] - - Early snapshots of gcc 4.3 don't support this and we can't detect this - in the preprocessor, but we can live with this because they're unreleased. - Maketime probing would be overkill here. - - gcc also has a __attribute__((__hot__)) to move hot functions into - a special section, but I don't see any sense in this right now in - the kernel context */ -#define __cold __attribute__((__cold__)) - -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - -#ifndef __CHECKER__ -# define __compiletime_warning(message) __attribute__((warning(message))) -# define __compiletime_error(message) __attribute__((error(message))) -#endif /* __CHECKER__ */ -#endif /* GCC_VERSION >= 40300 */ - -#if GCC_VERSION >= 40500 -/* - * Mark a position in code as unreachable. This can be used to - * suppress control flow warnings after asm blocks that transfer - * control elsewhere. - * - * Early snapshots of gcc 4.5 don't support this and we can't detect - * this in the preprocessor, but we can live with this because they're - * unreleased. Really, we need to have autoconf for the kernel. - */ -#define unreachable() __builtin_unreachable() - -/* Mark a function definition as prohibited from being cloned. */ -#define __noclone __attribute__((__noclone__)) - -#endif /* GCC_VERSION >= 40500 */ - -#if GCC_VERSION >= 40600 -/* - * Tell the optimizer that something else uses this function or variable. - */ -#define __visible __attribute__((externally_visible)) -#endif - -/* - * GCC 'asm goto' miscompiles certain code sequences: - * - * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 - * - * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. - * - * (asm goto is automatically volatile - the naming reflects this.) - */ -#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) - -#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP -#if GCC_VERSION >= 40400 -#define __HAVE_BUILTIN_BSWAP32__ -#define __HAVE_BUILTIN_BSWAP64__ -#endif -#if GCC_VERSION >= 40800 || (defined(__powerpc__) && GCC_VERSION >= 40600) -#define __HAVE_BUILTIN_BSWAP16__ -#endif -#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ - -#if GCC_VERSION >= 40902 -#define KASAN_ABI_VERSION 3 -#endif diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h deleted file mode 100644 index efee493714eb..000000000000 --- a/include/linux/compiler-gcc5.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef __LINUX_COMPILER_H -#error "Please don't include <linux/compiler-gcc5.h> directly, include <linux/compiler.h> instead." -#endif - -#define __used __attribute__((__used__)) -#define __must_check __attribute__((warn_unused_result)) -#define __compiler_offsetof(a, b) __builtin_offsetof(a, b) - -/* Mark functions as cold. gcc will assume any path leading to a call - to them will be unlikely. This means a lot of manual unlikely()s - are unnecessary now for any paths leading to the usual suspects - like BUG(), printk(), panic() etc. [but let's keep them for now for - older compilers] - - Early snapshots of gcc 4.3 don't support this and we can't detect this - in the preprocessor, but we can live with this because they're unreleased. - Maketime probing would be overkill here. - - gcc also has a __attribute__((__hot__)) to move hot functions into - a special section, but I don't see any sense in this right now in - the kernel context */ -#define __cold __attribute__((__cold__)) - -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - -#ifndef __CHECKER__ -# define __compiletime_warning(message) __attribute__((warning(message))) -# define __compiletime_error(message) __attribute__((error(message))) -#endif /* __CHECKER__ */ - -/* - * Mark a position in code as unreachable. This can be used to - * suppress control flow warnings after asm blocks that transfer - * control elsewhere. - * - * Early snapshots of gcc 4.5 don't support this and we can't detect - * this in the preprocessor, but we can live with this because they're - * unreleased. Really, we need to have autoconf for the kernel. - */ -#define unreachable() __builtin_unreachable() - -/* Mark a function definition as prohibited from being cloned. */ -#define __noclone __attribute__((__noclone__)) - -/* - * Tell the optimizer that something else uses this function or variable. - */ -#define __visible __attribute__((externally_visible)) - -/* - * GCC 'asm goto' miscompiles certain code sequences: - * - * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 - * - * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. - * - * (asm goto is automatically volatile - the naming reflects this.) - */ -#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) - -#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP -#define __HAVE_BUILTIN_BSWAP32__ -#define __HAVE_BUILTIN_BSWAP64__ -#define __HAVE_BUILTIN_BSWAP16__ -#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ - -#define KASAN_ABI_VERSION 4 diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index 0c9a2f2c2802..d4c71132d07f 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -13,10 +13,12 @@ /* Intel ECC compiler doesn't support gcc specific asm stmts. * It uses intrinsics to do the equivalent things. */ +#undef barrier #undef barrier_data #undef RELOC_HIDE #undef OPTIMIZER_HIDE_VAR +#define barrier() __memory_barrier() #define barrier_data(ptr) barrier() #define RELOC_HIDE(ptr, off) \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 05be2352fef8..7f8ad9593da7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -21,6 +21,7 @@ # define __rcu __attribute__((noderef, address_space(4))) #else # define __rcu +# define __pmem __attribute__((noderef, address_space(5))) #endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); @@ -42,6 +43,7 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __cond_lock(x,c) (c) # define __percpu # define __rcu +# define __pmem #endif /* Indirect macros required for expanded argument pasting, eg. __LINE__. */ @@ -473,6 +475,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s (volatile typeof(x) *)&(x); }) #define ACCESS_ONCE(x) (*__ACCESS_ONCE(x)) +/** + * lockless_dereference() - safely load a pointer for later dereference + * @p: The pointer to load + * + * Similar to rcu_dereference(), but for situations where the pointed-to + * object's lifetime is managed by something other than RCU. That + * "something other" might be reference counting or simple immortality. + */ +#define lockless_dereference(p) \ +({ \ + typeof(p) _________p1 = READ_ONCE(p); \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + (_________p1); \ +}) + /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */ #ifdef CONFIG_KPROBES # define __kprobes __attribute__((__section__(".kprobes.text"))) diff --git a/include/linux/console.h b/include/linux/console.h index 9f50fb413c11..bd194343c346 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -115,6 +115,7 @@ static inline int con_debug_leave(void) #define CON_BOOT (8) #define CON_ANYTIME (16) /* Safe to call when cpu is offline */ #define CON_BRL (32) /* Used for a braille device */ +#define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */ struct console { char name[16]; diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index e859c98d1767..e329ee2667e1 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -104,6 +104,7 @@ struct vc_data { unsigned int vc_resize_user; /* resize request from user */ unsigned int vc_bell_pitch; /* Console bell pitch */ unsigned int vc_bell_duration; /* Console bell duration */ + unsigned short vc_cur_blink_ms; /* Cursor blink duration */ struct vc_data **vc_display_fg; /* [!] Ptr to var holding fg console for this display */ struct uni_pagedir *vc_uni_pagedir; struct uni_pagedir **vc_uni_pagedir_loc; /* [!] Location of uni_pagedir variable for this console */ diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index bd955270d5aa..c156f5082758 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -28,6 +28,9 @@ #include <linux/thermal.h> #include <linux/cpumask.h> +typedef int (*get_static_t)(cpumask_t *cpumask, int interval, + unsigned long voltage, u32 *power); + #ifdef CONFIG_CPU_THERMAL /** * cpufreq_cooling_register - function to create cpufreq cooling device. @@ -36,6 +39,10 @@ struct thermal_cooling_device * cpufreq_cooling_register(const struct cpumask *clip_cpus); +struct thermal_cooling_device * +cpufreq_power_cooling_register(const struct cpumask *clip_cpus, + u32 capacitance, get_static_t plat_static_func); + /** * of_cpufreq_cooling_register - create cpufreq cooling device based on DT. * @np: a valid struct device_node to the cooling device device tree node. @@ -45,6 +52,12 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus); struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, const struct cpumask *clip_cpus); + +struct thermal_cooling_device * +of_cpufreq_power_cooling_register(struct device_node *np, + const struct cpumask *clip_cpus, + u32 capacitance, + get_static_t plat_static_func); #else static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, @@ -52,6 +65,15 @@ of_cpufreq_cooling_register(struct device_node *np, { return ERR_PTR(-ENOSYS); } + +static inline struct thermal_cooling_device * +of_cpufreq_power_cooling_register(struct device_node *np, + const struct cpumask *clip_cpus, + u32 capacitance, + get_static_t plat_static_func) +{ + return NULL; +} #endif /** @@ -68,11 +90,28 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus) return ERR_PTR(-ENOSYS); } static inline struct thermal_cooling_device * +cpufreq_power_cooling_register(const struct cpumask *clip_cpus, + u32 capacitance, get_static_t plat_static_func) +{ + return NULL; +} + +static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, const struct cpumask *clip_cpus) { return ERR_PTR(-ENOSYS); } + +static inline struct thermal_cooling_device * +of_cpufreq_power_cooling_register(struct device_node *np, + const struct cpumask *clip_cpus, + u32 capacitance, + get_static_t plat_static_func) +{ + return NULL; +} + static inline void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) { diff --git a/include/linux/device.h b/include/linux/device.h index 6558af90c8fe..00ac57c26615 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -196,12 +196,41 @@ extern struct kset *bus_get_kset(struct bus_type *bus); extern struct klist *bus_get_device_klist(struct bus_type *bus); /** + * enum probe_type - device driver probe type to try + * Device drivers may opt in for special handling of their + * respective probe routines. This tells the core what to + * expect and prefer. + * + * @PROBE_DEFAULT_STRATEGY: Used by drivers that work equally well + * whether probed synchronously or asynchronously. + * @PROBE_PREFER_ASYNCHRONOUS: Drivers for "slow" devices which + * probing order is not essential for booting the system may + * opt into executing their probes asynchronously. + * @PROBE_FORCE_SYNCHRONOUS: Use this to annotate drivers that need + * their probe routines to run synchronously with driver and + * device registration (with the exception of -EPROBE_DEFER + * handling - re-probing always ends up being done asynchronously). + * + * Note that the end goal is to switch the kernel to use asynchronous + * probing by default, so annotating drivers with + * %PROBE_PREFER_ASYNCHRONOUS is a temporary measure that allows us + * to speed up boot process while we are validating the rest of the + * drivers. + */ +enum probe_type { + PROBE_DEFAULT_STRATEGY, + PROBE_PREFER_ASYNCHRONOUS, + PROBE_FORCE_SYNCHRONOUS, +}; + +/** * struct device_driver - The basic device driver structure * @name: Name of the device driver. * @bus: The bus which the device of this driver belongs to. * @owner: The module owner. * @mod_name: Used for built-in modules. * @suppress_bind_attrs: Disables bind/unbind via sysfs. + * @probe_type: Type of the probe (synchronous or asynchronous) to use. * @of_match_table: The open firmware table. * @acpi_match_table: The ACPI match table. * @probe: Called to query the existence of a specific device, @@ -235,6 +264,7 @@ struct device_driver { const char *mod_name; /* used for built-in modules */ bool suppress_bind_attrs; /* disables bind/unbind via sysfs */ + enum probe_type probe_type; const struct of_device_id *of_match_table; const struct acpi_device_id *acpi_match_table; @@ -975,6 +1005,7 @@ extern int __must_check device_bind_driver(struct device *dev); extern void device_release_driver(struct device *dev); extern int __must_check device_attach(struct device *dev); extern int __must_check driver_attach(struct device_driver *drv); +extern void device_initial_probe(struct device *dev); extern int __must_check device_reprobe(struct device *dev); /* diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 2f0b431b73e0..f98bd7068d55 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -115,6 +115,8 @@ struct dma_buf_ops { * @attachments: list of dma_buf_attachment that denotes all devices attached. * @ops: dma_buf_ops associated with this buffer object. * @exp_name: name of the exporter; useful for debugging. + * @owner: pointer to exporter module; used for refcounting when exporter is a + * kernel module. * @list_node: node for dma_buf accounting and debugging. * @priv: exporter specific private data for this buffer object. * @resv: reservation object linked to this dma-buf @@ -129,6 +131,7 @@ struct dma_buf { unsigned vmapping_counter; void *vmap_ptr; const char *exp_name; + struct module *owner; struct list_head list_node; void *priv; struct reservation_object *resv; @@ -164,7 +167,8 @@ struct dma_buf_attachment { /** * struct dma_buf_export_info - holds information needed to export a dma_buf - * @exp_name: name of the exporting module - useful for debugging. + * @exp_name: name of the exporter - useful for debugging. + * @owner: pointer to exporter module - used for refcounting kernel module * @ops: Attach allocator-defined dma buf ops to the new buffer * @size: Size of the buffer * @flags: mode flags for the file @@ -176,6 +180,7 @@ struct dma_buf_attachment { */ struct dma_buf_export_info { const char *exp_name; + struct module *owner; const struct dma_buf_ops *ops; size_t size; int flags; @@ -187,7 +192,8 @@ struct dma_buf_export_info { * helper macro for exporters; zeros and fills in most common values */ #define DEFINE_DMA_BUF_EXPORT_INFO(a) \ - struct dma_buf_export_info a = { .exp_name = KBUILD_MODNAME } + struct dma_buf_export_info a = { .exp_name = KBUILD_MODNAME, \ + .owner = THIS_MODULE } /** * get_dma_buf - convenience wrapper for get_file. diff --git a/include/linux/dma/pxa-dma.h b/include/linux/dma/pxa-dma.h new file mode 100644 index 000000000000..3edc99294bf6 --- /dev/null +++ b/include/linux/dma/pxa-dma.h @@ -0,0 +1,27 @@ +#ifndef _PXA_DMA_H_ +#define _PXA_DMA_H_ + +enum pxad_chan_prio { + PXAD_PRIO_HIGHEST = 0, + PXAD_PRIO_NORMAL, + PXAD_PRIO_LOW, + PXAD_PRIO_LOWEST, +}; + +struct pxad_param { + unsigned int drcmr; + enum pxad_chan_prio prio; +}; + +struct dma_chan; + +#ifdef CONFIG_PXA_DMA +bool pxad_filter_fn(struct dma_chan *chan, void *param); +#else +static inline bool pxad_filter_fn(struct dma_chan *chan, void *param) +{ + return false; +} +#endif + +#endif /* _PXA_DMA_H_ */ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ad419757241f..e2f5eb419976 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -65,6 +65,7 @@ enum dma_transaction_type { DMA_PQ, DMA_XOR_VAL, DMA_PQ_VAL, + DMA_MEMSET, DMA_INTERRUPT, DMA_SG, DMA_PRIVATE, @@ -122,10 +123,18 @@ enum dma_transfer_direction { * chunk and before first src/dst address for next chunk. * Ignored for dst(assumed 0), if dst_inc is true and dst_sgl is false. * Ignored for src(assumed 0), if src_inc is true and src_sgl is false. + * @dst_icg: Number of bytes to jump after last dst address of this + * chunk and before the first dst address for next chunk. + * Ignored if dst_inc is true and dst_sgl is false. + * @src_icg: Number of bytes to jump after last src address of this + * chunk and before the first src address for next chunk. + * Ignored if src_inc is true and src_sgl is false. */ struct data_chunk { size_t size; size_t icg; + size_t dst_icg; + size_t src_icg; }; /** @@ -222,6 +231,16 @@ struct dma_chan_percpu { }; /** + * struct dma_router - DMA router structure + * @dev: pointer to the DMA router device + * @route_free: function to be called when the route can be disconnected + */ +struct dma_router { + struct device *dev; + void (*route_free)(struct device *dev, void *route_data); +}; + +/** * struct dma_chan - devices supply DMA channels, clients use them * @device: ptr to the dma device who supplies this channel, always !%NULL * @cookie: last cookie value returned to client @@ -232,6 +251,8 @@ struct dma_chan_percpu { * @local: per-cpu pointer to a struct dma_chan_percpu * @client_count: how many clients are using this channel * @table_count: number of appearances in the mem-to-mem allocation table + * @router: pointer to the DMA router structure + * @route_data: channel specific data for the router * @private: private data for certain client-channel associations */ struct dma_chan { @@ -247,6 +268,11 @@ struct dma_chan { struct dma_chan_percpu __percpu *local; int client_count; int table_count; + + /* DMA router */ + struct dma_router *router; + void *route_data; + void *private; }; @@ -570,6 +596,7 @@ struct dma_tx_state { * @copy_align: alignment shift for memcpy operations * @xor_align: alignment shift for xor operations * @pq_align: alignment shift for pq operations + * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @src_addr_widths: bit mask of src addr widths the device supports @@ -588,6 +615,7 @@ struct dma_tx_state { * @device_prep_dma_xor_val: prepares a xor validation operation * @device_prep_dma_pq: prepares a pq operation * @device_prep_dma_pq_val: prepares a pqzero_sum operation + * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio. @@ -620,6 +648,7 @@ struct dma_device { u8 copy_align; u8 xor_align; u8 pq_align; + u8 fill_align; #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; @@ -650,6 +679,9 @@ struct dma_device { struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, unsigned int src_cnt, const unsigned char *scf, size_t len, enum sum_check_flags *pqres, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_memset)( + struct dma_chan *chan, dma_addr_t dest, int value, size_t len, + unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( struct dma_chan *chan, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_sg)( @@ -745,6 +777,17 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma( return chan->device->device_prep_interleaved_dma(chan, xt, flags); } +static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset( + struct dma_chan *chan, dma_addr_t dest, int value, size_t len, + unsigned long flags) +{ + if (!chan || !chan->device) + return NULL; + + return chan->device->device_prep_dma_memset(chan, dest, value, + len, flags); +} + static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg( struct dma_chan *chan, struct scatterlist *dst_sg, unsigned int dst_nents, @@ -820,6 +863,12 @@ static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1, return dmaengine_check_align(dev->pq_align, off1, off2, len); } +static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->fill_align, off1, off2, len); +} + static inline void dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) { @@ -874,6 +923,33 @@ static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags) BUG(); } +static inline size_t dmaengine_get_icg(bool inc, bool sgl, size_t icg, + size_t dir_icg) +{ + if (inc) { + if (dir_icg) + return dir_icg; + else if (sgl) + return icg; + } + + return 0; +} + +static inline size_t dmaengine_get_dst_icg(struct dma_interleaved_template *xt, + struct data_chunk *chunk) +{ + return dmaengine_get_icg(xt->dst_inc, xt->dst_sgl, + chunk->icg, chunk->dst_icg); +} + +static inline size_t dmaengine_get_src_icg(struct dma_interleaved_template *xt, + struct data_chunk *chunk) +{ + return dmaengine_get_icg(xt->src_inc, xt->src_sgl, + chunk->icg, chunk->src_icg); +} + /* --- public DMA engine API --- */ #ifdef CONFIG_DMA_ENGINE diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index 52456aa566a0..e1043f79122f 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -11,8 +11,8 @@ #ifndef LINUX_DMAPOOL_H #define LINUX_DMAPOOL_H +#include <linux/scatterlist.h> #include <asm/io.h> -#include <asm/scatterlist.h> struct device; diff --git a/include/linux/dmi.h b/include/linux/dmi.h index f820f0a336c9..5055ac34142d 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -2,6 +2,7 @@ #define __DMI_H__ #include <linux/list.h> +#include <linux/kobject.h> #include <linux/mod_devicetable.h> /* enum dmi_field is in mod_devicetable.h */ @@ -74,7 +75,7 @@ struct dmi_header { u8 type; u8 length; u16 handle; -}; +} __packed; struct dmi_device { struct list_head list; @@ -93,6 +94,7 @@ struct dmi_dev_onboard { int devfn; }; +extern struct kobject *dmi_kobj; extern int dmi_check_system(const struct dmi_system_id *list); const struct dmi_system_id *dmi_first_match(const struct dmi_system_id *list); extern const char * dmi_get_system_info(int field); diff --git a/include/linux/efi.h b/include/linux/efi.h index 5f19efe4eb3f..85ef051ac6fb 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -85,7 +85,8 @@ typedef struct { #define EFI_MEMORY_MAPPED_IO 11 #define EFI_MEMORY_MAPPED_IO_PORT_SPACE 12 #define EFI_PAL_CODE 13 -#define EFI_MAX_MEMORY_TYPE 14 +#define EFI_PERSISTENT_MEMORY 14 +#define EFI_MAX_MEMORY_TYPE 15 /* Attribute values: */ #define EFI_MEMORY_UC ((u64)0x0000000000000001ULL) /* uncached */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 45a91474487d..638b324f0291 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -39,6 +39,7 @@ typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct reques typedef int (elevator_init_fn) (struct request_queue *, struct elevator_type *e); typedef void (elevator_exit_fn) (struct elevator_queue *); +typedef void (elevator_registered_fn) (struct request_queue *); struct elevator_ops { @@ -68,6 +69,7 @@ struct elevator_ops elevator_init_fn *elevator_init_fn; elevator_exit_fn *elevator_exit_fn; + elevator_registered_fn *elevator_registered_fn; }; #define ELV_NAME_MAX (16) diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 36f49c405dfb..b16d929fa75f 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -1,6 +1,9 @@ /* * External connector (extcon) class driver * + * Copyright (C) 2015 Samsung Electronics + * Author: Chanwoo Choi <cw00.choi@samsung.com> + * * Copyright (C) 2012 Samsung Electronics * Author: Donggeun Kim <dg77.kim@samsung.com> * Author: MyungJoo Ham <myungjoo.ham@samsung.com> @@ -27,50 +30,35 @@ #include <linux/notifier.h> #include <linux/sysfs.h> -#define SUPPORTED_CABLE_MAX 32 -#define CABLE_NAME_MAX 30 - /* - * The standard cable name is to help support general notifier - * and notifiee device drivers to share the common names. - * Please use standard cable names unless your notifier device has - * a very unique and abnormal cable or - * the cable type is supposed to be used with only one unique - * pair of notifier/notifiee devices. - * - * Please add any other "standard" cables used with extcon dev. - * - * You may add a dot and number to specify version or specification - * of the specific cable if it is required. (e.g., "Fast-charger.18" - * and "Fast-charger.10" for 1.8A and 1.0A chargers) - * However, the notifiee and notifier should be able to handle such - * string and if the notifiee can negotiate the protocol or identify, - * you don't need such convention. This convention is helpful when - * notifier can distinguish but notifiee cannot. + * Define the unique id of supported external connectors */ -enum extcon_cable_name { - EXTCON_USB = 0, - EXTCON_USB_HOST, - EXTCON_TA, /* Travel Adaptor */ - EXTCON_FAST_CHARGER, - EXTCON_SLOW_CHARGER, - EXTCON_CHARGE_DOWNSTREAM, /* Charging an external device */ - EXTCON_HDMI, - EXTCON_MHL, - EXTCON_DVI, - EXTCON_VGA, - EXTCON_DOCK, - EXTCON_LINE_IN, - EXTCON_LINE_OUT, - EXTCON_MIC_IN, - EXTCON_HEADPHONE_OUT, - EXTCON_SPDIF_IN, - EXTCON_SPDIF_OUT, - EXTCON_VIDEO_IN, - EXTCON_VIDEO_OUT, - EXTCON_MECHANICAL, -}; -extern const char extcon_cable_name[][CABLE_NAME_MAX + 1]; +#define EXTCON_NONE 0 + +#define EXTCON_USB 1 /* USB connector */ +#define EXTCON_USB_HOST 2 + +#define EXTCON_TA 3 /* Charger connector */ +#define EXTCON_FAST_CHARGER 4 +#define EXTCON_SLOW_CHARGER 5 +#define EXTCON_CHARGE_DOWNSTREAM 6 + +#define EXTCON_LINE_IN 7 /* Audio/Video connector */ +#define EXTCON_LINE_OUT 8 +#define EXTCON_MICROPHONE 9 +#define EXTCON_HEADPHONE 10 +#define EXTCON_HDMI 11 +#define EXTCON_MHL 12 +#define EXTCON_DVI 13 +#define EXTCON_VGA 14 +#define EXTCON_SPDIF_IN 15 +#define EXTCON_SPDIF_OUT 16 +#define EXTCON_VIDEO_IN 17 +#define EXTCON_VIDEO_OUT 18 + +#define EXTCON_DOCK 19 /* Misc connector */ +#define EXTCON_JIG 20 +#define EXTCON_MECHANICAL 21 struct extcon_cable; @@ -78,7 +66,7 @@ struct extcon_cable; * struct extcon_dev - An extcon device represents one external connector. * @name: The name of this extcon device. Parent device name is * used if NULL. - * @supported_cable: Array of supported cable names ending with NULL. + * @supported_cable: Array of supported cable names ending with EXTCON_NONE. * If supported_cable is NULL, cable name related APIs * are disabled. * @mutually_exclusive: Array of mutually exclusive set of cables that cannot @@ -89,16 +77,14 @@ struct extcon_cable; * be attached simulataneously. {0x7, 0} is equivalent to * {0x3, 0x6, 0x5, 0}. If it is {0xFFFFFFFF, 0}, there * can be no simultaneous connections. - * @print_name: An optional callback to override the method to print the - * name of the extcon device. * @print_state: An optional callback to override the method to print the * status of the extcon device. * @dev: Device of this extcon. * @state: Attach/detach state of this extcon. Do not provide at * register-time. * @nh: Notifier for the state change events from this extcon - * @entry: To support list of extcon devices so that users can search - * for extcon devices based on the extcon name. + * @entry: To support list of extcon devices so that users can + * search for extcon devices based on the extcon name. * @lock: * @max_supported: Internal value to store the number of cables. * @extcon_dev_type: Device_type struct to provide attribute_groups @@ -113,16 +99,15 @@ struct extcon_cable; struct extcon_dev { /* Optional user initializing data */ const char *name; - const char **supported_cable; + const unsigned int *supported_cable; const u32 *mutually_exclusive; /* Optional callbacks to override class functions */ - ssize_t (*print_name)(struct extcon_dev *edev, char *buf); ssize_t (*print_state)(struct extcon_dev *edev, char *buf); /* Internal data. Please do not set. */ struct device dev; - struct raw_notifier_head nh; + struct raw_notifier_head *nh; struct list_head entry; int max_supported; spinlock_t lock; /* could be called by irq handler */ @@ -161,8 +146,6 @@ struct extcon_cable { /** * struct extcon_specific_cable_nb - An internal data for * extcon_register_interest(). - * @internal_nb: A notifier block bridging extcon notifier - * and cable notifier. * @user_nb: user provided notifier block for events from * a specific cable. * @cable_index: the target cable. @@ -170,7 +153,6 @@ struct extcon_cable { * @previous_value: the saved previous event value. */ struct extcon_specific_cable_nb { - struct notifier_block internal_nb; struct notifier_block *user_nb; int cable_index; struct extcon_dev *edev; @@ -194,10 +176,10 @@ extern struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name); /* * Following APIs control the memory of extcon device. */ -extern struct extcon_dev *extcon_dev_allocate(const char **cables); +extern struct extcon_dev *extcon_dev_allocate(const unsigned int *cable); extern void extcon_dev_free(struct extcon_dev *edev); extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, - const char **cables); + const unsigned int *cable); extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); /* @@ -216,13 +198,10 @@ extern int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state); /* * get/set_cable_state access each bit of the 32b encoded state value. - * They are used to access the status of each cable based on the cable_name - * or cable_index, which is retrieved by extcon_find_cable_index + * They are used to access the status of each cable based on the cable_name. */ -extern int extcon_find_cable_index(struct extcon_dev *sdev, - const char *cable_name); -extern int extcon_get_cable_state_(struct extcon_dev *edev, int cable_index); -extern int extcon_set_cable_state_(struct extcon_dev *edev, int cable_index, +extern int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id); +extern int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id, bool cable_state); extern int extcon_get_cable_state(struct extcon_dev *edev, @@ -249,16 +228,21 @@ extern int extcon_unregister_interest(struct extcon_specific_cable_nb *nb); * we do not recommend to use this for normal 'notifiee' device drivers who * want to be notified by a specific external port of the notifier. */ -extern int extcon_register_notifier(struct extcon_dev *edev, +extern int extcon_register_notifier(struct extcon_dev *edev, unsigned int id, + struct notifier_block *nb); +extern int extcon_unregister_notifier(struct extcon_dev *edev, unsigned int id, struct notifier_block *nb); -extern int extcon_unregister_notifier(struct extcon_dev *edev, - struct notifier_block *nb); /* * Following API get the extcon device from devicetree. * This function use phandle of devicetree to get extcon device directly. */ -extern struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, int index); +extern struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, + int index); + +/* Following API to get information of extcon device */ +extern const char *extcon_get_edev_name(struct extcon_dev *edev); + #else /* CONFIG_EXTCON */ static inline int extcon_dev_register(struct extcon_dev *edev) { @@ -276,7 +260,7 @@ static inline int devm_extcon_dev_register(struct device *dev, static inline void devm_extcon_dev_unregister(struct device *dev, struct extcon_dev *edev) { } -static inline struct extcon_dev *extcon_dev_allocate(const char **cables) +static inline struct extcon_dev *extcon_dev_allocate(const unsigned int *cable) { return ERR_PTR(-ENOSYS); } @@ -284,7 +268,7 @@ static inline struct extcon_dev *extcon_dev_allocate(const char **cables) static inline void extcon_dev_free(struct extcon_dev *edev) { } static inline struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, - const char **cables) + const unsigned int *cable) { return ERR_PTR(-ENOSYS); } @@ -307,20 +291,14 @@ static inline int extcon_update_state(struct extcon_dev *edev, u32 mask, return 0; } -static inline int extcon_find_cable_index(struct extcon_dev *edev, - const char *cable_name) -{ - return 0; -} - static inline int extcon_get_cable_state_(struct extcon_dev *edev, - int cable_index) + unsigned int id) { return 0; } static inline int extcon_set_cable_state_(struct extcon_dev *edev, - int cable_index, bool cable_state) + unsigned int id, bool cable_state) { return 0; } @@ -343,13 +321,15 @@ static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) } static inline int extcon_register_notifier(struct extcon_dev *edev, - struct notifier_block *nb) + unsigned int id, + struct notifier_block *nb) { return 0; } static inline int extcon_unregister_notifier(struct extcon_dev *edev, - struct notifier_block *nb) + unsigned int id, + struct notifier_block *nb) { return 0; } diff --git a/include/linux/extcon/extcon-adc-jack.h b/include/linux/extcon/extcon-adc-jack.h index 9ca958c4e94c..53c60806bcfb 100644 --- a/include/linux/extcon/extcon-adc-jack.h +++ b/include/linux/extcon/extcon-adc-jack.h @@ -44,7 +44,7 @@ struct adc_jack_cond { * @consumer_channel: Unique name to identify the channel on the consumer * side. This typically describes the channels used within * the consumer. E.g. 'battery_voltage' - * @cable_names: array of cable names ending with null. + * @cable_names: array of extcon id for supported cables. * @adc_contitions: array of struct adc_jack_cond conditions ending * with .state = 0 entry. This describes how to decode * adc values into extcon state. @@ -58,8 +58,7 @@ struct adc_jack_pdata { const char *name; const char *consumer_channel; - /* The last entry should be NULL */ - const char **cable_names; + const enum extcon *cable_names; /* The last entry's state should be 0 */ struct adc_jack_cond *adc_conditions; diff --git a/include/linux/fs.h b/include/linux/fs.h index b577e801b4af..3f1a84635da8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -35,6 +35,7 @@ #include <uapi/linux/fs.h> struct backing_dev_info; +struct bdi_writeback; struct export_operations; struct hd_geometry; struct iovec; @@ -69,6 +70,7 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); +typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); #define MAY_EXEC 0x00000001 #define MAY_WRITE 0x00000002 @@ -634,6 +636,14 @@ struct inode { struct hlist_node i_hash; struct list_head i_wb_list; /* backing dev IO list */ +#ifdef CONFIG_CGROUP_WRITEBACK + struct bdi_writeback *i_wb; /* the associated cgroup wb */ + + /* foreign inode detection, see wbc_detach_inode() */ + int i_wb_frn_winner; + u16 i_wb_frn_avg_time; + u16 i_wb_frn_history; +#endif struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; union { @@ -1232,6 +1242,8 @@ struct mm_struct; #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ +/* sb->s_iflags */ +#define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ /* Possible states of 'frozen' field */ enum { @@ -1270,6 +1282,7 @@ struct super_block { const struct quotactl_ops *s_qcop; const struct export_operations *s_export_op; unsigned long s_flags; + unsigned long s_iflags; /* internal SB_I_* flags */ unsigned long s_magic; struct dentry *s_root; struct rw_semaphore s_umount; @@ -1806,6 +1819,11 @@ struct super_operations { * * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit(). * + * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to + * synchronize competing switching instances and to tell + * wb stat updates to grab mapping->tree_lock. See + * inode_switch_wb_work_fn() for details. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -1825,6 +1843,7 @@ struct super_operations { #define I_DIRTY_TIME (1 << 11) #define __I_DIRTY_TIME_EXPIRED 12 #define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) +#define I_WB_SWITCH (1 << 13) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) @@ -2241,7 +2260,13 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -extern int sb_is_blkdev_sb(struct super_block *sb); + +extern struct super_block *blockdev_superblock; + +static inline bool sb_is_blkdev_sb(struct super_block *sb) +{ + return sb == blockdev_superblock; +} #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } @@ -2280,6 +2305,9 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); extern void blkdev_put(struct block_device *bdev, fmode_t mode); +extern int __blkdev_reread_part(struct block_device *bdev); +extern int blkdev_reread_part(struct block_device *bdev); + #ifdef CONFIG_SYSFS extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); extern void bd_unlink_disk_holder(struct block_device *bdev, @@ -2628,9 +2656,13 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, int dax_clear_blocks(struct inode *, sector_t block, long size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); -int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); +int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, + dax_iodone_t); +int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, + dax_iodone_t); int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); -#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) +#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod) +#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod) #ifdef CONFIG_BLOCK typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index a82296af413f..2a2f56b292c1 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -24,6 +24,7 @@ #define FSL_USB_VER_1_6 1 #define FSL_USB_VER_2_2 2 #define FSL_USB_VER_2_4 3 +#define FSL_USB_VER_2_5 4 #include <linux/types.h> diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h index 569236e6b2bc..93e080b39cf6 100644 --- a/include/linux/goldfish.h +++ b/include/linux/goldfish.h @@ -3,13 +3,24 @@ /* Helpers for Goldfish virtual platform */ -static inline void gf_write64(unsigned long data, - void __iomem *portl, void __iomem *porth) +static inline void gf_write_ptr(const void *ptr, void __iomem *portl, + void __iomem *porth) { - writel((u32)data, portl); + writel((u32)(unsigned long)ptr, portl); #ifdef CONFIG_64BIT - writel(data>>32, porth); + writel((unsigned long)ptr >> 32, porth); #endif } +static inline void gf_write_dma_addr(const dma_addr_t addr, + void __iomem *portl, + void __iomem *porth) +{ + writel((u32)addr, portl); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + writel(addr >> 32, porth); +#endif +} + + #endif /* __LINUX_GOLDFISH_H */ diff --git a/include/linux/gsmmux.h b/include/linux/gsmmux.h deleted file mode 100644 index c25e9477f7c3..000000000000 --- a/include/linux/gsmmux.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef _LINUX_GSMMUX_H -#define _LINUX_GSMMUX_H - -struct gsm_config -{ - unsigned int adaption; - unsigned int encapsulation; - unsigned int initiator; - unsigned int t1; - unsigned int t2; - unsigned int t3; - unsigned int n2; - unsigned int mru; - unsigned int mtu; - unsigned int k; - unsigned int i; - unsigned int unused[8]; /* Padding for expansion without - breaking stuff */ -}; - -#define GSMIOC_GETCONF _IOR('G', 0, struct gsm_config) -#define GSMIOC_SETCONF _IOW('G', 1, struct gsm_config) - -struct gsm_netconfig { - unsigned int adaption; /* Adaption to use in network mode */ - unsigned short protocol;/* Protocol to use - only ETH_P_IP supported */ - unsigned short unused2; - char if_name[IFNAMSIZ]; /* interface name format string */ - __u8 unused[28]; /* For future use */ -}; - -#define GSMIOC_ENABLE_NET _IOW('G', 2, struct gsm_netconfig) -#define GSMIOC_DISABLE_NET _IO('G', 3) - - -#endif diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 902c37aef67e..30d3a1f79450 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -160,16 +160,18 @@ hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi, * 1 . 1 (Windows 7) * 2 . 4 (Windows 8) * 3 . 0 (Windows 8 R2) + * 4 . 0 (Windows 10) */ #define VERSION_WS2008 ((0 << 16) | (13)) #define VERSION_WIN7 ((1 << 16) | (1)) #define VERSION_WIN8 ((2 << 16) | (4)) #define VERSION_WIN8_1 ((3 << 16) | (0)) +#define VERSION_WIN10 ((4 << 16) | (0)) #define VERSION_INVAL -1 -#define VERSION_CURRENT VERSION_WIN8_1 +#define VERSION_CURRENT VERSION_WIN10 /* Make maximum size of pipe payload of 16K */ #define MAX_PIPE_DATA_PAYLOAD (sizeof(u8) * 16384) @@ -389,10 +391,7 @@ enum vmbus_channel_message_type { CHANNELMSG_INITIATE_CONTACT = 14, CHANNELMSG_VERSION_RESPONSE = 15, CHANNELMSG_UNLOAD = 16, -#ifdef VMBUS_FEATURE_PARENT_OR_PEER_MEMORY_MAPPED_INTO_A_CHILD - CHANNELMSG_VIEWRANGE_ADD = 17, - CHANNELMSG_VIEWRANGE_REMOVE = 18, -#endif + CHANNELMSG_UNLOAD_RESPONSE = 17, CHANNELMSG_COUNT }; @@ -549,21 +548,6 @@ struct vmbus_channel_gpadl_torndown { u32 gpadl; } __packed; -#ifdef VMBUS_FEATURE_PARENT_OR_PEER_MEMORY_MAPPED_INTO_A_CHILD -struct vmbus_channel_view_range_add { - struct vmbus_channel_message_header header; - PHYSICAL_ADDRESS viewrange_base; - u64 viewrange_length; - u32 child_relid; -} __packed; - -struct vmbus_channel_view_range_remove { - struct vmbus_channel_message_header header; - PHYSICAL_ADDRESS viewrange_base; - u32 child_relid; -} __packed; -#endif - struct vmbus_channel_relid_released { struct vmbus_channel_message_header header; u32 child_relid; @@ -713,6 +697,11 @@ struct vmbus_channel { /* The corresponding CPUID in the guest */ u32 target_cpu; /* + * State to manage the CPU affiliation of channels. + */ + struct cpumask alloced_cpus_in_node; + int numa_node; + /* * Support for sub-channels. For high performance devices, * it will be useful to have multiple sub-channels to support * a scalable communication infrastructure with the host. @@ -745,6 +734,15 @@ struct vmbus_channel { */ struct list_head sc_list; /* + * Current number of sub-channels. + */ + int num_sc; + /* + * Number of a sub-channel (position within sc_list) which is supposed + * to be used as the next outgoing channel. + */ + int next_oc; + /* * The primary channel this sub-channel belongs to. * This will be NULL for the primary channel. */ @@ -758,9 +756,6 @@ struct vmbus_channel { * link up channels based on their CPU affinity. */ struct list_head percpu_list; - - int num_sc; - int next_oc; }; static inline void set_channel_read_state(struct vmbus_channel *c, bool state) @@ -1236,13 +1231,6 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *, struct icmsg_negotiate *, u8 *, int, int); -int hv_kvp_init(struct hv_util_service *); -void hv_kvp_deinit(void); -void hv_kvp_onchannelcallback(void *); - -int hv_vss_init(struct hv_util_service *); -void hv_vss_deinit(void); -void hv_vss_onchannelcallback(void *); void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); extern struct resource hyperv_mmio; diff --git a/include/linux/ide.h b/include/linux/ide.h index 93b5ca754b5b..a633898f36ac 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -39,6 +39,19 @@ struct device; +/* IDE-specific values for req->cmd_type */ +enum ata_cmd_type_bits { + REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, + REQ_TYPE_ATA_PC, + REQ_TYPE_ATA_SENSE, /* sense request */ + REQ_TYPE_ATA_PM_SUSPEND,/* suspend request */ + REQ_TYPE_ATA_PM_RESUME, /* resume request */ +}; + +#define ata_pm_request(rq) \ + ((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \ + (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME) + /* Error codes returned in rq->errors to the higher part of the driver. */ enum { IDE_DRV_ERROR_GENERAL = 101, @@ -1314,6 +1327,19 @@ struct ide_port_info { u8 udma_mask; }; +/* + * State information carried for REQ_TYPE_ATA_PM_SUSPEND and REQ_TYPE_ATA_PM_RESUME + * requests. + */ +struct ide_pm_state { + /* PM state machine step value, currently driver specific */ + int pm_step; + /* requested PM state value (S1, S2, S3, S4, ...) */ + u32 pm_state; + void* data; /* for driver use */ +}; + + int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *); int ide_pci_init_two(struct pci_dev *, struct pci_dev *, const struct ide_port_info *, void *); @@ -1551,4 +1577,5 @@ static inline void ide_set_drivedata(ide_drive_t *drive, void *data) #define ide_host_for_each_port(i, port, host) \ for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++) + #endif /* _IDE_H */ diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index eb8622b78ec9..1600c55828e0 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -29,6 +29,7 @@ struct iio_buffer; * @set_length: set number of datums in buffer * @release: called when the last reference to the buffer is dropped, * should free all resources allocated by the buffer. + * @modes: Supported operating modes by this buffer type * * The purpose of this structure is to make the buffer element * modular as event for a given driver, different usecases may require @@ -51,6 +52,8 @@ struct iio_buffer_access_funcs { int (*set_length)(struct iio_buffer *buffer, int length); void (*release)(struct iio_buffer *buffer); + + unsigned int modes; }; /** diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d86b753e9b30..f79148261d16 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -32,6 +32,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_QUADRATURE_CORRECTION_RAW, IIO_CHAN_INFO_AVERAGE_RAW, IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY, + IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY, IIO_CHAN_INFO_SAMP_FREQ, IIO_CHAN_INFO_FREQUENCY, IIO_CHAN_INFO_PHASE, @@ -43,6 +44,8 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_CALIBWEIGHT, IIO_CHAN_INFO_DEBOUNCE_COUNT, IIO_CHAN_INFO_DEBOUNCE_TIME, + IIO_CHAN_INFO_CALIBEMISSIVITY, + IIO_CHAN_INFO_OVERSAMPLING_RATIO, }; enum iio_shared_by { diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index 942b6de68e2f..32b579525004 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -17,6 +17,8 @@ enum iio_event_info { IIO_EV_INFO_VALUE, IIO_EV_INFO_HYSTERESIS, IIO_EV_INFO_PERIOD, + IIO_EV_INFO_HIGH_PASS_FILTER_3DB, + IIO_EV_INFO_LOW_PASS_FILTER_3DB, }; #define IIO_VAL_INT 1 diff --git a/include/linux/init_task.h b/include/linux/init_task.h index bb9b075f0eb0..e8493fee8160 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -25,13 +25,6 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; -#ifdef CONFIG_CGROUPS -#define INIT_GROUP_RWSEM(sig) \ - .group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem), -#else -#define INIT_GROUP_RWSEM(sig) -#endif - #ifdef CONFIG_CPUSETS #define INIT_CPUSET_SEQ(tsk) \ .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), @@ -55,7 +48,6 @@ extern struct fs_struct init_fs; }, \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ - INIT_GROUP_RWSEM(sig) \ } extern struct nsproxy init_nsproxy; diff --git a/include/linux/irqchip/ingenic.h b/include/linux/irqchip/ingenic.h new file mode 100644 index 000000000000..0ee319a4029d --- /dev/null +++ b/include/linux/irqchip/ingenic.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LINUX_IRQCHIP_INGENIC_H__ +#define __LINUX_IRQCHIP_INGENIC_H__ + +#include <linux/irq.h> + +extern void ingenic_intc_irq_suspend(struct irq_data *data); +extern void ingenic_intc_irq_resume(struct irq_data *data); + +#endif diff --git a/include/linux/irqchip/irq-sa11x0.h b/include/linux/irqchip/irq-sa11x0.h new file mode 100644 index 000000000000..15db6829c1e4 --- /dev/null +++ b/include/linux/irqchip/irq-sa11x0.h @@ -0,0 +1,17 @@ +/* + * Generic IRQ handling for the SA11x0. + * + * Copyright (C) 2015 Dmitry Eremin-Solenikov + * Copyright (C) 1999-2001 Nicolas Pitre + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __INCLUDE_LINUX_IRQCHIP_IRQ_SA11x0_H +#define __INCLUDE_LINUX_IRQCHIP_IRQ_SA11x0_H + +void __init sa11x0_init_irq_nodt(int irq_start, resource_size_t io_start); + +#endif diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 20e7f78041c8..edb640ae9a94 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal); int jbd2_journal_next_log_block(journal_t *, unsigned long long *); int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, unsigned long *block); -void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); +int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); /* Commit management */ @@ -1157,7 +1157,7 @@ extern int jbd2_journal_recover (journal_t *journal); extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_skip_recovery (journal_t *); extern void jbd2_journal_update_sb_errno(journal_t *); -extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t, +extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t, unsigned long, int); extern void __jbd2_journal_abort_hard (journal_t *); extern void jbd2_journal_abort (journal_t *, int); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 060dd7b61c6d..cfa9351c7536 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -533,12 +533,6 @@ bool mac_pton(const char *s, u8 *mac); * * Most likely, you want to use tracing_on/tracing_off. */ -#ifdef CONFIG_RING_BUFFER -/* trace_off_permanent stops recording with no way to bring it back */ -void tracing_off_permanent(void); -#else -static inline void tracing_off_permanent(void) { } -#endif enum ftrace_dump_mode { DUMP_NONE, @@ -819,13 +813,15 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */ -#define VERIFY_OCTAL_PERMISSIONS(perms) \ - (BUILD_BUG_ON_ZERO((perms) < 0) + \ - BUILD_BUG_ON_ZERO((perms) > 0777) + \ - /* User perms >= group perms >= other perms */ \ - BUILD_BUG_ON_ZERO(((perms) >> 6) < (((perms) >> 3) & 7)) + \ - BUILD_BUG_ON_ZERO((((perms) >> 3) & 7) < ((perms) & 7)) + \ - /* Other writable? Generally considered a bad idea. */ \ - BUILD_BUG_ON_ZERO((perms) & 2) + \ +#define VERIFY_OCTAL_PERMISSIONS(perms) \ + (BUILD_BUG_ON_ZERO((perms) < 0) + \ + BUILD_BUG_ON_ZERO((perms) > 0777) + \ + /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */ \ + BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) + \ + BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) + \ + /* USER_WRITABLE >= GROUP_WRITABLE */ \ + BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) + \ + /* OTHER_WRITABLE? Generally considered a bad idea. */ \ + BUILD_BUG_ON_ZERO((perms) & 2) + \ (perms)) #endif diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 71ecdab1671b..e6b2f7db9c0c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -277,6 +277,7 @@ void kernfs_put(struct kernfs_node *kn); struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry); struct kernfs_root *kernfs_root_from_sb(struct super_block *sb); +struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn); struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv); @@ -352,6 +353,10 @@ static inline struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) static inline struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) { return NULL; } +static inline struct inode * +kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn) +{ return NULL; } + static inline struct kernfs_root * kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv) diff --git a/include/linux/libata.h b/include/linux/libata.h index 51cb312d9bb9..36ce37bcc963 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -430,6 +430,7 @@ enum { ATA_HORKAGE_NOLPM = (1 << 20), /* don't use LPM */ ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ + ATA_HORKAGE_NO_NCQ_LOG = (1 << 23), /* don't use NCQ for log read */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h new file mode 100644 index 000000000000..75e3af01ee32 --- /dev/null +++ b/include/linux/libnvdimm.h @@ -0,0 +1,151 @@ +/* + * libnvdimm - Non-volatile-memory Devices Subsystem + * + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __LIBNVDIMM_H__ +#define __LIBNVDIMM_H__ +#include <linux/kernel.h> +#include <linux/sizes.h> +#include <linux/types.h> + +enum { + /* when a dimm supports both PMEM and BLK access a label is required */ + NDD_ALIASING = 1 << 0, + /* unarmed memory devices may not persist writes */ + NDD_UNARMED = 1 << 1, + + /* need to set a limit somewhere, but yes, this is likely overkill */ + ND_IOCTL_MAX_BUFLEN = SZ_4M, + ND_CMD_MAX_ELEM = 4, + ND_CMD_MAX_ENVELOPE = 16, + ND_CMD_ARS_STATUS_MAX = SZ_4K, + ND_MAX_MAPPINGS = 32, + + /* mark newly adjusted resources as requiring a label update */ + DPA_RESOURCE_ADJUSTED = 1 << 0, +}; + +extern struct attribute_group nvdimm_bus_attribute_group; +extern struct attribute_group nvdimm_attribute_group; +extern struct attribute_group nd_device_attribute_group; +extern struct attribute_group nd_numa_attribute_group; +extern struct attribute_group nd_region_attribute_group; +extern struct attribute_group nd_mapping_attribute_group; + +struct nvdimm; +struct nvdimm_bus_descriptor; +typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len); + +struct nd_namespace_label; +struct nvdimm_drvdata; +struct nd_mapping { + struct nvdimm *nvdimm; + struct nd_namespace_label **labels; + u64 start; + u64 size; + /* + * @ndd is for private use at region enable / disable time for + * get_ndd() + put_ndd(), all other nd_mapping to ndd + * conversions use to_ndd() which respects enabled state of the + * nvdimm. + */ + struct nvdimm_drvdata *ndd; +}; + +struct nvdimm_bus_descriptor { + const struct attribute_group **attr_groups; + unsigned long dsm_mask; + char *provider_name; + ndctl_fn ndctl; +}; + +struct nd_cmd_desc { + int in_num; + int out_num; + u32 in_sizes[ND_CMD_MAX_ELEM]; + int out_sizes[ND_CMD_MAX_ELEM]; +}; + +struct nd_interleave_set { + u64 cookie; +}; + +struct nd_region_desc { + struct resource *res; + struct nd_mapping *nd_mapping; + u16 num_mappings; + const struct attribute_group **attr_groups; + struct nd_interleave_set *nd_set; + void *provider_data; + int num_lanes; + int numa_node; +}; + +struct nvdimm_bus; +struct module; +struct device; +struct nd_blk_region; +struct nd_blk_region_desc { + int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); + void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); + int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, + void *iobuf, u64 len, int rw); + struct nd_region_desc ndr_desc; +}; + +static inline struct nd_blk_region_desc *to_blk_region_desc( + struct nd_region_desc *ndr_desc) +{ + return container_of(ndr_desc, struct nd_blk_region_desc, ndr_desc); + +} + +struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, + struct nvdimm_bus_descriptor *nfit_desc, struct module *module); +#define nvdimm_bus_register(parent, desc) \ + __nvdimm_bus_register(parent, desc, THIS_MODULE) +void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); +struct nvdimm_bus *to_nvdimm_bus(struct device *dev); +struct nvdimm *to_nvdimm(struct device *dev); +struct nd_region *to_nd_region(struct device *dev); +struct nd_blk_region *to_nd_blk_region(struct device *dev); +struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); +const char *nvdimm_name(struct nvdimm *nvdimm); +void *nvdimm_provider_data(struct nvdimm *nvdimm); +struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, + const struct attribute_group **groups, unsigned long flags, + unsigned long *dsm_mask); +const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); +const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); +u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, + const struct nd_cmd_desc *desc, int idx, void *buf); +u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd, + const struct nd_cmd_desc *desc, int idx, const u32 *in_field, + const u32 *out_field); +int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count); +struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc); +struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc); +struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc); +void *nd_region_provider_data(struct nd_region *nd_region); +void *nd_blk_region_provider_data(struct nd_blk_region *ndbr); +void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data); +struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr); +unsigned int nd_region_acquire_lane(struct nd_region *nd_region); +void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); +u64 nd_fletcher64(void *addr, size_t len, bool le); +#endif /* __LIBNVDIMM_H__ */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h new file mode 100644 index 000000000000..9429f054c323 --- /dev/null +++ b/include/linux/lsm_hooks.h @@ -0,0 +1,1888 @@ +/* + * Linux Security Module interfaces + * + * Copyright (C) 2001 WireX Communications, Inc <chris@wirex.com> + * Copyright (C) 2001 Greg Kroah-Hartman <greg@kroah.com> + * Copyright (C) 2001 Networks Associates Technology, Inc <ssmalley@nai.com> + * Copyright (C) 2001 James Morris <jmorris@intercode.com.au> + * Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group) + * Copyright (C) 2015 Intel Corporation. + * Copyright (C) 2015 Casey Schaufler <casey@schaufler-ca.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Due to this file being licensed under the GPL there is controversy over + * whether this permits you to write a module that #includes this file + * without placing your module under the GPL. Please consult a lawyer for + * advice before doing this. + * + */ + +#ifndef __LINUX_LSM_HOOKS_H +#define __LINUX_LSM_HOOKS_H + +#include <linux/security.h> +#include <linux/init.h> +#include <linux/rculist.h> + +/** + * Security hooks for program execution operations. + * + * @bprm_set_creds: + * Save security information in the bprm->security field, typically based + * on information about the bprm->file, for later use by the apply_creds + * hook. This hook may also optionally check permissions (e.g. for + * transitions between security domains). + * This hook may be called multiple times during a single execve, e.g. for + * interpreters. The hook can tell whether it has already been called by + * checking to see if @bprm->security is non-NULL. If so, then the hook + * may decide either to retain the security information saved earlier or + * to replace it. + * @bprm contains the linux_binprm structure. + * Return 0 if the hook is successful and permission is granted. + * @bprm_check_security: + * This hook mediates the point when a search for a binary handler will + * begin. It allows a check the @bprm->security value which is set in the + * preceding set_creds call. The primary difference from set_creds is + * that the argv list and envp list are reliably available in @bprm. This + * hook may be called multiple times during a single execve; and in each + * pass set_creds is called first. + * @bprm contains the linux_binprm structure. + * Return 0 if the hook is successful and permission is granted. + * @bprm_committing_creds: + * Prepare to install the new security attributes of a process being + * transformed by an execve operation, based on the old credentials + * pointed to by @current->cred and the information set in @bprm->cred by + * the bprm_set_creds hook. @bprm points to the linux_binprm structure. + * This hook is a good place to perform state changes on the process such + * as closing open file descriptors to which access will no longer be + * granted when the attributes are changed. This is called immediately + * before commit_creds(). + * @bprm_committed_creds: + * Tidy up after the installation of the new security attributes of a + * process being transformed by an execve operation. The new credentials + * have, by this point, been set to @current->cred. @bprm points to the + * linux_binprm structure. This hook is a good place to perform state + * changes on the process such as clearing out non-inheritable signal + * state. This is called immediately after commit_creds(). + * @bprm_secureexec: + * Return a boolean value (0 or 1) indicating whether a "secure exec" + * is required. The flag is passed in the auxiliary table + * on the initial stack to the ELF interpreter to indicate whether libc + * should enable secure mode. + * @bprm contains the linux_binprm structure. + * + * Security hooks for filesystem operations. + * + * @sb_alloc_security: + * Allocate and attach a security structure to the sb->s_security field. + * The s_security field is initialized to NULL when the structure is + * allocated. + * @sb contains the super_block structure to be modified. + * Return 0 if operation was successful. + * @sb_free_security: + * Deallocate and clear the sb->s_security field. + * @sb contains the super_block structure to be modified. + * @sb_statfs: + * Check permission before obtaining filesystem statistics for the @mnt + * mountpoint. + * @dentry is a handle on the superblock for the filesystem. + * Return 0 if permission is granted. + * @sb_mount: + * Check permission before an object specified by @dev_name is mounted on + * the mount point named by @nd. For an ordinary mount, @dev_name + * identifies a device if the file system type requires a device. For a + * remount (@flags & MS_REMOUNT), @dev_name is irrelevant. For a + * loopback/bind mount (@flags & MS_BIND), @dev_name identifies the + * pathname of the object being mounted. + * @dev_name contains the name for object being mounted. + * @path contains the path for mount point object. + * @type contains the filesystem type. + * @flags contains the mount flags. + * @data contains the filesystem-specific data. + * Return 0 if permission is granted. + * @sb_copy_data: + * Allow mount option data to be copied prior to parsing by the filesystem, + * so that the security module can extract security-specific mount + * options cleanly (a filesystem may modify the data e.g. with strsep()). + * This also allows the original mount data to be stripped of security- + * specific options to avoid having to make filesystems aware of them. + * @type the type of filesystem being mounted. + * @orig the original mount data copied from userspace. + * @copy copied data which will be passed to the security module. + * Returns 0 if the copy was successful. + * @sb_remount: + * Extracts security system specific mount options and verifies no changes + * are being made to those options. + * @sb superblock being remounted + * @data contains the filesystem-specific data. + * Return 0 if permission is granted. + * @sb_umount: + * Check permission before the @mnt file system is unmounted. + * @mnt contains the mounted file system. + * @flags contains the unmount flags, e.g. MNT_FORCE. + * Return 0 if permission is granted. + * @sb_pivotroot: + * Check permission before pivoting the root filesystem. + * @old_path contains the path for the new location of the + * current root (put_old). + * @new_path contains the path for the new root (new_root). + * Return 0 if permission is granted. + * @sb_set_mnt_opts: + * Set the security relevant mount options used for a superblock + * @sb the superblock to set security mount options for + * @opts binary data structure containing all lsm mount data + * @sb_clone_mnt_opts: + * Copy all security options from a given superblock to another + * @oldsb old superblock which contain information to clone + * @newsb new superblock which needs filled in + * @sb_parse_opts_str: + * Parse a string of security data filling in the opts structure + * @options string containing all mount options known by the LSM + * @opts binary data structure usable by the LSM + * @dentry_init_security: + * Compute a context for a dentry as the inode is not yet available + * since NFSv4 has no label backed by an EA anyway. + * @dentry dentry to use in calculating the context. + * @mode mode used to determine resource type. + * @name name of the last path component used to create file + * @ctx pointer to place the pointer to the resulting context in. + * @ctxlen point to place the length of the resulting context. + * + * + * Security hooks for inode operations. + * + * @inode_alloc_security: + * Allocate and attach a security structure to @inode->i_security. The + * i_security field is initialized to NULL when the inode structure is + * allocated. + * @inode contains the inode structure. + * Return 0 if operation was successful. + * @inode_free_security: + * @inode contains the inode structure. + * Deallocate the inode security structure and set @inode->i_security to + * NULL. + * @inode_init_security: + * Obtain the security attribute name suffix and value to set on a newly + * created inode and set up the incore security field for the new inode. + * This hook is called by the fs code as part of the inode creation + * transaction and provides for atomic labeling of the inode, unlike + * the post_create/mkdir/... hooks called by the VFS. The hook function + * is expected to allocate the name and value via kmalloc, with the caller + * being responsible for calling kfree after using them. + * If the security module does not use security attributes or does + * not wish to put a security attribute on this particular inode, + * then it should return -EOPNOTSUPP to skip this processing. + * @inode contains the inode structure of the newly created inode. + * @dir contains the inode structure of the parent directory. + * @qstr contains the last path component of the new object + * @name will be set to the allocated name suffix (e.g. selinux). + * @value will be set to the allocated attribute value. + * @len will be set to the length of the value. + * Returns 0 if @name and @value have been successfully set, + * -EOPNOTSUPP if no security attribute is needed, or + * -ENOMEM on memory allocation failure. + * @inode_create: + * Check permission to create a regular file. + * @dir contains inode structure of the parent of the new file. + * @dentry contains the dentry structure for the file to be created. + * @mode contains the file mode of the file to be created. + * Return 0 if permission is granted. + * @inode_link: + * Check permission before creating a new hard link to a file. + * @old_dentry contains the dentry structure for an existing + * link to the file. + * @dir contains the inode structure of the parent directory + * of the new link. + * @new_dentry contains the dentry structure for the new link. + * Return 0 if permission is granted. + * @path_link: + * Check permission before creating a new hard link to a file. + * @old_dentry contains the dentry structure for an existing link + * to the file. + * @new_dir contains the path structure of the parent directory of + * the new link. + * @new_dentry contains the dentry structure for the new link. + * Return 0 if permission is granted. + * @inode_unlink: + * Check the permission to remove a hard link to a file. + * @dir contains the inode structure of parent directory of the file. + * @dentry contains the dentry structure for file to be unlinked. + * Return 0 if permission is granted. + * @path_unlink: + * Check the permission to remove a hard link to a file. + * @dir contains the path structure of parent directory of the file. + * @dentry contains the dentry structure for file to be unlinked. + * Return 0 if permission is granted. + * @inode_symlink: + * Check the permission to create a symbolic link to a file. + * @dir contains the inode structure of parent directory of + * the symbolic link. + * @dentry contains the dentry structure of the symbolic link. + * @old_name contains the pathname of file. + * Return 0 if permission is granted. + * @path_symlink: + * Check the permission to create a symbolic link to a file. + * @dir contains the path structure of parent directory of + * the symbolic link. + * @dentry contains the dentry structure of the symbolic link. + * @old_name contains the pathname of file. + * Return 0 if permission is granted. + * @inode_mkdir: + * Check permissions to create a new directory in the existing directory + * associated with inode structure @dir. + * @dir contains the inode structure of parent of the directory + * to be created. + * @dentry contains the dentry structure of new directory. + * @mode contains the mode of new directory. + * Return 0 if permission is granted. + * @path_mkdir: + * Check permissions to create a new directory in the existing directory + * associated with path structure @path. + * @dir contains the path structure of parent of the directory + * to be created. + * @dentry contains the dentry structure of new directory. + * @mode contains the mode of new directory. + * Return 0 if permission is granted. + * @inode_rmdir: + * Check the permission to remove a directory. + * @dir contains the inode structure of parent of the directory + * to be removed. + * @dentry contains the dentry structure of directory to be removed. + * Return 0 if permission is granted. + * @path_rmdir: + * Check the permission to remove a directory. + * @dir contains the path structure of parent of the directory to be + * removed. + * @dentry contains the dentry structure of directory to be removed. + * Return 0 if permission is granted. + * @inode_mknod: + * Check permissions when creating a special file (or a socket or a fifo + * file created via the mknod system call). Note that if mknod operation + * is being done for a regular file, then the create hook will be called + * and not this hook. + * @dir contains the inode structure of parent of the new file. + * @dentry contains the dentry structure of the new file. + * @mode contains the mode of the new file. + * @dev contains the device number. + * Return 0 if permission is granted. + * @path_mknod: + * Check permissions when creating a file. Note that this hook is called + * even if mknod operation is being done for a regular file. + * @dir contains the path structure of parent of the new file. + * @dentry contains the dentry structure of the new file. + * @mode contains the mode of the new file. + * @dev contains the undecoded device number. Use new_decode_dev() to get + * the decoded device number. + * Return 0 if permission is granted. + * @inode_rename: + * Check for permission to rename a file or directory. + * @old_dir contains the inode structure for parent of the old link. + * @old_dentry contains the dentry structure of the old link. + * @new_dir contains the inode structure for parent of the new link. + * @new_dentry contains the dentry structure of the new link. + * Return 0 if permission is granted. + * @path_rename: + * Check for permission to rename a file or directory. + * @old_dir contains the path structure for parent of the old link. + * @old_dentry contains the dentry structure of the old link. + * @new_dir contains the path structure for parent of the new link. + * @new_dentry contains the dentry structure of the new link. + * Return 0 if permission is granted. + * @path_chmod: + * Check for permission to change DAC's permission of a file or directory. + * @dentry contains the dentry structure. + * @mnt contains the vfsmnt structure. + * @mode contains DAC's mode. + * Return 0 if permission is granted. + * @path_chown: + * Check for permission to change owner/group of a file or directory. + * @path contains the path structure. + * @uid contains new owner's ID. + * @gid contains new group's ID. + * Return 0 if permission is granted. + * @path_chroot: + * Check for permission to change root directory. + * @path contains the path structure. + * Return 0 if permission is granted. + * @inode_readlink: + * Check the permission to read the symbolic link. + * @dentry contains the dentry structure for the file link. + * Return 0 if permission is granted. + * @inode_follow_link: + * Check permission to follow a symbolic link when looking up a pathname. + * @dentry contains the dentry structure for the link. + * @inode contains the inode, which itself is not stable in RCU-walk + * @rcu indicates whether we are in RCU-walk mode. + * Return 0 if permission is granted. + * @inode_permission: + * Check permission before accessing an inode. This hook is called by the + * existing Linux permission function, so a security module can use it to + * provide additional checking for existing Linux permission checks. + * Notice that this hook is called when a file is opened (as well as many + * other operations), whereas the file_security_ops permission hook is + * called when the actual read/write operations are performed. + * @inode contains the inode structure to check. + * @mask contains the permission mask. + * Return 0 if permission is granted. + * @inode_setattr: + * Check permission before setting file attributes. Note that the kernel + * call to notify_change is performed from several locations, whenever + * file attributes change (such as when a file is truncated, chown/chmod + * operations, transferring disk quotas, etc). + * @dentry contains the dentry structure for the file. + * @attr is the iattr structure containing the new file attributes. + * Return 0 if permission is granted. + * @path_truncate: + * Check permission before truncating a file. + * @path contains the path structure for the file. + * Return 0 if permission is granted. + * @inode_getattr: + * Check permission before obtaining file attributes. + * @mnt is the vfsmount where the dentry was looked up + * @dentry contains the dentry structure for the file. + * Return 0 if permission is granted. + * @inode_setxattr: + * Check permission before setting the extended attributes + * @value identified by @name for @dentry. + * Return 0 if permission is granted. + * @inode_post_setxattr: + * Update inode security field after successful setxattr operation. + * @value identified by @name for @dentry. + * @inode_getxattr: + * Check permission before obtaining the extended attributes + * identified by @name for @dentry. + * Return 0 if permission is granted. + * @inode_listxattr: + * Check permission before obtaining the list of extended attribute + * names for @dentry. + * Return 0 if permission is granted. + * @inode_removexattr: + * Check permission before removing the extended attribute + * identified by @name for @dentry. + * Return 0 if permission is granted. + * @inode_getsecurity: + * Retrieve a copy of the extended attribute representation of the + * security label associated with @name for @inode via @buffer. Note that + * @name is the remainder of the attribute name after the security prefix + * has been removed. @alloc is used to specify of the call should return a + * value via the buffer or just the value length Return size of buffer on + * success. + * @inode_setsecurity: + * Set the security label associated with @name for @inode from the + * extended attribute value @value. @size indicates the size of the + * @value in bytes. @flags may be XATTR_CREATE, XATTR_REPLACE, or 0. + * Note that @name is the remainder of the attribute name after the + * security. prefix has been removed. + * Return 0 on success. + * @inode_listsecurity: + * Copy the extended attribute names for the security labels + * associated with @inode into @buffer. The maximum size of @buffer + * is specified by @buffer_size. @buffer may be NULL to request + * the size of the buffer required. + * Returns number of bytes used/required on success. + * @inode_need_killpriv: + * Called when an inode has been changed. + * @dentry is the dentry being changed. + * Return <0 on error to abort the inode change operation. + * Return 0 if inode_killpriv does not need to be called. + * Return >0 if inode_killpriv does need to be called. + * @inode_killpriv: + * The setuid bit is being removed. Remove similar security labels. + * Called with the dentry->d_inode->i_mutex held. + * @dentry is the dentry being changed. + * Return 0 on success. If error is returned, then the operation + * causing setuid bit removal is failed. + * @inode_getsecid: + * Get the secid associated with the node. + * @inode contains a pointer to the inode. + * @secid contains a pointer to the location where result will be saved. + * In case of failure, @secid will be set to zero. + * + * Security hooks for file operations + * + * @file_permission: + * Check file permissions before accessing an open file. This hook is + * called by various operations that read or write files. A security + * module can use this hook to perform additional checking on these + * operations, e.g. to revalidate permissions on use to support privilege + * bracketing or policy changes. Notice that this hook is used when the + * actual read/write operations are performed, whereas the + * inode_security_ops hook is called when a file is opened (as well as + * many other operations). + * Caveat: Although this hook can be used to revalidate permissions for + * various system call operations that read or write files, it does not + * address the revalidation of permissions for memory-mapped files. + * Security modules must handle this separately if they need such + * revalidation. + * @file contains the file structure being accessed. + * @mask contains the requested permissions. + * Return 0 if permission is granted. + * @file_alloc_security: + * Allocate and attach a security structure to the file->f_security field. + * The security field is initialized to NULL when the structure is first + * created. + * @file contains the file structure to secure. + * Return 0 if the hook is successful and permission is granted. + * @file_free_security: + * Deallocate and free any security structures stored in file->f_security. + * @file contains the file structure being modified. + * @file_ioctl: + * @file contains the file structure. + * @cmd contains the operation to perform. + * @arg contains the operational arguments. + * Check permission for an ioctl operation on @file. Note that @arg + * sometimes represents a user space pointer; in other cases, it may be a + * simple integer value. When @arg represents a user space pointer, it + * should never be used by the security module. + * Return 0 if permission is granted. + * @mmap_addr : + * Check permissions for a mmap operation at @addr. + * @addr contains virtual address that will be used for the operation. + * Return 0 if permission is granted. + * @mmap_file : + * Check permissions for a mmap operation. The @file may be NULL, e.g. + * if mapping anonymous memory. + * @file contains the file structure for file to map (may be NULL). + * @reqprot contains the protection requested by the application. + * @prot contains the protection that will be applied by the kernel. + * @flags contains the operational flags. + * Return 0 if permission is granted. + * @file_mprotect: + * Check permissions before changing memory access permissions. + * @vma contains the memory region to modify. + * @reqprot contains the protection requested by the application. + * @prot contains the protection that will be applied by the kernel. + * Return 0 if permission is granted. + * @file_lock: + * Check permission before performing file locking operations. + * Note: this hook mediates both flock and fcntl style locks. + * @file contains the file structure. + * @cmd contains the posix-translated lock operation to perform + * (e.g. F_RDLCK, F_WRLCK). + * Return 0 if permission is granted. + * @file_fcntl: + * Check permission before allowing the file operation specified by @cmd + * from being performed on the file @file. Note that @arg sometimes + * represents a user space pointer; in other cases, it may be a simple + * integer value. When @arg represents a user space pointer, it should + * never be used by the security module. + * @file contains the file structure. + * @cmd contains the operation to be performed. + * @arg contains the operational arguments. + * Return 0 if permission is granted. + * @file_set_fowner: + * Save owner security information (typically from current->security) in + * file->f_security for later use by the send_sigiotask hook. + * @file contains the file structure to update. + * Return 0 on success. + * @file_send_sigiotask: + * Check permission for the file owner @fown to send SIGIO or SIGURG to the + * process @tsk. Note that this hook is sometimes called from interrupt. + * Note that the fown_struct, @fown, is never outside the context of a + * struct file, so the file structure (and associated security information) + * can always be obtained: + * container_of(fown, struct file, f_owner) + * @tsk contains the structure of task receiving signal. + * @fown contains the file owner information. + * @sig is the signal that will be sent. When 0, kernel sends SIGIO. + * Return 0 if permission is granted. + * @file_receive: + * This hook allows security modules to control the ability of a process + * to receive an open file descriptor via socket IPC. + * @file contains the file structure being received. + * Return 0 if permission is granted. + * @file_open + * Save open-time permission checking state for later use upon + * file_permission, and recheck access if anything has changed + * since inode_permission. + * + * Security hooks for task operations. + * + * @task_create: + * Check permission before creating a child process. See the clone(2) + * manual page for definitions of the @clone_flags. + * @clone_flags contains the flags indicating what should be shared. + * Return 0 if permission is granted. + * @task_free: + * @task task being freed + * Handle release of task-related resources. (Note that this can be called + * from interrupt context.) + * @cred_alloc_blank: + * @cred points to the credentials. + * @gfp indicates the atomicity of any memory allocations. + * Only allocate sufficient memory and attach to @cred such that + * cred_transfer() will not get ENOMEM. + * @cred_free: + * @cred points to the credentials. + * Deallocate and clear the cred->security field in a set of credentials. + * @cred_prepare: + * @new points to the new credentials. + * @old points to the original credentials. + * @gfp indicates the atomicity of any memory allocations. + * Prepare a new set of credentials by copying the data from the old set. + * @cred_transfer: + * @new points to the new credentials. + * @old points to the original credentials. + * Transfer data from original creds to new creds + * @kernel_act_as: + * Set the credentials for a kernel service to act as (subjective context). + * @new points to the credentials to be modified. + * @secid specifies the security ID to be set + * The current task must be the one that nominated @secid. + * Return 0 if successful. + * @kernel_create_files_as: + * Set the file creation context in a set of credentials to be the same as + * the objective context of the specified inode. + * @new points to the credentials to be modified. + * @inode points to the inode to use as a reference. + * The current task must be the one that nominated @inode. + * Return 0 if successful. + * @kernel_fw_from_file: + * Load firmware from userspace (not called for built-in firmware). + * @file contains the file structure pointing to the file containing + * the firmware to load. This argument will be NULL if the firmware + * was loaded via the uevent-triggered blob-based interface exposed + * by CONFIG_FW_LOADER_USER_HELPER. + * @buf pointer to buffer containing firmware contents. + * @size length of the firmware contents. + * Return 0 if permission is granted. + * @kernel_module_request: + * Ability to trigger the kernel to automatically upcall to userspace for + * userspace to load a kernel module with the given name. + * @kmod_name name of the module requested by the kernel + * Return 0 if successful. + * @kernel_module_from_file: + * Load a kernel module from userspace. + * @file contains the file structure pointing to the file containing + * the kernel module to load. If the module is being loaded from a blob, + * this argument will be NULL. + * Return 0 if permission is granted. + * @task_fix_setuid: + * Update the module's state after setting one or more of the user + * identity attributes of the current process. The @flags parameter + * indicates which of the set*uid system calls invoked this hook. If + * @new is the set of credentials that will be installed. Modifications + * should be made to this rather than to @current->cred. + * @old is the set of credentials that are being replaces + * @flags contains one of the LSM_SETID_* values. + * Return 0 on success. + * @task_setpgid: + * Check permission before setting the process group identifier of the + * process @p to @pgid. + * @p contains the task_struct for process being modified. + * @pgid contains the new pgid. + * Return 0 if permission is granted. + * @task_getpgid: + * Check permission before getting the process group identifier of the + * process @p. + * @p contains the task_struct for the process. + * Return 0 if permission is granted. + * @task_getsid: + * Check permission before getting the session identifier of the process + * @p. + * @p contains the task_struct for the process. + * Return 0 if permission is granted. + * @task_getsecid: + * Retrieve the security identifier of the process @p. + * @p contains the task_struct for the process and place is into @secid. + * In case of failure, @secid will be set to zero. + * + * @task_setnice: + * Check permission before setting the nice value of @p to @nice. + * @p contains the task_struct of process. + * @nice contains the new nice value. + * Return 0 if permission is granted. + * @task_setioprio + * Check permission before setting the ioprio value of @p to @ioprio. + * @p contains the task_struct of process. + * @ioprio contains the new ioprio value + * Return 0 if permission is granted. + * @task_getioprio + * Check permission before getting the ioprio value of @p. + * @p contains the task_struct of process. + * Return 0 if permission is granted. + * @task_setrlimit: + * Check permission before setting the resource limits of the current + * process for @resource to @new_rlim. The old resource limit values can + * be examined by dereferencing (current->signal->rlim + resource). + * @resource contains the resource whose limit is being set. + * @new_rlim contains the new limits for @resource. + * Return 0 if permission is granted. + * @task_setscheduler: + * Check permission before setting scheduling policy and/or parameters of + * process @p based on @policy and @lp. + * @p contains the task_struct for process. + * @policy contains the scheduling policy. + * @lp contains the scheduling parameters. + * Return 0 if permission is granted. + * @task_getscheduler: + * Check permission before obtaining scheduling information for process + * @p. + * @p contains the task_struct for process. + * Return 0 if permission is granted. + * @task_movememory + * Check permission before moving memory owned by process @p. + * @p contains the task_struct for process. + * Return 0 if permission is granted. + * @task_kill: + * Check permission before sending signal @sig to @p. @info can be NULL, + * the constant 1, or a pointer to a siginfo structure. If @info is 1 or + * SI_FROMKERNEL(info) is true, then the signal should be viewed as coming + * from the kernel and should typically be permitted. + * SIGIO signals are handled separately by the send_sigiotask hook in + * file_security_ops. + * @p contains the task_struct for process. + * @info contains the signal information. + * @sig contains the signal value. + * @secid contains the sid of the process where the signal originated + * Return 0 if permission is granted. + * @task_wait: + * Check permission before allowing a process to reap a child process @p + * and collect its status information. + * @p contains the task_struct for process. + * Return 0 if permission is granted. + * @task_prctl: + * Check permission before performing a process control operation on the + * current process. + * @option contains the operation. + * @arg2 contains a argument. + * @arg3 contains a argument. + * @arg4 contains a argument. + * @arg5 contains a argument. + * Return -ENOSYS if no-one wanted to handle this op, any other value to + * cause prctl() to return immediately with that value. + * @task_to_inode: + * Set the security attributes for an inode based on an associated task's + * security attributes, e.g. for /proc/pid inodes. + * @p contains the task_struct for the task. + * @inode contains the inode structure for the inode. + * + * Security hooks for Netlink messaging. + * + * @netlink_send: + * Save security information for a netlink message so that permission + * checking can be performed when the message is processed. The security + * information can be saved using the eff_cap field of the + * netlink_skb_parms structure. Also may be used to provide fine + * grained control over message transmission. + * @sk associated sock of task sending the message. + * @skb contains the sk_buff structure for the netlink message. + * Return 0 if the information was successfully saved and message + * is allowed to be transmitted. + * + * Security hooks for Unix domain networking. + * + * @unix_stream_connect: + * Check permissions before establishing a Unix domain stream connection + * between @sock and @other. + * @sock contains the sock structure. + * @other contains the peer sock structure. + * @newsk contains the new sock structure. + * Return 0 if permission is granted. + * @unix_may_send: + * Check permissions before connecting or sending datagrams from @sock to + * @other. + * @sock contains the socket structure. + * @other contains the peer socket structure. + * Return 0 if permission is granted. + * + * The @unix_stream_connect and @unix_may_send hooks were necessary because + * Linux provides an alternative to the conventional file name space for Unix + * domain sockets. Whereas binding and connecting to sockets in the file name + * space is mediated by the typical file permissions (and caught by the mknod + * and permission hooks in inode_security_ops), binding and connecting to + * sockets in the abstract name space is completely unmediated. Sufficient + * control of Unix domain sockets in the abstract name space isn't possible + * using only the socket layer hooks, since we need to know the actual target + * socket, which is not looked up until we are inside the af_unix code. + * + * Security hooks for socket operations. + * + * @socket_create: + * Check permissions prior to creating a new socket. + * @family contains the requested protocol family. + * @type contains the requested communications type. + * @protocol contains the requested protocol. + * @kern set to 1 if a kernel socket. + * Return 0 if permission is granted. + * @socket_post_create: + * This hook allows a module to update or allocate a per-socket security + * structure. Note that the security field was not added directly to the + * socket structure, but rather, the socket security information is stored + * in the associated inode. Typically, the inode alloc_security hook will + * allocate and and attach security information to + * sock->inode->i_security. This hook may be used to update the + * sock->inode->i_security field with additional information that wasn't + * available when the inode was allocated. + * @sock contains the newly created socket structure. + * @family contains the requested protocol family. + * @type contains the requested communications type. + * @protocol contains the requested protocol. + * @kern set to 1 if a kernel socket. + * @socket_bind: + * Check permission before socket protocol layer bind operation is + * performed and the socket @sock is bound to the address specified in the + * @address parameter. + * @sock contains the socket structure. + * @address contains the address to bind to. + * @addrlen contains the length of address. + * Return 0 if permission is granted. + * @socket_connect: + * Check permission before socket protocol layer connect operation + * attempts to connect socket @sock to a remote address, @address. + * @sock contains the socket structure. + * @address contains the address of remote endpoint. + * @addrlen contains the length of address. + * Return 0 if permission is granted. + * @socket_listen: + * Check permission before socket protocol layer listen operation. + * @sock contains the socket structure. + * @backlog contains the maximum length for the pending connection queue. + * Return 0 if permission is granted. + * @socket_accept: + * Check permission before accepting a new connection. Note that the new + * socket, @newsock, has been created and some information copied to it, + * but the accept operation has not actually been performed. + * @sock contains the listening socket structure. + * @newsock contains the newly created server socket for connection. + * Return 0 if permission is granted. + * @socket_sendmsg: + * Check permission before transmitting a message to another socket. + * @sock contains the socket structure. + * @msg contains the message to be transmitted. + * @size contains the size of message. + * Return 0 if permission is granted. + * @socket_recvmsg: + * Check permission before receiving a message from a socket. + * @sock contains the socket structure. + * @msg contains the message structure. + * @size contains the size of message structure. + * @flags contains the operational flags. + * Return 0 if permission is granted. + * @socket_getsockname: + * Check permission before the local address (name) of the socket object + * @sock is retrieved. + * @sock contains the socket structure. + * Return 0 if permission is granted. + * @socket_getpeername: + * Check permission before the remote address (name) of a socket object + * @sock is retrieved. + * @sock contains the socket structure. + * Return 0 if permission is granted. + * @socket_getsockopt: + * Check permissions before retrieving the options associated with socket + * @sock. + * @sock contains the socket structure. + * @level contains the protocol level to retrieve option from. + * @optname contains the name of option to retrieve. + * Return 0 if permission is granted. + * @socket_setsockopt: + * Check permissions before setting the options associated with socket + * @sock. + * @sock contains the socket structure. + * @level contains the protocol level to set options for. + * @optname contains the name of the option to set. + * Return 0 if permission is granted. + * @socket_shutdown: + * Checks permission before all or part of a connection on the socket + * @sock is shut down. + * @sock contains the socket structure. + * @how contains the flag indicating how future sends and receives + * are handled. + * Return 0 if permission is granted. + * @socket_sock_rcv_skb: + * Check permissions on incoming network packets. This hook is distinct + * from Netfilter's IP input hooks since it is the first time that the + * incoming sk_buff @skb has been associated with a particular socket, @sk. + * Must not sleep inside this hook because some callers hold spinlocks. + * @sk contains the sock (not socket) associated with the incoming sk_buff. + * @skb contains the incoming network data. + * @socket_getpeersec_stream: + * This hook allows the security module to provide peer socket security + * state for unix or connected tcp sockets to userspace via getsockopt + * SO_GETPEERSEC. For tcp sockets this can be meaningful if the + * socket is associated with an ipsec SA. + * @sock is the local socket. + * @optval userspace memory where the security state is to be copied. + * @optlen userspace int where the module should copy the actual length + * of the security state. + * @len as input is the maximum length to copy to userspace provided + * by the caller. + * Return 0 if all is well, otherwise, typical getsockopt return + * values. + * @socket_getpeersec_dgram: + * This hook allows the security module to provide peer socket security + * state for udp sockets on a per-packet basis to userspace via + * getsockopt SO_GETPEERSEC. The application must first have indicated + * the IP_PASSSEC option via getsockopt. It can then retrieve the + * security state returned by this hook for a packet via the SCM_SECURITY + * ancillary message type. + * @skb is the skbuff for the packet being queried + * @secdata is a pointer to a buffer in which to copy the security data + * @seclen is the maximum length for @secdata + * Return 0 on success, error on failure. + * @sk_alloc_security: + * Allocate and attach a security structure to the sk->sk_security field, + * which is used to copy security attributes between local stream sockets. + * @sk_free_security: + * Deallocate security structure. + * @sk_clone_security: + * Clone/copy security structure. + * @sk_getsecid: + * Retrieve the LSM-specific secid for the sock to enable caching + * of network authorizations. + * @sock_graft: + * Sets the socket's isec sid to the sock's sid. + * @inet_conn_request: + * Sets the openreq's sid to socket's sid with MLS portion taken + * from peer sid. + * @inet_csk_clone: + * Sets the new child socket's sid to the openreq sid. + * @inet_conn_established: + * Sets the connection's peersid to the secmark on skb. + * @secmark_relabel_packet: + * check if the process should be allowed to relabel packets to + * the given secid + * @security_secmark_refcount_inc + * tells the LSM to increment the number of secmark labeling rules loaded + * @security_secmark_refcount_dec + * tells the LSM to decrement the number of secmark labeling rules loaded + * @req_classify_flow: + * Sets the flow's sid to the openreq sid. + * @tun_dev_alloc_security: + * This hook allows a module to allocate a security structure for a TUN + * device. + * @security pointer to a security structure pointer. + * Returns a zero on success, negative values on failure. + * @tun_dev_free_security: + * This hook allows a module to free the security structure for a TUN + * device. + * @security pointer to the TUN device's security structure + * @tun_dev_create: + * Check permissions prior to creating a new TUN device. + * @tun_dev_attach_queue: + * Check permissions prior to attaching to a TUN device queue. + * @security pointer to the TUN device's security structure. + * @tun_dev_attach: + * This hook can be used by the module to update any security state + * associated with the TUN device's sock structure. + * @sk contains the existing sock structure. + * @security pointer to the TUN device's security structure. + * @tun_dev_open: + * This hook can be used by the module to update any security state + * associated with the TUN device's security structure. + * @security pointer to the TUN devices's security structure. + * + * Security hooks for XFRM operations. + * + * @xfrm_policy_alloc_security: + * @ctxp is a pointer to the xfrm_sec_ctx being added to Security Policy + * Database used by the XFRM system. + * @sec_ctx contains the security context information being provided by + * the user-level policy update program (e.g., setkey). + * Allocate a security structure to the xp->security field; the security + * field is initialized to NULL when the xfrm_policy is allocated. + * Return 0 if operation was successful (memory to allocate, legal context) + * @gfp is to specify the context for the allocation + * @xfrm_policy_clone_security: + * @old_ctx contains an existing xfrm_sec_ctx. + * @new_ctxp contains a new xfrm_sec_ctx being cloned from old. + * Allocate a security structure in new_ctxp that contains the + * information from the old_ctx structure. + * Return 0 if operation was successful (memory to allocate). + * @xfrm_policy_free_security: + * @ctx contains the xfrm_sec_ctx + * Deallocate xp->security. + * @xfrm_policy_delete_security: + * @ctx contains the xfrm_sec_ctx. + * Authorize deletion of xp->security. + * @xfrm_state_alloc: + * @x contains the xfrm_state being added to the Security Association + * Database by the XFRM system. + * @sec_ctx contains the security context information being provided by + * the user-level SA generation program (e.g., setkey or racoon). + * Allocate a security structure to the x->security field; the security + * field is initialized to NULL when the xfrm_state is allocated. Set the + * context to correspond to sec_ctx. Return 0 if operation was successful + * (memory to allocate, legal context). + * @xfrm_state_alloc_acquire: + * @x contains the xfrm_state being added to the Security Association + * Database by the XFRM system. + * @polsec contains the policy's security context. + * @secid contains the secid from which to take the mls portion of the + * context. + * Allocate a security structure to the x->security field; the security + * field is initialized to NULL when the xfrm_state is allocated. Set the + * context to correspond to secid. Return 0 if operation was successful + * (memory to allocate, legal context). + * @xfrm_state_free_security: + * @x contains the xfrm_state. + * Deallocate x->security. + * @xfrm_state_delete_security: + * @x contains the xfrm_state. + * Authorize deletion of x->security. + * @xfrm_policy_lookup: + * @ctx contains the xfrm_sec_ctx for which the access control is being + * checked. + * @fl_secid contains the flow security label that is used to authorize + * access to the policy xp. + * @dir contains the direction of the flow (input or output). + * Check permission when a flow selects a xfrm_policy for processing + * XFRMs on a packet. The hook is called when selecting either a + * per-socket policy or a generic xfrm policy. + * Return 0 if permission is granted, -ESRCH otherwise, or -errno + * on other errors. + * @xfrm_state_pol_flow_match: + * @x contains the state to match. + * @xp contains the policy to check for a match. + * @fl contains the flow to check for a match. + * Return 1 if there is a match. + * @xfrm_decode_session: + * @skb points to skb to decode. + * @secid points to the flow key secid to set. + * @ckall says if all xfrms used should be checked for same secid. + * Return 0 if ckall is zero or all xfrms used have the same secid. + * + * Security hooks affecting all Key Management operations + * + * @key_alloc: + * Permit allocation of a key and assign security data. Note that key does + * not have a serial number assigned at this point. + * @key points to the key. + * @flags is the allocation flags + * Return 0 if permission is granted, -ve error otherwise. + * @key_free: + * Notification of destruction; free security data. + * @key points to the key. + * No return value. + * @key_permission: + * See whether a specific operational right is granted to a process on a + * key. + * @key_ref refers to the key (key pointer + possession attribute bit). + * @cred points to the credentials to provide the context against which to + * evaluate the security data on the key. + * @perm describes the combination of permissions required of this key. + * Return 0 if permission is granted, -ve error otherwise. + * @key_getsecurity: + * Get a textual representation of the security context attached to a key + * for the purposes of honouring KEYCTL_GETSECURITY. This function + * allocates the storage for the NUL-terminated string and the caller + * should free it. + * @key points to the key to be queried. + * @_buffer points to a pointer that should be set to point to the + * resulting string (if no label or an error occurs). + * Return the length of the string (including terminating NUL) or -ve if + * an error. + * May also return 0 (and a NULL buffer pointer) if there is no label. + * + * Security hooks affecting all System V IPC operations. + * + * @ipc_permission: + * Check permissions for access to IPC + * @ipcp contains the kernel IPC permission structure + * @flag contains the desired (requested) permission set + * Return 0 if permission is granted. + * @ipc_getsecid: + * Get the secid associated with the ipc object. + * @ipcp contains the kernel IPC permission structure. + * @secid contains a pointer to the location where result will be saved. + * In case of failure, @secid will be set to zero. + * + * Security hooks for individual messages held in System V IPC message queues + * @msg_msg_alloc_security: + * Allocate and attach a security structure to the msg->security field. + * The security field is initialized to NULL when the structure is first + * created. + * @msg contains the message structure to be modified. + * Return 0 if operation was successful and permission is granted. + * @msg_msg_free_security: + * Deallocate the security structure for this message. + * @msg contains the message structure to be modified. + * + * Security hooks for System V IPC Message Queues + * + * @msg_queue_alloc_security: + * Allocate and attach a security structure to the + * msq->q_perm.security field. The security field is initialized to + * NULL when the structure is first created. + * @msq contains the message queue structure to be modified. + * Return 0 if operation was successful and permission is granted. + * @msg_queue_free_security: + * Deallocate security structure for this message queue. + * @msq contains the message queue structure to be modified. + * @msg_queue_associate: + * Check permission when a message queue is requested through the + * msgget system call. This hook is only called when returning the + * message queue identifier for an existing message queue, not when a + * new message queue is created. + * @msq contains the message queue to act upon. + * @msqflg contains the operation control flags. + * Return 0 if permission is granted. + * @msg_queue_msgctl: + * Check permission when a message control operation specified by @cmd + * is to be performed on the message queue @msq. + * The @msq may be NULL, e.g. for IPC_INFO or MSG_INFO. + * @msq contains the message queue to act upon. May be NULL. + * @cmd contains the operation to be performed. + * Return 0 if permission is granted. + * @msg_queue_msgsnd: + * Check permission before a message, @msg, is enqueued on the message + * queue, @msq. + * @msq contains the message queue to send message to. + * @msg contains the message to be enqueued. + * @msqflg contains operational flags. + * Return 0 if permission is granted. + * @msg_queue_msgrcv: + * Check permission before a message, @msg, is removed from the message + * queue, @msq. The @target task structure contains a pointer to the + * process that will be receiving the message (not equal to the current + * process when inline receives are being performed). + * @msq contains the message queue to retrieve message from. + * @msg contains the message destination. + * @target contains the task structure for recipient process. + * @type contains the type of message requested. + * @mode contains the operational flags. + * Return 0 if permission is granted. + * + * Security hooks for System V Shared Memory Segments + * + * @shm_alloc_security: + * Allocate and attach a security structure to the shp->shm_perm.security + * field. The security field is initialized to NULL when the structure is + * first created. + * @shp contains the shared memory structure to be modified. + * Return 0 if operation was successful and permission is granted. + * @shm_free_security: + * Deallocate the security struct for this memory segment. + * @shp contains the shared memory structure to be modified. + * @shm_associate: + * Check permission when a shared memory region is requested through the + * shmget system call. This hook is only called when returning the shared + * memory region identifier for an existing region, not when a new shared + * memory region is created. + * @shp contains the shared memory structure to be modified. + * @shmflg contains the operation control flags. + * Return 0 if permission is granted. + * @shm_shmctl: + * Check permission when a shared memory control operation specified by + * @cmd is to be performed on the shared memory region @shp. + * The @shp may be NULL, e.g. for IPC_INFO or SHM_INFO. + * @shp contains shared memory structure to be modified. + * @cmd contains the operation to be performed. + * Return 0 if permission is granted. + * @shm_shmat: + * Check permissions prior to allowing the shmat system call to attach the + * shared memory segment @shp to the data segment of the calling process. + * The attaching address is specified by @shmaddr. + * @shp contains the shared memory structure to be modified. + * @shmaddr contains the address to attach memory region to. + * @shmflg contains the operational flags. + * Return 0 if permission is granted. + * + * Security hooks for System V Semaphores + * + * @sem_alloc_security: + * Allocate and attach a security structure to the sma->sem_perm.security + * field. The security field is initialized to NULL when the structure is + * first created. + * @sma contains the semaphore structure + * Return 0 if operation was successful and permission is granted. + * @sem_free_security: + * deallocate security struct for this semaphore + * @sma contains the semaphore structure. + * @sem_associate: + * Check permission when a semaphore is requested through the semget + * system call. This hook is only called when returning the semaphore + * identifier for an existing semaphore, not when a new one must be + * created. + * @sma contains the semaphore structure. + * @semflg contains the operation control flags. + * Return 0 if permission is granted. + * @sem_semctl: + * Check permission when a semaphore operation specified by @cmd is to be + * performed on the semaphore @sma. The @sma may be NULL, e.g. for + * IPC_INFO or SEM_INFO. + * @sma contains the semaphore structure. May be NULL. + * @cmd contains the operation to be performed. + * Return 0 if permission is granted. + * @sem_semop + * Check permissions before performing operations on members of the + * semaphore set @sma. If the @alter flag is nonzero, the semaphore set + * may be modified. + * @sma contains the semaphore structure. + * @sops contains the operations to perform. + * @nsops contains the number of operations to perform. + * @alter contains the flag indicating whether changes are to be made. + * Return 0 if permission is granted. + * + * @binder_set_context_mgr + * Check whether @mgr is allowed to be the binder context manager. + * @mgr contains the task_struct for the task being registered. + * Return 0 if permission is granted. + * @binder_transaction + * Check whether @from is allowed to invoke a binder transaction call + * to @to. + * @from contains the task_struct for the sending task. + * @to contains the task_struct for the receiving task. + * @binder_transfer_binder + * Check whether @from is allowed to transfer a binder reference to @to. + * @from contains the task_struct for the sending task. + * @to contains the task_struct for the receiving task. + * @binder_transfer_file + * Check whether @from is allowed to transfer @file to @to. + * @from contains the task_struct for the sending task. + * @file contains the struct file being transferred. + * @to contains the task_struct for the receiving task. + * + * @ptrace_access_check: + * Check permission before allowing the current process to trace the + * @child process. + * Security modules may also want to perform a process tracing check + * during an execve in the set_security or apply_creds hooks of + * tracing check during an execve in the bprm_set_creds hook of + * binprm_security_ops if the process is being traced and its security + * attributes would be changed by the execve. + * @child contains the task_struct structure for the target process. + * @mode contains the PTRACE_MODE flags indicating the form of access. + * Return 0 if permission is granted. + * @ptrace_traceme: + * Check that the @parent process has sufficient permission to trace the + * current process before allowing the current process to present itself + * to the @parent process for tracing. + * @parent contains the task_struct structure for debugger process. + * Return 0 if permission is granted. + * @capget: + * Get the @effective, @inheritable, and @permitted capability sets for + * the @target process. The hook may also perform permission checking to + * determine if the current process is allowed to see the capability sets + * of the @target process. + * @target contains the task_struct structure for target process. + * @effective contains the effective capability set. + * @inheritable contains the inheritable capability set. + * @permitted contains the permitted capability set. + * Return 0 if the capability sets were successfully obtained. + * @capset: + * Set the @effective, @inheritable, and @permitted capability sets for + * the current process. + * @new contains the new credentials structure for target process. + * @old contains the current credentials structure for target process. + * @effective contains the effective capability set. + * @inheritable contains the inheritable capability set. + * @permitted contains the permitted capability set. + * Return 0 and update @new if permission is granted. + * @capable: + * Check whether the @tsk process has the @cap capability in the indicated + * credentials. + * @cred contains the credentials to use. + * @ns contains the user namespace we want the capability in + * @cap contains the capability <include/linux/capability.h>. + * @audit: Whether to write an audit message or not + * Return 0 if the capability is granted for @tsk. + * @syslog: + * Check permission before accessing the kernel message ring or changing + * logging to the console. + * See the syslog(2) manual page for an explanation of the @type values. + * @type contains the type of action. + * @from_file indicates the context of action (if it came from /proc). + * Return 0 if permission is granted. + * @settime: + * Check permission to change the system time. + * struct timespec and timezone are defined in include/linux/time.h + * @ts contains new time + * @tz contains new timezone + * Return 0 if permission is granted. + * @vm_enough_memory: + * Check permissions for allocating a new virtual mapping. + * @mm contains the mm struct it is being added to. + * @pages contains the number of pages. + * Return 0 if permission is granted. + * + * @ismaclabel: + * Check if the extended attribute specified by @name + * represents a MAC label. Returns 1 if name is a MAC + * attribute otherwise returns 0. + * @name full extended attribute name to check against + * LSM as a MAC label. + * + * @secid_to_secctx: + * Convert secid to security context. If secdata is NULL the length of + * the result will be returned in seclen, but no secdata will be returned. + * This does mean that the length could change between calls to check the + * length and the next call which actually allocates and returns the + * secdata. + * @secid contains the security ID. + * @secdata contains the pointer that stores the converted security + * context. + * @seclen pointer which contains the length of the data + * @secctx_to_secid: + * Convert security context to secid. + * @secid contains the pointer to the generated security ID. + * @secdata contains the security context. + * + * @release_secctx: + * Release the security context. + * @secdata contains the security context. + * @seclen contains the length of the security context. + * + * Security hooks for Audit + * + * @audit_rule_init: + * Allocate and initialize an LSM audit rule structure. + * @field contains the required Audit action. + * Fields flags are defined in include/linux/audit.h + * @op contains the operator the rule uses. + * @rulestr contains the context where the rule will be applied to. + * @lsmrule contains a pointer to receive the result. + * Return 0 if @lsmrule has been successfully set, + * -EINVAL in case of an invalid rule. + * + * @audit_rule_known: + * Specifies whether given @rule contains any fields related to + * current LSM. + * @rule contains the audit rule of interest. + * Return 1 in case of relation found, 0 otherwise. + * + * @audit_rule_match: + * Determine if given @secid matches a rule previously approved + * by @audit_rule_known. + * @secid contains the security id in question. + * @field contains the field which relates to current LSM. + * @op contains the operator that will be used for matching. + * @rule points to the audit rule that will be checked against. + * @actx points to the audit context associated with the check. + * Return 1 if secid matches the rule, 0 if it does not, -ERRNO on failure. + * + * @audit_rule_free: + * Deallocate the LSM audit rule structure previously allocated by + * audit_rule_init. + * @rule contains the allocated rule + * + * @inode_notifysecctx: + * Notify the security module of what the security context of an inode + * should be. Initializes the incore security context managed by the + * security module for this inode. Example usage: NFS client invokes + * this hook to initialize the security context in its incore inode to the + * value provided by the server for the file when the server returned the + * file's attributes to the client. + * + * Must be called with inode->i_mutex locked. + * + * @inode we wish to set the security context of. + * @ctx contains the string which we wish to set in the inode. + * @ctxlen contains the length of @ctx. + * + * @inode_setsecctx: + * Change the security context of an inode. Updates the + * incore security context managed by the security module and invokes the + * fs code as needed (via __vfs_setxattr_noperm) to update any backing + * xattrs that represent the context. Example usage: NFS server invokes + * this hook to change the security context in its incore inode and on the + * backing filesystem to a value provided by the client on a SETATTR + * operation. + * + * Must be called with inode->i_mutex locked. + * + * @dentry contains the inode we wish to set the security context of. + * @ctx contains the string which we wish to set in the inode. + * @ctxlen contains the length of @ctx. + * + * @inode_getsecctx: + * On success, returns 0 and fills out @ctx and @ctxlen with the security + * context for the given @inode. + * + * @inode we wish to get the security context of. + * @ctx is a pointer in which to place the allocated security context. + * @ctxlen points to the place to put the length of @ctx. + * This is the main security structure. + */ + +union security_list_options { + int (*binder_set_context_mgr)(struct task_struct *mgr); + int (*binder_transaction)(struct task_struct *from, + struct task_struct *to); + int (*binder_transfer_binder)(struct task_struct *from, + struct task_struct *to); + int (*binder_transfer_file)(struct task_struct *from, + struct task_struct *to, + struct file *file); + + int (*ptrace_access_check)(struct task_struct *child, + unsigned int mode); + int (*ptrace_traceme)(struct task_struct *parent); + int (*capget)(struct task_struct *target, kernel_cap_t *effective, + kernel_cap_t *inheritable, kernel_cap_t *permitted); + int (*capset)(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); + int (*capable)(const struct cred *cred, struct user_namespace *ns, + int cap, int audit); + int (*quotactl)(int cmds, int type, int id, struct super_block *sb); + int (*quota_on)(struct dentry *dentry); + int (*syslog)(int type); + int (*settime)(const struct timespec *ts, const struct timezone *tz); + int (*vm_enough_memory)(struct mm_struct *mm, long pages); + + int (*bprm_set_creds)(struct linux_binprm *bprm); + int (*bprm_check_security)(struct linux_binprm *bprm); + int (*bprm_secureexec)(struct linux_binprm *bprm); + void (*bprm_committing_creds)(struct linux_binprm *bprm); + void (*bprm_committed_creds)(struct linux_binprm *bprm); + + int (*sb_alloc_security)(struct super_block *sb); + void (*sb_free_security)(struct super_block *sb); + int (*sb_copy_data)(char *orig, char *copy); + int (*sb_remount)(struct super_block *sb, void *data); + int (*sb_kern_mount)(struct super_block *sb, int flags, void *data); + int (*sb_show_options)(struct seq_file *m, struct super_block *sb); + int (*sb_statfs)(struct dentry *dentry); + int (*sb_mount)(const char *dev_name, struct path *path, + const char *type, unsigned long flags, void *data); + int (*sb_umount)(struct vfsmount *mnt, int flags); + int (*sb_pivotroot)(struct path *old_path, struct path *new_path); + int (*sb_set_mnt_opts)(struct super_block *sb, + struct security_mnt_opts *opts, + unsigned long kern_flags, + unsigned long *set_kern_flags); + int (*sb_clone_mnt_opts)(const struct super_block *oldsb, + struct super_block *newsb); + int (*sb_parse_opts_str)(char *options, struct security_mnt_opts *opts); + int (*dentry_init_security)(struct dentry *dentry, int mode, + struct qstr *name, void **ctx, + u32 *ctxlen); + + +#ifdef CONFIG_SECURITY_PATH + int (*path_unlink)(struct path *dir, struct dentry *dentry); + int (*path_mkdir)(struct path *dir, struct dentry *dentry, + umode_t mode); + int (*path_rmdir)(struct path *dir, struct dentry *dentry); + int (*path_mknod)(struct path *dir, struct dentry *dentry, + umode_t mode, unsigned int dev); + int (*path_truncate)(struct path *path); + int (*path_symlink)(struct path *dir, struct dentry *dentry, + const char *old_name); + int (*path_link)(struct dentry *old_dentry, struct path *new_dir, + struct dentry *new_dentry); + int (*path_rename)(struct path *old_dir, struct dentry *old_dentry, + struct path *new_dir, + struct dentry *new_dentry); + int (*path_chmod)(struct path *path, umode_t mode); + int (*path_chown)(struct path *path, kuid_t uid, kgid_t gid); + int (*path_chroot)(struct path *path); +#endif + + int (*inode_alloc_security)(struct inode *inode); + void (*inode_free_security)(struct inode *inode); + int (*inode_init_security)(struct inode *inode, struct inode *dir, + const struct qstr *qstr, + const char **name, void **value, + size_t *len); + int (*inode_create)(struct inode *dir, struct dentry *dentry, + umode_t mode); + int (*inode_link)(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry); + int (*inode_unlink)(struct inode *dir, struct dentry *dentry); + int (*inode_symlink)(struct inode *dir, struct dentry *dentry, + const char *old_name); + int (*inode_mkdir)(struct inode *dir, struct dentry *dentry, + umode_t mode); + int (*inode_rmdir)(struct inode *dir, struct dentry *dentry); + int (*inode_mknod)(struct inode *dir, struct dentry *dentry, + umode_t mode, dev_t dev); + int (*inode_rename)(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry); + int (*inode_readlink)(struct dentry *dentry); + int (*inode_follow_link)(struct dentry *dentry, struct inode *inode, + bool rcu); + int (*inode_permission)(struct inode *inode, int mask); + int (*inode_setattr)(struct dentry *dentry, struct iattr *attr); + int (*inode_getattr)(const struct path *path); + int (*inode_setxattr)(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); + void (*inode_post_setxattr)(struct dentry *dentry, const char *name, + const void *value, size_t size, + int flags); + int (*inode_getxattr)(struct dentry *dentry, const char *name); + int (*inode_listxattr)(struct dentry *dentry); + int (*inode_removexattr)(struct dentry *dentry, const char *name); + int (*inode_need_killpriv)(struct dentry *dentry); + int (*inode_killpriv)(struct dentry *dentry); + int (*inode_getsecurity)(const struct inode *inode, const char *name, + void **buffer, bool alloc); + int (*inode_setsecurity)(struct inode *inode, const char *name, + const void *value, size_t size, + int flags); + int (*inode_listsecurity)(struct inode *inode, char *buffer, + size_t buffer_size); + void (*inode_getsecid)(const struct inode *inode, u32 *secid); + + int (*file_permission)(struct file *file, int mask); + int (*file_alloc_security)(struct file *file); + void (*file_free_security)(struct file *file); + int (*file_ioctl)(struct file *file, unsigned int cmd, + unsigned long arg); + int (*mmap_addr)(unsigned long addr); + int (*mmap_file)(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); + int (*file_mprotect)(struct vm_area_struct *vma, unsigned long reqprot, + unsigned long prot); + int (*file_lock)(struct file *file, unsigned int cmd); + int (*file_fcntl)(struct file *file, unsigned int cmd, + unsigned long arg); + void (*file_set_fowner)(struct file *file); + int (*file_send_sigiotask)(struct task_struct *tsk, + struct fown_struct *fown, int sig); + int (*file_receive)(struct file *file); + int (*file_open)(struct file *file, const struct cred *cred); + + int (*task_create)(unsigned long clone_flags); + void (*task_free)(struct task_struct *task); + int (*cred_alloc_blank)(struct cred *cred, gfp_t gfp); + void (*cred_free)(struct cred *cred); + int (*cred_prepare)(struct cred *new, const struct cred *old, + gfp_t gfp); + void (*cred_transfer)(struct cred *new, const struct cred *old); + int (*kernel_act_as)(struct cred *new, u32 secid); + int (*kernel_create_files_as)(struct cred *new, struct inode *inode); + int (*kernel_fw_from_file)(struct file *file, char *buf, size_t size); + int (*kernel_module_request)(char *kmod_name); + int (*kernel_module_from_file)(struct file *file); + int (*task_fix_setuid)(struct cred *new, const struct cred *old, + int flags); + int (*task_setpgid)(struct task_struct *p, pid_t pgid); + int (*task_getpgid)(struct task_struct *p); + int (*task_getsid)(struct task_struct *p); + void (*task_getsecid)(struct task_struct *p, u32 *secid); + int (*task_setnice)(struct task_struct *p, int nice); + int (*task_setioprio)(struct task_struct *p, int ioprio); + int (*task_getioprio)(struct task_struct *p); + int (*task_setrlimit)(struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim); + int (*task_setscheduler)(struct task_struct *p); + int (*task_getscheduler)(struct task_struct *p); + int (*task_movememory)(struct task_struct *p); + int (*task_kill)(struct task_struct *p, struct siginfo *info, + int sig, u32 secid); + int (*task_wait)(struct task_struct *p); + int (*task_prctl)(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5); + void (*task_to_inode)(struct task_struct *p, struct inode *inode); + + int (*ipc_permission)(struct kern_ipc_perm *ipcp, short flag); + void (*ipc_getsecid)(struct kern_ipc_perm *ipcp, u32 *secid); + + int (*msg_msg_alloc_security)(struct msg_msg *msg); + void (*msg_msg_free_security)(struct msg_msg *msg); + + int (*msg_queue_alloc_security)(struct msg_queue *msq); + void (*msg_queue_free_security)(struct msg_queue *msq); + int (*msg_queue_associate)(struct msg_queue *msq, int msqflg); + int (*msg_queue_msgctl)(struct msg_queue *msq, int cmd); + int (*msg_queue_msgsnd)(struct msg_queue *msq, struct msg_msg *msg, + int msqflg); + int (*msg_queue_msgrcv)(struct msg_queue *msq, struct msg_msg *msg, + struct task_struct *target, long type, + int mode); + + int (*shm_alloc_security)(struct shmid_kernel *shp); + void (*shm_free_security)(struct shmid_kernel *shp); + int (*shm_associate)(struct shmid_kernel *shp, int shmflg); + int (*shm_shmctl)(struct shmid_kernel *shp, int cmd); + int (*shm_shmat)(struct shmid_kernel *shp, char __user *shmaddr, + int shmflg); + + int (*sem_alloc_security)(struct sem_array *sma); + void (*sem_free_security)(struct sem_array *sma); + int (*sem_associate)(struct sem_array *sma, int semflg); + int (*sem_semctl)(struct sem_array *sma, int cmd); + int (*sem_semop)(struct sem_array *sma, struct sembuf *sops, + unsigned nsops, int alter); + + int (*netlink_send)(struct sock *sk, struct sk_buff *skb); + + void (*d_instantiate)(struct dentry *dentry, struct inode *inode); + + int (*getprocattr)(struct task_struct *p, char *name, char **value); + int (*setprocattr)(struct task_struct *p, char *name, void *value, + size_t size); + int (*ismaclabel)(const char *name); + int (*secid_to_secctx)(u32 secid, char **secdata, u32 *seclen); + int (*secctx_to_secid)(const char *secdata, u32 seclen, u32 *secid); + void (*release_secctx)(char *secdata, u32 seclen); + + int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen); + int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen); + int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen); + +#ifdef CONFIG_SECURITY_NETWORK + int (*unix_stream_connect)(struct sock *sock, struct sock *other, + struct sock *newsk); + int (*unix_may_send)(struct socket *sock, struct socket *other); + + int (*socket_create)(int family, int type, int protocol, int kern); + int (*socket_post_create)(struct socket *sock, int family, int type, + int protocol, int kern); + int (*socket_bind)(struct socket *sock, struct sockaddr *address, + int addrlen); + int (*socket_connect)(struct socket *sock, struct sockaddr *address, + int addrlen); + int (*socket_listen)(struct socket *sock, int backlog); + int (*socket_accept)(struct socket *sock, struct socket *newsock); + int (*socket_sendmsg)(struct socket *sock, struct msghdr *msg, + int size); + int (*socket_recvmsg)(struct socket *sock, struct msghdr *msg, + int size, int flags); + int (*socket_getsockname)(struct socket *sock); + int (*socket_getpeername)(struct socket *sock); + int (*socket_getsockopt)(struct socket *sock, int level, int optname); + int (*socket_setsockopt)(struct socket *sock, int level, int optname); + int (*socket_shutdown)(struct socket *sock, int how); + int (*socket_sock_rcv_skb)(struct sock *sk, struct sk_buff *skb); + int (*socket_getpeersec_stream)(struct socket *sock, + char __user *optval, + int __user *optlen, unsigned len); + int (*socket_getpeersec_dgram)(struct socket *sock, + struct sk_buff *skb, u32 *secid); + int (*sk_alloc_security)(struct sock *sk, int family, gfp_t priority); + void (*sk_free_security)(struct sock *sk); + void (*sk_clone_security)(const struct sock *sk, struct sock *newsk); + void (*sk_getsecid)(struct sock *sk, u32 *secid); + void (*sock_graft)(struct sock *sk, struct socket *parent); + int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req); + void (*inet_csk_clone)(struct sock *newsk, + const struct request_sock *req); + void (*inet_conn_established)(struct sock *sk, struct sk_buff *skb); + int (*secmark_relabel_packet)(u32 secid); + void (*secmark_refcount_inc)(void); + void (*secmark_refcount_dec)(void); + void (*req_classify_flow)(const struct request_sock *req, + struct flowi *fl); + int (*tun_dev_alloc_security)(void **security); + void (*tun_dev_free_security)(void *security); + int (*tun_dev_create)(void); + int (*tun_dev_attach_queue)(void *security); + int (*tun_dev_attach)(struct sock *sk, void *security); + int (*tun_dev_open)(void *security); +#endif /* CONFIG_SECURITY_NETWORK */ + +#ifdef CONFIG_SECURITY_NETWORK_XFRM + int (*xfrm_policy_alloc_security)(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx, + gfp_t gfp); + int (*xfrm_policy_clone_security)(struct xfrm_sec_ctx *old_ctx, + struct xfrm_sec_ctx **new_ctx); + void (*xfrm_policy_free_security)(struct xfrm_sec_ctx *ctx); + int (*xfrm_policy_delete_security)(struct xfrm_sec_ctx *ctx); + int (*xfrm_state_alloc)(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_state_alloc_acquire)(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, + u32 secid); + void (*xfrm_state_free_security)(struct xfrm_state *x); + int (*xfrm_state_delete_security)(struct xfrm_state *x); + int (*xfrm_policy_lookup)(struct xfrm_sec_ctx *ctx, u32 fl_secid, + u8 dir); + int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, + struct xfrm_policy *xp, + const struct flowi *fl); + int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ + + /* key management security hooks */ +#ifdef CONFIG_KEYS + int (*key_alloc)(struct key *key, const struct cred *cred, + unsigned long flags); + void (*key_free)(struct key *key); + int (*key_permission)(key_ref_t key_ref, const struct cred *cred, + unsigned perm); + int (*key_getsecurity)(struct key *key, char **_buffer); +#endif /* CONFIG_KEYS */ + +#ifdef CONFIG_AUDIT + int (*audit_rule_init)(u32 field, u32 op, char *rulestr, + void **lsmrule); + int (*audit_rule_known)(struct audit_krule *krule); + int (*audit_rule_match)(u32 secid, u32 field, u32 op, void *lsmrule, + struct audit_context *actx); + void (*audit_rule_free)(void *lsmrule); +#endif /* CONFIG_AUDIT */ +}; + +struct security_hook_heads { + struct list_head binder_set_context_mgr; + struct list_head binder_transaction; + struct list_head binder_transfer_binder; + struct list_head binder_transfer_file; + struct list_head ptrace_access_check; + struct list_head ptrace_traceme; + struct list_head capget; + struct list_head capset; + struct list_head capable; + struct list_head quotactl; + struct list_head quota_on; + struct list_head syslog; + struct list_head settime; + struct list_head vm_enough_memory; + struct list_head bprm_set_creds; + struct list_head bprm_check_security; + struct list_head bprm_secureexec; + struct list_head bprm_committing_creds; + struct list_head bprm_committed_creds; + struct list_head sb_alloc_security; + struct list_head sb_free_security; + struct list_head sb_copy_data; + struct list_head sb_remount; + struct list_head sb_kern_mount; + struct list_head sb_show_options; + struct list_head sb_statfs; + struct list_head sb_mount; + struct list_head sb_umount; + struct list_head sb_pivotroot; + struct list_head sb_set_mnt_opts; + struct list_head sb_clone_mnt_opts; + struct list_head sb_parse_opts_str; + struct list_head dentry_init_security; +#ifdef CONFIG_SECURITY_PATH + struct list_head path_unlink; + struct list_head path_mkdir; + struct list_head path_rmdir; + struct list_head path_mknod; + struct list_head path_truncate; + struct list_head path_symlink; + struct list_head path_link; + struct list_head path_rename; + struct list_head path_chmod; + struct list_head path_chown; + struct list_head path_chroot; +#endif + struct list_head inode_alloc_security; + struct list_head inode_free_security; + struct list_head inode_init_security; + struct list_head inode_create; + struct list_head inode_link; + struct list_head inode_unlink; + struct list_head inode_symlink; + struct list_head inode_mkdir; + struct list_head inode_rmdir; + struct list_head inode_mknod; + struct list_head inode_rename; + struct list_head inode_readlink; + struct list_head inode_follow_link; + struct list_head inode_permission; + struct list_head inode_setattr; + struct list_head inode_getattr; + struct list_head inode_setxattr; + struct list_head inode_post_setxattr; + struct list_head inode_getxattr; + struct list_head inode_listxattr; + struct list_head inode_removexattr; + struct list_head inode_need_killpriv; + struct list_head inode_killpriv; + struct list_head inode_getsecurity; + struct list_head inode_setsecurity; + struct list_head inode_listsecurity; + struct list_head inode_getsecid; + struct list_head file_permission; + struct list_head file_alloc_security; + struct list_head file_free_security; + struct list_head file_ioctl; + struct list_head mmap_addr; + struct list_head mmap_file; + struct list_head file_mprotect; + struct list_head file_lock; + struct list_head file_fcntl; + struct list_head file_set_fowner; + struct list_head file_send_sigiotask; + struct list_head file_receive; + struct list_head file_open; + struct list_head task_create; + struct list_head task_free; + struct list_head cred_alloc_blank; + struct list_head cred_free; + struct list_head cred_prepare; + struct list_head cred_transfer; + struct list_head kernel_act_as; + struct list_head kernel_create_files_as; + struct list_head kernel_fw_from_file; + struct list_head kernel_module_request; + struct list_head kernel_module_from_file; + struct list_head task_fix_setuid; + struct list_head task_setpgid; + struct list_head task_getpgid; + struct list_head task_getsid; + struct list_head task_getsecid; + struct list_head task_setnice; + struct list_head task_setioprio; + struct list_head task_getioprio; + struct list_head task_setrlimit; + struct list_head task_setscheduler; + struct list_head task_getscheduler; + struct list_head task_movememory; + struct list_head task_kill; + struct list_head task_wait; + struct list_head task_prctl; + struct list_head task_to_inode; + struct list_head ipc_permission; + struct list_head ipc_getsecid; + struct list_head msg_msg_alloc_security; + struct list_head msg_msg_free_security; + struct list_head msg_queue_alloc_security; + struct list_head msg_queue_free_security; + struct list_head msg_queue_associate; + struct list_head msg_queue_msgctl; + struct list_head msg_queue_msgsnd; + struct list_head msg_queue_msgrcv; + struct list_head shm_alloc_security; + struct list_head shm_free_security; + struct list_head shm_associate; + struct list_head shm_shmctl; + struct list_head shm_shmat; + struct list_head sem_alloc_security; + struct list_head sem_free_security; + struct list_head sem_associate; + struct list_head sem_semctl; + struct list_head sem_semop; + struct list_head netlink_send; + struct list_head d_instantiate; + struct list_head getprocattr; + struct list_head setprocattr; + struct list_head ismaclabel; + struct list_head secid_to_secctx; + struct list_head secctx_to_secid; + struct list_head release_secctx; + struct list_head inode_notifysecctx; + struct list_head inode_setsecctx; + struct list_head inode_getsecctx; +#ifdef CONFIG_SECURITY_NETWORK + struct list_head unix_stream_connect; + struct list_head unix_may_send; + struct list_head socket_create; + struct list_head socket_post_create; + struct list_head socket_bind; + struct list_head socket_connect; + struct list_head socket_listen; + struct list_head socket_accept; + struct list_head socket_sendmsg; + struct list_head socket_recvmsg; + struct list_head socket_getsockname; + struct list_head socket_getpeername; + struct list_head socket_getsockopt; + struct list_head socket_setsockopt; + struct list_head socket_shutdown; + struct list_head socket_sock_rcv_skb; + struct list_head socket_getpeersec_stream; + struct list_head socket_getpeersec_dgram; + struct list_head sk_alloc_security; + struct list_head sk_free_security; + struct list_head sk_clone_security; + struct list_head sk_getsecid; + struct list_head sock_graft; + struct list_head inet_conn_request; + struct list_head inet_csk_clone; + struct list_head inet_conn_established; + struct list_head secmark_relabel_packet; + struct list_head secmark_refcount_inc; + struct list_head secmark_refcount_dec; + struct list_head req_classify_flow; + struct list_head tun_dev_alloc_security; + struct list_head tun_dev_free_security; + struct list_head tun_dev_create; + struct list_head tun_dev_attach_queue; + struct list_head tun_dev_attach; + struct list_head tun_dev_open; + struct list_head skb_owned_by; +#endif /* CONFIG_SECURITY_NETWORK */ +#ifdef CONFIG_SECURITY_NETWORK_XFRM + struct list_head xfrm_policy_alloc_security; + struct list_head xfrm_policy_clone_security; + struct list_head xfrm_policy_free_security; + struct list_head xfrm_policy_delete_security; + struct list_head xfrm_state_alloc; + struct list_head xfrm_state_alloc_acquire; + struct list_head xfrm_state_free_security; + struct list_head xfrm_state_delete_security; + struct list_head xfrm_policy_lookup; + struct list_head xfrm_state_pol_flow_match; + struct list_head xfrm_decode_session; +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ +#ifdef CONFIG_KEYS + struct list_head key_alloc; + struct list_head key_free; + struct list_head key_permission; + struct list_head key_getsecurity; +#endif /* CONFIG_KEYS */ +#ifdef CONFIG_AUDIT + struct list_head audit_rule_init; + struct list_head audit_rule_known; + struct list_head audit_rule_match; + struct list_head audit_rule_free; +#endif /* CONFIG_AUDIT */ +}; + +/* + * Security module hook list structure. + * For use with generic list macros for common operations. + */ +struct security_hook_list { + struct list_head list; + struct list_head *head; + union security_list_options hook; +}; + +/* + * Initializing a security_hook_list structure takes + * up a lot of space in a source file. This macro takes + * care of the common case and reduces the amount of + * text involved. + */ +#define LSM_HOOK_INIT(HEAD, HOOK) \ + { .head = &security_hook_heads.HEAD, .hook = { .HEAD = HOOK } } + +extern struct security_hook_heads security_hook_heads; + +static inline void security_add_hooks(struct security_hook_list *hooks, + int count) +{ + int i; + + for (i = 0; i < count; i++) + list_add_tail_rcu(&hooks[i].list, hooks[i].head); +} + +#ifdef CONFIG_SECURITY_SELINUX_DISABLE +/* + * Assuring the safety of deleting a security module is up to + * the security module involved. This may entail ordering the + * module's hook list in a particular way, refusing to disable + * the module once a policy is loaded or any number of other + * actions better imagined than described. + * + * The name of the configuration option reflects the only module + * that currently uses the mechanism. Any developer who thinks + * disabling their module is a good idea needs to be at least as + * careful as the SELinux team. + */ +static inline void security_delete_hooks(struct security_hook_list *hooks, + int count) +{ + int i; + + for (i = 0; i < count; i++) + list_del_rcu(&hooks[i].list); +} +#endif /* CONFIG_SECURITY_SELINUX_DISABLE */ + +extern int __init security_module_enable(const char *module); +extern void __init capability_add_hooks(void); +#ifdef CONFIG_SECURITY_YAMA_STACKED +void __init yama_add_hooks(void); +#endif + +#endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h index 1726ccbd8009..44348710953f 100644 --- a/include/linux/mailbox_client.h +++ b/include/linux/mailbox_client.h @@ -40,6 +40,8 @@ struct mbox_client { void (*tx_done)(struct mbox_client *cl, void *mssg, int r); }; +struct mbox_chan *mbox_request_channel_byname(struct mbox_client *cl, + const char *name); struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index); int mbox_send_message(struct mbox_chan *chan, void *mssg); void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */ diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index d4cf96f07cfc..68c42454439b 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -72,7 +72,7 @@ struct mbox_chan_ops { */ struct mbox_controller { struct device *dev; - struct mbox_chan_ops *ops; + const struct mbox_chan_ops *ops; struct mbox_chan *chans; int num_chans; bool txdone_irq; diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index 0819d36a3a74..a16b1f9c1aca 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -7,6 +7,42 @@ struct mei_cl_device; +typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device, + u32 events, void *context); + +/** + * struct mei_cl_device - MEI device handle + * An mei_cl_device pointer is returned from mei_add_device() + * and links MEI bus clients to their actual ME host client pointer. + * Drivers for MEI devices will get an mei_cl_device pointer + * when being probed and shall use it for doing ME bus I/O. + * + * @dev: linux driver model device pointer + * @me_cl: me client + * @cl: mei client + * @name: device name + * @event_work: async work to execute event callback + * @event_cb: Drivers register this callback to get asynchronous ME + * events (e.g. Rx buffer pending) notifications. + * @event_context: event callback run context + * @events: Events bitmask sent to the driver. + * @priv_data: client private data + */ +struct mei_cl_device { + struct device dev; + + struct mei_me_client *me_cl; + struct mei_cl *cl; + char name[MEI_CL_NAME_SIZE]; + + struct work_struct event_work; + mei_cl_event_cb_t event_cb; + void *event_context; + unsigned long events; + + void *priv_data; +}; + struct mei_cl_driver { struct device_driver driver; const char *name; @@ -28,8 +64,6 @@ void mei_cl_driver_unregister(struct mei_cl_driver *driver); ssize_t mei_cl_send(struct mei_cl_device *device, u8 *buf, size_t length); ssize_t mei_cl_recv(struct mei_cl_device *device, u8 *buf, size_t length); -typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device, - u32 events, void *context); int mei_cl_register_event_cb(struct mei_cl_device *device, mei_cl_event_cb_t read_cb, void *context); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6c8918114804..73b02b0a8f60 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -41,6 +41,7 @@ enum mem_cgroup_stat_index { MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ MEM_CGROUP_STAT_RSS_HUGE, /* # of pages charged as anon huge */ MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ + MEM_CGROUP_STAT_DIRTY, /* # of dirty pages in page cache */ MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */ MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ MEM_CGROUP_STAT_NSTATS, @@ -67,6 +68,8 @@ enum mem_cgroup_events_index { }; #ifdef CONFIG_MEMCG +extern struct cgroup_subsys_state *mem_cgroup_root_css; + void mem_cgroup_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx, unsigned int nr); @@ -112,6 +115,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm, } extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); +extern struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, struct mem_cgroup *, @@ -195,6 +199,8 @@ void mem_cgroup_split_huge_fixup(struct page *head); #else /* CONFIG_MEMCG */ struct mem_cgroup; +#define mem_cgroup_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) + static inline void mem_cgroup_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx, unsigned int nr) @@ -382,6 +388,29 @@ enum { OVER_LIMIT, }; +#ifdef CONFIG_CGROUP_WRITEBACK + +struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); +struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); +void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail, + unsigned long *pdirty, unsigned long *pwriteback); + +#else /* CONFIG_CGROUP_WRITEBACK */ + +static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) +{ + return NULL; +} + +static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, + unsigned long *pavail, + unsigned long *pdirty, + unsigned long *pwriteback) +{ +} + +#endif /* CONFIG_CGROUP_WRITEBACK */ + struct sock; #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) void sock_update_memcg(struct sock *sk); diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index f6722677e6d0..43db4faad143 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -121,6 +121,9 @@ struct arizona_pdata { /** GPIO used for mic isolation with HPDET */ int hpdet_id_gpio; + /** Channel to use for headphone detection */ + unsigned int hpdet_channel; + /** Extra debounce timeout used during initial mic detection (ms) */ int micd_detect_debounce; diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 02f97dc568ac..c2aa853fb412 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -368,4 +368,9 @@ struct axp20x_chrg_pdata { int def_cv; }; +struct axp288_extcon_pdata { + /* GPIO pin control to switch D+/D- lines b/w PMIC and SOC */ + struct gpio_desc *gpio_mux_cntl; +}; + #endif /* __LINUX_MFD_AXP20X_H */ diff --git a/include/linux/mfd/syscon/atmel-mc.h b/include/linux/mfd/syscon/atmel-mc.h new file mode 100644 index 000000000000..afd9b8f1e363 --- /dev/null +++ b/include/linux/mfd/syscon/atmel-mc.h @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2005 Ivan Kokshaysky + * Copyright (C) SAN People + * + * Memory Controllers (MC, EBI, SMC, SDRAMC, BFC) - System peripherals + * registers. + * Based on AT91RM9200 datasheet revision E. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _LINUX_MFD_SYSCON_ATMEL_MC_H_ +#define _LINUX_MFD_SYSCON_ATMEL_MC_H_ + +/* Memory Controller */ +#define AT91_MC_RCR 0x00 +#define AT91_MC_RCB BIT(0) + +#define AT91_MC_ASR 0x04 +#define AT91_MC_UNADD BIT(0) +#define AT91_MC_MISADD BIT(1) +#define AT91_MC_ABTSZ GENMASK(9, 8) +#define AT91_MC_ABTSZ_BYTE (0 << 8) +#define AT91_MC_ABTSZ_HALFWORD (1 << 8) +#define AT91_MC_ABTSZ_WORD (2 << 8) +#define AT91_MC_ABTTYP GENMASK(11, 10) +#define AT91_MC_ABTTYP_DATAREAD (0 << 10) +#define AT91_MC_ABTTYP_DATAWRITE (1 << 10) +#define AT91_MC_ABTTYP_FETCH (2 << 10) +#define AT91_MC_MST(n) BIT(16 + (n)) +#define AT91_MC_SVMST(n) BIT(24 + (n)) + +#define AT91_MC_AASR 0x08 + +#define AT91_MC_MPR 0x0c +#define AT91_MPR_MSTP(n) GENMASK(2 + ((x) * 4), ((x) * 4)) + +/* External Bus Interface (EBI) registers */ +#define AT91_MC_EBI_CSA 0x60 +#define AT91_MC_EBI_CS(n) BIT(x) +#define AT91_MC_EBI_NUM_CS 8 + +#define AT91_MC_EBI_CFGR 0x64 +#define AT91_MC_EBI_DBPUC BIT(0) + +/* Static Memory Controller (SMC) registers */ +#define AT91_MC_SMC_CSR(n) (0x70 + ((n) * 4)) +#define AT91_MC_SMC_NWS GENMASK(6, 0) +#define AT91_MC_SMC_NWS_(x) ((x) << 0) +#define AT91_MC_SMC_WSEN BIT(7) +#define AT91_MC_SMC_TDF GENMASK(11, 8) +#define AT91_MC_SMC_TDF_(x) ((x) << 8) +#define AT91_MC_SMC_TDF_MAX 0xf +#define AT91_MC_SMC_BAT BIT(12) +#define AT91_MC_SMC_DBW GENMASK(14, 13) +#define AT91_MC_SMC_DBW_16 (1 << 13) +#define AT91_MC_SMC_DBW_8 (2 << 13) +#define AT91_MC_SMC_DPR BIT(15) +#define AT91_MC_SMC_ACSS GENMASK(17, 16) +#define AT91_MC_SMC_ACSS_(x) ((x) << 16) +#define AT91_MC_SMC_ACSS_MAX 3 +#define AT91_MC_SMC_RWSETUP GENMASK(26, 24) +#define AT91_MC_SMC_RWSETUP_(x) ((x) << 24) +#define AT91_MC_SMC_RWHOLD GENMASK(30, 28) +#define AT91_MC_SMC_RWHOLD_(x) ((x) << 28) +#define AT91_MC_SMC_RWHOLDSETUP_MAX 7 + +/* SDRAM Controller registers */ +#define AT91_MC_SDRAMC_MR 0x90 +#define AT91_MC_SDRAMC_MODE GENMASK(3, 0) +#define AT91_MC_SDRAMC_MODE_NORMAL (0 << 0) +#define AT91_MC_SDRAMC_MODE_NOP (1 << 0) +#define AT91_MC_SDRAMC_MODE_PRECHARGE (2 << 0) +#define AT91_MC_SDRAMC_MODE_LMR (3 << 0) +#define AT91_MC_SDRAMC_MODE_REFRESH (4 << 0) +#define AT91_MC_SDRAMC_DBW_16 BIT(4) + +#define AT91_MC_SDRAMC_TR 0x94 +#define AT91_MC_SDRAMC_COUNT GENMASK(11, 0) + +#define AT91_MC_SDRAMC_CR 0x98 +#define AT91_MC_SDRAMC_NC GENMASK(1, 0) +#define AT91_MC_SDRAMC_NC_8 (0 << 0) +#define AT91_MC_SDRAMC_NC_9 (1 << 0) +#define AT91_MC_SDRAMC_NC_10 (2 << 0) +#define AT91_MC_SDRAMC_NC_11 (3 << 0) +#define AT91_MC_SDRAMC_NR GENMASK(3, 2) +#define AT91_MC_SDRAMC_NR_11 (0 << 2) +#define AT91_MC_SDRAMC_NR_12 (1 << 2) +#define AT91_MC_SDRAMC_NR_13 (2 << 2) +#define AT91_MC_SDRAMC_NB BIT(4) +#define AT91_MC_SDRAMC_NB_2 (0 << 4) +#define AT91_MC_SDRAMC_NB_4 (1 << 4) +#define AT91_MC_SDRAMC_CAS GENMASK(6, 5) +#define AT91_MC_SDRAMC_CAS_2 (2 << 5) +#define AT91_MC_SDRAMC_TWR GENMASK(10, 7) +#define AT91_MC_SDRAMC_TRC GENMASK(14, 11) +#define AT91_MC_SDRAMC_TRP GENMASK(18, 15) +#define AT91_MC_SDRAMC_TRCD GENMASK(22, 19) +#define AT91_MC_SDRAMC_TRAS GENMASK(26, 23) +#define AT91_MC_SDRAMC_TXSR GENMASK(30, 27) + +#define AT91_MC_SDRAMC_SRR 0x9c +#define AT91_MC_SDRAMC_SRCB BIT(0) + +#define AT91_MC_SDRAMC_LPR 0xa0 +#define AT91_MC_SDRAMC_LPCB BIT(0) + +#define AT91_MC_SDRAMC_IER 0xa4 +#define AT91_MC_SDRAMC_IDR 0xa8 +#define AT91_MC_SDRAMC_IMR 0xac +#define AT91_MC_SDRAMC_ISR 0xb0 +#define AT91_MC_SDRAMC_RES BIT(0) + +/* Burst Flash Controller register */ +#define AT91_MC_BFC_MR 0xc0 +#define AT91_MC_BFC_BFCOM GENMASK(1, 0) +#define AT91_MC_BFC_BFCOM_DISABLED (0 << 0) +#define AT91_MC_BFC_BFCOM_ASYNC (1 << 0) +#define AT91_MC_BFC_BFCOM_BURST (2 << 0) +#define AT91_MC_BFC_BFCC GENMASK(3, 2) +#define AT91_MC_BFC_BFCC_MCK (1 << 2) +#define AT91_MC_BFC_BFCC_DIV2 (2 << 2) +#define AT91_MC_BFC_BFCC_DIV4 (3 << 2) +#define AT91_MC_BFC_AVL GENMASK(7, 4) +#define AT91_MC_BFC_PAGES GENMASK(10, 8) +#define AT91_MC_BFC_PAGES_NO_PAGE (0 << 8) +#define AT91_MC_BFC_PAGES_16 (1 << 8) +#define AT91_MC_BFC_PAGES_32 (2 << 8) +#define AT91_MC_BFC_PAGES_64 (3 << 8) +#define AT91_MC_BFC_PAGES_128 (4 << 8) +#define AT91_MC_BFC_PAGES_256 (5 << 8) +#define AT91_MC_BFC_PAGES_512 (6 << 8) +#define AT91_MC_BFC_PAGES_1024 (7 << 8) +#define AT91_MC_BFC_OEL GENMASK(13, 12) +#define AT91_MC_BFC_BAAEN BIT(16) +#define AT91_MC_BFC_BFOEH BIT(17) +#define AT91_MC_BFC_MUXEN BIT(18) +#define AT91_MC_BFC_RDYEN BIT(19) + +#endif /* _LINUX_MFD_SYSCON_ATMEL_MC_H_ */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 24ad583596d1..99959a34f4f1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -27,6 +27,7 @@ struct anon_vma_chain; struct file_ra_state; struct user_struct; struct writeback_control; +struct bdi_writeback; #ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; @@ -1211,10 +1212,13 @@ int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); -void account_page_dirtied(struct page *page, struct address_space *mapping); -void account_page_cleaned(struct page *page, struct address_space *mapping); +void account_page_dirtied(struct page *page, struct address_space *mapping, + struct mem_cgroup *memcg); +void account_page_cleaned(struct page *page, struct address_space *mapping, + struct mem_cgroup *memcg, struct bdi_writeback *wb); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); +void cancel_dirty_page(struct page *page); int clear_page_dirty_for_io(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 3bfd56778c29..8183d6640ca7 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -599,9 +599,22 @@ struct ipack_device_id { #define MEI_CL_MODULE_PREFIX "mei:" #define MEI_CL_NAME_SIZE 32 +#define MEI_CL_UUID_FMT "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x" +#define MEI_CL_UUID_ARGS(_u) \ + _u[0], _u[1], _u[2], _u[3], _u[4], _u[5], _u[6], _u[7], \ + _u[8], _u[9], _u[10], _u[11], _u[12], _u[13], _u[14], _u[15] +/** + * struct mei_cl_device_id - MEI client device identifier + * @name: helper name + * @uuid: client uuid + * @driver_info: information used by the driver. + * + * identifies mei client device by uuid and name + */ struct mei_cl_device_id { char name[MEI_CL_NAME_SIZE]; + uuid_le uuid; kernel_ulong_t driver_info; }; @@ -629,4 +642,10 @@ struct mcb_device_id { kernel_ulong_t driver_data; }; +struct ulpi_device_id { + __u16 vendor; + __u16 product; + kernel_ulong_t driver_data; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/module.h b/include/linux/module.h index 1e5436042eb0..d67b1932cc59 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,6 +17,7 @@ #include <linux/moduleparam.h> #include <linux/jump_label.h> #include <linux/export.h> +#include <linux/rbtree_latch.h> #include <linux/percpu.h> #include <asm/module.h> @@ -210,6 +211,13 @@ enum module_state { MODULE_STATE_UNFORMED, /* Still setting it up. */ }; +struct module; + +struct mod_tree_node { + struct module *mod; + struct latch_tree_node node; +}; + struct module { enum module_state state; @@ -232,6 +240,9 @@ struct module { unsigned int num_syms; /* Kernel parameters. */ +#ifdef CONFIG_SYSFS + struct mutex param_lock; +#endif struct kernel_param *kp; unsigned int num_kp; @@ -257,6 +268,8 @@ struct module { bool sig_ok; #endif + bool async_probe_requested; + /* symbols that will be GPL-only in the near future. */ const struct kernel_symbol *gpl_future_syms; const unsigned long *gpl_future_crcs; @@ -269,8 +282,15 @@ struct module { /* Startup function. */ int (*init)(void); - /* If this is non-NULL, vfree after init() returns */ - void *module_init; + /* + * If this is non-NULL, vfree() after init() returns. + * + * Cacheline align here, such that: + * module_init, module_core, init_size, core_size, + * init_text_size, core_text_size and mtn_core::{mod,node[0]} + * are on the same cacheline. + */ + void *module_init ____cacheline_aligned; /* Here is the actual code + data, vfree'd on unload. */ void *module_core; @@ -281,6 +301,16 @@ struct module { /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; +#ifdef CONFIG_MODULES_TREE_LOOKUP + /* + * We want mtn_core::{mod,node[0]} to be in the same cacheline as the + * above entries such that a regular lookup will only touch one + * cacheline. + */ + struct mod_tree_node mtn_core; + struct mod_tree_node mtn_init; +#endif + /* Size of RO sections of the module (text+rodata) */ unsigned int init_ro_size, core_ro_size; @@ -336,7 +366,7 @@ struct module { const char **trace_bprintk_fmt_start; #endif #ifdef CONFIG_EVENT_TRACING - struct ftrace_event_call **trace_events; + struct trace_event_call **trace_events; unsigned int num_trace_events; struct trace_enum_map **trace_enums; unsigned int num_trace_enums; @@ -367,7 +397,7 @@ struct module { ctor_fn_t *ctors; unsigned int num_ctors; #endif -}; +} ____cacheline_aligned; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} #endif @@ -421,14 +451,22 @@ struct symsearch { bool unused; }; -/* Search for an exported symbol by name. */ +/* + * Search for an exported symbol by name. + * + * Must be called with module_mutex held or preemption disabled. + */ const struct kernel_symbol *find_symbol(const char *name, struct module **owner, const unsigned long **crc, bool gplok, bool warn); -/* Walk the exported symbol table */ +/* + * Walk the exported symbol table + * + * Must be called with module_mutex held or preemption disabled. + */ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, struct module *owner, void *data), void *data); @@ -508,6 +546,11 @@ int unregister_module_notifier(struct notifier_block *nb); extern void print_modules(void); +static inline bool module_requested_async_probing(struct module *module) +{ + return module && module->async_probe_requested; +} + #else /* !CONFIG_MODULES... */ /* Given an address, look for it in the exception tables. */ @@ -618,6 +661,12 @@ static inline int unregister_module_notifier(struct notifier_block *nb) static inline void print_modules(void) { } + +static inline bool module_requested_async_probing(struct module *module) +{ + return false; +} + #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1c9effa25e26..c12f2147c350 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -67,8 +67,9 @@ enum { struct kernel_param { const char *name; + struct module *mod; const struct kernel_param_ops *ops; - u16 perm; + const u16 perm; s8 level; u8 flags; union { @@ -108,7 +109,7 @@ struct kparam_array * * @perm is 0 if the the variable is not to appear in sysfs, or 0444 * for world-readable, 0644 for root-writable, etc. Note that if it - * is writable, you may need to use kparam_block_sysfs_write() around + * is writable, you may need to use kernel_param_lock() around * accesses (esp. charp, which can be kfreed when it changes). * * The @type is simply pasted to refer to a param_ops_##type and a @@ -216,16 +217,16 @@ struct kparam_array parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ /* Default value instead of permissions? */ \ - static const char __param_str_##name[] = prefix #name; \ + static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ - = { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm), \ - level, flags, { arg } } + = { __param_str_##name, THIS_MODULE, ops, \ + VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } /* Obsolete - use module_param_cb() */ #define module_param_call(name, set, get, arg, perm) \ - static struct kernel_param_ops __param_ops_##name = \ + static const struct kernel_param_ops __param_ops_##name = \ { .flags = 0, (void *)set, (void *)get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ name, &__param_ops_##name, arg, \ @@ -238,58 +239,14 @@ __check_old_set_param(int (*oldset)(const char *, struct kernel_param *)) return 0; } -/** - * kparam_block_sysfs_write - make sure a parameter isn't written via sysfs. - * @name: the name of the parameter - * - * There's no point blocking write on a paramter that isn't writable via sysfs! - */ -#define kparam_block_sysfs_write(name) \ - do { \ - BUG_ON(!(__param_##name.perm & 0222)); \ - __kernel_param_lock(); \ - } while (0) - -/** - * kparam_unblock_sysfs_write - allows sysfs to write to a parameter again. - * @name: the name of the parameter - */ -#define kparam_unblock_sysfs_write(name) \ - do { \ - BUG_ON(!(__param_##name.perm & 0222)); \ - __kernel_param_unlock(); \ - } while (0) - -/** - * kparam_block_sysfs_read - make sure a parameter isn't read via sysfs. - * @name: the name of the parameter - * - * This also blocks sysfs writes. - */ -#define kparam_block_sysfs_read(name) \ - do { \ - BUG_ON(!(__param_##name.perm & 0444)); \ - __kernel_param_lock(); \ - } while (0) - -/** - * kparam_unblock_sysfs_read - allows sysfs to read a parameter again. - * @name: the name of the parameter - */ -#define kparam_unblock_sysfs_read(name) \ - do { \ - BUG_ON(!(__param_##name.perm & 0444)); \ - __kernel_param_unlock(); \ - } while (0) - #ifdef CONFIG_SYSFS -extern void __kernel_param_lock(void); -extern void __kernel_param_unlock(void); +extern void kernel_param_lock(struct module *mod); +extern void kernel_param_unlock(struct module *mod); #else -static inline void __kernel_param_lock(void) +static inline void kernel_param_lock(struct module *mod) { } -static inline void __kernel_param_unlock(void) +static inline void kernel_param_unlock(struct module *mod) { } #endif @@ -310,6 +267,15 @@ static inline void __kernel_param_unlock(void) #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ __module_param_call("", name, ¶m_ops_##type, &var, perm, -1, 0) + +/** + * core_param_unsafe - same as core_param but taints kernel + */ +#define core_param_unsafe(name, var, type, perm) \ + param_check_##type(name, &(var)); \ + __module_param_call("", name, ¶m_ops_##type, &var, perm, \ + -1, KERNEL_PARAM_FL_UNSAFE) + #endif /* !MODULE */ /** @@ -357,8 +323,9 @@ extern char *parse_args(const char *name, unsigned num, s16 level_min, s16 level_max, + void *arg, int (*unknown)(char *param, char *val, - const char *doing)); + const char *doing, void *arg)); /* Called by module remove. */ #ifdef CONFIG_SYSFS @@ -376,64 +343,70 @@ static inline void destroy_params(const struct kernel_param *params, #define __param_check(name, p, type) \ static inline type __always_unused *__check_##name(void) { return(p); } -extern struct kernel_param_ops param_ops_byte; +extern const struct kernel_param_ops param_ops_byte; extern int param_set_byte(const char *val, const struct kernel_param *kp); extern int param_get_byte(char *buffer, const struct kernel_param *kp); #define param_check_byte(name, p) __param_check(name, p, unsigned char) -extern struct kernel_param_ops param_ops_short; +extern const struct kernel_param_ops param_ops_short; extern int param_set_short(const char *val, const struct kernel_param *kp); extern int param_get_short(char *buffer, const struct kernel_param *kp); #define param_check_short(name, p) __param_check(name, p, short) -extern struct kernel_param_ops param_ops_ushort; +extern const struct kernel_param_ops param_ops_ushort; extern int param_set_ushort(const char *val, const struct kernel_param *kp); extern int param_get_ushort(char *buffer, const struct kernel_param *kp); #define param_check_ushort(name, p) __param_check(name, p, unsigned short) -extern struct kernel_param_ops param_ops_int; +extern const struct kernel_param_ops param_ops_int; extern int param_set_int(const char *val, const struct kernel_param *kp); extern int param_get_int(char *buffer, const struct kernel_param *kp); #define param_check_int(name, p) __param_check(name, p, int) -extern struct kernel_param_ops param_ops_uint; +extern const struct kernel_param_ops param_ops_uint; extern int param_set_uint(const char *val, const struct kernel_param *kp); extern int param_get_uint(char *buffer, const struct kernel_param *kp); #define param_check_uint(name, p) __param_check(name, p, unsigned int) -extern struct kernel_param_ops param_ops_long; +extern const struct kernel_param_ops param_ops_long; extern int param_set_long(const char *val, const struct kernel_param *kp); extern int param_get_long(char *buffer, const struct kernel_param *kp); #define param_check_long(name, p) __param_check(name, p, long) -extern struct kernel_param_ops param_ops_ulong; +extern const struct kernel_param_ops param_ops_ulong; extern int param_set_ulong(const char *val, const struct kernel_param *kp); extern int param_get_ulong(char *buffer, const struct kernel_param *kp); #define param_check_ulong(name, p) __param_check(name, p, unsigned long) -extern struct kernel_param_ops param_ops_ullong; +extern const struct kernel_param_ops param_ops_ullong; extern int param_set_ullong(const char *val, const struct kernel_param *kp); extern int param_get_ullong(char *buffer, const struct kernel_param *kp); #define param_check_ullong(name, p) __param_check(name, p, unsigned long long) -extern struct kernel_param_ops param_ops_charp; +extern const struct kernel_param_ops param_ops_charp; extern int param_set_charp(const char *val, const struct kernel_param *kp); extern int param_get_charp(char *buffer, const struct kernel_param *kp); #define param_check_charp(name, p) __param_check(name, p, char *) /* We used to allow int as well as bool. We're taking that away! */ -extern struct kernel_param_ops param_ops_bool; +extern const struct kernel_param_ops param_ops_bool; extern int param_set_bool(const char *val, const struct kernel_param *kp); extern int param_get_bool(char *buffer, const struct kernel_param *kp); #define param_check_bool(name, p) __param_check(name, p, bool) -extern struct kernel_param_ops param_ops_invbool; +extern const struct kernel_param_ops param_ops_bool_enable_only; +extern int param_set_bool_enable_only(const char *val, + const struct kernel_param *kp); +/* getter is the same as for the regular bool */ +#define param_check_bool_enable_only param_check_bool + +extern const struct kernel_param_ops param_ops_invbool; extern int param_set_invbool(const char *val, const struct kernel_param *kp); extern int param_get_invbool(char *buffer, const struct kernel_param *kp); #define param_check_invbool(name, p) __param_check(name, p, bool) /* An int, which can only be set like a bool (though it shows as an int). */ -extern struct kernel_param_ops param_ops_bint; +extern const struct kernel_param_ops param_ops_bint; extern int param_set_bint(const char *val, const struct kernel_param *kp); #define param_get_bint param_get_int #define param_check_bint param_check_int @@ -477,9 +450,9 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp); perm, -1, 0); \ __MODULE_PARM_TYPE(name, "array of " #type) -extern struct kernel_param_ops param_array_ops; +extern const struct kernel_param_ops param_array_ops; -extern struct kernel_param_ops param_ops_string; +extern const struct kernel_param_ops param_ops_string; extern int param_set_copystring(const char *val, const struct kernel_param *); extern int param_get_string(char *buffer, const struct kernel_param *kp); diff --git a/include/linux/nd.h b/include/linux/nd.h new file mode 100644 index 000000000000..507e47c86737 --- /dev/null +++ b/include/linux/nd.h @@ -0,0 +1,151 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __LINUX_ND_H__ +#define __LINUX_ND_H__ +#include <linux/fs.h> +#include <linux/ndctl.h> +#include <linux/device.h> + +struct nd_device_driver { + struct device_driver drv; + unsigned long type; + int (*probe)(struct device *dev); + int (*remove)(struct device *dev); +}; + +static inline struct nd_device_driver *to_nd_device_driver( + struct device_driver *drv) +{ + return container_of(drv, struct nd_device_driver, drv); +}; + +/** + * struct nd_namespace_common - core infrastructure of a namespace + * @force_raw: ignore other personalities for the namespace (e.g. btt) + * @dev: device model node + * @claim: when set a another personality has taken ownership of the namespace + * @rw_bytes: access the raw namespace capacity with byte-aligned transfers + */ +struct nd_namespace_common { + int force_raw; + struct device dev; + struct device *claim; + int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset, + void *buf, size_t size, int rw); +}; + +static inline struct nd_namespace_common *to_ndns(struct device *dev) +{ + return container_of(dev, struct nd_namespace_common, dev); +} + +/** + * struct nd_namespace_io - infrastructure for loading an nd_pmem instance + * @dev: namespace device created by the nd region driver + * @res: struct resource conversion of a NFIT SPA table + */ +struct nd_namespace_io { + struct nd_namespace_common common; + struct resource res; +}; + +/** + * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory + * @nsio: device and system physical address range to drive + * @alt_name: namespace name supplied in the dimm label + * @uuid: namespace name supplied in the dimm label + */ +struct nd_namespace_pmem { + struct nd_namespace_io nsio; + char *alt_name; + u8 *uuid; +}; + +/** + * struct nd_namespace_blk - namespace for dimm-bounded persistent memory + * @alt_name: namespace name supplied in the dimm label + * @uuid: namespace name supplied in the dimm label + * @id: ida allocated id + * @lbasize: blk namespaces have a native sector size when btt not present + * @num_resources: number of dpa extents to claim + * @res: discontiguous dpa extents for given dimm + */ +struct nd_namespace_blk { + struct nd_namespace_common common; + char *alt_name; + u8 *uuid; + int id; + unsigned long lbasize; + int num_resources; + struct resource **res; +}; + +static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev) +{ + return container_of(dev, struct nd_namespace_io, common.dev); +} + +static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(dev); + + return container_of(nsio, struct nd_namespace_pmem, nsio); +} + +static inline struct nd_namespace_blk *to_nd_namespace_blk(struct device *dev) +{ + return container_of(dev, struct nd_namespace_blk, common.dev); +} + +/** + * nvdimm_read_bytes() - synchronously read bytes from an nvdimm namespace + * @ndns: device to read + * @offset: namespace-relative starting offset + * @buf: buffer to fill + * @size: transfer length + * + * @buf is up-to-date upon return from this routine. + */ +static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *buf, size_t size) +{ + return ndns->rw_bytes(ndns, offset, buf, size, READ); +} + +/** + * nvdimm_write_bytes() - synchronously write bytes to an nvdimm namespace + * @ndns: device to read + * @offset: namespace-relative starting offset + * @buf: buffer to drain + * @size: transfer length + * + * NVDIMM Namepaces disks do not implement sectors internally. Depending on + * the @ndns, the contents of @buf may be in cpu cache, platform buffers, + * or on backing memory media upon return from this routine. Flushing + * to media is handled internal to the @ndns driver, if at all. + */ +static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *buf, size_t size) +{ + return ndns->rw_bytes(ndns, offset, buf, size, WRITE); +} + +#define MODULE_ALIAS_ND_DEVICE(type) \ + MODULE_ALIAS("nd:t" __stringify(type) "*") +#define ND_DEVICE_MODALIAS_FMT "nd:t%d" + +int __must_check __nd_driver_register(struct nd_device_driver *nd_drv, + struct module *module, const char *mod_name); +#define nd_driver_register(driver) \ + __nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) +#endif /* __LINUX_ND_H__ */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8dbd05e70f09..c0d94ed8ce9a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -74,7 +74,7 @@ struct nvme_dev { struct blk_mq_tag_set tagset; struct blk_mq_tag_set admin_tagset; u32 __iomem *dbs; - struct pci_dev *pci_dev; + struct device *dev; struct dma_pool *prp_page_pool; struct dma_pool *prp_small_pool; int instance; @@ -92,6 +92,7 @@ struct nvme_dev { work_func_t reset_workfn; struct work_struct reset_work; struct work_struct probe_work; + struct work_struct scan_work; char name[12]; char serial[20]; char model[40]; @@ -146,25 +147,15 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) return (sector >> (ns->lba_shift - 9)); } -/** - * nvme_free_iod - frees an nvme_iod - * @dev: The device that the I/O was submitted to - * @iod: The memory to free - */ -void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod); - -int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int, gfp_t); -struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, - unsigned long addr, unsigned length); -void nvme_unmap_user_pages(struct nvme_dev *dev, int write, - struct nvme_iod *iod); -int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_ns *, - struct nvme_command *, u32 *); -int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns); -int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, - u32 *result); -int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, - dma_addr_t dma_addr); +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buf, unsigned bufflen); +int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, void __user *ubuffer, unsigned bufflen, + u32 *result, unsigned timeout); +int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id); +int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, + struct nvme_id_ns **id); +int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log); int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, dma_addr_t dma_addr, u32 *result); int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, diff --git a/include/linux/of.h b/include/linux/of.h index b871ff9d81d7..f05fdcea4e66 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -128,7 +128,7 @@ static inline bool is_of_node(struct fwnode_handle *fwnode) return fwnode && fwnode->type == FWNODE_OF; } -static inline struct device_node *of_node(struct fwnode_handle *fwnode) +static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) { return fwnode ? container_of(fwnode, struct device_node, fwnode) : NULL; } @@ -387,7 +387,7 @@ static inline bool is_of_node(struct fwnode_handle *fwnode) return false; } -static inline struct device_node *of_node(struct fwnode_handle *fwnode) +static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) { return NULL; } diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h index 56bc026c143f..98ba7525929e 100644 --- a/include/linux/of_dma.h +++ b/include/linux/of_dma.h @@ -23,6 +23,9 @@ struct of_dma { struct device_node *of_node; struct dma_chan *(*of_dma_xlate) (struct of_phandle_args *, struct of_dma *); + void *(*of_dma_route_allocate) + (struct of_phandle_args *, struct of_dma *); + struct dma_router *dma_router; void *of_dma_data; }; @@ -37,12 +40,20 @@ extern int of_dma_controller_register(struct device_node *np, (struct of_phandle_args *, struct of_dma *), void *data); extern void of_dma_controller_free(struct device_node *np); + +extern int of_dma_router_register(struct device_node *np, + void *(*of_dma_route_allocate) + (struct of_phandle_args *, struct of_dma *), + struct dma_router *dma_router); +#define of_dma_router_free of_dma_controller_free + extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np, const char *name); extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma); extern struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec, struct of_dma *ofdma); + #else static inline int of_dma_controller_register(struct device_node *np, struct dma_chan *(*of_dma_xlate) @@ -56,6 +67,16 @@ static inline void of_dma_controller_free(struct device_node *np) { } +static inline int of_dma_router_register(struct device_node *np, + void *(*of_dma_route_allocate) + (struct of_phandle_args *, struct of_dma *), + struct dma_router *dma_router) +{ + return -ENODEV; +} + +#define of_dma_router_free of_dma_controller_free + static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np, const char *name) { diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index 7bc92e050608..f8bcd0e21a26 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -45,6 +45,8 @@ int of_graph_parse_endpoint(const struct device_node *node, struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id); struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *previous); +struct device_node *of_graph_get_endpoint_by_regs( + const struct device_node *parent, int port_reg, int reg); struct device_node *of_graph_get_remote_port_parent( const struct device_node *node); struct device_node *of_graph_get_remote_port(const struct device_node *node); @@ -69,6 +71,12 @@ static inline struct device_node *of_graph_get_next_endpoint( return NULL; } +static inline struct device_node *of_graph_get_endpoint_by_regs( + const struct device_node *parent, int port_reg, int reg) +{ + return NULL; +} + static inline struct device_node *of_graph_get_remote_port_parent( const struct device_node *node) { diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4b3736f7065c..fb0814ca65c7 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -651,7 +651,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern void delete_from_page_cache(struct page *page); -extern void __delete_from_page_cache(struct page *page, void *shadow); +extern void __delete_from_page_cache(struct page *page, void *shadow, + struct mem_cgroup *memcg); int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); /* diff --git a/include/linux/parport.h b/include/linux/parport.h index c22f12547324..58e3c64c6b49 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -13,6 +13,7 @@ #include <linux/wait.h> #include <linux/irqreturn.h> #include <linux/semaphore.h> +#include <linux/device.h> #include <asm/ptrace.h> #include <uapi/linux/parport.h> @@ -145,6 +146,8 @@ struct pardevice { unsigned int flags; struct pardevice *next; struct pardevice *prev; + struct device dev; + bool devmodel; struct parport_state *state; /* saved status over preemption */ wait_queue_head_t wait_q; unsigned long int time; @@ -156,6 +159,8 @@ struct pardevice { void * sysctl_table; }; +#define to_pardevice(n) container_of(n, struct pardevice, dev) + /* IEEE1284 information */ /* IEEE1284 phases. These are exposed to userland through ppdev IOCTL @@ -195,7 +200,7 @@ struct parport { * This may unfortulately be null if the * port has a legacy driver. */ - + struct device bus_dev; /* to link with the bus */ struct parport *physport; /* If this is a non-default mux parport, i.e. we're a clone of a real @@ -245,15 +250,26 @@ struct parport { struct parport *slaves[3]; }; +#define to_parport_dev(n) container_of(n, struct parport, bus_dev) + #define DEFAULT_SPIN_TIME 500 /* us */ struct parport_driver { const char *name; void (*attach) (struct parport *); void (*detach) (struct parport *); + void (*match_port)(struct parport *); + int (*probe)(struct pardevice *); + struct device_driver driver; + bool devmodel; struct list_head list; }; +#define to_parport_driver(n) container_of(n, struct parport_driver, driver) + +int parport_bus_init(void); +void parport_bus_exit(void); + /* parport_register_port registers a new parallel port at the given address (if one does not already exist) and returns a pointer to it. This entails claiming the I/O region, IRQ and DMA. NULL is returned @@ -272,10 +288,20 @@ void parport_announce_port (struct parport *port); extern void parport_remove_port(struct parport *port); /* Register a new high-level driver. */ -extern int parport_register_driver (struct parport_driver *); + +int __must_check __parport_register_driver(struct parport_driver *, + struct module *, + const char *mod_name); +/* + * parport_register_driver must be a macro so that KBUILD_MODNAME can + * be expanded + */ +#define parport_register_driver(driver) \ + __parport_register_driver(driver, THIS_MODULE, KBUILD_MODNAME) /* Unregister a high-level driver. */ extern void parport_unregister_driver (struct parport_driver *); +void parport_unregister_driver(struct parport_driver *); /* If parport_register_driver doesn't fit your needs, perhaps * parport_find_xxx does. */ @@ -288,6 +314,15 @@ extern irqreturn_t parport_irq_handler(int irq, void *dev_id); /* Reference counting for ports. */ extern struct parport *parport_get_port (struct parport *); extern void parport_put_port (struct parport *); +void parport_del_port(struct parport *); + +struct pardev_cb { + int (*preempt)(void *); + void (*wakeup)(void *); + void *private; + void (*irq_func)(void *); + unsigned int flags; +}; /* parport_register_device declares that a device is connected to a port, and tells the kernel all it needs to know. @@ -301,6 +336,10 @@ struct pardevice *parport_register_device(struct parport *port, void (*irq_func)(void *), int flags, void *handle); +struct pardevice * +parport_register_dev_model(struct parport *port, const char *name, + const struct pardev_cb *par_dev_cb, int cnt); + /* parport_unregister unlinks a device from the chain. */ extern void parport_unregister_device(struct pardevice *dev); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1b82d44b0a02..2027809433b3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -300,6 +300,11 @@ struct pmu { * Free pmu-private AUX data structures */ void (*free_aux) (void *aux); /* optional */ + + /* + * Filter events for PMU-specific reasons. + */ + int (*filter_match) (struct perf_event *event); /* optional */ }; /** @@ -479,7 +484,7 @@ struct perf_event { void *overflow_handler_context; #ifdef CONFIG_EVENT_TRACING - struct ftrace_event_call *tp_event; + struct trace_event_call *tp_event; struct event_filter *filter; #ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops ftrace_ops; diff --git a/include/linux/phy/phy-sun4i-usb.h b/include/linux/phy/phy-sun4i-usb.h new file mode 100644 index 000000000000..50aed92ea89c --- /dev/null +++ b/include/linux/phy/phy-sun4i-usb.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2015 Hans de Goede <hdegoede@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef PHY_SUN4I_USB_H_ +#define PHY_SUN4I_USB_H_ + +#include "phy.h" + +/** + * sun4i_usb_phy_set_squelch_detect() - Enable/disable squelch detect + * @phy: reference to a sun4i usb phy + * @enabled: wether to enable or disable squelch detect + */ +void sun4i_usb_phy_set_squelch_detect(struct phy *phy, bool enabled); + +#endif diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index a0197fa1b116..8cf05e341cff 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -133,6 +133,8 @@ struct phy *devm_phy_get(struct device *dev, const char *string); struct phy *devm_phy_optional_get(struct device *dev, const char *string); struct phy *devm_of_phy_get(struct device *dev, struct device_node *np, const char *con_id); +struct phy *devm_of_phy_get_by_index(struct device *dev, struct device_node *np, + int index); void phy_put(struct phy *phy); void devm_phy_put(struct device *dev, struct phy *phy); struct phy *of_phy_get(struct device_node *np, const char *con_id); @@ -261,6 +263,13 @@ static inline struct phy *devm_of_phy_get(struct device *dev, return ERR_PTR(-ENOSYS); } +static inline struct phy *devm_of_phy_get_by_index(struct device *dev, + struct device_node *np, + int index) +{ + return ERR_PTR(-ENOSYS); +} + static inline void phy_put(struct phy *phy) { } diff --git a/include/linux/platform_data/dma-rcar-audmapp.h b/include/linux/platform_data/dma-rcar-audmapp.h deleted file mode 100644 index 471fffebbeb4..000000000000 --- a/include/linux/platform_data/dma-rcar-audmapp.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * This is for Renesas R-Car Audio-DMAC-peri-peri. - * - * Copyright (C) 2014 Renesas Electronics Corporation - * Copyright (C) 2014 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> - * - * This file is based on the include/linux/sh_dma.h - * - * Header for the new SH dmaengine driver - * - * Copyright (C) 2010 Guennadi Liakhovetski <g.liakhovetski@gmx.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef SH_AUDMAPP_H -#define SH_AUDMAPP_H - -#include <linux/dmaengine.h> - -struct audmapp_slave_config { - int slave_id; - dma_addr_t src; - dma_addr_t dst; - u32 chcr; -}; - -struct audmapp_pdata { - struct audmapp_slave_config *slave; - int slave_num; -}; - -#endif /* SH_AUDMAPP_H */ diff --git a/include/linux/platform_data/gpio-ath79.h b/include/linux/platform_data/gpio-ath79.h new file mode 100644 index 000000000000..88b0db7bee74 --- /dev/null +++ b/include/linux/platform_data/gpio-ath79.h @@ -0,0 +1,19 @@ +/* + * Atheros AR7XXX/AR9XXX GPIO controller platform data + * + * Copyright (C) 2015 Alban Bedel <albeu@free.fr> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_PLATFORM_DATA_GPIO_ATH79_H +#define __LINUX_PLATFORM_DATA_GPIO_ATH79_H + +struct ath79_gpio_platform_data { + unsigned ngpios; + bool oe_inverted; +}; + +#endif diff --git a/include/linux/platform_data/usb-rcar-gen2-phy.h b/include/linux/platform_data/usb-rcar-gen2-phy.h deleted file mode 100644 index dd3ba46c0d90..000000000000 --- a/include/linux/platform_data/usb-rcar-gen2-phy.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (C) 2013 Renesas Solutions Corp. - * Copyright (C) 2013 Cogent Embedded, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __USB_RCAR_GEN2_PHY_H -#define __USB_RCAR_GEN2_PHY_H - -#include <linux/types.h> - -struct rcar_gen2_phy_platform_data { - /* USB channel 0 configuration */ - bool chan0_pci:1; /* true: PCI USB host 0, false: USBHS */ - /* USB channel 2 configuration */ - bool chan2_pci:1; /* true: PCI USB host 2, false: USBSS */ -}; - -#endif diff --git a/include/linux/pmem.h b/include/linux/pmem.h new file mode 100644 index 000000000000..d2114045a6c4 --- /dev/null +++ b/include/linux/pmem.h @@ -0,0 +1,152 @@ +/* + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __PMEM_H__ +#define __PMEM_H__ + +#include <linux/io.h> + +#ifdef CONFIG_ARCH_HAS_PMEM_API +#include <asm/cacheflush.h> +#else +static inline void arch_wmb_pmem(void) +{ + BUG(); +} + +static inline bool __arch_has_wmb_pmem(void) +{ + return false; +} + +static inline void __pmem *arch_memremap_pmem(resource_size_t offset, + unsigned long size) +{ + return NULL; +} + +static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, + size_t n) +{ + BUG(); +} +#endif + +/* + * Architectures that define ARCH_HAS_PMEM_API must provide + * implementations for arch_memremap_pmem(), arch_memcpy_to_pmem(), + * arch_wmb_pmem(), and __arch_has_wmb_pmem(). + */ + +static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) +{ + memcpy(dst, (void __force const *) src, size); +} + +static inline void memunmap_pmem(void __pmem *addr) +{ + iounmap((void __force __iomem *) addr); +} + +/** + * arch_has_wmb_pmem - true if wmb_pmem() ensures durability + * + * For a given cpu implementation within an architecture it is possible + * that wmb_pmem() resolves to a nop. In the case this returns + * false, pmem api users are unable to ensure durability and may want to + * fall back to a different data consistency model, or otherwise notify + * the user. + */ +static inline bool arch_has_wmb_pmem(void) +{ + if (IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)) + return __arch_has_wmb_pmem(); + return false; +} + +static inline bool arch_has_pmem_api(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && arch_has_wmb_pmem(); +} + +/* + * These defaults seek to offer decent performance and minimize the + * window between i/o completion and writes being durable on media. + * However, it is undefined / architecture specific whether + * default_memremap_pmem + default_memcpy_to_pmem is sufficient for + * making data durable relative to i/o completion. + */ +static void default_memcpy_to_pmem(void __pmem *dst, const void *src, + size_t size) +{ + memcpy((void __force *) dst, src, size); +} + +static void __pmem *default_memremap_pmem(resource_size_t offset, + unsigned long size) +{ + return (void __pmem __force *)ioremap_wt(offset, size); +} + +/** + * memremap_pmem - map physical persistent memory for pmem api + * @offset: physical address of persistent memory + * @size: size of the mapping + * + * Establish a mapping of the architecture specific memory type expected + * by memcpy_to_pmem() and wmb_pmem(). For example, it may be + * the case that an uncacheable or writethrough mapping is sufficient, + * or a writeback mapping provided memcpy_to_pmem() and + * wmb_pmem() arrange for the data to be written through the + * cache to persistent media. + */ +static inline void __pmem *memremap_pmem(resource_size_t offset, + unsigned long size) +{ + if (arch_has_pmem_api()) + return arch_memremap_pmem(offset, size); + return default_memremap_pmem(offset, size); +} + +/** + * memcpy_to_pmem - copy data to persistent memory + * @dst: destination buffer for the copy + * @src: source buffer for the copy + * @n: length of the copy in bytes + * + * Perform a memory copy that results in the destination of the copy + * being effectively evicted from, or never written to, the processor + * cache hierarchy after the copy completes. After memcpy_to_pmem() + * data may still reside in cpu or platform buffers, so this operation + * must be followed by a wmb_pmem(). + */ +static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n) +{ + if (arch_has_pmem_api()) + arch_memcpy_to_pmem(dst, src, n); + else + default_memcpy_to_pmem(dst, src, n); +} + +/** + * wmb_pmem - synchronize writes to persistent memory + * + * After a series of memcpy_to_pmem() operations this drains data from + * cpu write buffers and any platform (memory controller) buffers to + * ensure that written data is durable on persistent memory media. + */ +static inline void wmb_pmem(void) +{ + if (arch_has_pmem_api()) + arch_wmb_pmem(); +} +#endif /* __PMEM_H__ */ diff --git a/include/linux/printk.h b/include/linux/printk.h index 9b30871c9149..58b1fec40d37 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -30,6 +30,8 @@ static inline const char *printk_skip_level(const char *buffer) return buffer; } +#define CONSOLE_EXT_LOG_MAX 8192 + /* printk's without a loglevel use this.. */ #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index d7a974d5f57c..6e7d5ec65838 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-2014, The Linux Foundation. All rights reserved. +/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved. * Copyright (C) 2015 Linaro Ltd. * * This program is free software; you can redistribute it and/or modify @@ -16,6 +16,17 @@ extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); extern int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus); +#define QCOM_SCM_HDCP_MAX_REQ_CNT 5 + +struct qcom_scm_hdcp_req { + u32 addr; + u32 val; +}; + +extern bool qcom_scm_hdcp_available(void); +extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, + u32 *resp); + #define QCOM_SCM_CPU_PWR_DOWN_L2_ON 0x0 #define QCOM_SCM_CPU_PWR_DOWN_L2_OFF 0x1 diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index fb31765e935a..830c4992088d 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -31,6 +31,7 @@ #include <linux/kernel.h> #include <linux/stddef.h> +#include <linux/rcupdate.h> struct rb_node { unsigned long __rb_parent_color; @@ -73,11 +74,11 @@ extern struct rb_node *rb_first_postorder(const struct rb_root *); extern struct rb_node *rb_next_postorder(const struct rb_node *); /* Fast replacement of a single node without remove/rebalance/add/rebalance */ -extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); -static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, - struct rb_node ** rb_link) +static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) { node->__rb_parent_color = (unsigned long)parent; node->rb_left = node->rb_right = NULL; @@ -85,6 +86,15 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, *rb_link = node; } +static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + rcu_assign_pointer(*rb_link, node); +} + #define rb_entry_safe(ptr, type, member) \ ({ typeof(ptr) ____ptr = (ptr); \ ____ptr ? rb_entry(____ptr, type, member) : NULL; \ diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 378c5ee75f78..14d7b831b63a 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -123,11 +123,11 @@ __rb_change_child(struct rb_node *old, struct rb_node *new, { if (parent) { if (parent->rb_left == old) - parent->rb_left = new; + WRITE_ONCE(parent->rb_left, new); else - parent->rb_right = new; + WRITE_ONCE(parent->rb_right, new); } else - root->rb_node = new; + WRITE_ONCE(root->rb_node, new); } extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, @@ -137,7 +137,8 @@ static __always_inline struct rb_node * __rb_erase_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { - struct rb_node *child = node->rb_right, *tmp = node->rb_left; + struct rb_node *child = node->rb_right; + struct rb_node *tmp = node->rb_left; struct rb_node *parent, *rebalance; unsigned long pc; @@ -167,6 +168,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, tmp = parent; } else { struct rb_node *successor = child, *child2; + tmp = child->rb_left; if (!tmp) { /* @@ -180,6 +182,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, */ parent = successor; child2 = successor->rb_right; + augment->copy(node, successor); } else { /* @@ -201,19 +204,23 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, successor = tmp; tmp = tmp->rb_left; } while (tmp); - parent->rb_left = child2 = successor->rb_right; - successor->rb_right = child; + child2 = successor->rb_right; + WRITE_ONCE(parent->rb_left, child2); + WRITE_ONCE(successor->rb_right, child); rb_set_parent(child, successor); + augment->copy(node, successor); augment->propagate(parent, successor); } - successor->rb_left = tmp = node->rb_left; + tmp = node->rb_left; + WRITE_ONCE(successor->rb_left, tmp); rb_set_parent(tmp, successor); pc = node->__rb_parent_color; tmp = __rb_parent(pc); __rb_change_child(node, successor, tmp, root); + if (child2) { successor->__rb_parent_color = pc; rb_set_parent_color(child2, parent, RB_BLACK); diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h new file mode 100644 index 000000000000..4f3432c61d12 --- /dev/null +++ b/include/linux/rbtree_latch.h @@ -0,0 +1,212 @@ +/* + * Latched RB-trees + * + * Copyright (C) 2015 Intel Corp., Peter Zijlstra <peterz@infradead.org> + * + * Since RB-trees have non-atomic modifications they're not immediately suited + * for RCU/lockless queries. Even though we made RB-tree lookups non-fatal for + * lockless lookups; we cannot guarantee they return a correct result. + * + * The simplest solution is a seqlock + RB-tree, this will allow lockless + * lookups; but has the constraint (inherent to the seqlock) that read sides + * cannot nest in write sides. + * + * If we need to allow unconditional lookups (say as required for NMI context + * usage) we need a more complex setup; this data structure provides this by + * employing the latch technique -- see @raw_write_seqcount_latch -- to + * implement a latched RB-tree which does allow for unconditional lookups by + * virtue of always having (at least) one stable copy of the tree. + * + * However, while we have the guarantee that there is at all times one stable + * copy, this does not guarantee an iteration will not observe modifications. + * What might have been a stable copy at the start of the iteration, need not + * remain so for the duration of the iteration. + * + * Therefore, this does require a lockless RB-tree iteration to be non-fatal; + * see the comment in lib/rbtree.c. Note however that we only require the first + * condition -- not seeing partial stores -- because the latch thing isolates + * us from loops. If we were to interrupt a modification the lookup would be + * pointed at the stable tree and complete while the modification was halted. + */ + +#ifndef RB_TREE_LATCH_H +#define RB_TREE_LATCH_H + +#include <linux/rbtree.h> +#include <linux/seqlock.h> + +struct latch_tree_node { + struct rb_node node[2]; +}; + +struct latch_tree_root { + seqcount_t seq; + struct rb_root tree[2]; +}; + +/** + * latch_tree_ops - operators to define the tree order + * @less: used for insertion; provides the (partial) order between two elements. + * @comp: used for lookups; provides the order between the search key and an element. + * + * The operators are related like: + * + * comp(a->key,b) < 0 := less(a,b) + * comp(a->key,b) > 0 := less(b,a) + * comp(a->key,b) == 0 := !less(a,b) && !less(b,a) + * + * If these operators define a partial order on the elements we make no + * guarantee on which of the elements matching the key is found. See + * latch_tree_find(). + */ +struct latch_tree_ops { + bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b); + int (*comp)(void *key, struct latch_tree_node *b); +}; + +static __always_inline struct latch_tree_node * +__lt_from_rb(struct rb_node *node, int idx) +{ + return container_of(node, struct latch_tree_node, node[idx]); +} + +static __always_inline void +__lt_insert(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx, + bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b)) +{ + struct rb_root *root = <r->tree[idx]; + struct rb_node **link = &root->rb_node; + struct rb_node *node = <n->node[idx]; + struct rb_node *parent = NULL; + struct latch_tree_node *ltp; + + while (*link) { + parent = *link; + ltp = __lt_from_rb(parent, idx); + + if (less(ltn, ltp)) + link = &parent->rb_left; + else + link = &parent->rb_right; + } + + rb_link_node_rcu(node, parent, link); + rb_insert_color(node, root); +} + +static __always_inline void +__lt_erase(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx) +{ + rb_erase(<n->node[idx], <r->tree[idx]); +} + +static __always_inline struct latch_tree_node * +__lt_find(void *key, struct latch_tree_root *ltr, int idx, + int (*comp)(void *key, struct latch_tree_node *node)) +{ + struct rb_node *node = rcu_dereference_raw(ltr->tree[idx].rb_node); + struct latch_tree_node *ltn; + int c; + + while (node) { + ltn = __lt_from_rb(node, idx); + c = comp(key, ltn); + + if (c < 0) + node = rcu_dereference_raw(node->rb_left); + else if (c > 0) + node = rcu_dereference_raw(node->rb_right); + else + return ltn; + } + + return NULL; +} + +/** + * latch_tree_insert() - insert @node into the trees @root + * @node: nodes to insert + * @root: trees to insert @node into + * @ops: operators defining the node order + * + * It inserts @node into @root in an ordered fashion such that we can always + * observe one complete tree. See the comment for raw_write_seqcount_latch(). + * + * The inserts use rcu_assign_pointer() to publish the element such that the + * tree structure is stored before we can observe the new @node. + * + * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be + * serialized. + */ +static __always_inline void +latch_tree_insert(struct latch_tree_node *node, + struct latch_tree_root *root, + const struct latch_tree_ops *ops) +{ + raw_write_seqcount_latch(&root->seq); + __lt_insert(node, root, 0, ops->less); + raw_write_seqcount_latch(&root->seq); + __lt_insert(node, root, 1, ops->less); +} + +/** + * latch_tree_erase() - removes @node from the trees @root + * @node: nodes to remote + * @root: trees to remove @node from + * @ops: operators defining the node order + * + * Removes @node from the trees @root in an ordered fashion such that we can + * always observe one complete tree. See the comment for + * raw_write_seqcount_latch(). + * + * It is assumed that @node will observe one RCU quiescent state before being + * reused of freed. + * + * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be + * serialized. + */ +static __always_inline void +latch_tree_erase(struct latch_tree_node *node, + struct latch_tree_root *root, + const struct latch_tree_ops *ops) +{ + raw_write_seqcount_latch(&root->seq); + __lt_erase(node, root, 0); + raw_write_seqcount_latch(&root->seq); + __lt_erase(node, root, 1); +} + +/** + * latch_tree_find() - find the node matching @key in the trees @root + * @key: search key + * @root: trees to search for @key + * @ops: operators defining the node order + * + * Does a lockless lookup in the trees @root for the node matching @key. + * + * It is assumed that this is called while holding the appropriate RCU read + * side lock. + * + * If the operators define a partial order on the elements (there are multiple + * elements which have the same key value) it is undefined which of these + * elements will be found. Nor is it possible to iterate the tree to find + * further elements with the same key value. + * + * Returns: a pointer to the node matching @key or NULL. + */ +static __always_inline struct latch_tree_node * +latch_tree_find(void *key, struct latch_tree_root *root, + const struct latch_tree_ops *ops) +{ + struct latch_tree_node *node; + unsigned int seq; + + do { + seq = raw_read_seqcount_latch(&root->seq); + node = __lt_find(key, root, seq & 1, ops->comp); + } while (read_seqcount_retry(&root->seq, seq)); + + return node; +} + +#endif /* RB_TREE_LATCH_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 33a056bb886f..4cf5f51b4c9c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -633,21 +633,6 @@ static inline void rcu_preempt_sleep_check(void) #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) /** - * lockless_dereference() - safely load a pointer for later dereference - * @p: The pointer to load - * - * Similar to rcu_dereference(), but for situations where the pointed-to - * object's lifetime is managed by something other than RCU. That - * "something other" might be reference counting or simple immortality. - */ -#define lockless_dereference(p) \ -({ \ - typeof(p) _________p1 = READ_ONCE(p); \ - smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ - (_________p1); \ -}) - -/** * rcu_assign_pointer() - assign to RCU-protected pointer * @p: pointer to assign to * @v: value to assign (publish) diff --git a/include/linux/reset/bcm63xx_pmb.h b/include/linux/reset/bcm63xx_pmb.h new file mode 100644 index 000000000000..bb4af7b5eb36 --- /dev/null +++ b/include/linux/reset/bcm63xx_pmb.h @@ -0,0 +1,88 @@ +/* + * Broadcom BCM63xx Processor Monitor Bus shared routines (SMP and reset) + * + * Copyright (C) 2015, Broadcom Corporation + * Author: Florian Fainelli <f.fainelli@gmail.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef __BCM63XX_PMB_H +#define __BCM63XX_PMB_H + +#include <linux/io.h> +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/err.h> + +/* PMB Master controller register */ +#define PMB_CTRL 0x00 +#define PMC_PMBM_START (1 << 31) +#define PMC_PMBM_TIMEOUT (1 << 30) +#define PMC_PMBM_SLAVE_ERR (1 << 29) +#define PMC_PMBM_BUSY (1 << 28) +#define PMC_PMBM_READ (0 << 20) +#define PMC_PMBM_WRITE (1 << 20) +#define PMB_WR_DATA 0x04 +#define PMB_TIMEOUT 0x08 +#define PMB_RD_DATA 0x0C + +#define PMB_BUS_ID_SHIFT 8 + +/* Perform the low-level PMB master operation, shared between reads and + * writes. + */ +static inline int __bpcm_do_op(void __iomem *master, unsigned int addr, + u32 off, u32 op) +{ + unsigned int timeout = 1000; + u32 cmd; + + cmd = (PMC_PMBM_START | op | (addr & 0xff) << 12 | off); + writel(cmd, master + PMB_CTRL); + do { + cmd = readl(master + PMB_CTRL); + if (!(cmd & PMC_PMBM_START)) + return 0; + + if (cmd & PMC_PMBM_SLAVE_ERR) + return -EIO; + + if (cmd & PMC_PMBM_TIMEOUT) + return -ETIMEDOUT; + + udelay(1); + } while (timeout-- > 0); + + return -ETIMEDOUT; +} + +static inline int bpcm_rd(void __iomem *master, unsigned int addr, + u32 off, u32 *val) +{ + int ret = 0; + + ret = __bpcm_do_op(master, addr, off >> 2, PMC_PMBM_READ); + *val = readl(master + PMB_RD_DATA); + + return ret; +} + +static inline int bpcm_wr(void __iomem *master, unsigned int addr, + u32 off, u32 val) +{ + int ret = 0; + + writel(val, master + PMB_WR_DATA); + ret = __bpcm_do_op(master, addr, off >> 2, PMC_PMBM_WRITE); + + return ret; +} + +#endif /* __BCM63XX_PMB_H */ diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 8dcf6825fa88..3359f0422c6b 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -24,6 +24,14 @@ extern void rtc_time64_to_tm(time64_t time, struct rtc_time *tm); ktime_t rtc_tm_to_ktime(struct rtc_time tm); struct rtc_time rtc_ktime_to_tm(ktime_t kt); +/* + * rtc_tm_sub - Return the difference in seconds. + */ +static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs) +{ + return rtc_tm_to_time64(lhs) - rtc_tm_to_time64(rhs); +} + /** * Deprecated. Use rtc_time64_to_tm(). */ @@ -101,8 +109,7 @@ struct rtc_timer { /* flags */ #define RTC_DEV_BUSY 0 -struct rtc_device -{ +struct rtc_device { struct device dev; struct module *owner; @@ -161,7 +168,6 @@ extern void devm_rtc_device_unregister(struct device *dev, extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); -extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs); extern int rtc_set_ntp_time(struct timespec64 now); int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm); extern int rtc_read_alarm(struct rtc_device *rtc, @@ -198,10 +204,10 @@ int rtc_register(rtc_task_t *task); int rtc_unregister(rtc_task_t *task); int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg); -void rtc_timer_init(struct rtc_timer *timer, void (*f)(void* p), void* data); -int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer* timer, - ktime_t expires, ktime_t period); -int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer* timer); +void rtc_timer_init(struct rtc_timer *timer, void (*f)(void *p), void *data); +int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer, + ktime_t expires, ktime_t period); +void rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer *timer); void rtc_timer_do_work(struct work_struct *work); static inline bool is_leap_year(unsigned int year) diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index a0edb992c9c3..50a8486c524b 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -2,13 +2,39 @@ #define _LINUX_SCATTERLIST_H #include <linux/string.h> +#include <linux/types.h> #include <linux/bug.h> #include <linux/mm.h> - -#include <asm/types.h> -#include <asm/scatterlist.h> #include <asm/io.h> +struct scatterlist { +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; + unsigned int offset; + unsigned int length; + dma_addr_t dma_address; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + unsigned int dma_length; +#endif +}; + +/* + * These macros should be used after a dma_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries dma_map_sg + * returns, or alternatively stop on the first sg_dma_len(sg) which + * is 0. + */ +#define sg_dma_address(sg) ((sg)->dma_address) + +#ifdef CONFIG_NEED_SG_DMA_LENGTH +#define sg_dma_len(sg) ((sg)->dma_length) +#else +#define sg_dma_len(sg) ((sg)->length) +#endif + struct sg_table { struct scatterlist *sgl; /* the list */ unsigned int nents; /* number of mapped entries */ @@ -18,10 +44,9 @@ struct sg_table { /* * Notes on SG table design. * - * Architectures must provide an unsigned long page_link field in the - * scatterlist struct. We use that to place the page pointer AND encode - * information about the sg table as well. The two lower bits are reserved - * for this information. + * We use the unsigned long page_link field in the scatterlist struct to place + * the page pointer AND encode information about the sg table as well. The two + * lower bits are reserved for this information. * * If bit 0 is set, then the page_link contains a pointer to the next sg * table list. Otherwise the next entry is at sg + 1. diff --git a/include/linux/sched.h b/include/linux/sched.h index 6633e83e608a..8aa4a251742f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -58,6 +58,7 @@ struct sched_param { #include <linux/uidgid.h> #include <linux/gfp.h> #include <linux/magic.h> +#include <linux/cgroup-defs.h> #include <asm/processor.h> @@ -755,18 +756,6 @@ struct signal_struct { unsigned audit_tty_log_passwd; struct tty_audit_buf *tty_audit_buf; #endif -#ifdef CONFIG_CGROUPS - /* - * group_rwsem prevents new tasks from entering the threadgroup and - * member tasks from exiting,a more specifically, setting of - * PF_EXITING. fork and exit paths are protected with this rwsem - * using threadgroup_change_begin/end(). Users which require - * threadgroup to remain stable should use threadgroup_[un]lock() - * which also takes care of exec path. Currently, cgroup is the - * only user. - */ - struct rw_semaphore group_rwsem; -#endif oom_flags_t oom_flags; short oom_score_adj; /* OOM kill score adjustment */ @@ -2432,7 +2421,6 @@ extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); -extern void __flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); @@ -2556,8 +2544,22 @@ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct */ extern void mm_release(struct task_struct *, struct mm_struct *); +#ifdef CONFIG_HAVE_COPY_THREAD_TLS +extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, + struct task_struct *, unsigned long); +#else extern int copy_thread(unsigned long, unsigned long, unsigned long, struct task_struct *); + +/* Architectures that haven't opted into copy_thread_tls get the tls argument + * via pt_regs, so ignore the tls argument passed via C. */ +static inline int copy_thread_tls( + unsigned long clone_flags, unsigned long sp, unsigned long arg, + struct task_struct *p, unsigned long tls) +{ + return copy_thread(clone_flags, sp, arg, p); +} +#endif extern void flush_thread(void); extern void exit_thread(void); @@ -2576,6 +2578,7 @@ extern int do_execveat(int, struct filename *, const char __user * const __user *, const char __user * const __user *, int); +extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); @@ -2710,53 +2713,33 @@ static inline void unlock_task_sighand(struct task_struct *tsk, spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); } -#ifdef CONFIG_CGROUPS -static inline void threadgroup_change_begin(struct task_struct *tsk) -{ - down_read(&tsk->signal->group_rwsem); -} -static inline void threadgroup_change_end(struct task_struct *tsk) -{ - up_read(&tsk->signal->group_rwsem); -} - /** - * threadgroup_lock - lock threadgroup - * @tsk: member task of the threadgroup to lock - * - * Lock the threadgroup @tsk belongs to. No new task is allowed to enter - * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or - * change ->group_leader/pid. This is useful for cases where the threadgroup - * needs to stay stable across blockable operations. + * threadgroup_change_begin - mark the beginning of changes to a threadgroup + * @tsk: task causing the changes * - * fork and exit paths explicitly call threadgroup_change_{begin|end}() for - * synchronization. While held, no new task will be added to threadgroup - * and no existing live task will have its PF_EXITING set. - * - * de_thread() does threadgroup_change_{begin|end}() when a non-leader - * sub-thread becomes a new leader. + * All operations which modify a threadgroup - a new thread joining the + * group, death of a member thread (the assertion of PF_EXITING) and + * exec(2) dethreading the process and replacing the leader - are wrapped + * by threadgroup_change_{begin|end}(). This is to provide a place which + * subsystems needing threadgroup stability can hook into for + * synchronization. */ -static inline void threadgroup_lock(struct task_struct *tsk) +static inline void threadgroup_change_begin(struct task_struct *tsk) { - down_write(&tsk->signal->group_rwsem); + might_sleep(); + cgroup_threadgroup_change_begin(tsk); } /** - * threadgroup_unlock - unlock threadgroup - * @tsk: member task of the threadgroup to unlock + * threadgroup_change_end - mark the end of changes to a threadgroup + * @tsk: task causing the changes * - * Reverse threadgroup_lock(). + * See threadgroup_change_begin(). */ -static inline void threadgroup_unlock(struct task_struct *tsk) +static inline void threadgroup_change_end(struct task_struct *tsk) { - up_write(&tsk->signal->group_rwsem); + cgroup_threadgroup_change_end(tsk); } -#else -static inline void threadgroup_change_begin(struct task_struct *tsk) {} -static inline void threadgroup_change_end(struct task_struct *tsk) {} -static inline void threadgroup_lock(struct task_struct *tsk) {} -static inline void threadgroup_unlock(struct task_struct *tsk) {} -#endif #ifndef __HAVE_THREAD_FUNCTIONS diff --git a/include/linux/scif.h b/include/linux/scif.h new file mode 100644 index 000000000000..44f4f3898bbe --- /dev/null +++ b/include/linux/scif.h @@ -0,0 +1,993 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Intel SCIF driver. + * + */ +#ifndef __SCIF_H__ +#define __SCIF_H__ + +#include <linux/types.h> +#include <linux/poll.h> +#include <linux/scif_ioctl.h> + +#define SCIF_ACCEPT_SYNC 1 +#define SCIF_SEND_BLOCK 1 +#define SCIF_RECV_BLOCK 1 + +enum { + SCIF_PROT_READ = (1 << 0), + SCIF_PROT_WRITE = (1 << 1) +}; + +enum { + SCIF_MAP_FIXED = 0x10, + SCIF_MAP_KERNEL = 0x20, +}; + +enum { + SCIF_FENCE_INIT_SELF = (1 << 0), + SCIF_FENCE_INIT_PEER = (1 << 1), + SCIF_SIGNAL_LOCAL = (1 << 4), + SCIF_SIGNAL_REMOTE = (1 << 5) +}; + +enum { + SCIF_RMA_USECPU = (1 << 0), + SCIF_RMA_USECACHE = (1 << 1), + SCIF_RMA_SYNC = (1 << 2), + SCIF_RMA_ORDERED = (1 << 3) +}; + +/* End of SCIF Admin Reserved Ports */ +#define SCIF_ADMIN_PORT_END 1024 + +/* End of SCIF Reserved Ports */ +#define SCIF_PORT_RSVD 1088 + +typedef struct scif_endpt *scif_epd_t; + +#define SCIF_OPEN_FAILED ((scif_epd_t)-1) +#define SCIF_REGISTER_FAILED ((off_t)-1) +#define SCIF_MMAP_FAILED ((void *)-1) + +/** + * scif_open() - Create an endpoint + * + * Return: + * Upon successful completion, scif_open() returns an endpoint descriptor to + * be used in subsequent SCIF functions calls to refer to that endpoint; + * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is + * returned and errno is set to indicate the error; in kernel mode a NULL + * scif_epd_t is returned. + * + * Errors: + * ENOMEM - Insufficient kernel memory was available + */ +scif_epd_t scif_open(void); + +/** + * scif_bind() - Bind an endpoint to a port + * @epd: endpoint descriptor + * @pn: port number + * + * scif_bind() binds endpoint epd to port pn, where pn is a port number on the + * local node. If pn is zero, a port number greater than or equal to + * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to + * exactly one local port. Ports less than 1024 when requested can only be bound + * by system (or root) processes or by processes executed by privileged users. + * + * Return: + * Upon successful completion, scif_bind() returns the port number to which epd + * is bound; otherwise in user mode -1 is returned and errno is set to + * indicate the error; in kernel mode the negative of one of the following + * errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * EINVAL - the endpoint or the port is already bound + * EISCONN - The endpoint is already connected + * ENOSPC - No port number available for assignment + * EACCES - The port requested is protected and the user is not the superuser + */ +int scif_bind(scif_epd_t epd, u16 pn); + +/** + * scif_listen() - Listen for connections on an endpoint + * @epd: endpoint descriptor + * @backlog: maximum pending connection requests + * + * scif_listen() marks the endpoint epd as a listening endpoint - that is, as + * an endpoint that will be used to accept incoming connection requests. Once + * so marked, the endpoint is said to be in the listening state and may not be + * used as the endpoint of a connection. + * + * The endpoint, epd, must have been bound to a port. + * + * The backlog argument defines the maximum length to which the queue of + * pending connections for epd may grow. If a connection request arrives when + * the queue is full, the client may receive an error with an indication that + * the connection was refused. + * + * Return: + * Upon successful completion, scif_listen() returns 0; otherwise in user mode + * -1 is returned and errno is set to indicate the error; in kernel mode the + * negative of one of the following errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * EINVAL - the endpoint is not bound to a port + * EISCONN - The endpoint is already connected or listening + */ +int scif_listen(scif_epd_t epd, int backlog); + +/** + * scif_connect() - Initiate a connection on a port + * @epd: endpoint descriptor + * @dst: global id of port to which to connect + * + * The scif_connect() function requests the connection of endpoint epd to remote + * port dst. If the connection is successful, a peer endpoint, bound to dst, is + * created on node dst.node. On successful return, the connection is complete. + * + * If the endpoint epd has not already been bound to a port, scif_connect() + * will bind it to an unused local port. + * + * A connection is terminated when an endpoint of the connection is closed, + * either explicitly by scif_close(), or when a process that owns one of the + * endpoints of the connection is terminated. + * + * In user space, scif_connect() supports an asynchronous connection mode + * if the application has set the O_NONBLOCK flag on the endpoint via the + * fcntl() system call. Setting this flag will result in the calling process + * not to wait during scif_connect(). + * + * Return: + * Upon successful completion, scif_connect() returns the port ID to which the + * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is + * set to indicate the error; in kernel mode the negative of one of the + * following errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNREFUSED - The destination was not listening for connections or refused + * the connection request + * EINVAL - dst.port is not a valid port ID + * EISCONN - The endpoint is already connected + * ENOMEM - No buffer space is available + * ENODEV - The destination node does not exist, or the node is lost or existed, + * but is not currently in the network since it may have crashed + * ENOSPC - No port number available for assignment + * EOPNOTSUPP - The endpoint is listening and cannot be connected + */ +int scif_connect(scif_epd_t epd, struct scif_port_id *dst); + +/** + * scif_accept() - Accept a connection on an endpoint + * @epd: endpoint descriptor + * @peer: global id of port to which connected + * @newepd: new connected endpoint descriptor + * @flags: flags + * + * The scif_accept() call extracts the first connection request from the queue + * of pending connections for the port on which epd is listening. scif_accept() + * creates a new endpoint, bound to the same port as epd, and allocates a new + * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new + * endpoint is connected to the endpoint through which the connection was + * requested. epd is unaffected by this call, and remains in the listening + * state. + * + * On successful return, peer holds the global port identifier (node id and + * local port number) of the port which requested the connection. + * + * A connection is terminated when an endpoint of the connection is closed, + * either explicitly by scif_close(), or when a process that owns one of the + * endpoints of the connection is terminated. + * + * The number of connections that can (subsequently) be accepted on epd is only + * limited by system resources (memory). + * + * The flags argument is formed by OR'ing together zero or more of the + * following values. + * SCIF_ACCEPT_SYNC - block until a connection request is presented. If + * SCIF_ACCEPT_SYNC is not in flags, and no pending + * connections are present on the queue, scif_accept() + * fails with an EAGAIN error + * + * In user mode, the select() and poll() functions can be used to determine + * when there is a connection request. In kernel mode, the scif_poll() + * function may be used for this purpose. A readable event will be delivered + * when a connection is requested. + * + * Return: + * Upon successful completion, scif_accept() returns 0; otherwise in user mode + * -1 is returned and errno is set to indicate the error; in kernel mode the + * negative of one of the following errors is returned. + * + * Errors: + * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be + * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete + * its connection request + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * EINTR - Interrupted function + * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is + * NULL, or newepd is NULL + * ENODEV - The requesting node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOMEM - Not enough space + * ENOENT - Secondary part of epd registration failed + */ +int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t + *newepd, int flags); + +/** + * scif_close() - Close an endpoint + * @epd: endpoint descriptor + * + * scif_close() closes an endpoint and performs necessary teardown of + * facilities associated with that endpoint. + * + * If epd is a listening endpoint then it will no longer accept connection + * requests on the port to which it is bound. Any pending connection requests + * are rejected. + * + * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs + * which are in-process through epd or its peer endpoint will complete before + * scif_close() returns. Registered windows of the local and peer endpoints are + * released as if scif_unregister() was called against each window. + * + * Closing a SCIF endpoint does not affect local registered memory mapped by + * a SCIF endpoint on a remote node. The local memory remains mapped by the peer + * SCIF endpoint explicitly removed by calling munmap(..) by the peer. + * + * If the peer endpoint's receive queue is not empty at the time that epd is + * closed, then the peer endpoint can be passed as the endpoint parameter to + * scif_recv() until the receive queue is empty. + * + * epd is freed and may no longer be accessed. + * + * Return: + * Upon successful completion, scif_close() returns 0; otherwise in user mode + * -1 is returned and errno is set to indicate the error; in kernel mode the + * negative of one of the following errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + */ +int scif_close(scif_epd_t epd); + +/** + * scif_send() - Send a message + * @epd: endpoint descriptor + * @msg: message buffer address + * @len: message length + * @flags: blocking mode flags + * + * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data + * are copied from memory starting at address msg. On successful execution the + * return value of scif_send() is the number of bytes that were sent, and is + * zero if no bytes were sent because len was zero. scif_send() may be called + * only when the endpoint is in a connected state. + * + * If a scif_send() call is non-blocking, then it sends only those bytes which + * can be sent without waiting, up to a maximum of len bytes. + * + * If a scif_send() call is blocking, then it normally returns after sending + * all len bytes. If a blocking call is interrupted or the connection is + * reset, the call is considered successful if some bytes were sent or len is + * zero, otherwise the call is considered unsuccessful. + * + * In user mode, the select() and poll() functions can be used to determine + * when the send queue is not full. In kernel mode, the scif_poll() function + * may be used for this purpose. + * + * It is recommended that scif_send()/scif_recv() only be used for short + * control-type message communication between SCIF endpoints. The SCIF RMA + * APIs are expected to provide better performance for transfer sizes of + * 1024 bytes or longer for the current MIC hardware and software + * implementation. + * + * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK + * is passed as the flags argument. + * + * Return: + * Upon successful completion, scif_send() returns the number of bytes sent; + * otherwise in user mode -1 is returned and errno is set to indicate the + * error; in kernel mode the negative of one of the following errors is + * returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EFAULT - An invalid address was specified for a parameter + * EINVAL - flags is invalid, or len is negative + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOMEM - Not enough space + * ENOTCONN - The endpoint is not connected + */ +int scif_send(scif_epd_t epd, void *msg, int len, int flags); + +/** + * scif_recv() - Receive a message + * @epd: endpoint descriptor + * @msg: message buffer address + * @len: message buffer length + * @flags: blocking mode flags + * + * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of + * data are copied to memory starting at address msg. On successful execution + * the return value of scif_recv() is the number of bytes that were received, + * and is zero if no bytes were received because len was zero. scif_recv() may + * be called only when the endpoint is in a connected state. + * + * If a scif_recv() call is non-blocking, then it receives only those bytes + * which can be received without waiting, up to a maximum of len bytes. + * + * If a scif_recv() call is blocking, then it normally returns after receiving + * all len bytes. If the blocking call was interrupted due to a disconnection, + * subsequent calls to scif_recv() will copy all bytes received upto the point + * of disconnection. + * + * In user mode, the select() and poll() functions can be used to determine + * when data is available to be received. In kernel mode, the scif_poll() + * function may be used for this purpose. + * + * It is recommended that scif_send()/scif_recv() only be used for short + * control-type message communication between SCIF endpoints. The SCIF RMA + * APIs are expected to provide better performance for transfer sizes of + * 1024 bytes or longer for the current MIC hardware and software + * implementation. + * + * scif_recv() will block until the entire message is received if + * SCIF_RECV_BLOCK is passed as the flags argument. + * + * Return: + * Upon successful completion, scif_recv() returns the number of bytes + * received; otherwise in user mode -1 is returned and errno is set to + * indicate the error; in kernel mode the negative of one of the following + * errors is returned. + * + * Errors: + * EAGAIN - The destination node is returning from a low power state + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EFAULT - An invalid address was specified for a parameter + * EINVAL - flags is invalid, or len is negative + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOMEM - Not enough space + * ENOTCONN - The endpoint is not connected + */ +int scif_recv(scif_epd_t epd, void *msg, int len, int flags); + +/** + * scif_register() - Mark a memory region for remote access. + * @epd: endpoint descriptor + * @addr: starting virtual address + * @len: length of range + * @offset: offset of window + * @prot_flags: read/write protection flags + * @map_flags: mapping flags + * + * The scif_register() function opens a window, a range of whole pages of the + * registered address space of the endpoint epd, starting at offset po and + * continuing for len bytes. The value of po, further described below, is a + * function of the parameters offset and len, and the value of map_flags. Each + * page of the window represents the physical memory page which backs the + * corresponding page of the range of virtual address pages starting at addr + * and continuing for len bytes. addr and len are constrained to be multiples + * of the page size. A successful scif_register() call returns po. + * + * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset + * exactly, and offset is constrained to be a multiple of the page size. The + * mapping established by scif_register() will not replace any existing + * registration; an error is returned if any page within the range [offset, + * offset + len - 1] intersects an existing window. + * + * When SCIF_MAP_FIXED is not set, the implementation uses offset in an + * implementation-defined manner to arrive at po. The po value so chosen will + * be an area of the registered address space that the implementation deems + * suitable for a mapping of len bytes. An offset value of 0 is interpreted as + * granting the implementation complete freedom in selecting po, subject to + * constraints described below. A non-zero value of offset is taken to be a + * suggestion of an offset near which the mapping should be placed. When the + * implementation selects a value for po, it does not replace any extant + * window. In all cases, po will be a multiple of the page size. + * + * The physical pages which are so represented by a window are available for + * access in calls to mmap(), scif_readfrom(), scif_writeto(), + * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the + * physical pages represented by the window will not be reused by the memory + * subsystem for any other purpose. Note that the same physical page may be + * represented by multiple windows. + * + * Subsequent operations which change the memory pages to which virtual + * addresses are mapped (such as mmap(), munmap()) have no effect on + * existing window. + * + * If the process will fork(), it is recommended that the registered + * virtual address range be marked with MADV_DONTFORK. Doing so will prevent + * problems due to copy-on-write semantics. + * + * The prot_flags argument is formed by OR'ing together one or more of the + * following values. + * SCIF_PROT_READ - allow read operations from the window + * SCIF_PROT_WRITE - allow write operations to the window + * + * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a + * fixed offset. + * + * Return: + * Upon successful completion, scif_register() returns the offset at which the + * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that + * is (off_t *)-1) is returned and errno is set to indicate the error; in + * kernel mode the negative of one of the following errors is returned. + * + * Errors: + * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range + * [offset, offset + len -1] are already registered + * EAGAIN - The mapping could not be performed due to lack of resources + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid + * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is + * set in flags, and offset is not a multiple of the page size, or addr is not a + * multiple of the page size, or len is not a multiple of the page size, or is + * 0, or offset is negative + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOMEM - Not enough space + * ENOTCONN -The endpoint is not connected + */ +off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, + int prot_flags, int map_flags); + +/** + * scif_unregister() - Mark a memory region for remote access. + * @epd: endpoint descriptor + * @offset: start of range to unregister + * @len: length of range to unregister + * + * The scif_unregister() function closes those previously registered windows + * which are entirely within the range [offset, offset + len - 1]. It is an + * error to specify a range which intersects only a subrange of a window. + * + * On a successful return, pages within the window may no longer be specified + * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(), + * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, + * however, continues to exist until all previous references against it are + * removed. A window is referenced if there is a mapping to it created by + * mmap(), or if scif_get_pages() was called against the window + * (and the pages have not been returned via scif_put_pages()). A window is + * also referenced while an RMA, in which some range of the window is a source + * or destination, is in progress. Finally a window is referenced while some + * offset in that window was specified to scif_fence_signal(), and the RMAs + * marked by that call to scif_fence_signal() have not completed. While a + * window is in this state, its registered address space pages are not + * available for use in a new registered window. + * + * When all such references to the window have been removed, its references to + * all the physical pages which it represents are removed. Similarly, the + * registered address space pages of the window become available for + * registration in a new window. + * + * Return: + * Upon successful completion, scif_unregister() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. In the event of an + * error, no windows are unregistered. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a + * window, or offset is negative + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the + * registered address space of epd + */ +int scif_unregister(scif_epd_t epd, off_t offset, size_t len); + +/** + * scif_readfrom() - Copy from a remote address space + * @epd: endpoint descriptor + * @loffset: offset in local registered address space to + * which to copy + * @len: length of range to copy + * @roffset: offset in remote registered address space + * from which to copy + * @rma_flags: transfer mode flags + * + * scif_readfrom() copies len bytes from the remote registered address space of + * the peer of endpoint epd, starting at the offset roffset to the local + * registered address space of epd, starting at the offset loffset. + * + * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, + * roffset + len - 1] must be within some registered window or windows of the + * local and remote nodes. A range may intersect multiple registered windows, + * but only if those windows are contiguous in the registered address space. + * + * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using + * programmed read/writes. Otherwise the data is copied using DMA. If rma_- + * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the + * transfer is complete. Otherwise, the transfer may be performed asynchron- + * ously. The order in which any two asynchronous RMA operations complete + * is non-deterministic. The synchronization functions, scif_fence_mark()/ + * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to + * the completion of asynchronous RMA operations on the same endpoint. + * + * The DMA transfer of individual bytes is not guaranteed to complete in + * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last + * cacheline or partial cacheline of the source range will become visible on + * the destination node after all other transferred data in the source + * range has become visible on the destination node. + * + * The optimal DMA performance will likely be realized if both + * loffset and roffset are cacheline aligned (are a multiple of 64). Lower + * performance will likely be realized if loffset and roffset are not + * cacheline aligned but are separated by some multiple of 64. The lowest level + * of performance is likely if loffset and roffset are not separated by a + * multiple of 64. + * + * The rma_flags argument is formed by ORing together zero or more of the + * following values. + * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA + * engine. + * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the + * transfer has completed. Passing this flag results in the + * current implementation busy waiting and consuming CPU cycles + * while the DMA transfer is in progress for best performance by + * avoiding the interrupt latency. + * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of + * the source range becomes visible on the destination node + * after all other transferred data in the source range has + * become visible on the destination + * + * Return: + * Upon successful completion, scif_readfrom() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. + * + * Errors: + * EACCESS - Attempt to write to a read-only range + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EINVAL - rma_flags is invalid + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered + * address space of epd, or, The range [roffset, roffset + len - 1] is invalid + * for the registered address space of the peer of epd, or loffset or roffset + * is negative + */ +int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t + roffset, int rma_flags); + +/** + * scif_writeto() - Copy to a remote address space + * @epd: endpoint descriptor + * @loffset: offset in local registered address space + * from which to copy + * @len: length of range to copy + * @roffset: offset in remote registered address space to + * which to copy + * @rma_flags: transfer mode flags + * + * scif_writeto() copies len bytes from the local registered address space of + * epd, starting at the offset loffset to the remote registered address space + * of the peer of endpoint epd, starting at the offset roffset. + * + * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, + * roffset + len - 1] must be within some registered window or windows of the + * local and remote nodes. A range may intersect multiple registered windows, + * but only if those windows are contiguous in the registered address space. + * + * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using + * programmed read/writes. Otherwise the data is copied using DMA. If rma_- + * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the + * transfer is complete. Otherwise, the transfer may be performed asynchron- + * ously. The order in which any two asynchronous RMA operations complete + * is non-deterministic. The synchronization functions, scif_fence_mark()/ + * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to + * the completion of asynchronous RMA operations on the same endpoint. + * + * The DMA transfer of individual bytes is not guaranteed to complete in + * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last + * cacheline or partial cacheline of the source range will become visible on + * the destination node after all other transferred data in the source + * range has become visible on the destination node. + * + * The optimal DMA performance will likely be realized if both + * loffset and roffset are cacheline aligned (are a multiple of 64). Lower + * performance will likely be realized if loffset and roffset are not cacheline + * aligned but are separated by some multiple of 64. The lowest level of + * performance is likely if loffset and roffset are not separated by a multiple + * of 64. + * + * The rma_flags argument is formed by ORing together zero or more of the + * following values. + * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA + * engine. + * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the + * transfer has completed. Passing this flag results in the + * current implementation busy waiting and consuming CPU cycles + * while the DMA transfer is in progress for best performance by + * avoiding the interrupt latency. + * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of + * the source range becomes visible on the destination node + * after all other transferred data in the source range has + * become visible on the destination + * + * Return: + * Upon successful completion, scif_readfrom() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. + * + * Errors: + * EACCESS - Attempt to write to a read-only range + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EINVAL - rma_flags is invalid + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered + * address space of epd, or, The range [roffset , roffset + len -1] is invalid + * for the registered address space of the peer of epd, or loffset or roffset + * is negative + */ +int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t + roffset, int rma_flags); + +/** + * scif_vreadfrom() - Copy from a remote address space + * @epd: endpoint descriptor + * @addr: address to which to copy + * @len: length of range to copy + * @roffset: offset in remote registered address space + * from which to copy + * @rma_flags: transfer mode flags + * + * scif_vreadfrom() copies len bytes from the remote registered address + * space of the peer of endpoint epd, starting at the offset roffset, to local + * memory, starting at addr. + * + * The specified range [roffset, roffset + len - 1] must be within some + * registered window or windows of the remote nodes. The range may + * intersect multiple registered windows, but only if those windows are + * contiguous in the registered address space. + * + * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using + * programmed read/writes. Otherwise the data is copied using DMA. If rma_- + * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the + * transfer is complete. Otherwise, the transfer may be performed asynchron- + * ously. The order in which any two asynchronous RMA operations complete + * is non-deterministic. The synchronization functions, scif_fence_mark()/ + * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to + * the completion of asynchronous RMA operations on the same endpoint. + * + * The DMA transfer of individual bytes is not guaranteed to complete in + * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last + * cacheline or partial cacheline of the source range will become visible on + * the destination node after all other transferred data in the source + * range has become visible on the destination node. + * + * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back + * the specified local memory range may be remain in a pinned state even after + * the specified transfer completes. This may reduce overhead if some or all of + * the same virtual address range is referenced in a subsequent call of + * scif_vreadfrom() or scif_vwriteto(). + * + * The optimal DMA performance will likely be realized if both + * addr and roffset are cacheline aligned (are a multiple of 64). Lower + * performance will likely be realized if addr and roffset are not + * cacheline aligned but are separated by some multiple of 64. The lowest level + * of performance is likely if addr and roffset are not separated by a + * multiple of 64. + * + * The rma_flags argument is formed by ORing together zero or more of the + * following values. + * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA + * engine. + * SCIF_RMA_USECACHE - enable registration caching + * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the + * transfer has completed. Passing this flag results in the + * current implementation busy waiting and consuming CPU cycles + * while the DMA transfer is in progress for best performance by + * avoiding the interrupt latency. + * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of + * the source range becomes visible on the destination node + * after all other transferred data in the source range has + * become visible on the destination + * + * Return: + * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. + * + * Errors: + * EACCESS - Attempt to write to a read-only range + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid + * EINVAL - rma_flags is invalid + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the + * registered address space of epd + */ +int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset, + int rma_flags); + +/** + * scif_vwriteto() - Copy to a remote address space + * @epd: endpoint descriptor + * @addr: address from which to copy + * @len: length of range to copy + * @roffset: offset in remote registered address space to + * which to copy + * @rma_flags: transfer mode flags + * + * scif_vwriteto() copies len bytes from the local memory, starting at addr, to + * the remote registered address space of the peer of endpoint epd, starting at + * the offset roffset. + * + * The specified range [roffset, roffset + len - 1] must be within some + * registered window or windows of the remote nodes. The range may intersect + * multiple registered windows, but only if those windows are contiguous in the + * registered address space. + * + * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using + * programmed read/writes. Otherwise the data is copied using DMA. If rma_- + * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the + * transfer is complete. Otherwise, the transfer may be performed asynchron- + * ously. The order in which any two asynchronous RMA operations complete + * is non-deterministic. The synchronization functions, scif_fence_mark()/ + * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to + * the completion of asynchronous RMA operations on the same endpoint. + * + * The DMA transfer of individual bytes is not guaranteed to complete in + * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last + * cacheline or partial cacheline of the source range will become visible on + * the destination node after all other transferred data in the source + * range has become visible on the destination node. + * + * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back + * the specified local memory range may be remain in a pinned state even after + * the specified transfer completes. This may reduce overhead if some or all of + * the same virtual address range is referenced in a subsequent call of + * scif_vreadfrom() or scif_vwriteto(). + * + * The optimal DMA performance will likely be realized if both + * addr and offset are cacheline aligned (are a multiple of 64). Lower + * performance will likely be realized if addr and offset are not cacheline + * aligned but are separated by some multiple of 64. The lowest level of + * performance is likely if addr and offset are not separated by a multiple of + * 64. + * + * The rma_flags argument is formed by ORing together zero or more of the + * following values. + * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA + * engine. + * SCIF_RMA_USECACHE - allow registration caching + * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the + * transfer has completed. Passing this flag results in the + * current implementation busy waiting and consuming CPU cycles + * while the DMA transfer is in progress for best performance by + * avoiding the interrupt latency. + * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of + * the source range becomes visible on the destination node + * after all other transferred data in the source range has + * become visible on the destination + * + * Return: + * Upon successful completion, scif_vwriteto() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. + * + * Errors: + * EACCESS - Attempt to write to a read-only range + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid + * EINVAL - rma_flags is invalid + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the + * registered address space of epd + */ +int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset, + int rma_flags); + +/** + * scif_fence_mark() - Mark previously issued RMAs + * @epd: endpoint descriptor + * @flags: control flags + * @mark: marked value returned as output. + * + * scif_fence_mark() returns after marking the current set of all uncompleted + * RMAs initiated through the endpoint epd or the current set of all + * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are + * marked with a value returned at mark. The application may subsequently call + * scif_fence_wait(), passing the value returned at mark, to await completion + * of all RMAs so marked. + * + * The flags argument has exactly one of the following values. + * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint + * epd are marked + * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer + * of endpoint epd are marked + * + * Return: + * Upon successful completion, scif_fence_mark() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EINVAL - flags is invalid + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENOMEM - Insufficient kernel memory was available + */ +int scif_fence_mark(scif_epd_t epd, int flags, int *mark); + +/** + * scif_fence_wait() - Wait for completion of marked RMAs + * @epd: endpoint descriptor + * @mark: mark request + * + * scif_fence_wait() returns after all RMAs marked with mark have completed. + * The value passed in mark must have been obtained in a previous call to + * scif_fence_mark(). + * + * Return: + * Upon successful completion, scif_fence_wait() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENOMEM - Insufficient kernel memory was available + */ +int scif_fence_wait(scif_epd_t epd, int mark); + +/** + * scif_fence_signal() - Request a memory update on completion of RMAs + * @epd: endpoint descriptor + * @loff: local offset + * @lval: local value to write to loffset + * @roff: remote offset + * @rval: remote value to write to roffset + * @flags: flags + * + * scif_fence_signal() returns after marking the current set of all uncompleted + * RMAs initiated through the endpoint epd or marking the current set of all + * uncompleted RMAs initiated through the peer of endpoint epd. + * + * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the + * marked set, lval is written to memory at the address corresponding to offset + * loff in the local registered address space of epd. loff must be within a + * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion + * of the RMAs in the marked set, rval is written to memory at the address + * corresponding to offset roff in the remote registered address space of epd. + * roff must be within a remote registered window of the peer of epd. Note + * that any specified offset must be DWORD (4 byte / 32 bit) aligned. + * + * The flags argument is formed by OR'ing together the following. + * Exactly one of the following values. + * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint + * epd are marked + * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer + * of endpoint epd are marked + * One or more of the following values. + * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to + * memory at the address corresponding to offset loff in the local + * registered address space of epd. + * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to + * memory at the address corresponding to offset roff in the remote + * registered address space of epd. + * + * Return: + * Upon successful completion, scif_fence_signal() returns 0; otherwise in + * user mode -1 is returned and errno is set to indicate the error; in kernel + * mode the negative of one of the following errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EINVAL - flags is invalid, or loff or roff are not DWORD aligned + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENXIO - loff is invalid for the registered address of epd, or roff is invalid + * for the registered address space, of the peer of epd + */ +int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff, + u64 rval, int flags); + +/** + * scif_get_node_ids() - Return information about online nodes + * @nodes: array in which to return online node IDs + * @len: number of entries in the nodes array + * @self: address to place the node ID of the local node + * + * scif_get_node_ids() fills in the nodes array with up to len node IDs of the + * nodes in the SCIF network. If there is not enough space in nodes, as + * indicated by the len parameter, only len node IDs are returned in nodes. The + * return value of scif_get_node_ids() is the total number of nodes currently in + * the SCIF network. By checking the return value against the len parameter, + * the user may determine if enough space for nodes was allocated. + * + * The node ID of the local node is returned at self. + * + * Return: + * Upon successful completion, scif_get_node_ids() returns the actual number of + * online nodes in the SCIF network including 'self'; otherwise in user mode + * -1 is returned and errno is set to indicate the error; in kernel mode no + * errors are returned. + * + * Errors: + * EFAULT - Bad address + */ +int scif_get_node_ids(u16 *nodes, int len, u16 *self); + +#endif /* __SCIF_H__ */ diff --git a/include/linux/security.h b/include/linux/security.h index 52febde52479..79d85ddf8093 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -27,6 +27,7 @@ #include <linux/slab.h> #include <linux/err.h> #include <linux/string.h> +#include <linux/mm.h> struct linux_binprm; struct cred; @@ -53,9 +54,6 @@ struct xattr; struct xfrm_sec_ctx; struct mm_struct; -/* Maximum number of letters for an LSM name string */ -#define SECURITY_NAME_MAX 10 - /* If capable should audit the security request */ #define SECURITY_CAP_NOAUDIT 0 #define SECURITY_CAP_AUDIT 1 @@ -68,10 +66,7 @@ struct audit_krule; struct user_namespace; struct timezone; -/* - * These functions are in security/capability.c and are used - * as the default capabilities functions - */ +/* These functions are in security/commoncap.c */ extern int cap_capable(const struct cred *cred, struct user_namespace *ns, int cap, int audit); extern int cap_settime(const struct timespec *ts, const struct timezone *tz); @@ -113,10 +108,6 @@ struct xfrm_state; struct xfrm_user_sec_ctx; struct seq_file; -extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); - -void reset_security_ops(void); - #ifdef CONFIG_MMU extern unsigned long mmap_min_addr; extern unsigned long dac_mmap_min_addr; @@ -187,1583 +178,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) opts->num_mnt_opts = 0; } -/** - * struct security_operations - main security structure - * - * Security module identifier. - * - * @name: - * A string that acts as a unique identifier for the LSM with max number - * of characters = SECURITY_NAME_MAX. - * - * Security hooks for program execution operations. - * - * @bprm_set_creds: - * Save security information in the bprm->security field, typically based - * on information about the bprm->file, for later use by the apply_creds - * hook. This hook may also optionally check permissions (e.g. for - * transitions between security domains). - * This hook may be called multiple times during a single execve, e.g. for - * interpreters. The hook can tell whether it has already been called by - * checking to see if @bprm->security is non-NULL. If so, then the hook - * may decide either to retain the security information saved earlier or - * to replace it. - * @bprm contains the linux_binprm structure. - * Return 0 if the hook is successful and permission is granted. - * @bprm_check_security: - * This hook mediates the point when a search for a binary handler will - * begin. It allows a check the @bprm->security value which is set in the - * preceding set_creds call. The primary difference from set_creds is - * that the argv list and envp list are reliably available in @bprm. This - * hook may be called multiple times during a single execve; and in each - * pass set_creds is called first. - * @bprm contains the linux_binprm structure. - * Return 0 if the hook is successful and permission is granted. - * @bprm_committing_creds: - * Prepare to install the new security attributes of a process being - * transformed by an execve operation, based on the old credentials - * pointed to by @current->cred and the information set in @bprm->cred by - * the bprm_set_creds hook. @bprm points to the linux_binprm structure. - * This hook is a good place to perform state changes on the process such - * as closing open file descriptors to which access will no longer be - * granted when the attributes are changed. This is called immediately - * before commit_creds(). - * @bprm_committed_creds: - * Tidy up after the installation of the new security attributes of a - * process being transformed by an execve operation. The new credentials - * have, by this point, been set to @current->cred. @bprm points to the - * linux_binprm structure. This hook is a good place to perform state - * changes on the process such as clearing out non-inheritable signal - * state. This is called immediately after commit_creds(). - * @bprm_secureexec: - * Return a boolean value (0 or 1) indicating whether a "secure exec" - * is required. The flag is passed in the auxiliary table - * on the initial stack to the ELF interpreter to indicate whether libc - * should enable secure mode. - * @bprm contains the linux_binprm structure. - * - * Security hooks for filesystem operations. - * - * @sb_alloc_security: - * Allocate and attach a security structure to the sb->s_security field. - * The s_security field is initialized to NULL when the structure is - * allocated. - * @sb contains the super_block structure to be modified. - * Return 0 if operation was successful. - * @sb_free_security: - * Deallocate and clear the sb->s_security field. - * @sb contains the super_block structure to be modified. - * @sb_statfs: - * Check permission before obtaining filesystem statistics for the @mnt - * mountpoint. - * @dentry is a handle on the superblock for the filesystem. - * Return 0 if permission is granted. - * @sb_mount: - * Check permission before an object specified by @dev_name is mounted on - * the mount point named by @nd. For an ordinary mount, @dev_name - * identifies a device if the file system type requires a device. For a - * remount (@flags & MS_REMOUNT), @dev_name is irrelevant. For a - * loopback/bind mount (@flags & MS_BIND), @dev_name identifies the - * pathname of the object being mounted. - * @dev_name contains the name for object being mounted. - * @path contains the path for mount point object. - * @type contains the filesystem type. - * @flags contains the mount flags. - * @data contains the filesystem-specific data. - * Return 0 if permission is granted. - * @sb_copy_data: - * Allow mount option data to be copied prior to parsing by the filesystem, - * so that the security module can extract security-specific mount - * options cleanly (a filesystem may modify the data e.g. with strsep()). - * This also allows the original mount data to be stripped of security- - * specific options to avoid having to make filesystems aware of them. - * @type the type of filesystem being mounted. - * @orig the original mount data copied from userspace. - * @copy copied data which will be passed to the security module. - * Returns 0 if the copy was successful. - * @sb_remount: - * Extracts security system specific mount options and verifies no changes - * are being made to those options. - * @sb superblock being remounted - * @data contains the filesystem-specific data. - * Return 0 if permission is granted. - * @sb_umount: - * Check permission before the @mnt file system is unmounted. - * @mnt contains the mounted file system. - * @flags contains the unmount flags, e.g. MNT_FORCE. - * Return 0 if permission is granted. - * @sb_pivotroot: - * Check permission before pivoting the root filesystem. - * @old_path contains the path for the new location of the current root (put_old). - * @new_path contains the path for the new root (new_root). - * Return 0 if permission is granted. - * @sb_set_mnt_opts: - * Set the security relevant mount options used for a superblock - * @sb the superblock to set security mount options for - * @opts binary data structure containing all lsm mount data - * @sb_clone_mnt_opts: - * Copy all security options from a given superblock to another - * @oldsb old superblock which contain information to clone - * @newsb new superblock which needs filled in - * @sb_parse_opts_str: - * Parse a string of security data filling in the opts structure - * @options string containing all mount options known by the LSM - * @opts binary data structure usable by the LSM - * @dentry_init_security: - * Compute a context for a dentry as the inode is not yet available - * since NFSv4 has no label backed by an EA anyway. - * @dentry dentry to use in calculating the context. - * @mode mode used to determine resource type. - * @name name of the last path component used to create file - * @ctx pointer to place the pointer to the resulting context in. - * @ctxlen point to place the length of the resulting context. - * - * - * Security hooks for inode operations. - * - * @inode_alloc_security: - * Allocate and attach a security structure to @inode->i_security. The - * i_security field is initialized to NULL when the inode structure is - * allocated. - * @inode contains the inode structure. - * Return 0 if operation was successful. - * @inode_free_security: - * @inode contains the inode structure. - * Deallocate the inode security structure and set @inode->i_security to - * NULL. - * @inode_init_security: - * Obtain the security attribute name suffix and value to set on a newly - * created inode and set up the incore security field for the new inode. - * This hook is called by the fs code as part of the inode creation - * transaction and provides for atomic labeling of the inode, unlike - * the post_create/mkdir/... hooks called by the VFS. The hook function - * is expected to allocate the name and value via kmalloc, with the caller - * being responsible for calling kfree after using them. - * If the security module does not use security attributes or does - * not wish to put a security attribute on this particular inode, - * then it should return -EOPNOTSUPP to skip this processing. - * @inode contains the inode structure of the newly created inode. - * @dir contains the inode structure of the parent directory. - * @qstr contains the last path component of the new object - * @name will be set to the allocated name suffix (e.g. selinux). - * @value will be set to the allocated attribute value. - * @len will be set to the length of the value. - * Returns 0 if @name and @value have been successfully set, - * -EOPNOTSUPP if no security attribute is needed, or - * -ENOMEM on memory allocation failure. - * @inode_create: - * Check permission to create a regular file. - * @dir contains inode structure of the parent of the new file. - * @dentry contains the dentry structure for the file to be created. - * @mode contains the file mode of the file to be created. - * Return 0 if permission is granted. - * @inode_link: - * Check permission before creating a new hard link to a file. - * @old_dentry contains the dentry structure for an existing link to the file. - * @dir contains the inode structure of the parent directory of the new link. - * @new_dentry contains the dentry structure for the new link. - * Return 0 if permission is granted. - * @path_link: - * Check permission before creating a new hard link to a file. - * @old_dentry contains the dentry structure for an existing link - * to the file. - * @new_dir contains the path structure of the parent directory of - * the new link. - * @new_dentry contains the dentry structure for the new link. - * Return 0 if permission is granted. - * @inode_unlink: - * Check the permission to remove a hard link to a file. - * @dir contains the inode structure of parent directory of the file. - * @dentry contains the dentry structure for file to be unlinked. - * Return 0 if permission is granted. - * @path_unlink: - * Check the permission to remove a hard link to a file. - * @dir contains the path structure of parent directory of the file. - * @dentry contains the dentry structure for file to be unlinked. - * Return 0 if permission is granted. - * @inode_symlink: - * Check the permission to create a symbolic link to a file. - * @dir contains the inode structure of parent directory of the symbolic link. - * @dentry contains the dentry structure of the symbolic link. - * @old_name contains the pathname of file. - * Return 0 if permission is granted. - * @path_symlink: - * Check the permission to create a symbolic link to a file. - * @dir contains the path structure of parent directory of - * the symbolic link. - * @dentry contains the dentry structure of the symbolic link. - * @old_name contains the pathname of file. - * Return 0 if permission is granted. - * @inode_mkdir: - * Check permissions to create a new directory in the existing directory - * associated with inode structure @dir. - * @dir contains the inode structure of parent of the directory to be created. - * @dentry contains the dentry structure of new directory. - * @mode contains the mode of new directory. - * Return 0 if permission is granted. - * @path_mkdir: - * Check permissions to create a new directory in the existing directory - * associated with path structure @path. - * @dir contains the path structure of parent of the directory - * to be created. - * @dentry contains the dentry structure of new directory. - * @mode contains the mode of new directory. - * Return 0 if permission is granted. - * @inode_rmdir: - * Check the permission to remove a directory. - * @dir contains the inode structure of parent of the directory to be removed. - * @dentry contains the dentry structure of directory to be removed. - * Return 0 if permission is granted. - * @path_rmdir: - * Check the permission to remove a directory. - * @dir contains the path structure of parent of the directory to be - * removed. - * @dentry contains the dentry structure of directory to be removed. - * Return 0 if permission is granted. - * @inode_mknod: - * Check permissions when creating a special file (or a socket or a fifo - * file created via the mknod system call). Note that if mknod operation - * is being done for a regular file, then the create hook will be called - * and not this hook. - * @dir contains the inode structure of parent of the new file. - * @dentry contains the dentry structure of the new file. - * @mode contains the mode of the new file. - * @dev contains the device number. - * Return 0 if permission is granted. - * @path_mknod: - * Check permissions when creating a file. Note that this hook is called - * even if mknod operation is being done for a regular file. - * @dir contains the path structure of parent of the new file. - * @dentry contains the dentry structure of the new file. - * @mode contains the mode of the new file. - * @dev contains the undecoded device number. Use new_decode_dev() to get - * the decoded device number. - * Return 0 if permission is granted. - * @inode_rename: - * Check for permission to rename a file or directory. - * @old_dir contains the inode structure for parent of the old link. - * @old_dentry contains the dentry structure of the old link. - * @new_dir contains the inode structure for parent of the new link. - * @new_dentry contains the dentry structure of the new link. - * Return 0 if permission is granted. - * @path_rename: - * Check for permission to rename a file or directory. - * @old_dir contains the path structure for parent of the old link. - * @old_dentry contains the dentry structure of the old link. - * @new_dir contains the path structure for parent of the new link. - * @new_dentry contains the dentry structure of the new link. - * Return 0 if permission is granted. - * @path_chmod: - * Check for permission to change DAC's permission of a file or directory. - * @dentry contains the dentry structure. - * @mnt contains the vfsmnt structure. - * @mode contains DAC's mode. - * Return 0 if permission is granted. - * @path_chown: - * Check for permission to change owner/group of a file or directory. - * @path contains the path structure. - * @uid contains new owner's ID. - * @gid contains new group's ID. - * Return 0 if permission is granted. - * @path_chroot: - * Check for permission to change root directory. - * @path contains the path structure. - * Return 0 if permission is granted. - * @inode_readlink: - * Check the permission to read the symbolic link. - * @dentry contains the dentry structure for the file link. - * Return 0 if permission is granted. - * @inode_follow_link: - * Check permission to follow a symbolic link when looking up a pathname. - * @dentry contains the dentry structure for the link. - * @inode contains the inode, which itself is not stable in RCU-walk - * @rcu indicates whether we are in RCU-walk mode. - * Return 0 if permission is granted. - * @inode_permission: - * Check permission before accessing an inode. This hook is called by the - * existing Linux permission function, so a security module can use it to - * provide additional checking for existing Linux permission checks. - * Notice that this hook is called when a file is opened (as well as many - * other operations), whereas the file_security_ops permission hook is - * called when the actual read/write operations are performed. - * @inode contains the inode structure to check. - * @mask contains the permission mask. - * Return 0 if permission is granted. - * @inode_setattr: - * Check permission before setting file attributes. Note that the kernel - * call to notify_change is performed from several locations, whenever - * file attributes change (such as when a file is truncated, chown/chmod - * operations, transferring disk quotas, etc). - * @dentry contains the dentry structure for the file. - * @attr is the iattr structure containing the new file attributes. - * Return 0 if permission is granted. - * @path_truncate: - * Check permission before truncating a file. - * @path contains the path structure for the file. - * Return 0 if permission is granted. - * @inode_getattr: - * Check permission before obtaining file attributes. - * @mnt is the vfsmount where the dentry was looked up - * @dentry contains the dentry structure for the file. - * Return 0 if permission is granted. - * @inode_setxattr: - * Check permission before setting the extended attributes - * @value identified by @name for @dentry. - * Return 0 if permission is granted. - * @inode_post_setxattr: - * Update inode security field after successful setxattr operation. - * @value identified by @name for @dentry. - * @inode_getxattr: - * Check permission before obtaining the extended attributes - * identified by @name for @dentry. - * Return 0 if permission is granted. - * @inode_listxattr: - * Check permission before obtaining the list of extended attribute - * names for @dentry. - * Return 0 if permission is granted. - * @inode_removexattr: - * Check permission before removing the extended attribute - * identified by @name for @dentry. - * Return 0 if permission is granted. - * @inode_getsecurity: - * Retrieve a copy of the extended attribute representation of the - * security label associated with @name for @inode via @buffer. Note that - * @name is the remainder of the attribute name after the security prefix - * has been removed. @alloc is used to specify of the call should return a - * value via the buffer or just the value length Return size of buffer on - * success. - * @inode_setsecurity: - * Set the security label associated with @name for @inode from the - * extended attribute value @value. @size indicates the size of the - * @value in bytes. @flags may be XATTR_CREATE, XATTR_REPLACE, or 0. - * Note that @name is the remainder of the attribute name after the - * security. prefix has been removed. - * Return 0 on success. - * @inode_listsecurity: - * Copy the extended attribute names for the security labels - * associated with @inode into @buffer. The maximum size of @buffer - * is specified by @buffer_size. @buffer may be NULL to request - * the size of the buffer required. - * Returns number of bytes used/required on success. - * @inode_need_killpriv: - * Called when an inode has been changed. - * @dentry is the dentry being changed. - * Return <0 on error to abort the inode change operation. - * Return 0 if inode_killpriv does not need to be called. - * Return >0 if inode_killpriv does need to be called. - * @inode_killpriv: - * The setuid bit is being removed. Remove similar security labels. - * Called with the dentry->d_inode->i_mutex held. - * @dentry is the dentry being changed. - * Return 0 on success. If error is returned, then the operation - * causing setuid bit removal is failed. - * @inode_getsecid: - * Get the secid associated with the node. - * @inode contains a pointer to the inode. - * @secid contains a pointer to the location where result will be saved. - * In case of failure, @secid will be set to zero. - * - * Security hooks for file operations - * - * @file_permission: - * Check file permissions before accessing an open file. This hook is - * called by various operations that read or write files. A security - * module can use this hook to perform additional checking on these - * operations, e.g. to revalidate permissions on use to support privilege - * bracketing or policy changes. Notice that this hook is used when the - * actual read/write operations are performed, whereas the - * inode_security_ops hook is called when a file is opened (as well as - * many other operations). - * Caveat: Although this hook can be used to revalidate permissions for - * various system call operations that read or write files, it does not - * address the revalidation of permissions for memory-mapped files. - * Security modules must handle this separately if they need such - * revalidation. - * @file contains the file structure being accessed. - * @mask contains the requested permissions. - * Return 0 if permission is granted. - * @file_alloc_security: - * Allocate and attach a security structure to the file->f_security field. - * The security field is initialized to NULL when the structure is first - * created. - * @file contains the file structure to secure. - * Return 0 if the hook is successful and permission is granted. - * @file_free_security: - * Deallocate and free any security structures stored in file->f_security. - * @file contains the file structure being modified. - * @file_ioctl: - * @file contains the file structure. - * @cmd contains the operation to perform. - * @arg contains the operational arguments. - * Check permission for an ioctl operation on @file. Note that @arg - * sometimes represents a user space pointer; in other cases, it may be a - * simple integer value. When @arg represents a user space pointer, it - * should never be used by the security module. - * Return 0 if permission is granted. - * @mmap_addr : - * Check permissions for a mmap operation at @addr. - * @addr contains virtual address that will be used for the operation. - * Return 0 if permission is granted. - * @mmap_file : - * Check permissions for a mmap operation. The @file may be NULL, e.g. - * if mapping anonymous memory. - * @file contains the file structure for file to map (may be NULL). - * @reqprot contains the protection requested by the application. - * @prot contains the protection that will be applied by the kernel. - * @flags contains the operational flags. - * Return 0 if permission is granted. - * @file_mprotect: - * Check permissions before changing memory access permissions. - * @vma contains the memory region to modify. - * @reqprot contains the protection requested by the application. - * @prot contains the protection that will be applied by the kernel. - * Return 0 if permission is granted. - * @file_lock: - * Check permission before performing file locking operations. - * Note: this hook mediates both flock and fcntl style locks. - * @file contains the file structure. - * @cmd contains the posix-translated lock operation to perform - * (e.g. F_RDLCK, F_WRLCK). - * Return 0 if permission is granted. - * @file_fcntl: - * Check permission before allowing the file operation specified by @cmd - * from being performed on the file @file. Note that @arg sometimes - * represents a user space pointer; in other cases, it may be a simple - * integer value. When @arg represents a user space pointer, it should - * never be used by the security module. - * @file contains the file structure. - * @cmd contains the operation to be performed. - * @arg contains the operational arguments. - * Return 0 if permission is granted. - * @file_set_fowner: - * Save owner security information (typically from current->security) in - * file->f_security for later use by the send_sigiotask hook. - * @file contains the file structure to update. - * Return 0 on success. - * @file_send_sigiotask: - * Check permission for the file owner @fown to send SIGIO or SIGURG to the - * process @tsk. Note that this hook is sometimes called from interrupt. - * Note that the fown_struct, @fown, is never outside the context of a - * struct file, so the file structure (and associated security information) - * can always be obtained: - * container_of(fown, struct file, f_owner) - * @tsk contains the structure of task receiving signal. - * @fown contains the file owner information. - * @sig is the signal that will be sent. When 0, kernel sends SIGIO. - * Return 0 if permission is granted. - * @file_receive: - * This hook allows security modules to control the ability of a process - * to receive an open file descriptor via socket IPC. - * @file contains the file structure being received. - * Return 0 if permission is granted. - * @file_open - * Save open-time permission checking state for later use upon - * file_permission, and recheck access if anything has changed - * since inode_permission. - * - * Security hooks for task operations. - * - * @task_create: - * Check permission before creating a child process. See the clone(2) - * manual page for definitions of the @clone_flags. - * @clone_flags contains the flags indicating what should be shared. - * Return 0 if permission is granted. - * @task_free: - * @task task being freed - * Handle release of task-related resources. (Note that this can be called - * from interrupt context.) - * @cred_alloc_blank: - * @cred points to the credentials. - * @gfp indicates the atomicity of any memory allocations. - * Only allocate sufficient memory and attach to @cred such that - * cred_transfer() will not get ENOMEM. - * @cred_free: - * @cred points to the credentials. - * Deallocate and clear the cred->security field in a set of credentials. - * @cred_prepare: - * @new points to the new credentials. - * @old points to the original credentials. - * @gfp indicates the atomicity of any memory allocations. - * Prepare a new set of credentials by copying the data from the old set. - * @cred_transfer: - * @new points to the new credentials. - * @old points to the original credentials. - * Transfer data from original creds to new creds - * @kernel_act_as: - * Set the credentials for a kernel service to act as (subjective context). - * @new points to the credentials to be modified. - * @secid specifies the security ID to be set - * The current task must be the one that nominated @secid. - * Return 0 if successful. - * @kernel_create_files_as: - * Set the file creation context in a set of credentials to be the same as - * the objective context of the specified inode. - * @new points to the credentials to be modified. - * @inode points to the inode to use as a reference. - * The current task must be the one that nominated @inode. - * Return 0 if successful. - * @kernel_fw_from_file: - * Load firmware from userspace (not called for built-in firmware). - * @file contains the file structure pointing to the file containing - * the firmware to load. This argument will be NULL if the firmware - * was loaded via the uevent-triggered blob-based interface exposed - * by CONFIG_FW_LOADER_USER_HELPER. - * @buf pointer to buffer containing firmware contents. - * @size length of the firmware contents. - * Return 0 if permission is granted. - * @kernel_module_request: - * Ability to trigger the kernel to automatically upcall to userspace for - * userspace to load a kernel module with the given name. - * @kmod_name name of the module requested by the kernel - * Return 0 if successful. - * @kernel_module_from_file: - * Load a kernel module from userspace. - * @file contains the file structure pointing to the file containing - * the kernel module to load. If the module is being loaded from a blob, - * this argument will be NULL. - * Return 0 if permission is granted. - * @task_fix_setuid: - * Update the module's state after setting one or more of the user - * identity attributes of the current process. The @flags parameter - * indicates which of the set*uid system calls invoked this hook. If - * @new is the set of credentials that will be installed. Modifications - * should be made to this rather than to @current->cred. - * @old is the set of credentials that are being replaces - * @flags contains one of the LSM_SETID_* values. - * Return 0 on success. - * @task_setpgid: - * Check permission before setting the process group identifier of the - * process @p to @pgid. - * @p contains the task_struct for process being modified. - * @pgid contains the new pgid. - * Return 0 if permission is granted. - * @task_getpgid: - * Check permission before getting the process group identifier of the - * process @p. - * @p contains the task_struct for the process. - * Return 0 if permission is granted. - * @task_getsid: - * Check permission before getting the session identifier of the process - * @p. - * @p contains the task_struct for the process. - * Return 0 if permission is granted. - * @task_getsecid: - * Retrieve the security identifier of the process @p. - * @p contains the task_struct for the process and place is into @secid. - * In case of failure, @secid will be set to zero. - * - * @task_setnice: - * Check permission before setting the nice value of @p to @nice. - * @p contains the task_struct of process. - * @nice contains the new nice value. - * Return 0 if permission is granted. - * @task_setioprio - * Check permission before setting the ioprio value of @p to @ioprio. - * @p contains the task_struct of process. - * @ioprio contains the new ioprio value - * Return 0 if permission is granted. - * @task_getioprio - * Check permission before getting the ioprio value of @p. - * @p contains the task_struct of process. - * Return 0 if permission is granted. - * @task_setrlimit: - * Check permission before setting the resource limits of the current - * process for @resource to @new_rlim. The old resource limit values can - * be examined by dereferencing (current->signal->rlim + resource). - * @resource contains the resource whose limit is being set. - * @new_rlim contains the new limits for @resource. - * Return 0 if permission is granted. - * @task_setscheduler: - * Check permission before setting scheduling policy and/or parameters of - * process @p based on @policy and @lp. - * @p contains the task_struct for process. - * @policy contains the scheduling policy. - * @lp contains the scheduling parameters. - * Return 0 if permission is granted. - * @task_getscheduler: - * Check permission before obtaining scheduling information for process - * @p. - * @p contains the task_struct for process. - * Return 0 if permission is granted. - * @task_movememory - * Check permission before moving memory owned by process @p. - * @p contains the task_struct for process. - * Return 0 if permission is granted. - * @task_kill: - * Check permission before sending signal @sig to @p. @info can be NULL, - * the constant 1, or a pointer to a siginfo structure. If @info is 1 or - * SI_FROMKERNEL(info) is true, then the signal should be viewed as coming - * from the kernel and should typically be permitted. - * SIGIO signals are handled separately by the send_sigiotask hook in - * file_security_ops. - * @p contains the task_struct for process. - * @info contains the signal information. - * @sig contains the signal value. - * @secid contains the sid of the process where the signal originated - * Return 0 if permission is granted. - * @task_wait: - * Check permission before allowing a process to reap a child process @p - * and collect its status information. - * @p contains the task_struct for process. - * Return 0 if permission is granted. - * @task_prctl: - * Check permission before performing a process control operation on the - * current process. - * @option contains the operation. - * @arg2 contains a argument. - * @arg3 contains a argument. - * @arg4 contains a argument. - * @arg5 contains a argument. - * Return -ENOSYS if no-one wanted to handle this op, any other value to - * cause prctl() to return immediately with that value. - * @task_to_inode: - * Set the security attributes for an inode based on an associated task's - * security attributes, e.g. for /proc/pid inodes. - * @p contains the task_struct for the task. - * @inode contains the inode structure for the inode. - * - * Security hooks for Netlink messaging. - * - * @netlink_send: - * Save security information for a netlink message so that permission - * checking can be performed when the message is processed. The security - * information can be saved using the eff_cap field of the - * netlink_skb_parms structure. Also may be used to provide fine - * grained control over message transmission. - * @sk associated sock of task sending the message. - * @skb contains the sk_buff structure for the netlink message. - * Return 0 if the information was successfully saved and message - * is allowed to be transmitted. - * - * Security hooks for Unix domain networking. - * - * @unix_stream_connect: - * Check permissions before establishing a Unix domain stream connection - * between @sock and @other. - * @sock contains the sock structure. - * @other contains the peer sock structure. - * @newsk contains the new sock structure. - * Return 0 if permission is granted. - * @unix_may_send: - * Check permissions before connecting or sending datagrams from @sock to - * @other. - * @sock contains the socket structure. - * @other contains the peer socket structure. - * Return 0 if permission is granted. - * - * The @unix_stream_connect and @unix_may_send hooks were necessary because - * Linux provides an alternative to the conventional file name space for Unix - * domain sockets. Whereas binding and connecting to sockets in the file name - * space is mediated by the typical file permissions (and caught by the mknod - * and permission hooks in inode_security_ops), binding and connecting to - * sockets in the abstract name space is completely unmediated. Sufficient - * control of Unix domain sockets in the abstract name space isn't possible - * using only the socket layer hooks, since we need to know the actual target - * socket, which is not looked up until we are inside the af_unix code. - * - * Security hooks for socket operations. - * - * @socket_create: - * Check permissions prior to creating a new socket. - * @family contains the requested protocol family. - * @type contains the requested communications type. - * @protocol contains the requested protocol. - * @kern set to 1 if a kernel socket. - * Return 0 if permission is granted. - * @socket_post_create: - * This hook allows a module to update or allocate a per-socket security - * structure. Note that the security field was not added directly to the - * socket structure, but rather, the socket security information is stored - * in the associated inode. Typically, the inode alloc_security hook will - * allocate and and attach security information to - * sock->inode->i_security. This hook may be used to update the - * sock->inode->i_security field with additional information that wasn't - * available when the inode was allocated. - * @sock contains the newly created socket structure. - * @family contains the requested protocol family. - * @type contains the requested communications type. - * @protocol contains the requested protocol. - * @kern set to 1 if a kernel socket. - * @socket_bind: - * Check permission before socket protocol layer bind operation is - * performed and the socket @sock is bound to the address specified in the - * @address parameter. - * @sock contains the socket structure. - * @address contains the address to bind to. - * @addrlen contains the length of address. - * Return 0 if permission is granted. - * @socket_connect: - * Check permission before socket protocol layer connect operation - * attempts to connect socket @sock to a remote address, @address. - * @sock contains the socket structure. - * @address contains the address of remote endpoint. - * @addrlen contains the length of address. - * Return 0 if permission is granted. - * @socket_listen: - * Check permission before socket protocol layer listen operation. - * @sock contains the socket structure. - * @backlog contains the maximum length for the pending connection queue. - * Return 0 if permission is granted. - * @socket_accept: - * Check permission before accepting a new connection. Note that the new - * socket, @newsock, has been created and some information copied to it, - * but the accept operation has not actually been performed. - * @sock contains the listening socket structure. - * @newsock contains the newly created server socket for connection. - * Return 0 if permission is granted. - * @socket_sendmsg: - * Check permission before transmitting a message to another socket. - * @sock contains the socket structure. - * @msg contains the message to be transmitted. - * @size contains the size of message. - * Return 0 if permission is granted. - * @socket_recvmsg: - * Check permission before receiving a message from a socket. - * @sock contains the socket structure. - * @msg contains the message structure. - * @size contains the size of message structure. - * @flags contains the operational flags. - * Return 0 if permission is granted. - * @socket_getsockname: - * Check permission before the local address (name) of the socket object - * @sock is retrieved. - * @sock contains the socket structure. - * Return 0 if permission is granted. - * @socket_getpeername: - * Check permission before the remote address (name) of a socket object - * @sock is retrieved. - * @sock contains the socket structure. - * Return 0 if permission is granted. - * @socket_getsockopt: - * Check permissions before retrieving the options associated with socket - * @sock. - * @sock contains the socket structure. - * @level contains the protocol level to retrieve option from. - * @optname contains the name of option to retrieve. - * Return 0 if permission is granted. - * @socket_setsockopt: - * Check permissions before setting the options associated with socket - * @sock. - * @sock contains the socket structure. - * @level contains the protocol level to set options for. - * @optname contains the name of the option to set. - * Return 0 if permission is granted. - * @socket_shutdown: - * Checks permission before all or part of a connection on the socket - * @sock is shut down. - * @sock contains the socket structure. - * @how contains the flag indicating how future sends and receives are handled. - * Return 0 if permission is granted. - * @socket_sock_rcv_skb: - * Check permissions on incoming network packets. This hook is distinct - * from Netfilter's IP input hooks since it is the first time that the - * incoming sk_buff @skb has been associated with a particular socket, @sk. - * Must not sleep inside this hook because some callers hold spinlocks. - * @sk contains the sock (not socket) associated with the incoming sk_buff. - * @skb contains the incoming network data. - * @socket_getpeersec_stream: - * This hook allows the security module to provide peer socket security - * state for unix or connected tcp sockets to userspace via getsockopt - * SO_GETPEERSEC. For tcp sockets this can be meaningful if the - * socket is associated with an ipsec SA. - * @sock is the local socket. - * @optval userspace memory where the security state is to be copied. - * @optlen userspace int where the module should copy the actual length - * of the security state. - * @len as input is the maximum length to copy to userspace provided - * by the caller. - * Return 0 if all is well, otherwise, typical getsockopt return - * values. - * @socket_getpeersec_dgram: - * This hook allows the security module to provide peer socket security - * state for udp sockets on a per-packet basis to userspace via - * getsockopt SO_GETPEERSEC. The application must first have indicated - * the IP_PASSSEC option via getsockopt. It can then retrieve the - * security state returned by this hook for a packet via the SCM_SECURITY - * ancillary message type. - * @skb is the skbuff for the packet being queried - * @secdata is a pointer to a buffer in which to copy the security data - * @seclen is the maximum length for @secdata - * Return 0 on success, error on failure. - * @sk_alloc_security: - * Allocate and attach a security structure to the sk->sk_security field, - * which is used to copy security attributes between local stream sockets. - * @sk_free_security: - * Deallocate security structure. - * @sk_clone_security: - * Clone/copy security structure. - * @sk_getsecid: - * Retrieve the LSM-specific secid for the sock to enable caching of network - * authorizations. - * @sock_graft: - * Sets the socket's isec sid to the sock's sid. - * @inet_conn_request: - * Sets the openreq's sid to socket's sid with MLS portion taken from peer sid. - * @inet_csk_clone: - * Sets the new child socket's sid to the openreq sid. - * @inet_conn_established: - * Sets the connection's peersid to the secmark on skb. - * @secmark_relabel_packet: - * check if the process should be allowed to relabel packets to the given secid - * @security_secmark_refcount_inc - * tells the LSM to increment the number of secmark labeling rules loaded - * @security_secmark_refcount_dec - * tells the LSM to decrement the number of secmark labeling rules loaded - * @req_classify_flow: - * Sets the flow's sid to the openreq sid. - * @tun_dev_alloc_security: - * This hook allows a module to allocate a security structure for a TUN - * device. - * @security pointer to a security structure pointer. - * Returns a zero on success, negative values on failure. - * @tun_dev_free_security: - * This hook allows a module to free the security structure for a TUN - * device. - * @security pointer to the TUN device's security structure - * @tun_dev_create: - * Check permissions prior to creating a new TUN device. - * @tun_dev_attach_queue: - * Check permissions prior to attaching to a TUN device queue. - * @security pointer to the TUN device's security structure. - * @tun_dev_attach: - * This hook can be used by the module to update any security state - * associated with the TUN device's sock structure. - * @sk contains the existing sock structure. - * @security pointer to the TUN device's security structure. - * @tun_dev_open: - * This hook can be used by the module to update any security state - * associated with the TUN device's security structure. - * @security pointer to the TUN devices's security structure. - * @skb_owned_by: - * This hook sets the packet's owning sock. - * @skb is the packet. - * @sk the sock which owns the packet. - * - * Security hooks for XFRM operations. - * - * @xfrm_policy_alloc_security: - * @ctxp is a pointer to the xfrm_sec_ctx being added to Security Policy - * Database used by the XFRM system. - * @sec_ctx contains the security context information being provided by - * the user-level policy update program (e.g., setkey). - * Allocate a security structure to the xp->security field; the security - * field is initialized to NULL when the xfrm_policy is allocated. - * Return 0 if operation was successful (memory to allocate, legal context) - * @gfp is to specify the context for the allocation - * @xfrm_policy_clone_security: - * @old_ctx contains an existing xfrm_sec_ctx. - * @new_ctxp contains a new xfrm_sec_ctx being cloned from old. - * Allocate a security structure in new_ctxp that contains the - * information from the old_ctx structure. - * Return 0 if operation was successful (memory to allocate). - * @xfrm_policy_free_security: - * @ctx contains the xfrm_sec_ctx - * Deallocate xp->security. - * @xfrm_policy_delete_security: - * @ctx contains the xfrm_sec_ctx. - * Authorize deletion of xp->security. - * @xfrm_state_alloc: - * @x contains the xfrm_state being added to the Security Association - * Database by the XFRM system. - * @sec_ctx contains the security context information being provided by - * the user-level SA generation program (e.g., setkey or racoon). - * Allocate a security structure to the x->security field; the security - * field is initialized to NULL when the xfrm_state is allocated. Set the - * context to correspond to sec_ctx. Return 0 if operation was successful - * (memory to allocate, legal context). - * @xfrm_state_alloc_acquire: - * @x contains the xfrm_state being added to the Security Association - * Database by the XFRM system. - * @polsec contains the policy's security context. - * @secid contains the secid from which to take the mls portion of the - * context. - * Allocate a security structure to the x->security field; the security - * field is initialized to NULL when the xfrm_state is allocated. Set the - * context to correspond to secid. Return 0 if operation was successful - * (memory to allocate, legal context). - * @xfrm_state_free_security: - * @x contains the xfrm_state. - * Deallocate x->security. - * @xfrm_state_delete_security: - * @x contains the xfrm_state. - * Authorize deletion of x->security. - * @xfrm_policy_lookup: - * @ctx contains the xfrm_sec_ctx for which the access control is being - * checked. - * @fl_secid contains the flow security label that is used to authorize - * access to the policy xp. - * @dir contains the direction of the flow (input or output). - * Check permission when a flow selects a xfrm_policy for processing - * XFRMs on a packet. The hook is called when selecting either a - * per-socket policy or a generic xfrm policy. - * Return 0 if permission is granted, -ESRCH otherwise, or -errno - * on other errors. - * @xfrm_state_pol_flow_match: - * @x contains the state to match. - * @xp contains the policy to check for a match. - * @fl contains the flow to check for a match. - * Return 1 if there is a match. - * @xfrm_decode_session: - * @skb points to skb to decode. - * @secid points to the flow key secid to set. - * @ckall says if all xfrms used should be checked for same secid. - * Return 0 if ckall is zero or all xfrms used have the same secid. - * - * Security hooks affecting all Key Management operations - * - * @key_alloc: - * Permit allocation of a key and assign security data. Note that key does - * not have a serial number assigned at this point. - * @key points to the key. - * @flags is the allocation flags - * Return 0 if permission is granted, -ve error otherwise. - * @key_free: - * Notification of destruction; free security data. - * @key points to the key. - * No return value. - * @key_permission: - * See whether a specific operational right is granted to a process on a - * key. - * @key_ref refers to the key (key pointer + possession attribute bit). - * @cred points to the credentials to provide the context against which to - * evaluate the security data on the key. - * @perm describes the combination of permissions required of this key. - * Return 0 if permission is granted, -ve error otherwise. - * @key_getsecurity: - * Get a textual representation of the security context attached to a key - * for the purposes of honouring KEYCTL_GETSECURITY. This function - * allocates the storage for the NUL-terminated string and the caller - * should free it. - * @key points to the key to be queried. - * @_buffer points to a pointer that should be set to point to the - * resulting string (if no label or an error occurs). - * Return the length of the string (including terminating NUL) or -ve if - * an error. - * May also return 0 (and a NULL buffer pointer) if there is no label. - * - * Security hooks affecting all System V IPC operations. - * - * @ipc_permission: - * Check permissions for access to IPC - * @ipcp contains the kernel IPC permission structure - * @flag contains the desired (requested) permission set - * Return 0 if permission is granted. - * @ipc_getsecid: - * Get the secid associated with the ipc object. - * @ipcp contains the kernel IPC permission structure. - * @secid contains a pointer to the location where result will be saved. - * In case of failure, @secid will be set to zero. - * - * Security hooks for individual messages held in System V IPC message queues - * @msg_msg_alloc_security: - * Allocate and attach a security structure to the msg->security field. - * The security field is initialized to NULL when the structure is first - * created. - * @msg contains the message structure to be modified. - * Return 0 if operation was successful and permission is granted. - * @msg_msg_free_security: - * Deallocate the security structure for this message. - * @msg contains the message structure to be modified. - * - * Security hooks for System V IPC Message Queues - * - * @msg_queue_alloc_security: - * Allocate and attach a security structure to the - * msq->q_perm.security field. The security field is initialized to - * NULL when the structure is first created. - * @msq contains the message queue structure to be modified. - * Return 0 if operation was successful and permission is granted. - * @msg_queue_free_security: - * Deallocate security structure for this message queue. - * @msq contains the message queue structure to be modified. - * @msg_queue_associate: - * Check permission when a message queue is requested through the - * msgget system call. This hook is only called when returning the - * message queue identifier for an existing message queue, not when a - * new message queue is created. - * @msq contains the message queue to act upon. - * @msqflg contains the operation control flags. - * Return 0 if permission is granted. - * @msg_queue_msgctl: - * Check permission when a message control operation specified by @cmd - * is to be performed on the message queue @msq. - * The @msq may be NULL, e.g. for IPC_INFO or MSG_INFO. - * @msq contains the message queue to act upon. May be NULL. - * @cmd contains the operation to be performed. - * Return 0 if permission is granted. - * @msg_queue_msgsnd: - * Check permission before a message, @msg, is enqueued on the message - * queue, @msq. - * @msq contains the message queue to send message to. - * @msg contains the message to be enqueued. - * @msqflg contains operational flags. - * Return 0 if permission is granted. - * @msg_queue_msgrcv: - * Check permission before a message, @msg, is removed from the message - * queue, @msq. The @target task structure contains a pointer to the - * process that will be receiving the message (not equal to the current - * process when inline receives are being performed). - * @msq contains the message queue to retrieve message from. - * @msg contains the message destination. - * @target contains the task structure for recipient process. - * @type contains the type of message requested. - * @mode contains the operational flags. - * Return 0 if permission is granted. - * - * Security hooks for System V Shared Memory Segments - * - * @shm_alloc_security: - * Allocate and attach a security structure to the shp->shm_perm.security - * field. The security field is initialized to NULL when the structure is - * first created. - * @shp contains the shared memory structure to be modified. - * Return 0 if operation was successful and permission is granted. - * @shm_free_security: - * Deallocate the security struct for this memory segment. - * @shp contains the shared memory structure to be modified. - * @shm_associate: - * Check permission when a shared memory region is requested through the - * shmget system call. This hook is only called when returning the shared - * memory region identifier for an existing region, not when a new shared - * memory region is created. - * @shp contains the shared memory structure to be modified. - * @shmflg contains the operation control flags. - * Return 0 if permission is granted. - * @shm_shmctl: - * Check permission when a shared memory control operation specified by - * @cmd is to be performed on the shared memory region @shp. - * The @shp may be NULL, e.g. for IPC_INFO or SHM_INFO. - * @shp contains shared memory structure to be modified. - * @cmd contains the operation to be performed. - * Return 0 if permission is granted. - * @shm_shmat: - * Check permissions prior to allowing the shmat system call to attach the - * shared memory segment @shp to the data segment of the calling process. - * The attaching address is specified by @shmaddr. - * @shp contains the shared memory structure to be modified. - * @shmaddr contains the address to attach memory region to. - * @shmflg contains the operational flags. - * Return 0 if permission is granted. - * - * Security hooks for System V Semaphores - * - * @sem_alloc_security: - * Allocate and attach a security structure to the sma->sem_perm.security - * field. The security field is initialized to NULL when the structure is - * first created. - * @sma contains the semaphore structure - * Return 0 if operation was successful and permission is granted. - * @sem_free_security: - * deallocate security struct for this semaphore - * @sma contains the semaphore structure. - * @sem_associate: - * Check permission when a semaphore is requested through the semget - * system call. This hook is only called when returning the semaphore - * identifier for an existing semaphore, not when a new one must be - * created. - * @sma contains the semaphore structure. - * @semflg contains the operation control flags. - * Return 0 if permission is granted. - * @sem_semctl: - * Check permission when a semaphore operation specified by @cmd is to be - * performed on the semaphore @sma. The @sma may be NULL, e.g. for - * IPC_INFO or SEM_INFO. - * @sma contains the semaphore structure. May be NULL. - * @cmd contains the operation to be performed. - * Return 0 if permission is granted. - * @sem_semop - * Check permissions before performing operations on members of the - * semaphore set @sma. If the @alter flag is nonzero, the semaphore set - * may be modified. - * @sma contains the semaphore structure. - * @sops contains the operations to perform. - * @nsops contains the number of operations to perform. - * @alter contains the flag indicating whether changes are to be made. - * Return 0 if permission is granted. - * - * @binder_set_context_mgr - * Check whether @mgr is allowed to be the binder context manager. - * @mgr contains the task_struct for the task being registered. - * Return 0 if permission is granted. - * @binder_transaction - * Check whether @from is allowed to invoke a binder transaction call - * to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. - * @binder_transfer_binder - * Check whether @from is allowed to transfer a binder reference to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. - * @binder_transfer_file - * Check whether @from is allowed to transfer @file to @to. - * @from contains the task_struct for the sending task. - * @file contains the struct file being transferred. - * @to contains the task_struct for the receiving task. - * - * @ptrace_access_check: - * Check permission before allowing the current process to trace the - * @child process. - * Security modules may also want to perform a process tracing check - * during an execve in the set_security or apply_creds hooks of - * tracing check during an execve in the bprm_set_creds hook of - * binprm_security_ops if the process is being traced and its security - * attributes would be changed by the execve. - * @child contains the task_struct structure for the target process. - * @mode contains the PTRACE_MODE flags indicating the form of access. - * Return 0 if permission is granted. - * @ptrace_traceme: - * Check that the @parent process has sufficient permission to trace the - * current process before allowing the current process to present itself - * to the @parent process for tracing. - * @parent contains the task_struct structure for debugger process. - * Return 0 if permission is granted. - * @capget: - * Get the @effective, @inheritable, and @permitted capability sets for - * the @target process. The hook may also perform permission checking to - * determine if the current process is allowed to see the capability sets - * of the @target process. - * @target contains the task_struct structure for target process. - * @effective contains the effective capability set. - * @inheritable contains the inheritable capability set. - * @permitted contains the permitted capability set. - * Return 0 if the capability sets were successfully obtained. - * @capset: - * Set the @effective, @inheritable, and @permitted capability sets for - * the current process. - * @new contains the new credentials structure for target process. - * @old contains the current credentials structure for target process. - * @effective contains the effective capability set. - * @inheritable contains the inheritable capability set. - * @permitted contains the permitted capability set. - * Return 0 and update @new if permission is granted. - * @capable: - * Check whether the @tsk process has the @cap capability in the indicated - * credentials. - * @cred contains the credentials to use. - * @ns contains the user namespace we want the capability in - * @cap contains the capability <include/linux/capability.h>. - * @audit: Whether to write an audit message or not - * Return 0 if the capability is granted for @tsk. - * @syslog: - * Check permission before accessing the kernel message ring or changing - * logging to the console. - * See the syslog(2) manual page for an explanation of the @type values. - * @type contains the type of action. - * @from_file indicates the context of action (if it came from /proc). - * Return 0 if permission is granted. - * @settime: - * Check permission to change the system time. - * struct timespec and timezone are defined in include/linux/time.h - * @ts contains new time - * @tz contains new timezone - * Return 0 if permission is granted. - * @vm_enough_memory: - * Check permissions for allocating a new virtual mapping. - * @mm contains the mm struct it is being added to. - * @pages contains the number of pages. - * Return 0 if permission is granted. - * - * @ismaclabel: - * Check if the extended attribute specified by @name - * represents a MAC label. Returns 1 if name is a MAC - * attribute otherwise returns 0. - * @name full extended attribute name to check against - * LSM as a MAC label. - * - * @secid_to_secctx: - * Convert secid to security context. If secdata is NULL the length of - * the result will be returned in seclen, but no secdata will be returned. - * This does mean that the length could change between calls to check the - * length and the next call which actually allocates and returns the secdata. - * @secid contains the security ID. - * @secdata contains the pointer that stores the converted security context. - * @seclen pointer which contains the length of the data - * @secctx_to_secid: - * Convert security context to secid. - * @secid contains the pointer to the generated security ID. - * @secdata contains the security context. - * - * @release_secctx: - * Release the security context. - * @secdata contains the security context. - * @seclen contains the length of the security context. - * - * Security hooks for Audit - * - * @audit_rule_init: - * Allocate and initialize an LSM audit rule structure. - * @field contains the required Audit action. Fields flags are defined in include/linux/audit.h - * @op contains the operator the rule uses. - * @rulestr contains the context where the rule will be applied to. - * @lsmrule contains a pointer to receive the result. - * Return 0 if @lsmrule has been successfully set, - * -EINVAL in case of an invalid rule. - * - * @audit_rule_known: - * Specifies whether given @rule contains any fields related to current LSM. - * @rule contains the audit rule of interest. - * Return 1 in case of relation found, 0 otherwise. - * - * @audit_rule_match: - * Determine if given @secid matches a rule previously approved - * by @audit_rule_known. - * @secid contains the security id in question. - * @field contains the field which relates to current LSM. - * @op contains the operator that will be used for matching. - * @rule points to the audit rule that will be checked against. - * @actx points to the audit context associated with the check. - * Return 1 if secid matches the rule, 0 if it does not, -ERRNO on failure. - * - * @audit_rule_free: - * Deallocate the LSM audit rule structure previously allocated by - * audit_rule_init. - * @rule contains the allocated rule - * - * @inode_notifysecctx: - * Notify the security module of what the security context of an inode - * should be. Initializes the incore security context managed by the - * security module for this inode. Example usage: NFS client invokes - * this hook to initialize the security context in its incore inode to the - * value provided by the server for the file when the server returned the - * file's attributes to the client. - * - * Must be called with inode->i_mutex locked. - * - * @inode we wish to set the security context of. - * @ctx contains the string which we wish to set in the inode. - * @ctxlen contains the length of @ctx. - * - * @inode_setsecctx: - * Change the security context of an inode. Updates the - * incore security context managed by the security module and invokes the - * fs code as needed (via __vfs_setxattr_noperm) to update any backing - * xattrs that represent the context. Example usage: NFS server invokes - * this hook to change the security context in its incore inode and on the - * backing filesystem to a value provided by the client on a SETATTR - * operation. - * - * Must be called with inode->i_mutex locked. - * - * @dentry contains the inode we wish to set the security context of. - * @ctx contains the string which we wish to set in the inode. - * @ctxlen contains the length of @ctx. - * - * @inode_getsecctx: - * On success, returns 0 and fills out @ctx and @ctxlen with the security - * context for the given @inode. - * - * @inode we wish to get the security context of. - * @ctx is a pointer in which to place the allocated security context. - * @ctxlen points to the place to put the length of @ctx. - * This is the main security structure. - */ -struct security_operations { - char name[SECURITY_NAME_MAX + 1]; - - int (*binder_set_context_mgr) (struct task_struct *mgr); - int (*binder_transaction) (struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_binder) (struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_file) (struct task_struct *from, - struct task_struct *to, struct file *file); - - int (*ptrace_access_check) (struct task_struct *child, unsigned int mode); - int (*ptrace_traceme) (struct task_struct *parent); - int (*capget) (struct task_struct *target, - kernel_cap_t *effective, - kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset) (struct cred *new, - const struct cred *old, - const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); - int (*capable) (const struct cred *cred, struct user_namespace *ns, - int cap, int audit); - int (*quotactl) (int cmds, int type, int id, struct super_block *sb); - int (*quota_on) (struct dentry *dentry); - int (*syslog) (int type); - int (*settime) (const struct timespec *ts, const struct timezone *tz); - int (*vm_enough_memory) (struct mm_struct *mm, long pages); - - int (*bprm_set_creds) (struct linux_binprm *bprm); - int (*bprm_check_security) (struct linux_binprm *bprm); - int (*bprm_secureexec) (struct linux_binprm *bprm); - void (*bprm_committing_creds) (struct linux_binprm *bprm); - void (*bprm_committed_creds) (struct linux_binprm *bprm); - - int (*sb_alloc_security) (struct super_block *sb); - void (*sb_free_security) (struct super_block *sb); - int (*sb_copy_data) (char *orig, char *copy); - int (*sb_remount) (struct super_block *sb, void *data); - int (*sb_kern_mount) (struct super_block *sb, int flags, void *data); - int (*sb_show_options) (struct seq_file *m, struct super_block *sb); - int (*sb_statfs) (struct dentry *dentry); - int (*sb_mount) (const char *dev_name, struct path *path, - const char *type, unsigned long flags, void *data); - int (*sb_umount) (struct vfsmount *mnt, int flags); - int (*sb_pivotroot) (struct path *old_path, - struct path *new_path); - int (*sb_set_mnt_opts) (struct super_block *sb, - struct security_mnt_opts *opts, - unsigned long kern_flags, - unsigned long *set_kern_flags); - int (*sb_clone_mnt_opts) (const struct super_block *oldsb, - struct super_block *newsb); - int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts); - int (*dentry_init_security) (struct dentry *dentry, int mode, - struct qstr *name, void **ctx, - u32 *ctxlen); - - -#ifdef CONFIG_SECURITY_PATH - int (*path_unlink) (struct path *dir, struct dentry *dentry); - int (*path_mkdir) (struct path *dir, struct dentry *dentry, umode_t mode); - int (*path_rmdir) (struct path *dir, struct dentry *dentry); - int (*path_mknod) (struct path *dir, struct dentry *dentry, umode_t mode, - unsigned int dev); - int (*path_truncate) (struct path *path); - int (*path_symlink) (struct path *dir, struct dentry *dentry, - const char *old_name); - int (*path_link) (struct dentry *old_dentry, struct path *new_dir, - struct dentry *new_dentry); - int (*path_rename) (struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, struct dentry *new_dentry); - int (*path_chmod) (struct path *path, umode_t mode); - int (*path_chown) (struct path *path, kuid_t uid, kgid_t gid); - int (*path_chroot) (struct path *path); -#endif - - int (*inode_alloc_security) (struct inode *inode); - void (*inode_free_security) (struct inode *inode); - int (*inode_init_security) (struct inode *inode, struct inode *dir, - const struct qstr *qstr, const char **name, - void **value, size_t *len); - int (*inode_create) (struct inode *dir, - struct dentry *dentry, umode_t mode); - int (*inode_link) (struct dentry *old_dentry, - struct inode *dir, struct dentry *new_dentry); - int (*inode_unlink) (struct inode *dir, struct dentry *dentry); - int (*inode_symlink) (struct inode *dir, - struct dentry *dentry, const char *old_name); - int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, umode_t mode); - int (*inode_rmdir) (struct inode *dir, struct dentry *dentry); - int (*inode_mknod) (struct inode *dir, struct dentry *dentry, - umode_t mode, dev_t dev); - int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry); - int (*inode_readlink) (struct dentry *dentry); - int (*inode_follow_link) (struct dentry *dentry, struct inode *inode, - bool rcu); - int (*inode_permission) (struct inode *inode, int mask); - int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); - int (*inode_getattr) (const struct path *path); - int (*inode_setxattr) (struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); - void (*inode_post_setxattr) (struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); - int (*inode_getxattr) (struct dentry *dentry, const char *name); - int (*inode_listxattr) (struct dentry *dentry); - int (*inode_removexattr) (struct dentry *dentry, const char *name); - int (*inode_need_killpriv) (struct dentry *dentry); - int (*inode_killpriv) (struct dentry *dentry); - int (*inode_getsecurity) (const struct inode *inode, const char *name, void **buffer, bool alloc); - int (*inode_setsecurity) (struct inode *inode, const char *name, const void *value, size_t size, int flags); - int (*inode_listsecurity) (struct inode *inode, char *buffer, size_t buffer_size); - void (*inode_getsecid) (const struct inode *inode, u32 *secid); - - int (*file_permission) (struct file *file, int mask); - int (*file_alloc_security) (struct file *file); - void (*file_free_security) (struct file *file); - int (*file_ioctl) (struct file *file, unsigned int cmd, - unsigned long arg); - int (*mmap_addr) (unsigned long addr); - int (*mmap_file) (struct file *file, - unsigned long reqprot, unsigned long prot, - unsigned long flags); - int (*file_mprotect) (struct vm_area_struct *vma, - unsigned long reqprot, - unsigned long prot); - int (*file_lock) (struct file *file, unsigned int cmd); - int (*file_fcntl) (struct file *file, unsigned int cmd, - unsigned long arg); - void (*file_set_fowner) (struct file *file); - int (*file_send_sigiotask) (struct task_struct *tsk, - struct fown_struct *fown, int sig); - int (*file_receive) (struct file *file); - int (*file_open) (struct file *file, const struct cred *cred); - - int (*task_create) (unsigned long clone_flags); - void (*task_free) (struct task_struct *task); - int (*cred_alloc_blank) (struct cred *cred, gfp_t gfp); - void (*cred_free) (struct cred *cred); - int (*cred_prepare)(struct cred *new, const struct cred *old, - gfp_t gfp); - void (*cred_transfer)(struct cred *new, const struct cred *old); - int (*kernel_act_as)(struct cred *new, u32 secid); - int (*kernel_create_files_as)(struct cred *new, struct inode *inode); - int (*kernel_fw_from_file)(struct file *file, char *buf, size_t size); - int (*kernel_module_request)(char *kmod_name); - int (*kernel_module_from_file)(struct file *file); - int (*task_fix_setuid) (struct cred *new, const struct cred *old, - int flags); - int (*task_setpgid) (struct task_struct *p, pid_t pgid); - int (*task_getpgid) (struct task_struct *p); - int (*task_getsid) (struct task_struct *p); - void (*task_getsecid) (struct task_struct *p, u32 *secid); - int (*task_setnice) (struct task_struct *p, int nice); - int (*task_setioprio) (struct task_struct *p, int ioprio); - int (*task_getioprio) (struct task_struct *p); - int (*task_setrlimit) (struct task_struct *p, unsigned int resource, - struct rlimit *new_rlim); - int (*task_setscheduler) (struct task_struct *p); - int (*task_getscheduler) (struct task_struct *p); - int (*task_movememory) (struct task_struct *p); - int (*task_kill) (struct task_struct *p, - struct siginfo *info, int sig, u32 secid); - int (*task_wait) (struct task_struct *p); - int (*task_prctl) (int option, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5); - void (*task_to_inode) (struct task_struct *p, struct inode *inode); - - int (*ipc_permission) (struct kern_ipc_perm *ipcp, short flag); - void (*ipc_getsecid) (struct kern_ipc_perm *ipcp, u32 *secid); - - int (*msg_msg_alloc_security) (struct msg_msg *msg); - void (*msg_msg_free_security) (struct msg_msg *msg); - - int (*msg_queue_alloc_security) (struct msg_queue *msq); - void (*msg_queue_free_security) (struct msg_queue *msq); - int (*msg_queue_associate) (struct msg_queue *msq, int msqflg); - int (*msg_queue_msgctl) (struct msg_queue *msq, int cmd); - int (*msg_queue_msgsnd) (struct msg_queue *msq, - struct msg_msg *msg, int msqflg); - int (*msg_queue_msgrcv) (struct msg_queue *msq, - struct msg_msg *msg, - struct task_struct *target, - long type, int mode); - - int (*shm_alloc_security) (struct shmid_kernel *shp); - void (*shm_free_security) (struct shmid_kernel *shp); - int (*shm_associate) (struct shmid_kernel *shp, int shmflg); - int (*shm_shmctl) (struct shmid_kernel *shp, int cmd); - int (*shm_shmat) (struct shmid_kernel *shp, - char __user *shmaddr, int shmflg); - - int (*sem_alloc_security) (struct sem_array *sma); - void (*sem_free_security) (struct sem_array *sma); - int (*sem_associate) (struct sem_array *sma, int semflg); - int (*sem_semctl) (struct sem_array *sma, int cmd); - int (*sem_semop) (struct sem_array *sma, - struct sembuf *sops, unsigned nsops, int alter); - - int (*netlink_send) (struct sock *sk, struct sk_buff *skb); - - void (*d_instantiate) (struct dentry *dentry, struct inode *inode); - - int (*getprocattr) (struct task_struct *p, char *name, char **value); - int (*setprocattr) (struct task_struct *p, char *name, void *value, size_t size); - int (*ismaclabel) (const char *name); - int (*secid_to_secctx) (u32 secid, char **secdata, u32 *seclen); - int (*secctx_to_secid) (const char *secdata, u32 seclen, u32 *secid); - void (*release_secctx) (char *secdata, u32 seclen); - - int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen); - int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen); - int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen); - -#ifdef CONFIG_SECURITY_NETWORK - int (*unix_stream_connect) (struct sock *sock, struct sock *other, struct sock *newsk); - int (*unix_may_send) (struct socket *sock, struct socket *other); - - int (*socket_create) (int family, int type, int protocol, int kern); - int (*socket_post_create) (struct socket *sock, int family, - int type, int protocol, int kern); - int (*socket_bind) (struct socket *sock, - struct sockaddr *address, int addrlen); - int (*socket_connect) (struct socket *sock, - struct sockaddr *address, int addrlen); - int (*socket_listen) (struct socket *sock, int backlog); - int (*socket_accept) (struct socket *sock, struct socket *newsock); - int (*socket_sendmsg) (struct socket *sock, - struct msghdr *msg, int size); - int (*socket_recvmsg) (struct socket *sock, - struct msghdr *msg, int size, int flags); - int (*socket_getsockname) (struct socket *sock); - int (*socket_getpeername) (struct socket *sock); - int (*socket_getsockopt) (struct socket *sock, int level, int optname); - int (*socket_setsockopt) (struct socket *sock, int level, int optname); - int (*socket_shutdown) (struct socket *sock, int how); - int (*socket_sock_rcv_skb) (struct sock *sk, struct sk_buff *skb); - int (*socket_getpeersec_stream) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len); - int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid); - int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); - void (*sk_free_security) (struct sock *sk); - void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); - void (*sk_getsecid) (struct sock *sk, u32 *secid); - void (*sock_graft) (struct sock *sk, struct socket *parent); - int (*inet_conn_request) (struct sock *sk, struct sk_buff *skb, - struct request_sock *req); - void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req); - void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb); - int (*secmark_relabel_packet) (u32 secid); - void (*secmark_refcount_inc) (void); - void (*secmark_refcount_dec) (void); - void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); - int (*tun_dev_alloc_security) (void **security); - void (*tun_dev_free_security) (void *security); - int (*tun_dev_create) (void); - int (*tun_dev_attach_queue) (void *security); - int (*tun_dev_attach) (struct sock *sk, void *security); - int (*tun_dev_open) (void *security); -#endif /* CONFIG_SECURITY_NETWORK */ - -#ifdef CONFIG_SECURITY_NETWORK_XFRM - int (*xfrm_policy_alloc_security) (struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp); - int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx); - void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx); - int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx); - int (*xfrm_state_alloc) (struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx); - int (*xfrm_state_alloc_acquire) (struct xfrm_state *x, - struct xfrm_sec_ctx *polsec, - u32 secid); - void (*xfrm_state_free_security) (struct xfrm_state *x); - int (*xfrm_state_delete_security) (struct xfrm_state *x); - int (*xfrm_policy_lookup) (struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); - int (*xfrm_state_pol_flow_match) (struct xfrm_state *x, - struct xfrm_policy *xp, - const struct flowi *fl); - int (*xfrm_decode_session) (struct sk_buff *skb, u32 *secid, int ckall); -#endif /* CONFIG_SECURITY_NETWORK_XFRM */ - - /* key management security hooks */ -#ifdef CONFIG_KEYS - int (*key_alloc) (struct key *key, const struct cred *cred, unsigned long flags); - void (*key_free) (struct key *key); - int (*key_permission) (key_ref_t key_ref, - const struct cred *cred, - unsigned perm); - int (*key_getsecurity)(struct key *key, char **_buffer); -#endif /* CONFIG_KEYS */ - -#ifdef CONFIG_AUDIT - int (*audit_rule_init) (u32 field, u32 op, char *rulestr, void **lsmrule); - int (*audit_rule_known) (struct audit_krule *krule); - int (*audit_rule_match) (u32 secid, u32 field, u32 op, void *lsmrule, - struct audit_context *actx); - void (*audit_rule_free) (void *lsmrule); -#endif /* CONFIG_AUDIT */ -}; - /* prototypes */ extern int security_init(void); -extern int security_module_enable(struct security_operations *ops); -extern int register_security(struct security_operations *ops); -extern void __init security_fixup_ops(struct security_operations *ops); - /* Security operations */ int security_binder_set_context_mgr(struct task_struct *mgr); @@ -2049,7 +465,7 @@ static inline int security_settime(const struct timespec *ts, static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { - return cap_vm_enough_memory(mm, pages); + return __vm_enough_memory(mm, pages, cap_vm_enough_memory(mm, pages)); } static inline int security_bprm_set_creds(struct linux_binprm *bprm) @@ -2653,7 +1069,7 @@ static inline int security_setprocattr(struct task_struct *p, char *name, void * static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb) { - return cap_netlink_send(sk, skb); + return 0; } static inline int security_ismaclabel(const char *name) @@ -3221,36 +1637,5 @@ static inline void free_secdata(void *secdata) { } #endif /* CONFIG_SECURITY */ -#ifdef CONFIG_SECURITY_YAMA -extern int yama_ptrace_access_check(struct task_struct *child, - unsigned int mode); -extern int yama_ptrace_traceme(struct task_struct *parent); -extern void yama_task_free(struct task_struct *task); -extern int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); -#else -static inline int yama_ptrace_access_check(struct task_struct *child, - unsigned int mode) -{ - return 0; -} - -static inline int yama_ptrace_traceme(struct task_struct *parent) -{ - return 0; -} - -static inline void yama_task_free(struct task_struct *task) -{ -} - -static inline int yama_task_prctl(int option, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5) -{ - return -ENOSYS; -} -#endif /* CONFIG_SECURITY_YAMA */ - #endif /* ! __LINUX_SECURITY_H */ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 486e685a226a..e0582106ef4f 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -35,6 +35,7 @@ #include <linux/spinlock.h> #include <linux/preempt.h> #include <linux/lockdep.h> +#include <linux/compiler.h> #include <asm/processor.h> /* @@ -274,9 +275,87 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) s->sequence++; } -/* +static inline int raw_read_seqcount_latch(seqcount_t *s) +{ + return lockless_dereference(s->sequence); +} + +/** * raw_write_seqcount_latch - redirect readers to even/odd copy * @s: pointer to seqcount_t + * + * The latch technique is a multiversion concurrency control method that allows + * queries during non-atomic modifications. If you can guarantee queries never + * interrupt the modification -- e.g. the concurrency is strictly between CPUs + * -- you most likely do not need this. + * + * Where the traditional RCU/lockless data structures rely on atomic + * modifications to ensure queries observe either the old or the new state the + * latch allows the same for non-atomic updates. The trade-off is doubling the + * cost of storage; we have to maintain two copies of the entire data + * structure. + * + * Very simply put: we first modify one copy and then the other. This ensures + * there is always one copy in a stable state, ready to give us an answer. + * + * The basic form is a data structure like: + * + * struct latch_struct { + * seqcount_t seq; + * struct data_struct data[2]; + * }; + * + * Where a modification, which is assumed to be externally serialized, does the + * following: + * + * void latch_modify(struct latch_struct *latch, ...) + * { + * smp_wmb(); <- Ensure that the last data[1] update is visible + * latch->seq++; + * smp_wmb(); <- Ensure that the seqcount update is visible + * + * modify(latch->data[0], ...); + * + * smp_wmb(); <- Ensure that the data[0] update is visible + * latch->seq++; + * smp_wmb(); <- Ensure that the seqcount update is visible + * + * modify(latch->data[1], ...); + * } + * + * The query will have a form like: + * + * struct entry *latch_query(struct latch_struct *latch, ...) + * { + * struct entry *entry; + * unsigned seq, idx; + * + * do { + * seq = lockless_dereference(latch->seq); + * + * idx = seq & 0x01; + * entry = data_query(latch->data[idx], ...); + * + * smp_rmb(); + * } while (seq != latch->seq); + * + * return entry; + * } + * + * So during the modification, queries are first redirected to data[1]. Then we + * modify data[0]. When that is complete, we redirect queries back to data[0] + * and we can modify data[1]. + * + * NOTE: The non-requirement for atomic modifications does _NOT_ include + * the publishing of new entries in the case where data is a dynamic + * data structure. + * + * An iteration might start in data[0] and get suspended long enough + * to miss an entire modification sequence, once it resumes it might + * observe the new entry. + * + * NOTE: When data is a dynamic data structure; one should use regular RCU + * patterns to manage the lifetimes of the objects within. */ static inline void raw_write_seqcount_latch(seqcount_t *s) { diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 78097e7a330a..ba82c07feb95 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -12,6 +12,7 @@ #define _LINUX_SERIAL_8250_H #include <linux/serial_core.h> +#include <linux/serial_reg.h> #include <linux/platform_device.h> /* @@ -137,6 +138,8 @@ extern int early_serial_setup(struct uart_port *port); extern unsigned int serial8250_early_in(struct uart_port *port, int offset); extern void serial8250_early_out(struct uart_port *port, int offset, int value); +extern int early_serial8250_setup(struct earlycon_device *device, + const char *options); extern void serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old); extern int serial8250_do_startup(struct uart_port *port); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 025dad9dcde4..297d4fa1cfe5 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -35,7 +35,7 @@ #define uart_console(port) \ ((port)->cons && (port)->cons->index == (port)->line) #else -#define uart_console(port) (0) +#define uart_console(port) ({ (void)port; 0; }) #endif struct uart_port; diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index 6c5e3bb282b0..7c536ac5be05 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -1,6 +1,7 @@ #ifndef __LINUX_SERIAL_SCI_H #define __LINUX_SERIAL_SCI_H +#include <linux/bitops.h> #include <linux/serial_core.h> #include <linux/sh_dma.h> @@ -10,59 +11,16 @@ #define SCIx_NOT_SUPPORTED (-1) -/* SCSMR (Serial Mode Register) */ -#define SCSMR_CHR (1 << 6) /* 7-bit Character Length */ -#define SCSMR_PE (1 << 5) /* Parity Enable */ -#define SCSMR_ODD (1 << 4) /* Odd Parity */ -#define SCSMR_STOP (1 << 3) /* Stop Bit Length */ -#define SCSMR_CKS 0x0003 /* Clock Select */ - /* Serial Control Register (@ = not supported by all parts) */ -#define SCSCR_TIE (1 << 7) /* Transmit Interrupt Enable */ -#define SCSCR_RIE (1 << 6) /* Receive Interrupt Enable */ -#define SCSCR_TE (1 << 5) /* Transmit Enable */ -#define SCSCR_RE (1 << 4) /* Receive Enable */ -#define SCSCR_REIE (1 << 3) /* Receive Error Interrupt Enable @ */ -#define SCSCR_TOIE (1 << 2) /* Timeout Interrupt Enable @ */ -#define SCSCR_CKE1 (1 << 1) /* Clock Enable 1 */ -#define SCSCR_CKE0 (1 << 0) /* Clock Enable 0 */ -/* SCIFA/SCIFB only */ -#define SCSCR_TDRQE (1 << 15) /* Tx Data Transfer Request Enable */ -#define SCSCR_RDRQE (1 << 14) /* Rx Data Transfer Request Enable */ - -/* SCxSR (Serial Status Register) on SCI */ -#define SCI_TDRE 0x80 /* Transmit Data Register Empty */ -#define SCI_RDRF 0x40 /* Receive Data Register Full */ -#define SCI_ORER 0x20 /* Overrun Error */ -#define SCI_FER 0x10 /* Framing Error */ -#define SCI_PER 0x08 /* Parity Error */ -#define SCI_TEND 0x04 /* Transmit End */ - -#define SCI_DEFAULT_ERROR_MASK (SCI_PER | SCI_FER) - -/* SCxSR (Serial Status Register) on SCIF, HSCIF */ -#define SCIF_ER 0x0080 /* Receive Error */ -#define SCIF_TEND 0x0040 /* Transmission End */ -#define SCIF_TDFE 0x0020 /* Transmit FIFO Data Empty */ -#define SCIF_BRK 0x0010 /* Break Detect */ -#define SCIF_FER 0x0008 /* Framing Error */ -#define SCIF_PER 0x0004 /* Parity Error */ -#define SCIF_RDF 0x0002 /* Receive FIFO Data Full */ -#define SCIF_DR 0x0001 /* Receive Data Ready */ - -#define SCIF_DEFAULT_ERROR_MASK (SCIF_PER | SCIF_FER | SCIF_ER | SCIF_BRK) - -/* SCFCR (FIFO Control Register) */ -#define SCFCR_LOOP (1 << 0) /* Loopback Test */ - -/* SCSPTR (Serial Port Register), optional */ -#define SCSPTR_RTSIO (1 << 7) /* Serial Port RTS Pin Input/Output */ -#define SCSPTR_CTSIO (1 << 5) /* Serial Port CTS Pin Input/Output */ -#define SCSPTR_SPB2IO (1 << 1) /* Serial Port Break Input/Output */ -#define SCSPTR_SPB2DT (1 << 0) /* Serial Port Break Data */ - -/* HSSRR HSCIF */ -#define HSCIF_SRE 0x8000 /* Sampling Rate Register Enable */ +#define SCSCR_TIE BIT(7) /* Transmit Interrupt Enable */ +#define SCSCR_RIE BIT(6) /* Receive Interrupt Enable */ +#define SCSCR_TE BIT(5) /* Transmit Enable */ +#define SCSCR_RE BIT(4) /* Receive Enable */ +#define SCSCR_REIE BIT(3) /* Receive Error Interrupt Enable @ */ +#define SCSCR_TOIE BIT(2) /* Timeout Interrupt Enable @ */ +#define SCSCR_CKE1 BIT(1) /* Clock Enable 1 */ +#define SCSCR_CKE0 BIT(0) /* Clock Enable 0 */ + enum { SCIx_PROBE_REGTYPE, @@ -82,28 +40,6 @@ enum { SCIx_NR_REGTYPES, }; -/* - * SCI register subset common for all port types. - * Not all registers will exist on all parts. - */ -enum { - SCSMR, /* Serial Mode Register */ - SCBRR, /* Bit Rate Register */ - SCSCR, /* Serial Control Register */ - SCxSR, /* Serial Status Register */ - SCFCR, /* FIFO Control Register */ - SCFDR, /* FIFO Data Count Register */ - SCxTDR, /* Transmit (FIFO) Data Register */ - SCxRDR, /* Receive (FIFO) Data Register */ - SCLSR, /* Line Status Register */ - SCTFDR, /* Transmit FIFO Data Count Register */ - SCRFDR, /* Receive FIFO Data Count Register */ - SCSPTR, /* Serial Port Register */ - HSSRR, /* Sampling Rate Register */ - - SCIx_NR_REGS, -}; - struct device; struct plat_sci_port_ops { @@ -113,7 +49,7 @@ struct plat_sci_port_ops { /* * Port-specific capabilities */ -#define SCIx_HAVE_RTSCTS (1 << 0) +#define SCIx_HAVE_RTSCTS BIT(0) /* * Platform device specific platform_data struct diff --git a/include/linux/slab.h b/include/linux/slab.h index 9de2fdc8b5e4..a99f0e5243e1 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -153,30 +153,8 @@ size_t ksize(const void *); #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN #define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN #define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN) -/* - * The KMALLOC_LOOP_LOW is the definition for the for loop index start number - * to create the kmalloc_caches object in create_kmalloc_caches(). The first - * and the second are 96 and 192. You can see that in the kmalloc_index(), if - * the KMALLOC_MIN_SIZE <= 32, then return 1 (96). If KMALLOC_MIN_SIZE <= 64, - * then return 2 (192). If the KMALLOC_MIN_SIZE is bigger than 64, we don't - * need to initialize 96 and 192. Go directly to start the KMALLOC_SHIFT_LOW. - */ -#if KMALLOC_MIN_SIZE <= 32 -#define KMALLOC_LOOP_LOW 1 -#elif KMALLOC_MIN_SIZE <= 64 -#define KMALLOC_LOOP_LOW 2 -#else -#define KMALLOC_LOOP_LOW KMALLOC_SHIFT_LOW -#endif - #else #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) -/* - * The KMALLOC_MIN_SIZE of slub/slab/slob is 2^3/2^5/2^3. So, even slab is used. - * The KMALLOC_MIN_SIZE <= 32. The kmalloc-96 and kmalloc-192 should also be - * initialized. - */ -#define KMALLOC_LOOP_LOW 1 #endif /* diff --git a/include/linux/soc/sunxi/sunxi_sram.h b/include/linux/soc/sunxi/sunxi_sram.h new file mode 100644 index 000000000000..c5f663bba9c2 --- /dev/null +++ b/include/linux/soc/sunxi/sunxi_sram.h @@ -0,0 +1,19 @@ +/* + * Allwinner SoCs SRAM Controller Driver + * + * Copyright (C) 2015 Maxime Ripard + * + * Author: Maxime Ripard <maxime.ripard@free-electrons.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef _SUNXI_SRAM_H_ +#define _SUNXI_SRAM_H_ + +int sunxi_sram_claim(struct device *dev); +int sunxi_sram_release(struct device *dev); + +#endif /* _SUNXI_SRAM_H_ */ diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 4568a5cc9ab8..c3d1a525bacc 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -29,10 +29,13 @@ struct ssb_sprom { u8 il0mac[6] __aligned(sizeof(u16)); /* MAC address for 802.11b/g */ u8 et0mac[6] __aligned(sizeof(u16)); /* MAC address for Ethernet */ u8 et1mac[6] __aligned(sizeof(u16)); /* MAC address for 802.11a */ + u8 et2mac[6] __aligned(sizeof(u16)); /* MAC address for extra Ethernet */ u8 et0phyaddr; /* MII address for enet0 */ u8 et1phyaddr; /* MII address for enet1 */ + u8 et2phyaddr; /* MII address for enet2 */ u8 et0mdcport; /* MDIO for enet0 */ u8 et1mdcport; /* MDIO for enet1 */ + u8 et2mdcport; /* MDIO for enet2 */ u16 dev_id; /* Device ID overriding e.g. PCI ID */ u16 board_rev; /* Board revision number from SPROM. */ u16 board_num; /* Board number from SPROM. */ @@ -88,11 +91,14 @@ struct ssb_sprom { u32 ofdm5glpo; /* 5.2GHz OFDM power offset */ u32 ofdm5gpo; /* 5.3GHz OFDM power offset */ u32 ofdm5ghpo; /* 5.8GHz OFDM power offset */ + u32 boardflags; + u32 boardflags2; + u32 boardflags3; + /* TODO: Switch all drivers to new u32 fields and drop below ones */ u16 boardflags_lo; /* Board flags (bits 0-15) */ u16 boardflags_hi; /* Board flags (bits 16-31) */ u16 boardflags2_lo; /* Board flags (bits 32-47) */ u16 boardflags2_hi; /* Board flags (bits 48-63) */ - /* TODO store board flags in a single u64 */ struct ssb_sprom_core_pwr_info core_pwr_info[4]; diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 076af437284d..9c61c7cda936 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -3,7 +3,6 @@ #include <uapi/linux/stddef.h> - #undef NULL #define NULL ((void *)0) @@ -14,10 +13,9 @@ enum { #undef offsetof #ifdef __compiler_offsetof -#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER) +#define offsetof(TYPE, MEMBER) __compiler_offsetof(TYPE, MEMBER) #else -#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) -#endif +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) #endif /** @@ -28,3 +26,5 @@ enum { */ #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) + +#endif diff --git a/include/linux/string.h b/include/linux/string.h index e40099e585c9..a8d90db9c4b0 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -111,6 +111,7 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #endif void *memchr_inv(const void *s, int c, size_t n); +char *strreplace(char *s, char old, char new); extern void kfree_const(const void *x); diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index df8edf8ec914..cb94ee4181d4 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -172,6 +172,13 @@ struct svcxprt_rdma { #define RDMAXPRT_SQ_PENDING 2 #define RDMAXPRT_CONN_PENDING 3 +#define RPCRDMA_MAX_SVC_SEGS (64) /* server max scatter/gather */ +#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) +#define RPCRDMA_MAXPAYLOAD RPCSVC_MAXPAYLOAD +#else +#define RPCRDMA_MAXPAYLOAD (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) +#endif + #define RPCRDMA_LISTEN_BACKLOG 10 /* The default ORD value is based on two outstanding full-size writes with a * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ @@ -182,10 +189,9 @@ struct svcxprt_rdma { /* svc_rdma_marshal.c */ extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); -extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, struct rpcrdma_msg *, - enum rpcrdma_errcode, u32 *); + enum rpcrdma_errcode, __be32 *); extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, @@ -212,7 +218,6 @@ extern int svc_rdma_sendto(struct svc_rqst *); extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, enum rpcrdma_errcode); -struct page *svc_rdma_get_page(void); extern int svc_rdma_post_recv(struct svcxprt_rdma *); extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); diff --git a/include/linux/swap.h b/include/linux/swap.h index cee108cbe2d5..38874729dc5f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -377,7 +377,6 @@ extern void end_swap_bio_write(struct bio *bio, int err); extern int __swap_writepage(struct page *page, struct writeback_control *wbc, void (*end_write_func)(struct bio *, int)); extern int swap_set_page_dirty(struct page *page); -extern void end_swap_bio_read(struct bio *bio, int err); int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 76d1e38aabe1..b45c45b8c829 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -111,14 +111,14 @@ union bpf_attr; #define __SC_STR_ADECL(t, a) #a #define __SC_STR_TDECL(t, a) #t -extern struct ftrace_event_class event_class_syscall_enter; -extern struct ftrace_event_class event_class_syscall_exit; +extern struct trace_event_class event_class_syscall_enter; +extern struct trace_event_class event_class_syscall_exit; extern struct trace_event_functions enter_syscall_print_funcs; extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct syscall_metadata __syscall_meta_##sname; \ - static struct ftrace_event_call __used \ + static struct trace_event_call __used \ event_enter_##sname = { \ .class = &event_class_syscall_enter, \ { \ @@ -128,13 +128,13 @@ extern struct trace_event_functions exit_syscall_print_funcs; .data = (void *)&__syscall_meta_##sname,\ .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ - static struct ftrace_event_call __used \ + static struct trace_event_call __used \ __attribute__((section("_ftrace_events"))) \ *__event_enter_##sname = &event_enter_##sname; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata __syscall_meta_##sname; \ - static struct ftrace_event_call __used \ + static struct trace_event_call __used \ event_exit_##sname = { \ .class = &event_class_syscall_exit, \ { \ @@ -144,7 +144,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; .data = (void *)&__syscall_meta_##sname,\ .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ - static struct ftrace_event_call __used \ + static struct trace_event_call __used \ __attribute__((section("_ftrace_events"))) \ *__event_exit_##sname = &event_exit_##sname; @@ -827,15 +827,15 @@ asmlinkage long sys_syncfs(int fd); asmlinkage long sys_fork(void); asmlinkage long sys_vfork(void); #ifdef CONFIG_CLONE_BACKWARDS -asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int, +asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, unsigned long, int __user *); #else #ifdef CONFIG_CLONE_BACKWARDS3 asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *, - int __user *, int); + int __user *, unsigned long); #else asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, - int __user *, int); + int __user *, unsigned long); #endif #endif diff --git a/include/linux/syslog.h b/include/linux/syslog.h index 4b7b875a7ce1..c3a7f0cc3a27 100644 --- a/include/linux/syslog.h +++ b/include/linux/syslog.h @@ -47,12 +47,12 @@ #define SYSLOG_FROM_READER 0 #define SYSLOG_FROM_PROC 1 -int do_syslog(int type, char __user *buf, int count, bool from_file); +int do_syslog(int type, char __user *buf, int count, int source); #ifdef CONFIG_PRINTK -int check_syslog_permissions(int type, bool from_file); +int check_syslog_permissions(int type, int source); #else -static inline int check_syslog_permissions(int type, bool from_file) +static inline int check_syslog_permissions(int type, int source) { return 0; } diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 5eac316490ea..037e9df2f610 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -40,6 +40,9 @@ /* No upper/lower limit requirement */ #define THERMAL_NO_LIMIT ((u32)~0) +/* Default weight of a bound cooling device */ +#define THERMAL_WEIGHT_DEFAULT 0 + /* Unit conversion macros */ #define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \ ((long)t-2732+5)/10 : ((long)t-2732-5)/10) @@ -56,10 +59,13 @@ #define DEFAULT_THERMAL_GOVERNOR "fair_share" #elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE) #define DEFAULT_THERMAL_GOVERNOR "user_space" +#elif defined(CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR) +#define DEFAULT_THERMAL_GOVERNOR "power_allocator" #endif struct thermal_zone_device; struct thermal_cooling_device; +struct thermal_instance; enum thermal_device_mode { THERMAL_DEVICE_DISABLED = 0, @@ -113,6 +119,12 @@ struct thermal_cooling_device_ops { int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); int (*set_cur_state) (struct thermal_cooling_device *, unsigned long); + int (*get_requested_power)(struct thermal_cooling_device *, + struct thermal_zone_device *, u32 *); + int (*state2power)(struct thermal_cooling_device *, + struct thermal_zone_device *, unsigned long, u32 *); + int (*power2state)(struct thermal_cooling_device *, + struct thermal_zone_device *, u32, unsigned long *); }; struct thermal_cooling_device { @@ -144,8 +156,7 @@ struct thermal_attr { * @devdata: private pointer for device private data * @trips: number of trip points the thermal zone supports * @passive_delay: number of milliseconds to wait between polls when - * performing passive cooling. Currenty only used by the - * step-wise governor + * performing passive cooling. * @polling_delay: number of milliseconds to wait between polls when * checking whether trip points have been crossed (0 for * interrupt driven systems) @@ -155,13 +166,13 @@ struct thermal_attr { * @last_temperature: previous temperature read * @emul_temperature: emulated temperature when using CONFIG_THERMAL_EMULATION * @passive: 1 if you've crossed a passive trip point, 0 otherwise. - * Currenty only used by the step-wise governor. * @forced_passive: If > 0, temperature at which to switch on all ACPI * processor cooling devices. Currently only used by the * step-wise governor. * @ops: operations this &thermal_zone_device supports * @tzp: thermal zone parameters * @governor: pointer to the governor for this thermal zone + * @governor_data: private pointer for governor data * @thermal_instances: list of &struct thermal_instance of this thermal zone * @idr: &struct idr to generate unique id for this zone's cooling * devices @@ -186,8 +197,9 @@ struct thermal_zone_device { int passive; unsigned int forced_passive; struct thermal_zone_device_ops *ops; - const struct thermal_zone_params *tzp; + struct thermal_zone_params *tzp; struct thermal_governor *governor; + void *governor_data; struct list_head thermal_instances; struct idr idr; struct mutex lock; @@ -198,12 +210,19 @@ struct thermal_zone_device { /** * struct thermal_governor - structure that holds thermal governor information * @name: name of the governor + * @bind_to_tz: callback called when binding to a thermal zone. If it + * returns 0, the governor is bound to the thermal zone, + * otherwise it fails. + * @unbind_from_tz: callback called when a governor is unbound from a + * thermal zone. * @throttle: callback called for every trip point even if temperature is * below the trip point temperature * @governor_list: node in thermal_governor_list (in thermal_core.c) */ struct thermal_governor { char name[THERMAL_NAME_LENGTH]; + int (*bind_to_tz)(struct thermal_zone_device *tz); + void (*unbind_from_tz)(struct thermal_zone_device *tz); int (*throttle)(struct thermal_zone_device *tz, int trip); struct list_head governor_list; }; @@ -214,9 +233,12 @@ struct thermal_bind_params { /* * This is a measure of 'how effectively these devices can - * cool 'this' thermal zone. The shall be determined by platform - * characterization. This is on a 'percentage' scale. - * See Documentation/thermal/sysfs-api.txt for more information. + * cool 'this' thermal zone. It shall be determined by + * platform characterization. This value is relative to the + * rest of the weights so a cooling device whose weight is + * double that of another cooling device is twice as + * effective. See Documentation/thermal/sysfs-api.txt for more + * information. */ int weight; @@ -253,6 +275,44 @@ struct thermal_zone_params { int num_tbps; /* Number of tbp entries */ struct thermal_bind_params *tbp; + + /* + * Sustainable power (heat) that this thermal zone can dissipate in + * mW + */ + u32 sustainable_power; + + /* + * Proportional parameter of the PID controller when + * overshooting (i.e., when temperature is below the target) + */ + s32 k_po; + + /* + * Proportional parameter of the PID controller when + * undershooting + */ + s32 k_pu; + + /* Integral parameter of the PID controller */ + s32 k_i; + + /* Derivative parameter of the PID controller */ + s32 k_d; + + /* threshold below which the error is no longer accumulated */ + s32 integral_cutoff; + + /* + * @slope: slope of a linear temperature adjustment curve. + * Used by thermal zone drivers. + */ + int slope; + /* + * @offset: offset of a linear temperature adjustment curve. + * Used by thermal zone drivers (default 0). + */ + int offset; }; struct thermal_genl_event { @@ -316,14 +376,25 @@ void thermal_zone_of_sensor_unregister(struct device *dev, #endif #if IS_ENABLED(CONFIG_THERMAL) +static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) +{ + return cdev->ops->get_requested_power && cdev->ops->state2power && + cdev->ops->power2state; +} + +int power_actor_get_max_power(struct thermal_cooling_device *, + struct thermal_zone_device *tz, u32 *max_power); +int power_actor_set_power(struct thermal_cooling_device *, + struct thermal_instance *, u32); struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, void *, struct thermal_zone_device_ops *, - const struct thermal_zone_params *, int, int); + struct thermal_zone_params *, int, int); void thermal_zone_device_unregister(struct thermal_zone_device *); int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *, - unsigned long, unsigned long); + unsigned long, unsigned long, + unsigned int); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *); @@ -343,6 +414,14 @@ struct thermal_instance *get_thermal_instance(struct thermal_zone_device *, void thermal_cdev_update(struct thermal_cooling_device *); void thermal_notify_framework(struct thermal_zone_device *, int); #else +static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) +{ return false; } +static inline int power_actor_get_max_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, u32 *max_power) +{ return 0; } +static inline int power_actor_set_power(struct thermal_cooling_device *cdev, + struct thermal_instance *tz, u32 power) +{ return 0; } static inline struct thermal_zone_device *thermal_zone_device_register( const char *type, int trips, int mask, void *devdata, struct thermal_zone_device_ops *ops, diff --git a/include/linux/ftrace_event.h b/include/linux/trace_events.h index f9ecf63d47f1..1063c850dbab 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/trace_events.h @@ -1,6 +1,6 @@ -#ifndef _LINUX_FTRACE_EVENT_H -#define _LINUX_FTRACE_EVENT_H +#ifndef _LINUX_TRACE_EVENT_H +#define _LINUX_TRACE_EVENT_H #include <linux/ring_buffer.h> #include <linux/trace_seq.h> @@ -25,35 +25,35 @@ struct trace_print_flags_u64 { const char *name; }; -const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, - unsigned long flags, - const struct trace_print_flags *flag_array); +const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, + unsigned long flags, + const struct trace_print_flags *flag_array); -const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, - const struct trace_print_flags *symbol_array); +const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val, + const struct trace_print_flags *symbol_array); #if BITS_PER_LONG == 32 -const char *ftrace_print_symbols_seq_u64(struct trace_seq *p, - unsigned long long val, - const struct trace_print_flags_u64 +const char *trace_print_symbols_seq_u64(struct trace_seq *p, + unsigned long long val, + const struct trace_print_flags_u64 *symbol_array); #endif -const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, - unsigned int bitmask_size); +const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, + unsigned int bitmask_size); -const char *ftrace_print_hex_seq(struct trace_seq *p, - const unsigned char *buf, int len); +const char *trace_print_hex_seq(struct trace_seq *p, + const unsigned char *buf, int len); -const char *ftrace_print_array_seq(struct trace_seq *p, +const char *trace_print_array_seq(struct trace_seq *p, const void *buf, int count, size_t el_size); struct trace_iterator; struct trace_event; -int ftrace_raw_output_prep(struct trace_iterator *iter, - struct trace_event *event); +int trace_raw_output_prep(struct trace_iterator *iter, + struct trace_event *event); /* * The trace entry - the most basic unit of tracing. This is what @@ -68,7 +68,7 @@ struct trace_entry { int pid; }; -#define FTRACE_MAX_EVENT \ +#define TRACE_EVENT_TYPE_MAX \ ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) /* @@ -132,8 +132,8 @@ struct trace_event { struct trace_event_functions *funcs; }; -extern int register_ftrace_event(struct trace_event *event); -extern int unregister_ftrace_event(struct trace_event *event); +extern int register_trace_event(struct trace_event *event); +extern int unregister_trace_event(struct trace_event *event); /* Return values for print_line callback */ enum print_line_t { @@ -157,11 +157,11 @@ static inline enum print_line_t trace_handle_return(struct trace_seq *s) void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, int pc); -struct ftrace_event_file; +struct trace_event_file; struct ring_buffer_event * trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, - struct ftrace_event_file *ftrace_file, + struct trace_event_file *trace_file, int type, unsigned long len, unsigned long flags, int pc); struct ring_buffer_event * @@ -183,7 +183,7 @@ void trace_current_buffer_discard_commit(struct ring_buffer *buffer, void tracing_record_cmdline(struct task_struct *tsk); -int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); +int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); struct event_filter; @@ -200,50 +200,39 @@ enum trace_reg { #endif }; -struct ftrace_event_call; +struct trace_event_call; -struct ftrace_event_class { +struct trace_event_class { const char *system; void *probe; #ifdef CONFIG_PERF_EVENTS void *perf_probe; #endif - int (*reg)(struct ftrace_event_call *event, + int (*reg)(struct trace_event_call *event, enum trace_reg type, void *data); - int (*define_fields)(struct ftrace_event_call *); - struct list_head *(*get_fields)(struct ftrace_event_call *); + int (*define_fields)(struct trace_event_call *); + struct list_head *(*get_fields)(struct trace_event_call *); struct list_head fields; - int (*raw_init)(struct ftrace_event_call *); + int (*raw_init)(struct trace_event_call *); }; -extern int ftrace_event_reg(struct ftrace_event_call *event, +extern int trace_event_reg(struct trace_event_call *event, enum trace_reg type, void *data); -int ftrace_output_event(struct trace_iterator *iter, struct ftrace_event_call *event, - char *fmt, ...); - -int ftrace_event_define_field(struct ftrace_event_call *call, - char *type, int len, char *item, int offset, - int field_size, int sign, int filter); - -struct ftrace_event_buffer { +struct trace_event_buffer { struct ring_buffer *buffer; struct ring_buffer_event *event; - struct ftrace_event_file *ftrace_file; + struct trace_event_file *trace_file; void *entry; unsigned long flags; int pc; }; -void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, - struct ftrace_event_file *ftrace_file, +void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, + struct trace_event_file *trace_file, unsigned long len); -void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer); - -int ftrace_event_define_field(struct ftrace_event_call *call, - char *type, int len, char *item, int offset, - int field_size, int sign, int filter); +void trace_event_buffer_commit(struct trace_event_buffer *fbuffer); enum { TRACE_EVENT_FL_FILTERED_BIT, @@ -261,11 +250,11 @@ enum { * FILTERED - The event has a filter attached * CAP_ANY - Any user can enable for perf * NO_SET_FILTER - Set when filter has error and is to be ignored - * IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file + * IGNORE_ENABLE - For trace internal events, do not enable with debugfs file * WAS_ENABLED - Set and stays set when an event was ever enabled * (used for module unloading, if a module event is enabled, * it is best to clear the buffers that used it). - * USE_CALL_FILTER - For ftrace internal events, don't use file filter + * USE_CALL_FILTER - For trace internal events, don't use file filter * TRACEPOINT - Event is a tracepoint * KPROBE - Event is a kprobe */ @@ -280,9 +269,9 @@ enum { TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), }; -struct ftrace_event_call { +struct trace_event_call { struct list_head list; - struct ftrace_event_class *class; + struct trace_event_class *class; union { char *name; /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */ @@ -297,7 +286,7 @@ struct ftrace_event_call { * bit 0: filter_active * bit 1: allow trace by non root (cap any) * bit 2: failed to apply filter - * bit 3: ftrace internal event (do not enable) + * bit 3: trace internal event (do not enable) * bit 4: Event was enabled by module * bit 5: use call filter rather than file filter * bit 6: Event is a tracepoint @@ -309,13 +298,13 @@ struct ftrace_event_call { struct hlist_head __percpu *perf_events; struct bpf_prog *prog; - int (*perf_perm)(struct ftrace_event_call *, + int (*perf_perm)(struct trace_event_call *, struct perf_event *); #endif }; static inline const char * -ftrace_event_name(struct ftrace_event_call *call) +trace_event_name(struct trace_event_call *call) { if (call->flags & TRACE_EVENT_FL_TRACEPOINT) return call->tp ? call->tp->name : NULL; @@ -324,21 +313,21 @@ ftrace_event_name(struct ftrace_event_call *call) } struct trace_array; -struct ftrace_subsystem_dir; +struct trace_subsystem_dir; enum { - FTRACE_EVENT_FL_ENABLED_BIT, - FTRACE_EVENT_FL_RECORDED_CMD_BIT, - FTRACE_EVENT_FL_FILTERED_BIT, - FTRACE_EVENT_FL_NO_SET_FILTER_BIT, - FTRACE_EVENT_FL_SOFT_MODE_BIT, - FTRACE_EVENT_FL_SOFT_DISABLED_BIT, - FTRACE_EVENT_FL_TRIGGER_MODE_BIT, - FTRACE_EVENT_FL_TRIGGER_COND_BIT, + EVENT_FILE_FL_ENABLED_BIT, + EVENT_FILE_FL_RECORDED_CMD_BIT, + EVENT_FILE_FL_FILTERED_BIT, + EVENT_FILE_FL_NO_SET_FILTER_BIT, + EVENT_FILE_FL_SOFT_MODE_BIT, + EVENT_FILE_FL_SOFT_DISABLED_BIT, + EVENT_FILE_FL_TRIGGER_MODE_BIT, + EVENT_FILE_FL_TRIGGER_COND_BIT, }; /* - * Ftrace event file flags: + * Event file flags: * ENABLED - The event is enabled * RECORDED_CMD - The comms should be recorded at sched_switch * FILTERED - The event has a filter attached @@ -350,23 +339,23 @@ enum { * TRIGGER_COND - When set, one or more triggers has an associated filter */ enum { - FTRACE_EVENT_FL_ENABLED = (1 << FTRACE_EVENT_FL_ENABLED_BIT), - FTRACE_EVENT_FL_RECORDED_CMD = (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT), - FTRACE_EVENT_FL_FILTERED = (1 << FTRACE_EVENT_FL_FILTERED_BIT), - FTRACE_EVENT_FL_NO_SET_FILTER = (1 << FTRACE_EVENT_FL_NO_SET_FILTER_BIT), - FTRACE_EVENT_FL_SOFT_MODE = (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT), - FTRACE_EVENT_FL_SOFT_DISABLED = (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT), - FTRACE_EVENT_FL_TRIGGER_MODE = (1 << FTRACE_EVENT_FL_TRIGGER_MODE_BIT), - FTRACE_EVENT_FL_TRIGGER_COND = (1 << FTRACE_EVENT_FL_TRIGGER_COND_BIT), + EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), + EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT), + EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), + EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), + EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), + EVENT_FILE_FL_SOFT_DISABLED = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT), + EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), + EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), }; -struct ftrace_event_file { +struct trace_event_file { struct list_head list; - struct ftrace_event_call *event_call; + struct trace_event_call *event_call; struct event_filter *filter; struct dentry *dir; struct trace_array *tr; - struct ftrace_subsystem_dir *system; + struct trace_subsystem_dir *system; struct list_head triggers; /* @@ -399,7 +388,7 @@ struct ftrace_event_file { early_initcall(trace_init_flags_##name); #define __TRACE_EVENT_PERF_PERM(name, expr...) \ - static int perf_perm_##name(struct ftrace_event_call *tp_event, \ + static int perf_perm_##name(struct trace_event_call *tp_event, \ struct perf_event *p_event) \ { \ return ({ expr; }); \ @@ -425,19 +414,19 @@ enum event_trigger_type { extern int filter_match_preds(struct event_filter *filter, void *rec); -extern int filter_check_discard(struct ftrace_event_file *file, void *rec, +extern int filter_check_discard(struct trace_event_file *file, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event); -extern int call_filter_check_discard(struct ftrace_event_call *call, void *rec, +extern int call_filter_check_discard(struct trace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event); -extern enum event_trigger_type event_triggers_call(struct ftrace_event_file *file, +extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, void *rec); -extern void event_triggers_post_call(struct ftrace_event_file *file, +extern void event_triggers_post_call(struct trace_event_file *file, enum event_trigger_type tt); /** - * ftrace_trigger_soft_disabled - do triggers and test if soft disabled + * trace_trigger_soft_disabled - do triggers and test if soft disabled * @file: The file pointer of the event to test * * If any triggers without filters are attached to this event, they @@ -446,14 +435,14 @@ extern void event_triggers_post_call(struct ftrace_event_file *file, * otherwise false. */ static inline bool -ftrace_trigger_soft_disabled(struct ftrace_event_file *file) +trace_trigger_soft_disabled(struct trace_event_file *file) { unsigned long eflags = file->flags; - if (!(eflags & FTRACE_EVENT_FL_TRIGGER_COND)) { - if (eflags & FTRACE_EVENT_FL_TRIGGER_MODE) + if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { + if (eflags & EVENT_FILE_FL_TRIGGER_MODE) event_triggers_call(file, NULL); - if (eflags & FTRACE_EVENT_FL_SOFT_DISABLED) + if (eflags & EVENT_FILE_FL_SOFT_DISABLED) return true; } return false; @@ -473,7 +462,7 @@ ftrace_trigger_soft_disabled(struct ftrace_event_file *file) * Returns true if the event is discarded, false otherwise. */ static inline bool -__event_trigger_test_discard(struct ftrace_event_file *file, +__event_trigger_test_discard(struct trace_event_file *file, struct ring_buffer *buffer, struct ring_buffer_event *event, void *entry, @@ -481,10 +470,10 @@ __event_trigger_test_discard(struct ftrace_event_file *file, { unsigned long eflags = file->flags; - if (eflags & FTRACE_EVENT_FL_TRIGGER_COND) + if (eflags & EVENT_FILE_FL_TRIGGER_COND) *tt = event_triggers_call(file, entry); - if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags)) + if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags)) ring_buffer_discard_commit(buffer, event); else if (!filter_check_discard(file, entry, buffer, event)) return false; @@ -506,7 +495,7 @@ __event_trigger_test_discard(struct ftrace_event_file *file, * if the event is soft disabled and should be discarded. */ static inline void -event_trigger_unlock_commit(struct ftrace_event_file *file, +event_trigger_unlock_commit(struct trace_event_file *file, struct ring_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc) @@ -537,7 +526,7 @@ event_trigger_unlock_commit(struct ftrace_event_file *file, * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). */ static inline void -event_trigger_unlock_commit_regs(struct ftrace_event_file *file, +event_trigger_unlock_commit_regs(struct trace_event_file *file, struct ring_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc, @@ -570,12 +559,12 @@ enum { FILTER_TRACE_FN, }; -extern int trace_event_raw_init(struct ftrace_event_call *call); -extern int trace_define_field(struct ftrace_event_call *call, const char *type, +extern int trace_event_raw_init(struct trace_event_call *call); +extern int trace_define_field(struct trace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type); -extern int trace_add_event_call(struct ftrace_event_call *call); -extern int trace_remove_event_call(struct ftrace_event_call *call); +extern int trace_add_event_call(struct trace_event_call *call); +extern int trace_remove_event_call(struct trace_event_call *call); #define is_signed_type(type) (((type)(-1)) < (type)1) @@ -624,4 +613,4 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, } #endif -#endif /* _LINUX_FTRACE_EVENT_H */ +#endif /* _LINUX_TRACE_EVENT_H */ diff --git a/include/linux/tty.h b/include/linux/tty.h index d76631f615c2..ad6c8913aa3e 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -422,7 +422,7 @@ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) extern int tty_paranoia_check(struct tty_struct *tty, struct inode *inode, const char *routine); -extern char *tty_name(struct tty_struct *tty, char *buf); +extern const char *tty_name(const struct tty_struct *tty); extern void tty_wait_until_sent(struct tty_struct *tty, long timeout); extern int tty_check_change(struct tty_struct *tty); extern void __stop_tty(struct tty_struct *tty); diff --git a/include/linux/ulpi/driver.h b/include/linux/ulpi/driver.h new file mode 100644 index 000000000000..388f6e08b9d4 --- /dev/null +++ b/include/linux/ulpi/driver.h @@ -0,0 +1,60 @@ +#ifndef __LINUX_ULPI_DRIVER_H +#define __LINUX_ULPI_DRIVER_H + +#include <linux/mod_devicetable.h> + +#include <linux/device.h> + +struct ulpi_ops; + +/** + * struct ulpi - describes ULPI PHY device + * @id: vendor and product ids for ULPI device + * @ops: I/O access + * @dev: device interface + */ +struct ulpi { + struct ulpi_device_id id; + struct ulpi_ops *ops; + struct device dev; +}; + +#define to_ulpi_dev(d) container_of(d, struct ulpi, dev) + +static inline void ulpi_set_drvdata(struct ulpi *ulpi, void *data) +{ + dev_set_drvdata(&ulpi->dev, data); +} + +static inline void *ulpi_get_drvdata(struct ulpi *ulpi) +{ + return dev_get_drvdata(&ulpi->dev); +} + +/** + * struct ulpi_driver - describes a ULPI PHY driver + * @id_table: array of device identifiers supported by this driver + * @probe: binds this driver to ULPI device + * @remove: unbinds this driver from ULPI device + * @driver: the name and owner members must be initialized by the drivers + */ +struct ulpi_driver { + const struct ulpi_device_id *id_table; + int (*probe)(struct ulpi *ulpi); + void (*remove)(struct ulpi *ulpi); + struct device_driver driver; +}; + +#define to_ulpi_driver(d) container_of(d, struct ulpi_driver, driver) + +int ulpi_register_driver(struct ulpi_driver *drv); +void ulpi_unregister_driver(struct ulpi_driver *drv); + +#define module_ulpi_driver(__ulpi_driver) \ + module_driver(__ulpi_driver, ulpi_register_driver, \ + ulpi_unregister_driver) + +int ulpi_read(struct ulpi *ulpi, u8 addr); +int ulpi_write(struct ulpi *ulpi, u8 addr, u8 val); + +#endif /* __LINUX_ULPI_DRIVER_H */ diff --git a/include/linux/ulpi/interface.h b/include/linux/ulpi/interface.h new file mode 100644 index 000000000000..4de8ab491038 --- /dev/null +++ b/include/linux/ulpi/interface.h @@ -0,0 +1,23 @@ +#ifndef __LINUX_ULPI_INTERFACE_H +#define __LINUX_ULPI_INTERFACE_H + +#include <linux/types.h> + +struct ulpi; + +/** + * struct ulpi_ops - ULPI register access + * @dev: the interface provider + * @read: read operation for ULPI register access + * @write: write operation for ULPI register access + */ +struct ulpi_ops { + struct device *dev; + int (*read)(struct ulpi_ops *ops, u8 addr); + int (*write)(struct ulpi_ops *ops, u8 addr, u8 val); +}; + +struct ulpi *ulpi_register_interface(struct device *, struct ulpi_ops *); +void ulpi_unregister_interface(struct ulpi *); + +#endif /* __LINUX_ULPI_INTERFACE_H */ diff --git a/include/linux/ulpi/regs.h b/include/linux/ulpi/regs.h new file mode 100644 index 000000000000..b5b8b8804560 --- /dev/null +++ b/include/linux/ulpi/regs.h @@ -0,0 +1,130 @@ +#ifndef __LINUX_ULPI_REGS_H +#define __LINUX_ULPI_REGS_H + +/* + * Macros for Set and Clear + * See ULPI 1.1 specification to find the registers with Set and Clear offsets + */ +#define ULPI_SET(a) (a + 1) +#define ULPI_CLR(a) (a + 2) + +/* + * Register Map + */ +#define ULPI_VENDOR_ID_LOW 0x00 +#define ULPI_VENDOR_ID_HIGH 0x01 +#define ULPI_PRODUCT_ID_LOW 0x02 +#define ULPI_PRODUCT_ID_HIGH 0x03 +#define ULPI_FUNC_CTRL 0x04 +#define ULPI_IFC_CTRL 0x07 +#define ULPI_OTG_CTRL 0x0a +#define ULPI_USB_INT_EN_RISE 0x0d +#define ULPI_USB_INT_EN_FALL 0x10 +#define ULPI_USB_INT_STS 0x13 +#define ULPI_USB_INT_LATCH 0x14 +#define ULPI_DEBUG 0x15 +#define ULPI_SCRATCH 0x16 +/* Optional Carkit Registers */ +#define ULPI_CARKIT_CTRL 0x19 +#define ULPI_CARKIT_INT_DELAY 0x1c +#define ULPI_CARKIT_INT_EN 0x1d +#define ULPI_CARKIT_INT_STS 0x20 +#define ULPI_CARKIT_INT_LATCH 0x21 +#define ULPI_CARKIT_PLS_CTRL 0x22 +/* Other Optional Registers */ +#define ULPI_TX_POS_WIDTH 0x25 +#define ULPI_TX_NEG_WIDTH 0x26 +#define ULPI_POLARITY_RECOVERY 0x27 +/* Access Extended Register Set */ +#define ULPI_ACCESS_EXTENDED 0x2f +/* Vendor Specific */ +#define ULPI_VENDOR_SPECIFIC 0x30 +/* Extended Registers */ +#define ULPI_EXT_VENDOR_SPECIFIC 0x80 + +/* + * Register Bits + */ + +/* Function Control */ +#define ULPI_FUNC_CTRL_XCVRSEL BIT(0) +#define ULPI_FUNC_CTRL_XCVRSEL_MASK 0x3 +#define ULPI_FUNC_CTRL_HIGH_SPEED 0x0 +#define ULPI_FUNC_CTRL_FULL_SPEED 0x1 +#define ULPI_FUNC_CTRL_LOW_SPEED 0x2 +#define ULPI_FUNC_CTRL_FS4LS 0x3 +#define ULPI_FUNC_CTRL_TERMSELECT BIT(2) +#define ULPI_FUNC_CTRL_OPMODE BIT(3) +#define ULPI_FUNC_CTRL_OPMODE_MASK (0x3 << 3) +#define ULPI_FUNC_CTRL_OPMODE_NORMAL (0x0 << 3) +#define ULPI_FUNC_CTRL_OPMODE_NONDRIVING (0x1 << 3) +#define ULPI_FUNC_CTRL_OPMODE_DISABLE_NRZI (0x2 << 3) +#define ULPI_FUNC_CTRL_OPMODE_NOSYNC_NOEOP (0x3 << 3) +#define ULPI_FUNC_CTRL_RESET BIT(5) +#define ULPI_FUNC_CTRL_SUSPENDM BIT(6) + +/* Interface Control */ +#define ULPI_IFC_CTRL_6_PIN_SERIAL_MODE BIT(0) +#define ULPI_IFC_CTRL_3_PIN_SERIAL_MODE BIT(1) +#define ULPI_IFC_CTRL_CARKITMODE BIT(2) +#define ULPI_IFC_CTRL_CLOCKSUSPENDM BIT(3) +#define ULPI_IFC_CTRL_AUTORESUME BIT(4) +#define ULPI_IFC_CTRL_EXTERNAL_VBUS BIT(5) +#define ULPI_IFC_CTRL_PASSTHRU BIT(6) +#define ULPI_IFC_CTRL_PROTECT_IFC_DISABLE BIT(7) + +/* OTG Control */ +#define ULPI_OTG_CTRL_ID_PULLUP BIT(0) +#define ULPI_OTG_CTRL_DP_PULLDOWN BIT(1) +#define ULPI_OTG_CTRL_DM_PULLDOWN BIT(2) +#define ULPI_OTG_CTRL_DISCHRGVBUS BIT(3) +#define ULPI_OTG_CTRL_CHRGVBUS BIT(4) +#define ULPI_OTG_CTRL_DRVVBUS BIT(5) +#define ULPI_OTG_CTRL_DRVVBUS_EXT BIT(6) +#define ULPI_OTG_CTRL_EXTVBUSIND BIT(7) + +/* USB Interrupt Enable Rising, + * USB Interrupt Enable Falling, + * USB Interrupt Status and + * USB Interrupt Latch + */ +#define ULPI_INT_HOST_DISCONNECT BIT(0) +#define ULPI_INT_VBUS_VALID BIT(1) +#define ULPI_INT_SESS_VALID BIT(2) +#define ULPI_INT_SESS_END BIT(3) +#define ULPI_INT_IDGRD BIT(4) + +/* Debug */ +#define ULPI_DEBUG_LINESTATE0 BIT(0) +#define ULPI_DEBUG_LINESTATE1 BIT(1) + +/* Carkit Control */ +#define ULPI_CARKIT_CTRL_CARKITPWR BIT(0) +#define ULPI_CARKIT_CTRL_IDGNDDRV BIT(1) +#define ULPI_CARKIT_CTRL_TXDEN BIT(2) +#define ULPI_CARKIT_CTRL_RXDEN BIT(3) +#define ULPI_CARKIT_CTRL_SPKLEFTEN BIT(4) +#define ULPI_CARKIT_CTRL_SPKRIGHTEN BIT(5) +#define ULPI_CARKIT_CTRL_MICEN BIT(6) + +/* Carkit Interrupt Enable */ +#define ULPI_CARKIT_INT_EN_IDFLOAT_RISE BIT(0) +#define ULPI_CARKIT_INT_EN_IDFLOAT_FALL BIT(1) +#define ULPI_CARKIT_INT_EN_CARINTDET BIT(2) +#define ULPI_CARKIT_INT_EN_DP_RISE BIT(3) +#define ULPI_CARKIT_INT_EN_DP_FALL BIT(4) + +/* Carkit Interrupt Status and + * Carkit Interrupt Latch + */ +#define ULPI_CARKIT_INT_IDFLOAT BIT(0) +#define ULPI_CARKIT_INT_CARINTDET BIT(1) +#define ULPI_CARKIT_INT_DP BIT(2) + +/* Carkit Pulse Control*/ +#define ULPI_CARKIT_PLS_CTRL_TXPLSEN BIT(0) +#define ULPI_CARKIT_PLS_CTRL_RXPLSEN BIT(1) +#define ULPI_CARKIT_PLS_CTRL_SPKRLEFT_BIASEN BIT(2) +#define ULPI_CARKIT_PLS_CTRL_SPKRRIGHT_BIASEN BIT(3) + +#endif /* __LINUX_ULPI_REGS_H */ diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 68b1e836dff1..c9aa7792de10 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -622,8 +622,6 @@ extern struct list_head usb_bus_list; extern struct mutex usb_bus_list_lock; extern wait_queue_head_t usb_kill_urb_queue; -extern int usb_find_interface_driver(struct usb_device *dev, - struct usb_interface *interface); #define usb_endpoint_out(ep_dir) (!((ep_dir) & USB_DIR_IN)) diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h index 7dbecf9a4656..e55a1504266e 100644 --- a/include/linux/usb/msm_hsusb.h +++ b/include/linux/usb/msm_hsusb.h @@ -18,6 +18,7 @@ #ifndef __ASM_ARCH_MSM_HSUSB_H #define __ASM_ARCH_MSM_HSUSB_H +#include <linux/extcon.h> #include <linux/types.h> #include <linux/usb/otg.h> #include <linux/clk.h> @@ -120,6 +121,17 @@ struct msm_otg_platform_data { }; /** + * struct msm_usb_cable - structure for exteternal connector cable + * state tracking + * @nb: hold event notification callback + * @conn: used for notification registration + */ +struct msm_usb_cable { + struct notifier_block nb; + struct extcon_specific_cable_nb conn; +}; + +/** * struct msm_otg: OTG driver data. Shared by HCD and DCD. * @otg: USB OTG Transceiver structure. * @pdata: otg device platform data. @@ -138,6 +150,11 @@ struct msm_otg_platform_data { * @chg_type: The type of charger attached. * @dcd_retires: The retry count used to track Data contact * detection process. + * @manual_pullup: true if VBUS is not routed to USB controller/phy + * and controller driver therefore enables pull-up explicitly before + * starting controller using usbcmd run/stop bit. + * @vbus: VBUS signal state trakining, using extcon framework + * @id: ID signal state trakining, using extcon framework */ struct msm_otg { struct usb_phy phy; @@ -166,6 +183,11 @@ struct msm_otg { struct reset_control *phy_rst; struct reset_control *link_rst; int vdd_levels[3]; + + bool manual_pullup; + + struct msm_usb_cable vbus; + struct msm_usb_cable id; }; #endif diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h index a29f6030afb1..e159b39f67a2 100644 --- a/include/linux/usb/msm_hsusb_hw.h +++ b/include/linux/usb/msm_hsusb_hw.h @@ -21,6 +21,8 @@ #define USB_AHBBURST (MSM_USB_BASE + 0x0090) #define USB_AHBMODE (MSM_USB_BASE + 0x0098) +#define USB_GENCONFIG_2 (MSM_USB_BASE + 0x00a0) + #define USB_CAPLENGTH (MSM_USB_BASE + 0x0100) /* 8 bit */ #define USB_USBCMD (MSM_USB_BASE + 0x0140) @@ -30,6 +32,9 @@ #define USB_PHY_CTRL (MSM_USB_BASE + 0x0240) #define USB_PHY_CTRL2 (MSM_USB_BASE + 0x0278) +#define GENCONFIG_2_SESS_VLD_CTRL_EN BIT(7) +#define USBCMD_SESS_VLD_CTRL BIT(25) + #define USBCMD_RESET 2 #define USB_USBINTR (MSM_USB_BASE + 0x0148) @@ -50,6 +55,10 @@ #define ULPI_PWR_CLK_MNG_REG 0x88 #define OTG_COMP_DISABLE BIT(0) +#define ULPI_MISC_A 0x96 +#define ULPI_MISC_A_VBUSVLDEXTSEL BIT(1) +#define ULPI_MISC_A_VBUSVLDEXT BIT(0) + #define ASYNC_INTR_CTRL (1 << 29) /* Enable async interrupt */ #define ULPI_STP_CTRL (1 << 30) /* Block communication with PHY */ #define PHY_RETEN (1 << 1) /* PHY retention enable/disable */ diff --git a/include/linux/usb/net2280.h b/include/linux/usb/net2280.h index 148b8fa5b1a2..725120224472 100644 --- a/include/linux/usb/net2280.h +++ b/include/linux/usb/net2280.h @@ -168,6 +168,9 @@ struct net2280_regs { #define ENDPOINT_B_INTERRUPT 2 #define ENDPOINT_A_INTERRUPT 1 #define ENDPOINT_0_INTERRUPT 0 +#define USB3380_IRQSTAT0_EP_INTR_MASK_IN (0xF << 17) +#define USB3380_IRQSTAT0_EP_INTR_MASK_OUT (0xF << 1) + u32 irqstat1; #define POWER_STATE_CHANGE_INTERRUPT 27 #define PCI_ARBITER_TIMEOUT_INTERRUPT 26 diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index bc91b5d380fd..e39f251cf861 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -205,6 +205,8 @@ extern struct usb_phy *usb_get_phy_dev(struct device *dev, u8 index); extern struct usb_phy *devm_usb_get_phy_dev(struct device *dev, u8 index); extern struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev, const char *phandle, u8 index); +extern struct usb_phy *devm_usb_get_phy_by_node(struct device *dev, + struct device_node *node, struct notifier_block *nb); extern void usb_put_phy(struct usb_phy *); extern void devm_usb_put_phy(struct device *dev, struct usb_phy *x); extern int usb_bind_phy(const char *dev_name, u8 index, @@ -238,6 +240,12 @@ static inline struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev, return ERR_PTR(-ENXIO); } +static inline struct usb_phy *devm_usb_get_phy_by_node(struct device *dev, + struct device_node *node, struct notifier_block *nb) +{ + return ERR_PTR(-ENXIO); +} + static inline void usb_put_phy(struct usb_phy *x) { } diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index f06529c14141..3dd5a781da99 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -169,8 +169,7 @@ struct renesas_usbhs_driver_param { #define USBHS_USB_DMAC_XFER_SIZE 32 /* hardcode the xfer size */ }; -#define USBHS_TYPE_R8A7790 1 -#define USBHS_TYPE_R8A7791 2 +#define USBHS_TYPE_RCAR_GEN2 1 /* * option: diff --git a/include/linux/usb/ulpi.h b/include/linux/usb/ulpi.h index 5c295c26ad37..5f07407a367a 100644 --- a/include/linux/usb/ulpi.h +++ b/include/linux/usb/ulpi.h @@ -12,6 +12,8 @@ #define __LINUX_USB_ULPI_H #include <linux/usb/otg.h> +#include <linux/ulpi/regs.h> + /*-------------------------------------------------------------------------*/ /* @@ -49,138 +51,6 @@ /*-------------------------------------------------------------------------*/ -/* - * Macros for Set and Clear - * See ULPI 1.1 specification to find the registers with Set and Clear offsets - */ -#define ULPI_SET(a) (a + 1) -#define ULPI_CLR(a) (a + 2) - -/*-------------------------------------------------------------------------*/ - -/* - * Register Map - */ -#define ULPI_VENDOR_ID_LOW 0x00 -#define ULPI_VENDOR_ID_HIGH 0x01 -#define ULPI_PRODUCT_ID_LOW 0x02 -#define ULPI_PRODUCT_ID_HIGH 0x03 -#define ULPI_FUNC_CTRL 0x04 -#define ULPI_IFC_CTRL 0x07 -#define ULPI_OTG_CTRL 0x0a -#define ULPI_USB_INT_EN_RISE 0x0d -#define ULPI_USB_INT_EN_FALL 0x10 -#define ULPI_USB_INT_STS 0x13 -#define ULPI_USB_INT_LATCH 0x14 -#define ULPI_DEBUG 0x15 -#define ULPI_SCRATCH 0x16 -/* Optional Carkit Registers */ -#define ULPI_CARCIT_CTRL 0x19 -#define ULPI_CARCIT_INT_DELAY 0x1c -#define ULPI_CARCIT_INT_EN 0x1d -#define ULPI_CARCIT_INT_STS 0x20 -#define ULPI_CARCIT_INT_LATCH 0x21 -#define ULPI_CARCIT_PLS_CTRL 0x22 -/* Other Optional Registers */ -#define ULPI_TX_POS_WIDTH 0x25 -#define ULPI_TX_NEG_WIDTH 0x26 -#define ULPI_POLARITY_RECOVERY 0x27 -/* Access Extended Register Set */ -#define ULPI_ACCESS_EXTENDED 0x2f -/* Vendor Specific */ -#define ULPI_VENDOR_SPECIFIC 0x30 -/* Extended Registers */ -#define ULPI_EXT_VENDOR_SPECIFIC 0x80 - -/*-------------------------------------------------------------------------*/ - -/* - * Register Bits - */ - -/* Function Control */ -#define ULPI_FUNC_CTRL_XCVRSEL (1 << 0) -#define ULPI_FUNC_CTRL_XCVRSEL_MASK (3 << 0) -#define ULPI_FUNC_CTRL_HIGH_SPEED (0 << 0) -#define ULPI_FUNC_CTRL_FULL_SPEED (1 << 0) -#define ULPI_FUNC_CTRL_LOW_SPEED (2 << 0) -#define ULPI_FUNC_CTRL_FS4LS (3 << 0) -#define ULPI_FUNC_CTRL_TERMSELECT (1 << 2) -#define ULPI_FUNC_CTRL_OPMODE (1 << 3) -#define ULPI_FUNC_CTRL_OPMODE_MASK (3 << 3) -#define ULPI_FUNC_CTRL_OPMODE_NORMAL (0 << 3) -#define ULPI_FUNC_CTRL_OPMODE_NONDRIVING (1 << 3) -#define ULPI_FUNC_CTRL_OPMODE_DISABLE_NRZI (2 << 3) -#define ULPI_FUNC_CTRL_OPMODE_NOSYNC_NOEOP (3 << 3) -#define ULPI_FUNC_CTRL_RESET (1 << 5) -#define ULPI_FUNC_CTRL_SUSPENDM (1 << 6) - -/* Interface Control */ -#define ULPI_IFC_CTRL_6_PIN_SERIAL_MODE (1 << 0) -#define ULPI_IFC_CTRL_3_PIN_SERIAL_MODE (1 << 1) -#define ULPI_IFC_CTRL_CARKITMODE (1 << 2) -#define ULPI_IFC_CTRL_CLOCKSUSPENDM (1 << 3) -#define ULPI_IFC_CTRL_AUTORESUME (1 << 4) -#define ULPI_IFC_CTRL_EXTERNAL_VBUS (1 << 5) -#define ULPI_IFC_CTRL_PASSTHRU (1 << 6) -#define ULPI_IFC_CTRL_PROTECT_IFC_DISABLE (1 << 7) - -/* OTG Control */ -#define ULPI_OTG_CTRL_ID_PULLUP (1 << 0) -#define ULPI_OTG_CTRL_DP_PULLDOWN (1 << 1) -#define ULPI_OTG_CTRL_DM_PULLDOWN (1 << 2) -#define ULPI_OTG_CTRL_DISCHRGVBUS (1 << 3) -#define ULPI_OTG_CTRL_CHRGVBUS (1 << 4) -#define ULPI_OTG_CTRL_DRVVBUS (1 << 5) -#define ULPI_OTG_CTRL_DRVVBUS_EXT (1 << 6) -#define ULPI_OTG_CTRL_EXTVBUSIND (1 << 7) - -/* USB Interrupt Enable Rising, - * USB Interrupt Enable Falling, - * USB Interrupt Status and - * USB Interrupt Latch - */ -#define ULPI_INT_HOST_DISCONNECT (1 << 0) -#define ULPI_INT_VBUS_VALID (1 << 1) -#define ULPI_INT_SESS_VALID (1 << 2) -#define ULPI_INT_SESS_END (1 << 3) -#define ULPI_INT_IDGRD (1 << 4) - -/* Debug */ -#define ULPI_DEBUG_LINESTATE0 (1 << 0) -#define ULPI_DEBUG_LINESTATE1 (1 << 1) - -/* Carkit Control */ -#define ULPI_CARKIT_CTRL_CARKITPWR (1 << 0) -#define ULPI_CARKIT_CTRL_IDGNDDRV (1 << 1) -#define ULPI_CARKIT_CTRL_TXDEN (1 << 2) -#define ULPI_CARKIT_CTRL_RXDEN (1 << 3) -#define ULPI_CARKIT_CTRL_SPKLEFTEN (1 << 4) -#define ULPI_CARKIT_CTRL_SPKRIGHTEN (1 << 5) -#define ULPI_CARKIT_CTRL_MICEN (1 << 6) - -/* Carkit Interrupt Enable */ -#define ULPI_CARKIT_INT_EN_IDFLOAT_RISE (1 << 0) -#define ULPI_CARKIT_INT_EN_IDFLOAT_FALL (1 << 1) -#define ULPI_CARKIT_INT_EN_CARINTDET (1 << 2) -#define ULPI_CARKIT_INT_EN_DP_RISE (1 << 3) -#define ULPI_CARKIT_INT_EN_DP_FALL (1 << 4) - -/* Carkit Interrupt Status and - * Carkit Interrupt Latch - */ -#define ULPI_CARKIT_INT_IDFLOAT (1 << 0) -#define ULPI_CARKIT_INT_CARINTDET (1 << 1) -#define ULPI_CARKIT_INT_DP (1 << 2) - -/* Carkit Pulse Control*/ -#define ULPI_CARKIT_PLS_CTRL_TXPLSEN (1 << 0) -#define ULPI_CARKIT_PLS_CTRL_RXPLSEN (1 << 1) -#define ULPI_CARKIT_PLS_CTRL_SPKRLEFT_BIASEN (1 << 2) -#define ULPI_CARKIT_PLS_CTRL_SPKRRIGHT_BIASEN (1 << 3) - -/*-------------------------------------------------------------------------*/ - #if IS_ENABLED(CONFIG_USB_ULPI) struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops, unsigned int flags); diff --git a/include/linux/usb/usb338x.h b/include/linux/usb/usb338x.h index f92eb635b9d3..11525d8d89a7 100644 --- a/include/linux/usb/usb338x.h +++ b/include/linux/usb/usb338x.h @@ -43,6 +43,10 @@ #define IN_ENDPOINT_TYPE 12 #define OUT_ENDPOINT_ENABLE 10 #define OUT_ENDPOINT_TYPE 8 +#define USB3380_EP_CFG_MASK_IN ((0x3 << IN_ENDPOINT_TYPE) | \ + BIT(IN_ENDPOINT_ENABLE)) +#define USB3380_EP_CFG_MASK_OUT ((0x3 << OUT_ENDPOINT_TYPE) | \ + BIT(OUT_ENDPOINT_ENABLE)) struct usb338x_usb_ext_regs { u32 usbclass; diff --git a/include/linux/vme.h b/include/linux/vme.h index 79242e9c06b8..c0131358f351 100644 --- a/include/linux/vme.h +++ b/include/linux/vme.h @@ -120,6 +120,8 @@ void vme_free_consistent(struct vme_resource *, size_t, void *, dma_addr_t); size_t vme_get_size(struct vme_resource *); +int vme_check_window(u32 aspace, unsigned long long vme_base, + unsigned long long size); struct vme_resource *vme_slave_request(struct vme_dev *, u32, u32); int vme_slave_set(struct vme_resource *, int, unsigned long long, diff --git a/include/linux/wait.h b/include/linux/wait.h index d69ac4ecc88b..1e1bf9f963a9 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -358,6 +358,19 @@ do { \ __ret; \ }) +#define __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \ + (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 1, 0, \ + cmd1; schedule(); cmd2) +/* + * Just like wait_event_cmd(), except it sets exclusive flag + */ +#define wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \ +do { \ + if (condition) \ + break; \ + __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2); \ +} while (0) + #define __wait_event_cmd(wq, condition, cmd1, cmd2) \ (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ cmd1; schedule(); cmd2) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index deee212af8e0..738b30b39b68 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -424,6 +424,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask); void free_workqueue_attrs(struct workqueue_attrs *attrs); int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs); +int workqueue_set_unbound_cpumask(cpumask_var_t cpumask); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); @@ -434,7 +435,6 @@ extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, extern void flush_workqueue(struct workqueue_struct *wq); extern void drain_workqueue(struct workqueue_struct *wq); -extern void flush_scheduled_work(void); extern int schedule_on_each_cpu(work_func_t func); @@ -531,6 +531,35 @@ static inline bool schedule_work(struct work_struct *work) } /** + * flush_scheduled_work - ensure that any scheduled work has run to completion. + * + * Forces execution of the kernel-global workqueue and blocks until its + * completion. + * + * Think twice before calling this function! It's very easy to get into + * trouble if you don't take great care. Either of the following situations + * will lead to deadlock: + * + * One of the work items currently on the workqueue needs to acquire + * a lock held by your code or its caller. + * + * Your code is running in the context of a work routine. + * + * They will be detected by lockdep when they occur, but the first might not + * occur very often. It depends on what work items are on the workqueue and + * what locks they need, which you have no control over. + * + * In most situations flushing the entire workqueue is overkill; you merely + * need to know that a particular work item isn't queued and isn't running. + * In such cases you should use cancel_delayed_work_sync() or + * cancel_work_sync() instead. + */ +static inline void flush_scheduled_work(void) +{ + flush_workqueue(system_wq); +} + +/** * schedule_delayed_work_on - queue work in global workqueue on CPU after delay * @cpu: cpu to use * @dwork: job to be done diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b2dd371ec0ca..b333c945e571 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -7,6 +7,8 @@ #include <linux/sched.h> #include <linux/workqueue.h> #include <linux/fs.h> +#include <linux/flex_proportions.h> +#include <linux/backing-dev-defs.h> DECLARE_PER_CPU(int, dirty_throttle_leaks); @@ -84,18 +86,95 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ +#ifdef CONFIG_CGROUP_WRITEBACK + struct bdi_writeback *wb; /* wb this writeback is issued under */ + struct inode *inode; /* inode being written out */ + + /* foreign inode detection, see wbc_detach_inode() */ + int wb_id; /* current wb id */ + int wb_lcand_id; /* last foreign candidate wb id */ + int wb_tcand_id; /* this foreign candidate wb id */ + size_t wb_bytes; /* bytes written by current wb */ + size_t wb_lcand_bytes; /* bytes written by last candidate */ + size_t wb_tcand_bytes; /* bytes written by this candidate */ +#endif }; /* + * A wb_domain represents a domain that wb's (bdi_writeback's) belong to + * and are measured against each other in. There always is one global + * domain, global_wb_domain, that every wb in the system is a member of. + * This allows measuring the relative bandwidth of each wb to distribute + * dirtyable memory accordingly. + */ +struct wb_domain { + spinlock_t lock; + + /* + * Scale the writeback cache size proportional to the relative + * writeout speed. + * + * We do this by keeping a floating proportion between BDIs, based + * on page writeback completions [end_page_writeback()]. Those + * devices that write out pages fastest will get the larger share, + * while the slower will get a smaller share. + * + * We use page writeout completions because we are interested in + * getting rid of dirty pages. Having them written out is the + * primary goal. + * + * We introduce a concept of time, a period over which we measure + * these events, because demand can/will vary over time. The length + * of this period itself is measured in page writeback completions. + */ + struct fprop_global completions; + struct timer_list period_timer; /* timer for aging of completions */ + unsigned long period_time; + + /* + * The dirtyable memory and dirty threshold could be suddenly + * knocked down by a large amount (eg. on the startup of KVM in a + * swapless system). This may throw the system into deep dirty + * exceeded state and throttle heavy/light dirtiers alike. To + * retain good responsiveness, maintain global_dirty_limit for + * tracking slowly down to the knocked down dirty threshold. + * + * Both fields are protected by ->lock. + */ + unsigned long dirty_limit_tstamp; + unsigned long dirty_limit; +}; + +/** + * wb_domain_size_changed - memory available to a wb_domain has changed + * @dom: wb_domain of interest + * + * This function should be called when the amount of memory available to + * @dom has changed. It resets @dom's dirty limit parameters to prevent + * the past values which don't match the current configuration from skewing + * dirty throttling. Without this, when memory size of a wb_domain is + * greatly reduced, the dirty throttling logic may allow too many pages to + * be dirtied leading to consecutive unnecessary OOMs and may get stuck in + * that situation. + */ +static inline void wb_domain_size_changed(struct wb_domain *dom) +{ + spin_lock(&dom->lock); + dom->dirty_limit_tstamp = jiffies; + dom->dirty_limit = 0; + spin_unlock(&dom->lock); +} + +/* * fs/fs-writeback.c */ struct bdi_writeback; void writeback_inodes_sb(struct super_block *, enum wb_reason reason); void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); -int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); -int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, - enum wb_reason reason); +bool try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); +bool try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, + enum wb_reason reason); void sync_inodes_sb(struct super_block *); void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); void inode_wait_for_writeback(struct inode *inode); @@ -107,6 +186,123 @@ static inline void wait_on_inode(struct inode *inode) wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); } +#ifdef CONFIG_CGROUP_WRITEBACK + +#include <linux/cgroup.h> +#include <linux/bio.h> + +void __inode_attach_wb(struct inode *inode, struct page *page); +void wbc_attach_and_unlock_inode(struct writeback_control *wbc, + struct inode *inode) + __releases(&inode->i_lock); +void wbc_detach_inode(struct writeback_control *wbc); +void wbc_account_io(struct writeback_control *wbc, struct page *page, + size_t bytes); + +/** + * inode_attach_wb - associate an inode with its wb + * @inode: inode of interest + * @page: page being dirtied (may be NULL) + * + * If @inode doesn't have its wb, associate it with the wb matching the + * memcg of @page or, if @page is NULL, %current. May be called w/ or w/o + * @inode->i_lock. + */ +static inline void inode_attach_wb(struct inode *inode, struct page *page) +{ + if (!inode->i_wb) + __inode_attach_wb(inode, page); +} + +/** + * inode_detach_wb - disassociate an inode from its wb + * @inode: inode of interest + * + * @inode is being freed. Detach from its wb. + */ +static inline void inode_detach_wb(struct inode *inode) +{ + if (inode->i_wb) { + wb_put(inode->i_wb); + inode->i_wb = NULL; + } +} + +/** + * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite + * @wbc: writeback_control of interest + * @inode: target inode + * + * This function is to be used by __filemap_fdatawrite_range(), which is an + * alternative entry point into writeback code, and first ensures @inode is + * associated with a bdi_writeback and attaches it to @wbc. + */ +static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, + struct inode *inode) +{ + spin_lock(&inode->i_lock); + inode_attach_wb(inode, NULL); + wbc_attach_and_unlock_inode(wbc, inode); +} + +/** + * wbc_init_bio - writeback specific initializtion of bio + * @wbc: writeback_control for the writeback in progress + * @bio: bio to be initialized + * + * @bio is a part of the writeback in progress controlled by @wbc. Perform + * writeback specific initialization. This is used to apply the cgroup + * writeback context. + */ +static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) +{ + /* + * pageout() path doesn't attach @wbc to the inode being written + * out. This is intentional as we don't want the function to block + * behind a slow cgroup. Ultimately, we want pageout() to kick off + * regular writeback instead of writing things out itself. + */ + if (wbc->wb) + bio_associate_blkcg(bio, wbc->wb->blkcg_css); +} + +#else /* CONFIG_CGROUP_WRITEBACK */ + +static inline void inode_attach_wb(struct inode *inode, struct page *page) +{ +} + +static inline void inode_detach_wb(struct inode *inode) +{ +} + +static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc, + struct inode *inode) + __releases(&inode->i_lock) +{ + spin_unlock(&inode->i_lock); +} + +static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, + struct inode *inode) +{ +} + +static inline void wbc_detach_inode(struct writeback_control *wbc) +{ +} + +static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) +{ +} + +static inline void wbc_account_io(struct writeback_control *wbc, + struct page *page, size_t bytes) +{ +} + +#endif /* CONFIG_CGROUP_WRITEBACK */ + /* * mm/page-writeback.c */ @@ -120,8 +316,12 @@ static inline void laptop_sync_completion(void) { } #endif void throttle_vm_writeout(gfp_t gfp_mask); bool zone_dirty_ok(struct zone *zone); +int wb_domain_init(struct wb_domain *dom, gfp_t gfp); +#ifdef CONFIG_CGROUP_WRITEBACK +void wb_domain_exit(struct wb_domain *dom); +#endif -extern unsigned long global_dirty_limit; +extern struct wb_domain global_wb_domain; /* These are exported to sysctl. */ extern int dirty_background_ratio; @@ -155,19 +355,12 @@ int dirty_writeback_centisecs_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); -unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, - unsigned long dirty); - -void __bdi_update_bandwidth(struct backing_dev_info *bdi, - unsigned long thresh, - unsigned long bg_thresh, - unsigned long dirty, - unsigned long bdi_thresh, - unsigned long bdi_dirty, - unsigned long start_time); +unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); +void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); void page_writeback_init(void); void balance_dirty_pages_ratelimited(struct address_space *mapping); +bool wb_over_bg_thresh(struct bdi_writeback *wb); typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, void *data); diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 56529b34dc63..d30eff3d84d5 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -81,7 +81,8 @@ struct zpool_driver { atomic_t refcount; struct list_head list; - void *(*create)(char *name, gfp_t gfp, struct zpool_ops *ops); + void *(*create)(char *name, gfp_t gfp, struct zpool_ops *ops, + struct zpool *zpool); void (*destroy)(void *pool); int (*malloc)(void *pool, size_t size, gfp_t gfp, @@ -102,6 +103,4 @@ void zpool_register_driver(struct zpool_driver *driver); int zpool_unregister_driver(struct zpool_driver *driver); -int zpool_evict(void *pool, unsigned long handle); - #endif |
