From 3b5e6454aaf6b4439b19400d8365e2ec2d24e411 Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Thu, 4 Sep 2014 22:04:42 -0400 Subject: fs/buffer.c: support buffer cache allocations with gfp modifiers A buffer cache is allocated from movable area because it is referred for a while and released soon. But some filesystems are taking buffer cache for a long time and it can disturb page migration. New APIs are introduced to allocate buffer cache with user specific flag. *_gfp APIs are for user want to set page allocation flag for page cache allocation. And *_unmovable APIs are for the user wants to allocate page cache from non-movable area. Signed-off-by: Gioh Kim Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- include/linux/buffer_head.h | 47 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 324329ceea1e..73b45225a7ca 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -175,12 +175,13 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); -struct buffer_head *__getblk(struct block_device *bdev, sector_t block, - unsigned size); +struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); -struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size); +struct buffer_head *__bread_gfp(struct block_device *, + sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); @@ -295,7 +296,13 @@ static inline void bforget(struct buffer_head *bh) static inline struct buffer_head * sb_bread(struct super_block *sb, sector_t block) { - return __bread(sb->s_bdev, block, sb->s_blocksize); + return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); +} + +static inline struct buffer_head * +sb_bread_unmovable(struct super_block *sb, sector_t block) +{ + return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, 0); } static inline void @@ -307,7 +314,7 @@ sb_breadahead(struct super_block *sb, sector_t block) static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { - return __getblk(sb->s_bdev, block, sb->s_blocksize); + return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } static inline struct buffer_head * @@ -344,6 +351,36 @@ static inline void lock_buffer(struct buffer_head *bh) __lock_buffer(bh); } +static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, + sector_t block, + unsigned size) +{ + return __getblk_gfp(bdev, block, size, 0); +} + +static inline struct buffer_head *__getblk(struct block_device *bdev, + sector_t block, + unsigned size) +{ + return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); +} + +/** + * __bread() - reads a specified block and returns the bh + * @bdev: the block_device to read from + * @block: number of block + * @size: size (in bytes) to read + * + * Reads a specified block, and returns buffer head that contains it. + * The page cache is allocated from movable area so that it can be migrated. + * It returns NULL if the block was unreadable. + */ +static inline struct buffer_head * +__bread(struct block_device *bdev, sector_t block, unsigned size) +{ + return __bread_gfp(bdev, block, size, __GFP_MOVABLE); +} + extern int __set_page_dirty_buffers(struct page *page); #else /* CONFIG_BLOCK */ -- cgit v1.2.3 From 047133066e6c2549403fe5a2d619f47ba4212ef5 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Thu, 7 Aug 2014 05:10:22 -0700 Subject: leds: Reorder include directives Reorder include directives so that they are arranged in alphabetical order. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Richard Purdie Signed-off-by: Bryan Wu --- include/linux/leds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index e43686472197..4be2d7623d9e 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -13,8 +13,8 @@ #define __LINUX_LEDS_H_INCLUDED #include -#include #include +#include #include #include -- cgit v1.2.3 From d8082827d8a214343b761f2c4554d2a7d1573d63 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Thu, 7 Aug 2014 05:10:23 -0700 Subject: leds: make brightness type consistent across whole subsystem Documentations states that brightness units type is enum led_brightness and this is the type used by the led API functions. Adjust the type of brightness variables in the struct led_classdev accordingly. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Richard Purdie Signed-off-by: Bryan Wu --- include/linux/leds.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 4be2d7623d9e..f2e1cbc25705 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -31,8 +31,8 @@ enum led_brightness { struct led_classdev { const char *name; - int brightness; - int max_brightness; + enum led_brightness brightness; + enum led_brightness max_brightness; int flags; /* Lower 16 bits reflect status */ -- cgit v1.2.3 From 3ef7de5304edf60d0b8674dd7cdacc104e15a93c Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Wed, 20 Aug 2014 06:41:55 -0700 Subject: leds: Improve and export led_update_brightness led_update_brightness helper function used to be exploited only locally in the led-class.c module, where its result was being passed to the brightness_show sysfs callback. With the introduction of v4l2-flash subdevice the same functionality becomes required for reading current brightness from a LED device. This patch adds checking of return value of the brightness_get callback and moves the led_update_brightness() function to the LED subsystem public API. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Richard Purdie Signed-off-by: Bryan Wu --- include/linux/leds.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index f2e1cbc25705..a57611d0c94e 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -140,6 +140,16 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev, */ extern void led_set_brightness(struct led_classdev *led_cdev, enum led_brightness brightness); +/** + * led_update_brightness - update LED brightness + * @led_cdev: the LED to query + * + * Get an LED's current brightness and update led_cdev->brightness + * member with the obtained value. + * + * Returns: 0 on success or negative error value on failure + */ +extern int led_update_brightness(struct led_classdev *led_cdev); /* * LED Triggers -- cgit v1.2.3 From 50849db32a9f529235a84bcc84a6b8e631b1d0ec Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 Sep 2014 00:58:12 -0400 Subject: jbd2: simplify calling convention around __jbd2_journal_clean_checkpoint_list __jbd2_journal_clean_checkpoint_list() returns number of buffers it freed but noone was using the value so just stop doing that. This also allows for simplifying the calling convention for journal_clean_once_cp_list(). Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0dae71e9971c..704b9a599b26 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1042,7 +1042,7 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); extern void jbd2_journal_commit_transaction(journal_t *); /* Checkpoint list management */ -int __jbd2_journal_clean_checkpoint_list(journal_t *journal); +void __jbd2_journal_clean_checkpoint_list(journal_t *journal); int __jbd2_journal_remove_checkpoint(struct journal_head *); void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); -- cgit v1.2.3 From 7990da71ebfa887ae6fe4464ab0d99ddeb8efacc Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Wed, 3 Sep 2014 17:49:32 +0200 Subject: PM / QoS: Add PM_QOS_MEMORY_BANDWIDTH class Also adds a class type PM_QOS_SUM that aggregates the values by summing them. It can be used by memory controllers to calculate the optimum clock frequency based on the bandwidth needs of the different memory clients. Signed-off-by: Tomeu Vizoso Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- include/linux/pm_qos.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 9ab4bf7c4646..636e82834506 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -15,6 +15,7 @@ enum { PM_QOS_CPU_DMA_LATENCY, PM_QOS_NETWORK_LATENCY, PM_QOS_NETWORK_THROUGHPUT, + PM_QOS_MEMORY_BANDWIDTH, /* insert new class ID */ PM_QOS_NUM_CLASSES, @@ -32,6 +33,7 @@ enum pm_qos_flags_status { #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 +#define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE 0 #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE 0 #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0 #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1) @@ -69,7 +71,8 @@ struct dev_pm_qos_request { enum pm_qos_type { PM_QOS_UNITIALIZED, PM_QOS_MAX, /* return the largest value */ - PM_QOS_MIN /* return the smallest value */ + PM_QOS_MIN, /* return the smallest value */ + PM_QOS_SUM /* return the sum */ }; /* -- cgit v1.2.3 From 90a8020278c1598fafd071736a0846b38510309c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Oct 2014 21:49:18 -0400 Subject: vfs: fix data corruption when blocksize < pagesize for mmaped data ->page_mkwrite() is used by filesystems to allocate blocks under a page which is becoming writeably mmapped in some process' address space. This allows a filesystem to return a page fault if there is not enough space available, user exceeds quota or similar problem happens, rather than silently discarding data later when writepage is called. However VFS fails to call ->page_mkwrite() in all the cases where filesystems need it when blocksize < pagesize. For example when blocksize = 1024, pagesize = 4096 the following is problematic: ftruncate(fd, 0); pwrite(fd, buf, 1024, 0); map = mmap(NULL, 1024, PROT_WRITE, MAP_SHARED, fd, 0); map[0] = 'a'; ----> page_mkwrite() for index 0 is called ftruncate(fd, 10000); /* or even pwrite(fd, buf, 1, 10000) */ mremap(map, 1024, 10000, 0); map[4095] = 'a'; ----> no page_mkwrite() called At the moment ->page_mkwrite() is called, filesystem can allocate only one block for the page because i_size == 1024. Otherwise it would create blocks beyond i_size which is generally undesirable. But later at ->writepage() time, we also need to store data at offset 4095 but we don't have block allocated for it. This patch introduces a helper function filesystems can use to have ->page_mkwrite() called at all the necessary moments. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8981cc882ed2..5005464fe012 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1155,6 +1155,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); +void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); int generic_error_remove_page(struct address_space *mapping, struct page *page); -- cgit v1.2.3 From f14bb039a4e8206439d3e9abd92bc76bd142f243 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 1 Oct 2014 16:07:03 -0700 Subject: uio: Export definition of struct uio_device In order to prevent a O(n) search of the filesystem to link up its uio node with its target configuration, TCMU needs to know the minor number that UIO assigned. Expose the definition of this struct so TCMU can access this field. Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger --- include/linux/uio_driver.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 1ad4724458de..baa81718d985 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -63,7 +63,17 @@ struct uio_port { #define MAX_UIO_PORT_REGIONS 5 -struct uio_device; +struct uio_device { + struct module *owner; + struct device *dev; + int minor; + atomic_t event; + struct fasync_struct *async_queue; + wait_queue_head_t wait; + struct uio_info *info; + struct kobject *map_dir; + struct kobject *portio_dir; +}; /** * struct uio_info - UIO device capabilities -- cgit v1.2.3 From 5a17dae422d7de4b776a9753cd4673a343a25b4b Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 5 Aug 2014 11:52:11 +0100 Subject: efi: Add efi= parameter parsing to the EFI boot stub We need a way to customize the behaviour of the EFI boot stub, in particular, we need a way to disable the "chunking" workaround, used when reading files from the EFI System Partition. One of my machines doesn't cope well when reading files in 1MB chunks to a buffer above the 4GB mark - it appears that the "chunking" bug workaround triggers another firmware bug. This was only discovered with commit 4bf7111f5016 ("x86/efi: Support initrd loaded above 4G"), and that commit is perfectly valid. The symptom I observed was a corrupt initrd rather than any kind of crash. efi= is now used to specify EFI parameters in two very different execution environments, the EFI boot stub and during kernel boot. There is also a slight performance optimization by enabling efi=nochunk, but that's offset by the fact that you're more likely to run into firmware issues, at least on x86. This is the rationale behind leaving the workaround enabled by default. Also provide some documentation for EFI_READ_CHUNK_SIZE and why we're using the current value of 1MB. Tested-by: Ard Biesheuvel Cc: Roy Franz Cc: Maarten Lankhorst Cc: Leif Lindholm Cc: Borislav Petkov Signed-off-by: Matt Fleming --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 45cb4ffdea62..518779fb5e90 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1227,4 +1227,6 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, unsigned long *load_addr, unsigned long *load_size); +efi_status_t efi_parse_options(char *cmdline); + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From b2e0a54a1296a91b800f316df7bef7d1905e4fd0 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Thu, 14 Aug 2014 17:15:26 +0800 Subject: efi: Move noefi early param code out of x86 arch code noefi param can be used for arches other than X86 later, thus move it out of x86 platform code. Signed-off-by: Dave Young Signed-off-by: Matt Fleming --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 518779fb5e90..4812ed0b0374 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1229,4 +1229,5 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, efi_status_t efi_parse_options(char *cmdline); +bool efi_runtime_disabled(void); #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From 6ccc72b87b83ece31c2a75bbe07f440b0378f7a9 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Thu, 14 Aug 2014 17:15:27 +0800 Subject: lib: Add a generic cmdline parse function parse_option_str There should be a generic function to parse params like a=b,c Adding parse_option_str in lib/cmdline.c which will return true if there's specified option set in the params. Also updated efi=old_map parsing code to use the new function Signed-off-by: Dave Young Signed-off-by: Matt Fleming --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 95624bed87ef..f66427ef0628 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -407,6 +407,7 @@ int vsscanf(const char *, const char *, va_list); extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); extern unsigned long long memparse(const char *ptr, char **retptr); +extern bool parse_option_str(const char *str, const char *option); extern int core_kernel_text(unsigned long addr); extern int core_kernel_data(unsigned long addr); -- cgit v1.2.3 From 9c97e0bdd4b4ae44577a1b1ec949e782084e9a78 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 3 Sep 2014 13:32:19 +0200 Subject: efi: Add macro for EFI_MEMORY_UCE memory attribute Add the following macro from the UEFI spec, for completeness: EFI_MEMORY_UCE Memory cacheability attribute: The memory region supports being configured as not cacheable, exported, and supports the "fetch and add" semaphore mechanism. Signed-off-by: Laszlo Ersek Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Signed-off-by: Matt Fleming --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 4812ed0b0374..7464032ae00a 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -92,6 +92,7 @@ typedef struct { #define EFI_MEMORY_WC ((u64)0x0000000000000002ULL) /* write-coalescing */ #define EFI_MEMORY_WT ((u64)0x0000000000000004ULL) /* write-through */ #define EFI_MEMORY_WB ((u64)0x0000000000000008ULL) /* write-back */ +#define EFI_MEMORY_UCE ((u64)0x0000000000000010ULL) /* uncached, exported */ #define EFI_MEMORY_WP ((u64)0x0000000000001000ULL) /* write-protect */ #define EFI_MEMORY_RP ((u64)0x0000000000002000ULL) /* read-protect */ #define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */ -- cgit v1.2.3 From 98d2a6ca14520904a47c46258d3bad02ffcd3f96 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 3 Sep 2014 13:32:20 +0200 Subject: efi: Introduce efi_md_typeattr_format() At the moment, there are three architectures debug-printing the EFI memory map at initialization: x86, ia64, and arm64. They all use different format strings, plus the EFI memory type and the EFI memory attributes are similarly hard to decode for a human reader. Introduce a helper __init function that formats the memory type and the memory attributes in a unified way, to a user-provided character buffer. The array "memory_type_name" is copied from the arm64 code, temporarily duplicating it. The (otherwise optional) braces around each string literal in the initializer list are dropped in order to match the kernel coding style more closely. The element size is tightened from 32 to 20 bytes (maximum actual string length + 1) so that we can derive the field width from the element size. Signed-off-by: Laszlo Ersek Tested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel [ Dropped useless 'register' keyword, which compiler will ignore ] Signed-off-by: Matt Fleming --- include/linux/efi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 7464032ae00a..78b29b133e14 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -887,6 +887,13 @@ extern bool efi_poweroff_required(void); (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \ (md) = (void *)(md) + (m)->desc_size) +/* + * Format an EFI memory descriptor's type and attributes to a user-provided + * character buffer, as per snprintf(), and return the buffer. + */ +char * __init efi_md_typeattr_format(char *buf, size_t size, + const efi_memory_desc_t *md); + /** * efi_range_is_wc - check the WC bit on an address range * @start: starting kvirt address -- cgit v1.2.3 From 6d80dba1c9fe4316ef626980102b92fa30c7845a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 30 Sep 2014 21:58:52 +0100 Subject: efi: Provide a non-blocking SetVariable() operation There are some circumstances that call for trying to write an EFI variable in a non-blocking way. One such scenario is when writing pstore data in efi_pstore_write() via the pstore_dump() kdump callback. Now that we have an EFI runtime spinlock we need a way of aborting if there is contention instead of spinning, since when writing pstore data from the kdump callback, the runtime lock may already be held by the CPU that's running the callback if we crashed in the middle of an EFI variable operation. The situation is sufficiently special that a new EFI variable operation is warranted. Introduce ->set_variable_nonblocking() for this use case. It is an optional EFI backend operation, and need only be implemented by those backends that usually acquire locks to serialize access to EFI variables, as is the case for virt_efi_set_variable() where we now grab the EFI runtime spinlock. Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Ard Biesheuvel Cc: Matthew Garrett Signed-off-by: Matt Fleming --- include/linux/efi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 78b29b133e14..0949f9c7e872 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -503,6 +503,10 @@ typedef efi_status_t efi_get_next_variable_t (unsigned long *name_size, efi_char typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data); +typedef efi_status_t +efi_set_variable_nonblocking_t(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, void *data); + typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count); typedef void efi_reset_system_t (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data); @@ -822,6 +826,7 @@ extern struct efi { efi_get_variable_t *get_variable; efi_get_next_variable_t *get_next_variable; efi_set_variable_t *set_variable; + efi_set_variable_nonblocking_t *set_variable_nonblocking; efi_query_variable_info_t *query_variable_info; efi_update_capsule_t *update_capsule; efi_query_capsule_caps_t *query_capsule_caps; @@ -1042,6 +1047,7 @@ struct efivar_operations { efi_get_variable_t *get_variable; efi_get_next_variable_t *get_next_variable; efi_set_variable_t *set_variable; + efi_set_variable_nonblocking_t *set_variable_nonblocking; efi_query_variable_store_t *query_variable_store; }; -- cgit v1.2.3 From f2fc42b6ac31f4d808da7a9da460dd433a71e976 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Thu, 12 Jun 2014 22:30:34 +0530 Subject: mailbox: rename pl320-ipc specific mailbox.h The patch 30058677 "ARM / highbank: add support for pl320 IPC" added a pl320 IPC specific header file as a generic mailbox.h. This file has been renamed appropriately to allow the introduction of the generic mailbox API framework. Acked-by: Mark Langsdorf Cc: Rafael J. Wysocki Signed-off-by: Suman Anna Reviewed-by: Mark Brown Acked-by: Arnd Bergmann --- include/linux/mailbox.h | 17 ----------------- include/linux/pl320-ipc.h | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 17 deletions(-) delete mode 100644 include/linux/mailbox.h create mode 100644 include/linux/pl320-ipc.h (limited to 'include/linux') diff --git a/include/linux/mailbox.h b/include/linux/mailbox.h deleted file mode 100644 index 5161f63ec1c8..000000000000 --- a/include/linux/mailbox.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ - -int pl320_ipc_transmit(u32 *data); -int pl320_ipc_register_notifier(struct notifier_block *nb); -int pl320_ipc_unregister_notifier(struct notifier_block *nb); diff --git a/include/linux/pl320-ipc.h b/include/linux/pl320-ipc.h new file mode 100644 index 000000000000..5161f63ec1c8 --- /dev/null +++ b/include/linux/pl320-ipc.h @@ -0,0 +1,17 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +int pl320_ipc_transmit(u32 *data); +int pl320_ipc_register_notifier(struct notifier_block *nb); +int pl320_ipc_unregister_notifier(struct notifier_block *nb); -- cgit v1.2.3 From 2b6d83e2b8b7de82331a6a1dcd64b51020a6031c Mon Sep 17 00:00:00 2001 From: Jassi Brar Date: Thu, 12 Jun 2014 22:31:19 +0530 Subject: mailbox: Introduce framework for mailbox Introduce common framework for client/protocol drivers and controller drivers of Inter-Processor-Communication (IPC). Client driver developers should have a look at include/linux/mailbox_client.h to understand the part of the API exposed to client drivers. Similarly controller driver developers should have a look at include/linux/mailbox_controller.h Reviewed-by: Mark Brown Signed-off-by: Jassi Brar --- include/linux/mailbox_client.h | 46 +++++++++++++ include/linux/mailbox_controller.h | 133 +++++++++++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 include/linux/mailbox_client.h create mode 100644 include/linux/mailbox_controller.h (limited to 'include/linux') diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h new file mode 100644 index 000000000000..307d9cab2026 --- /dev/null +++ b/include/linux/mailbox_client.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2013-2014 Linaro Ltd. + * Author: Jassi Brar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MAILBOX_CLIENT_H +#define __MAILBOX_CLIENT_H + +#include +#include + +struct mbox_chan; + +/** + * struct mbox_client - User of a mailbox + * @dev: The client device + * @tx_block: If the mbox_send_message should block until data is + * transmitted. + * @tx_tout: Max block period in ms before TX is assumed failure + * @knows_txdone: If the client could run the TX state machine. Usually + * if the client receives some ACK packet for transmission. + * Unused if the controller already has TX_Done/RTR IRQ. + * @rx_callback: Atomic callback to provide client the data received + * @tx_done: Atomic callback to tell client of data transmission + */ +struct mbox_client { + struct device *dev; + bool tx_block; + unsigned long tx_tout; + bool knows_txdone; + + void (*rx_callback)(struct mbox_client *cl, void *mssg); + void (*tx_done)(struct mbox_client *cl, void *mssg, int r); +}; + +struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index); +int mbox_send_message(struct mbox_chan *chan, void *mssg); +void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */ +bool mbox_client_peek_data(struct mbox_chan *chan); /* atomic */ +void mbox_free_channel(struct mbox_chan *chan); /* may sleep */ + +#endif /* __MAILBOX_CLIENT_H */ diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h new file mode 100644 index 000000000000..d4cf96f07cfc --- /dev/null +++ b/include/linux/mailbox_controller.h @@ -0,0 +1,133 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MAILBOX_CONTROLLER_H +#define __MAILBOX_CONTROLLER_H + +#include +#include +#include +#include +#include + +struct mbox_chan; + +/** + * struct mbox_chan_ops - methods to control mailbox channels + * @send_data: The API asks the MBOX controller driver, in atomic + * context try to transmit a message on the bus. Returns 0 if + * data is accepted for transmission, -EBUSY while rejecting + * if the remote hasn't yet read the last data sent. Actual + * transmission of data is reported by the controller via + * mbox_chan_txdone (if it has some TX ACK irq). It must not + * sleep. + * @startup: Called when a client requests the chan. The controller + * could ask clients for additional parameters of communication + * to be provided via client's chan_data. This call may + * block. After this call the Controller must forward any + * data received on the chan by calling mbox_chan_received_data. + * The controller may do stuff that need to sleep. + * @shutdown: Called when a client relinquishes control of a chan. + * This call may block too. The controller must not forward + * any received data anymore. + * The controller may do stuff that need to sleep. + * @last_tx_done: If the controller sets 'txdone_poll', the API calls + * this to poll status of last TX. The controller must + * give priority to IRQ method over polling and never + * set both txdone_poll and txdone_irq. Only in polling + * mode 'send_data' is expected to return -EBUSY. + * The controller may do stuff that need to sleep/block. + * Used only if txdone_poll:=true && txdone_irq:=false + * @peek_data: Atomic check for any received data. Return true if controller + * has some data to push to the client. False otherwise. + */ +struct mbox_chan_ops { + int (*send_data)(struct mbox_chan *chan, void *data); + int (*startup)(struct mbox_chan *chan); + void (*shutdown)(struct mbox_chan *chan); + bool (*last_tx_done)(struct mbox_chan *chan); + bool (*peek_data)(struct mbox_chan *chan); +}; + +/** + * struct mbox_controller - Controller of a class of communication channels + * @dev: Device backing this controller + * @ops: Operators that work on each communication chan + * @chans: Array of channels + * @num_chans: Number of channels in the 'chans' array. + * @txdone_irq: Indicates if the controller can report to API when + * the last transmitted data was read by the remote. + * Eg, if it has some TX ACK irq. + * @txdone_poll: If the controller can read but not report the TX + * done. Ex, some register shows the TX status but + * no interrupt rises. Ignored if 'txdone_irq' is set. + * @txpoll_period: If 'txdone_poll' is in effect, the API polls for + * last TX's status after these many millisecs + * @of_xlate: Controller driver specific mapping of channel via DT + * @poll: API private. Used to poll for TXDONE on all channels. + * @node: API private. To hook into list of controllers. + */ +struct mbox_controller { + struct device *dev; + struct mbox_chan_ops *ops; + struct mbox_chan *chans; + int num_chans; + bool txdone_irq; + bool txdone_poll; + unsigned txpoll_period; + struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox, + const struct of_phandle_args *sp); + /* Internal to API */ + struct timer_list poll; + struct list_head node; +}; + +/* + * The length of circular buffer for queuing messages from a client. + * 'msg_count' tracks the number of buffered messages while 'msg_free' + * is the index where the next message would be buffered. + * We shouldn't need it too big because every transfer is interrupt + * triggered and if we have lots of data to transfer, the interrupt + * latencies are going to be the bottleneck, not the buffer length. + * Besides, mbox_send_message could be called from atomic context and + * the client could also queue another message from the notifier 'tx_done' + * of the last transfer done. + * REVISIT: If too many platforms see the "Try increasing MBOX_TX_QUEUE_LEN" + * print, it needs to be taken from config option or somesuch. + */ +#define MBOX_TX_QUEUE_LEN 20 + +/** + * struct mbox_chan - s/w representation of a communication chan + * @mbox: Pointer to the parent/provider of this channel + * @txdone_method: Way to detect TXDone chosen by the API + * @cl: Pointer to the current owner of this channel + * @tx_complete: Transmission completion + * @active_req: Currently active request hook + * @msg_count: No. of mssg currently queued + * @msg_free: Index of next available mssg slot + * @msg_data: Hook for data packet + * @lock: Serialise access to the channel + * @con_priv: Hook for controller driver to attach private data + */ +struct mbox_chan { + struct mbox_controller *mbox; + unsigned txdone_method; + struct mbox_client *cl; + struct completion tx_complete; + void *active_req; + unsigned msg_count, msg_free; + void *msg_data[MBOX_TX_QUEUE_LEN]; + spinlock_t lock; /* Serialise access to the channel */ + void *con_priv; +}; + +int mbox_controller_register(struct mbox_controller *mbox); /* can sleep */ +void mbox_controller_unregister(struct mbox_controller *mbox); /* can sleep */ +void mbox_chan_received_data(struct mbox_chan *chan, void *data); /* atomic */ +void mbox_chan_txdone(struct mbox_chan *chan, int r); /* atomic */ + +#endif /* __MAILBOX_CONTROLLER_H */ -- cgit v1.2.3 From 083bf668cb70e47b84db64856606e94beac87f01 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 14 Mar 2014 14:06:25 +0800 Subject: ACPI: make acpi_create_platform_device() an external API Signed-off-by: Zhang Rui --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 807cbc46d73e..2c24c2c1be45 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -432,6 +432,7 @@ static inline bool acpi_driver_match_device(struct device *dev, int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *); int acpi_device_modalias(struct device *, char *, int); +struct platform_device *acpi_create_platform_device(struct acpi_device *); #define ACPI_PTR(_ptr) (_ptr) #else /* !CONFIG_ACPI */ -- cgit v1.2.3 From 7b83fd9d91a411158f72d36958103c708c3b5a86 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Tue, 25 Mar 2014 10:40:09 +0800 Subject: Thermal: move the KELVIN_TO_MILLICELSIUS macro to thermal.h This macro can be used by other component so move it to a common header, but in a slightly different way: define two macros, one macro with an offset and the other doesn't. Signed-off-by: Aaron Lu Signed-off-by: Zhang Rui --- include/linux/thermal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 0305cde21a74..79ce6b94884a 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -44,6 +44,8 @@ #define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \ ((long)t-2732+5)/10 : ((long)t-2732-5)/10) #define CELSIUS_TO_KELVIN(t) ((t)*10+2732) +#define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100) +#define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732) /* Adding event notification support elements */ #define THERMAL_GENL_FAMILY_NAME "thermal_event" -- cgit v1.2.3 From 77e337c6e23e3b9d22e09ffec202a80f755a54c2 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Wed, 3 Sep 2014 15:13:02 +0800 Subject: Thermal: introduce INT3402 thermal driver ACPI INT3402 device object could report temperature for the memory module. To expose such information to user space, a thermal zone device is registered for it so that the thermal sysfs interface can expose such information for userspace to use. Signed-off-by: Aaron Lu Signed-off-by: Zhang Rui --- include/linux/thermal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 79ce6b94884a..ef90838b36a0 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -46,6 +46,8 @@ #define CELSIUS_TO_KELVIN(t) ((t)*10+2732) #define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100) #define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732) +#define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off)) +#define MILLICELSIUS_TO_DECI_KELVIN(t) MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, 2732) /* Adding event notification support elements */ #define THERMAL_GENL_FAMILY_NAME "thermal_event" -- cgit v1.2.3 From 174e964ec224c3c591b83a6b5f0984d905d3678f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 9 Oct 2014 12:43:27 -0700 Subject: regulator: Include err.h from consumer.h to fix build failure sh:sh2007_defconfig fails to build with the following error: In file included from include/linux/regulator/machine.h:18:0, from arch/sh/boards/board-sh2007.c:10: include/linux/regulator/consumer.h: In function 'regulator_get_optional': include/linux/regulator/consumer.h:271:2: error: implicit declaration of function 'ERR_PTR' include/linux/err.h: At top level: include/linux/err.h:23:35: error: conflicting types for 'ERR_PTR' include/linux/regulator/consumer.h:271:9: note: previous implicit declaration of 'ERR_PTR' was here Since consumer.h uses ERR_PTR, it should include err.h. Signed-off-by: Guenter Roeck Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index d347c805f923..f540b1496e2f 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -35,6 +35,8 @@ #ifndef __LINUX_REGULATOR_CONSUMER_H_ #define __LINUX_REGULATOR_CONSUMER_H_ +#include + struct device; struct notifier_block; struct regmap; -- cgit v1.2.3 From d4c5efdb97773f59a2b711754ca0953f24516739 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 26 Aug 2014 23:16:35 -0400 Subject: random: add and use memzero_explicit() for clearing data zatimend has reported that in his environment (3.16/gcc4.8.3/corei7) memset() calls which clear out sensitive data in extract_{buf,entropy, entropy_user}() in random driver are being optimized away by gcc. Add a helper memzero_explicit() (similarly as explicit_bzero() variants) that can be used in such cases where a variable with sensitive data is being cleared out in the end. Other use cases might also be in crypto code. [ I have put this into lib/string.c though, as it's always built-in and doesn't need any dependencies then. ] Fixes kernel bugzilla: 82041 Reported-by: zatimend@hotmail.co.uk Signed-off-by: Daniel Borkmann Acked-by: Hannes Frederic Sowa Cc: Alexey Dobriyan Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- include/linux/string.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index d36977e029af..3b42b3732da6 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -132,7 +132,7 @@ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4); #endif extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, - const void *from, size_t available); + const void *from, size_t available); /** * strstarts - does @str start with @prefix? @@ -144,7 +144,8 @@ static inline bool strstarts(const char *str, const char *prefix) return strncmp(str, prefix, strlen(prefix)) == 0; } -extern size_t memweight(const void *ptr, size_t bytes); +size_t memweight(const void *ptr, size_t bytes); +void memzero_explicit(void *s, size_t count); /** * kbasename - return the last part of a pathname. -- cgit v1.2.3 From 70f3ce0510afdad7cbaf27ab7ab961377205c782 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 29 Sep 2014 11:47:54 +0200 Subject: mtd: spi-nor: make spi_nor_scan() take a chip type name, not spi_device_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers currently call spi_nor_match_id() and then spi_nor_scan(). This adds a dependency on struct spi_device_id which we want to avoid. Make spi_nor_scan() do it for them. Signed-off-by: Ben Hutchings Signed-off-by: Rafał Miłecki Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 9e6294f32ba8..a5a7a086748d 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -187,32 +187,18 @@ struct spi_nor { /** * spi_nor_scan() - scan the SPI NOR * @nor: the spi_nor structure - * @id: the spi_device_id provided by the driver + * @name: the chip type name * @mode: the read mode supported by the driver * * The drivers can use this fuction to scan the SPI NOR. * In the scanning, it will try to get all the necessary information to * fill the mtd_info{} and the spi_nor{}. * - * The board may assigns a spi_device_id with @id which be used to compared with - * the spi_device_id detected by the scanning. + * The chip type name can be provided through the @name parameter. * * Return: 0 for success, others for failure. */ -int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, - enum read_mode mode); +int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode); extern const struct spi_device_id spi_nor_ids[]; -/** - * spi_nor_match_id() - find the spi_device_id by the name - * @name: the name of the spi_device_id - * - * The drivers use this function to find the spi_device_id - * specified by the @name. - * - * Return: returns the right spi_device_id pointer on success, - * and returns NULL on failure. - */ -const struct spi_device_id *spi_nor_match_id(char *name); - #endif -- cgit v1.2.3 From aa281ac631008b9c18c405c8880007789f659c7d Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 19 Oct 2014 19:38:58 +0300 Subject: Boaz Harrosh - Fix broken email address I no longer have access to the Panasas email. So change to an email that can always reach me. Signed-off-by: Boaz Harrosh --- include/linux/pnfs_osd_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h index fe25876c1a5d..17d7d0d20eca 100644 --- a/include/linux/pnfs_osd_xdr.h +++ b/include/linux/pnfs_osd_xdr.h @@ -5,7 +5,7 @@ * All rights reserved. * * Benny Halevy - * Boaz Harrosh + * Boaz Harrosh * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 -- cgit v1.2.3 From 4846e3784585173f48e267b76f968bcb4a12d3b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 9 Sep 2014 22:18:31 +0200 Subject: watchdog: simplify definitions of WATCHDOG_NOWAYOUT(_INIT_STATUS)? Signed-off-by: Uwe Kleine-K=C3=B6nig Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 2a3038ee17a3..395b70e0eccf 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -97,13 +97,8 @@ struct watchdog_device { #define WDOG_UNREGISTERED 4 /* Has the device been unregistered */ }; -#ifdef CONFIG_WATCHDOG_NOWAYOUT -#define WATCHDOG_NOWAYOUT 1 -#define WATCHDOG_NOWAYOUT_INIT_STATUS (1 << WDOG_NO_WAY_OUT) -#else -#define WATCHDOG_NOWAYOUT 0 -#define WATCHDOG_NOWAYOUT_INIT_STATUS 0 -#endif +#define WATCHDOG_NOWAYOUT IS_BUILTIN(CONFIG_WATCHDOG_NOWAYOUT) +#define WATCHDOG_NOWAYOUT_INIT_STATUS (WATCHDOG_NOWAYOUT << WDOG_NO_WAY_OUT) /* Use the following function to check whether or not the watchdog is active */ static inline bool watchdog_active(struct watchdog_device *wdd) -- cgit v1.2.3 From 51315cdfa0521fff3059cec5fb8ffecc7f37cba7 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sun, 19 Oct 2014 11:30:27 +0200 Subject: cpufreq: allow driver-specific data This commit extends the cpufreq_driver structure with an additional 'void *driver_data' field that can be filled by the ->probe() function of a cpufreq driver to pass additional custom information to the driver itself. A new function called cpufreq_get_driver_data() is added to allow a cpufreq driver to retrieve those driver data, since they are typically needed from a cpufreq_policy->init() callback, which does not have access to the cpufreq_driver structure. This function call is similar to the existing cpufreq_get_current_driver() function call. Signed-off-by: Thomas Petazzoni Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 138336b6bb04..503b085b7832 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -219,6 +219,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name) struct cpufreq_driver { char name[CPUFREQ_NAME_LEN]; u8 flags; + void *driver_data; /* needed by all drivers */ int (*init) (struct cpufreq_policy *policy); @@ -312,6 +313,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); const char *cpufreq_get_current_driver(void); +void *cpufreq_get_driver_data(void); static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max) -- cgit v1.2.3 From 34e5a5273d6aa0ee8836bd5d6111b135ffae6931 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sun, 19 Oct 2014 11:30:28 +0200 Subject: cpufreq: cpufreq-dt: extend with platform_data This commit extends the cpufreq-dt driver to take a platform_data structure. This structure is for now used to tell the cpufreq-dt driver the layout of the clocks on the platform, i.e whether all CPUs share the same clock or whether each CPU has a separate clock. Signed-off-by: Thomas Petazzoni Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq-dt.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/linux/cpufreq-dt.h (limited to 'include/linux') diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h new file mode 100644 index 000000000000..0414009e2c30 --- /dev/null +++ b/include/linux/cpufreq-dt.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2014 Marvell + * Thomas Petazzoni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __CPUFREQ_DT_H__ +#define __CPUFREQ_DT_H__ + +struct cpufreq_dt_platform_data { + /* + * True when each CPU has its own clock to control its + * frequency, false when all CPUs are controlled by a single + * clock. + */ + bool independent_clocks; +}; + +#endif /* __CPUFREQ_DT_H__ */ -- cgit v1.2.3 From a5b7616c55e188fe3d6ef686bef402d4703ecb62 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 30 Sep 2014 03:14:55 +0100 Subject: mtd: m25p80,spi-nor: Fix module aliases for m25p80 m25p80's device ID table is now spi_nor_ids, defined in spi-nor. The MODULE_DEVICE_TABLE() macro doesn't work with extern definitions, but its use was also removed at the same time. Now if m25p80 is built as a module it doesn't get the necessary aliases to be loaded automatically. A clean solution to this will involve defining the list of device IDs in spi-nor.h and removing struct spi_device_id from the spi-nor API, but this is quite a large change. As a quick fix suitable for stable, copy the device IDs back into m25p80. Fixes: 03e296f613af ("mtd: m25p80: use the SPI nor framework") Cc: # 3.16.x: 32f1b7c8352f: mtd: move support for struct flash_platform_data into m25p80 Cc: # 3.16.x: 90e55b3812a1: mtd: m25p80: get rid of spi_get_device_id Cc: # 3.16.x: 70f3ce0510af: mtd: spi-nor: make spi_nor_scan() take a chip type name, not spi_device_id Cc: # 3.16.x Signed-off-by: Ben Hutchings Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index a5a7a086748d..046a0a2e4c4e 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -199,6 +199,5 @@ struct spi_nor { * Return: 0 for success, others for failure. */ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode); -extern const struct spi_device_id spi_nor_ids[]; #endif -- cgit v1.2.3 From 5695be142e203167e3cb515ef86a88424f3524eb Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 20 Oct 2014 18:12:32 +0200 Subject: OOM, PM: OOM killed task shouldn't escape PM suspend PM freezer relies on having all tasks frozen by the time devices are getting frozen so that no task will touch them while they are getting frozen. But OOM killer is allowed to kill an already frozen task in order to handle OOM situtation. In order to protect from late wake ups OOM killer is disabled after all tasks are frozen. This, however, still keeps a window open when a killed task didn't manage to die by the time freeze_processes finishes. Reduce the race window by checking all tasks after OOM killer has been disabled. This is still not race free completely unfortunately because oom_killer_disable cannot stop an already ongoing OOM killer so a task might still wake up from the fridge and get killed without freeze_processes noticing. Full synchronization of OOM and freezer is, however, too heavy weight for this highly unlikely case. Introduce and check oom_kills counter which gets incremented early when the allocator enters __alloc_pages_may_oom path and only check all the tasks if the counter changes during the freezing attempt. The counter is updated so early to reduce the race window since allocator checked oom_killer_disabled which is set by PM-freezing code. A false positive will push the PM-freezer into a slow path but that is not a big deal. Changes since v1 - push the re-check loop out of freeze_processes into check_frozen_processes and invert the condition to make the code more readable as per Rafael Fixes: f660daac474c6f (oom: thaw threads if oom killed thread is frozen before deferring) Cc: 3.2+ # 3.2+ Signed-off-by: Michal Hocko Signed-off-by: Rafael J. Wysocki --- include/linux/oom.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index 647395a1a550..e8d6e1058723 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -50,6 +50,9 @@ static inline bool oom_task_origin(const struct task_struct *p) extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); + +extern int oom_kills_count(void); +extern void note_oom_kill(void); extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, unsigned int points, unsigned long totalpages, struct mem_cgroup *memcg, nodemask_t *nodemask, -- cgit v1.2.3 From 9e8beeb79ded25c5c1986f80fb8a7f6815345d5a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Oct 2014 18:58:48 -0600 Subject: audit: Remove "weak" from audit_classify_compat_syscall() declaration There's only one audit_classify_compat_syscall() definition, so it doesn't need to be weak. Remove the "weak" attribute from the audit_classify_compat_syscall() declaration. Signed-off-by: Bjorn Helgaas Acked-by: Richard Guy Briggs CC: AKASHI Takahiro --- include/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 36dffeccebdb..e58fe7df8b9c 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -90,7 +90,7 @@ extern unsigned compat_dir_class[]; extern unsigned compat_chattr_class[]; extern unsigned compat_signal_class[]; -extern int __weak audit_classify_compat_syscall(int abi, unsigned syscall); +extern int audit_classify_compat_syscall(int abi, unsigned syscall); /* audit_names->type values */ #define AUDIT_TYPE_UNKNOWN 0 /* we don't know yet */ -- cgit v1.2.3 From 96a2adbc6f501996418da9f7afe39bf0e4d006a9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Oct 2014 18:59:09 -0600 Subject: clocksource: Remove "weak" from clocksource_default_clock() declaration kernel/time/jiffies.c provides a default clocksource_default_clock() definition explicitly marked "weak". arch/s390 provides its own definition intended to override the default, but the "weak" attribute on the declaration applied to the s390 definition as well, so the linker chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak annotation from pcibios_get_phb_of_node decl")). Remove the "weak" attribute from the clocksource_default_clock() declaration so we always prefer a non-weak definition over the weak one, independent of link order. Fixes: f1b82746c1e9 ("clocksource: Cleanup clocksource selection") Signed-off-by: Bjorn Helgaas Acked-by: John Stultz Acked-by: Ingo Molnar CC: Daniel Lezcano CC: Martin Schwidefsky --- include/linux/clocksource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 653f0e2b6ca9..abcafaa20b86 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -287,7 +287,7 @@ extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_suspend(void); extern void clocksource_resume(void); -extern struct clocksource * __init __weak clocksource_default_clock(void); +extern struct clocksource * __init clocksource_default_clock(void); extern void clocksource_mark_unstable(struct clocksource *cs); extern u64 -- cgit v1.2.3 From 5ab03ac5aaa1f032e071f1b3dc433b7839359c03 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Oct 2014 18:59:41 -0600 Subject: vmcore: Remove "weak" from function declarations For the following functions: elfcorehdr_alloc() elfcorehdr_free() elfcorehdr_read() elfcorehdr_read_notes() remap_oldmem_pfn_range() fs/proc/vmcore.c provides default definitions explicitly marked "weak". arch/s390 provides its own definitions intended to override the default ones, but the "weak" attribute on the declarations applied to the s390 definitions as well, so the linker chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak annotation from pcibios_get_phb_of_node decl")). Remove the "weak" attribute from the declarations so we always prefer a non-weak definition over the weak one, independent of link order. Fixes: be8a8d069e50 ("vmcore: introduce ELF header in new memory feature") Fixes: 9cb218131de1 ("vmcore: introduce remap_oldmem_pfn_range()") Signed-off-by: Bjorn Helgaas Acked-by: Andrew Morton Acked-by: Vivek Goyal CC: Michael Holzheu --- include/linux/crash_dump.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 72ab536ad3de..3849fce7ecfe 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -14,14 +14,13 @@ extern unsigned long long elfcorehdr_addr; extern unsigned long long elfcorehdr_size; -extern int __weak elfcorehdr_alloc(unsigned long long *addr, - unsigned long long *size); -extern void __weak elfcorehdr_free(unsigned long long addr); -extern ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos); -extern ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos); -extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, - unsigned long from, unsigned long pfn, - unsigned long size, pgprot_t prot); +extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size); +extern void elfcorehdr_free(unsigned long long addr); +extern ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos); +extern ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos); +extern int remap_oldmem_pfn_range(struct vm_area_struct *vma, + unsigned long from, unsigned long pfn, + unsigned long size, pgprot_t prot); extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, unsigned long, int); -- cgit v1.2.3 From 107bcc6d566cb40184068d888637f9aefe6252dd Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Oct 2014 19:00:25 -0600 Subject: kgdb: Remove "weak" from kgdb_arch_pc() declaration kernel/debug/debug_core.c provides a default kgdb_arch_pc() definition explicitly marked "weak". Several architectures provide their own definitions intended to override the default, but the "weak" attribute on the declaration applied to the arch definitions as well, so the linker chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak annotation from pcibios_get_phb_of_node decl")). Remove the "weak" attribute from the declaration so we always prefer a non-weak definition over the weak one, independent of link order. Fixes: 688b744d8bc8 ("kgdb: fix signedness mixmatches, add statics, add declaration to header") Tested-by: Vineet Gupta # for ARC build Signed-off-by: Bjorn Helgaas Reviewed-by: Harvey Harrison --- include/linux/kgdb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 6b06d378f3df..e465bb15912d 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -283,7 +283,7 @@ struct kgdb_io { extern struct kgdb_arch arch_kgdb_ops; -extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs); +extern unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs); #ifdef CONFIG_SERIAL_KGDB_NMI extern int kgdb_register_nmi_console(void); -- cgit v1.2.3 From e0a8400c6923a163265d52798cdd4c33f3f8ab5a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Oct 2014 19:00:47 -0600 Subject: memory-hotplug: Remove "weak" from memory_block_size_bytes() declaration drivers/base/memory.c provides a default memory_block_size_bytes() definition explicitly marked "weak". Several architectures provide their own definitions intended to override the default, but the "weak" attribute on the declaration applied to the arch definitions as well, so the linker chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak annotation from pcibios_get_phb_of_node decl")). Remove the "weak" attribute from the declaration so we always prefer a non-weak definition over the weak one, independent of link order. Fixes: 41f107266b19 ("drivers: base: Add prototype declaration to the header file") Signed-off-by: Bjorn Helgaas Acked-by: Andrew Morton CC: Rashika Kheria CC: Nathan Fontenot CC: Anton Blanchard CC: Heiko Carstens CC: Yinghai Lu --- include/linux/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index bb7384e3c3d8..8b8d8d12348e 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -35,7 +35,7 @@ struct memory_block { }; int arch_get_memory_phys_device(unsigned long start_pfn); -unsigned long __weak memory_block_size_bytes(void); +unsigned long memory_block_size_bytes(void); /* These states are exposed to userspace as text strings in sysfs */ #define MEM_ONLINE (1<<0) /* exposed to userspace */ -- cgit v1.2.3 From 271a9c35158910496f6fc3a635c2ed85df6be3d9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 13 Oct 2014 19:01:03 -0600 Subject: uprobes: Remove "weak" from function declarations For the following interfaces: set_swbp() set_orig_insn() is_swbp_insn() is_trap_insn() uprobe_get_swbp_addr() arch_uprobe_ignore() arch_uprobe_copy_ixol() kernel/events/uprobes.c provides default definitions explicitly marked "weak". Some architectures provide their own definitions intended to override the defaults, but the "weak" attribute on the declarations applied to the arch definitions as well, so the linker chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak annotation from pcibios_get_phb_of_node decl")). Remove the "weak" attribute from the declarations so we always prefer a non-weak definition over the weak one, independent of link order. Signed-off-by: Bjorn Helgaas Acked-by: Ingo Molnar Acked-by: Srikar Dronamraju CC: Victor Kamensky CC: Oleg Nesterov CC: David A. Long CC: Ananth N Mavinakayanahalli --- include/linux/uprobes.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 4f844c6b03ee..60beb5dc7977 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -98,11 +98,11 @@ struct uprobes_state { struct xol_area *xol_area; }; -extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); -extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); -extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); -extern bool __weak is_trap_insn(uprobe_opcode_t *insn); -extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); +extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); +extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); +extern bool is_swbp_insn(uprobe_opcode_t *insn); +extern bool is_trap_insn(uprobe_opcode_t *insn); +extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); @@ -128,8 +128,8 @@ extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); -extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs); -extern void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, +extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs); +extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len); #else /* !CONFIG_UPROBES */ struct uprobes_state { -- cgit v1.2.3 From 4aa7c6346be395bdf776f82bbb2e3e2bc60bdd2b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:35 +0200 Subject: vfs: add i_op->dentry_open() Add a new inode operation i_op->dentry_open(). This is for stacked filesystems that want to return a struct file from a different filesystem. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index a957d4366c24..5cf7f6759679 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1528,6 +1528,9 @@ struct inode_operations { umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); int (*set_acl)(struct inode *, struct posix_acl *, int); + + /* WARNING: probably going away soon, do not use! */ + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, @@ -2040,6 +2043,7 @@ extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int); +extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); -- cgit v1.2.3 From 1c118596a7682912106c80007102ce0184c77780 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:35 +0200 Subject: vfs: export do_splice_direct() to modules Export do_splice_direct() to modules. Needed by overlay filesystem. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5cf7f6759679..10ed65b2c31d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2456,6 +2456,9 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *, size_t len, unsigned int flags); +extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len, unsigned int flags); + extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); -- cgit v1.2.3 From bd5d08569cc379f8366663a61558a9ce17c2e460 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:35 +0200 Subject: vfs: export __inode_permission() to modules We need to be able to check inode permissions (but not filesystem implied permissions) for stackable filesystems. Expose this interface for overlayfs. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 10ed65b2c31d..5419df70a835 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2257,6 +2257,7 @@ extern sector_t bmap(struct inode *, sector_t); #endif extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); +extern int __inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); static inline bool execute_ok(struct inode *inode) -- cgit v1.2.3 From c771d683a62e5d36bc46036f5c07f4f5bb7dda61 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:36 +0200 Subject: vfs: introduce clone_private_mount() Overlayfs needs a private clone of the mount, so create a function for this and export to modules. Signed-off-by: Miklos Szeredi --- include/linux/mount.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 9262e4bf0cc3..c2c561dc0114 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -81,6 +81,9 @@ extern struct vfsmount *mntget(struct vfsmount *mnt); extern struct vfsmount *mnt_clone_internal(struct path *path); extern int __mnt_is_readonly(struct vfsmount *mnt); +struct path; +extern struct vfsmount *clone_private_mount(struct path *path); + struct file_system_type; extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, -- cgit v1.2.3 From cbdf35bcb833bfd00f0925d7a9a33a21f41ea582 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:36 +0200 Subject: vfs: export check_sticky() It's already duplicated in btrfs and about to be used in overlayfs too. Move the sticky bit check to an inline helper and call the out-of-line helper only in the unlikly case of the sticky bit being set. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5419df70a835..55cc0a319baa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2259,6 +2259,7 @@ extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); extern int __inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); +extern int __check_sticky(struct inode *dir, struct inode *inode); static inline bool execute_ok(struct inode *inode) { @@ -2745,6 +2746,14 @@ static inline int is_sxid(umode_t mode) return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); } +static inline int check_sticky(struct inode *dir, struct inode *inode) +{ + if (!(dir->i_mode & S_ISVTX)) + return 0; + + return __check_sticky(dir, inode); +} + static inline void inode_has_no_xattr(struct inode *inode) { if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) -- cgit v1.2.3 From 787fb6bc9682ec7c05fb5d9561b57100fbc1cc41 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:36 +0200 Subject: vfs: add whiteout support Whiteout isn't actually a new file type, but is represented as a char device (Linus's idea) with 0/0 device number. This has several advantages compared to introducing a new whiteout file type: - no userspace API changes (e.g. trivial to make backups of upper layer filesystem, without losing whiteouts) - no fs image format changes (you can boot an old kernel/fsck without whiteout support and things won't break) - implementation is trivial Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 55cc0a319baa..69118b3cb917 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -222,6 +222,13 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ #define ATTR_TIMES_SET (1 << 16) +/* + * Whiteout is represented by a char device. The following constants define the + * mode and device number to use. + */ +#define WHITEOUT_MODE 0 +#define WHITEOUT_DEV 0 + /* * This is the Inode Attributes structure, used for notify_change(). It * uses the above definitions as flags, to know which values have changed. @@ -1398,6 +1405,7 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino extern int vfs_rmdir(struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); +extern int vfs_whiteout(struct inode *, struct dentry *); /* * VFS dentry helper functions. @@ -1628,6 +1636,9 @@ struct super_operations { #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) +#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ + (inode)->i_rdev == WHITEOUT_DEV) + /* * Inode state bits. Protected by inode->i_lock * -- cgit v1.2.3 From 69c433ed2ecd2d3264efd7afec4439524b319121 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 24 Oct 2014 00:14:39 +0200 Subject: fs: limit filesystem stacking depth Add a simple read-only counter to super_block that indicates how deep this is in the stack of filesystems. Previously ecryptfs was the only stackable filesystem and it explicitly disallowed multiple layers of itself. Overlayfs, however, can be stacked recursively and also may be stacked on top of ecryptfs or vice versa. To limit the kernel stack usage we must limit the depth of the filesystem stack. Initially the limit is set to 2. Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 69118b3cb917..4e41a4a331bb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -261,6 +261,12 @@ struct iattr { */ #include +/* + * Maximum number of layers of fs stack. Needs to be limited to + * prevent kernel stack overflow + */ +#define FILESYSTEM_MAX_STACK_DEPTH 2 + /** * enum positive_aop_returns - aop return codes with specific semantics * @@ -1273,6 +1279,11 @@ struct super_block { struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; struct list_lru s_inode_lru ____cacheline_aligned_in_smp; struct rcu_head rcu; + + /* + * Indicates how deep in a filesystem stack this SB is + */ + int s_stack_depth; }; extern struct timespec current_fs_time(struct super_block *sb); -- cgit v1.2.3 From 571ee1b6859869a09ed718d390aac2b9414646a2 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 9 Oct 2014 18:30:08 +0800 Subject: kvm: vfio: fix unregister kvm_device_ops of vfio After commit 80ce163 (KVM: VFIO: register kvm_device_ops dynamically), kvm_device_ops of vfio can be registered dynamically. Commit 3c3c29fd (kvm-vfio: do not use module_init) move the dynamic register invoked by kvm_init in order to fix broke unloading of the kvm module. However, kvm_device_ops of vfio is unregistered after rmmod kvm-intel module which lead to device type collision detection warning after kvm-intel module reinsmod. WARNING: CPU: 1 PID: 10358 at /root/cathy/kvm/arch/x86/kvm/../../../virt/kvm/kvm_main.c:3289 kvm_init+0x234/0x282 [kvm]() Modules linked in: kvm_intel(O+) kvm(O) nfsv3 nfs_acl auth_rpcgss oid_registry nfsv4 dns_resolver nfs fscache lockd sunrpc pci_stub bridge stp llc autofs4 8021q cpufreq_ondemand ipv6 joydev microcode pcspkr igb i2c_algo_bit ehci_pci ehci_hcd e1000e i2c_i801 ixgbe ptp pps_core hwmon mdio tpm_tis tpm ipmi_si ipmi_msghandler acpi_cpufreq isci libsas scsi_transport_sas button dm_mirror dm_region_hash dm_log dm_mod [last unloaded: kvm_intel] CPU: 1 PID: 10358 Comm: insmod Tainted: G W O 3.17.0-rc1 #2 Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013 0000000000000cd9 ffff880ff08cfd18 ffffffff814a61d9 0000000000000cd9 0000000000000000 ffff880ff08cfd58 ffffffff810417b7 ffff880ff08cfd48 ffffffffa045bcac ffffffffa049c420 0000000000000040 00000000000000ff Call Trace: [] dump_stack+0x49/0x60 [] warn_slowpath_common+0x7c/0x96 [] ? kvm_init+0x234/0x282 [kvm] [] warn_slowpath_null+0x15/0x17 [] kvm_init+0x234/0x282 [kvm] [] vmx_init+0x1bf/0x42a [kvm_intel] [] ? vmx_check_processor_compat+0x64/0x64 [kvm_intel] [] do_one_initcall+0xe3/0x170 [] ? __vunmap+0xad/0xb8 [] do_init_module+0x2b/0x174 [] load_module+0x43e/0x569 [] ? do_init_module+0x174/0x174 [] ? copy_module_from_user+0x39/0x82 [] ? module_sect_show+0x20/0x20 [] SyS_init_module+0x54/0x81 [] system_call_fastpath+0x16/0x1b ---[ end trace 0626f4a3ddea56f3 ]--- The bug can be reproduced by: rmmod kvm_intel.ko insmod kvm_intel.ko without rmmod/insmod kvm.ko This patch fixes the bug by unregistering kvm_device_ops of vfio when the kvm-intel module is removed. Reported-by: Liu Rongrong Fixes: 3c3c29fd0d7cddc32862c350d0700ce69953e3bd Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 28be31f49250..ea53b04993f2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1080,6 +1080,7 @@ void kvm_device_get(struct kvm_device *dev); void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); +void kvm_unregister_device_ops(u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; -- cgit v1.2.3 From dda02fd6278d9e995850b3c1dba484f17cbe4de4 Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Fri, 24 Oct 2014 17:47:57 +0800 Subject: mm, cma: make parameters order consistent in func declaration and definition In the current code, the base and size parameters order is not consistent in functions declaration and definition. If someone calls these functions according to the declaration parameters order in cma.h, he will run into some bug and it's hard to find the reason. This patch makes the parameters order consistent in functions declaration and definition. Signed-off-by: Weijie Yang Acked-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski --- include/linux/cma.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cma.h b/include/linux/cma.h index 0430ed05d3b9..a93438beb33c 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -18,12 +18,12 @@ struct cma; extern phys_addr_t cma_get_base(struct cma *cma); extern unsigned long cma_get_size(struct cma *cma); -extern int __init cma_declare_contiguous(phys_addr_t size, - phys_addr_t base, phys_addr_t limit, +extern int __init cma_declare_contiguous(phys_addr_t base, + phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, struct cma **res_cma); -extern int cma_init_reserved_mem(phys_addr_t size, - phys_addr_t base, int order_per_bit, +extern int cma_init_reserved_mem(phys_addr_t base, + phys_addr_t size, int order_per_bit, struct cma **res_cma); extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align); extern bool cma_release(struct cma *cma, struct page *pages, int count); -- cgit v1.2.3 From e999dbc254044e8d2a5818d92d205f65bae28f37 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Oct 2014 17:13:57 +0200 Subject: Revert "block: all blk-mq requests are tagged" This reverts commit fb3ccb5da71273e7f0d50b50bc879e50cedd60e7. SCSI-2/SPI actually needs the tagged/untagged flag in the request to work properly. Revert this patch and add a follow on to set it in the right place. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Acked-by: Jens Axboe Reported-by: Meelis Roos Tested-by: Meelis Roos Cc: stable@vger.kernel.org --- include/linux/blkdev.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0207a78a8d82..51d0dc2259cf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1136,8 +1136,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) /* * tag stuff */ -#define blk_rq_tagged(rq) \ - ((rq)->mq_ctx || ((rq)->cmd_flags & REQ_QUEUED)) +#define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) extern int blk_queue_start_tag(struct request_queue *, struct request *); extern struct request *blk_queue_find_tag(struct request_queue *, int); extern void blk_queue_end_tag(struct request_queue *, struct request *); -- cgit v1.2.3 From 5631b8fba640a4ab2f8a954f63a603fa34eda96b Mon Sep 17 00:00:00 2001 From: Steven Noonan Date: Sat, 25 Oct 2014 15:09:42 -0700 Subject: compiler/gcc4+: Remove inaccurate comment about 'asm goto' miscompiles The bug referenced by the comment in this commit was not completely fixed in GCC 4.8.2, as I mentioned in a thread back in February: https://lkml.org/lkml/2014/2/12/797 The conclusion at that time was to make the quirk unconditional until the bug could be found and fixed in GCC. Unfortunately, when I submitted the patch (commit a9f18034) I left a comment in that claimed the bug was fixed in GCC 4.8.2+. This comment is inaccurate, and should be removed. Signed-off-by: Steven Noonan Signed-off-by: Ingo Molnar Cc: Jakub Jelinek Cc: Richard Henderson Cc: Linus Torvalds Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1414274982-14040-1-git-send-email-steven@uplinklabs.net Cc: Ingo Molnar --- include/linux/compiler-gcc4.h | 1 - include/linux/compiler-gcc5.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 2507fd2a1eb4..d1a558239b1a 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -71,7 +71,6 @@ * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 * * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. - * Fixed in GCC 4.8.2 and later versions. * * (asm goto is automatically volatile - the naming reflects this.) */ diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h index cdd1cc202d51..c8c565952548 100644 --- a/include/linux/compiler-gcc5.h +++ b/include/linux/compiler-gcc5.h @@ -53,7 +53,6 @@ * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 * * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. - * Fixed in GCC 4.8.2 and later versions. * * (asm goto is automatically volatile - the naming reflects this.) */ -- cgit v1.2.3 From ebcf34f3d4be11f994340aff629f3c17171a4f65 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 26 Oct 2014 19:14:06 -0700 Subject: skbuff.h: fix kernel-doc warning for headers_end Fix kernel-doc warning in by making both headers_start and headers_end private fields. Warning(..//include/linux/skbuff.h:654): No description found for parameter 'headers_end[0]' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a59d9343c25b..5884f95ff0e9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -557,7 +557,9 @@ struct sk_buff { /* fields enclosed in headers_start/headers_end are copied * using a single memcpy() in __copy_skb_header() */ + /* private: */ __u32 headers_start[0]; + /* public: */ /* if you move pkt_type around you also must adapt those constants */ #ifdef __BIG_ENDIAN_BITFIELD @@ -642,7 +644,9 @@ struct sk_buff { __u16 network_header; __u16 mac_header; + /* private: */ __u32 headers_end[0]; + /* public: */ /* These elements must be at the end, see alloc_skb() for details. */ sk_buff_data_t tail; -- cgit v1.2.3 From 1efed2d06c703489342ab6af2951683e07509c99 Mon Sep 17 00:00:00 2001 From: Olivier Blin Date: Fri, 24 Oct 2014 19:43:00 +0200 Subject: usbnet: add a callback for set_rx_mode To delegate promiscuous mode and multicast filtering to the subdriver. Signed-off-by: Olivier Blin Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 26088feb6608..d9a4905e01d0 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -78,6 +78,7 @@ struct usbnet { # define EVENT_NO_RUNTIME_PM 9 # define EVENT_RX_KILL 10 # define EVENT_LINK_CHANGE 11 +# define EVENT_SET_RX_MODE 12 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) @@ -159,6 +160,9 @@ struct driver_info { /* called by minidriver when receiving indication */ void (*indication)(struct usbnet *dev, void *ind, int indlen); + /* rx mode change (device changes address list filtering) */ + void (*set_rx_mode)(struct usbnet *dev); + /* for new devices, use the descriptor-reading code instead */ int in; /* rx endpoint */ int out; /* tx endpoint */ -- cgit v1.2.3 From 54ef6df3f3f1353d99c80c437259d317b2cd1cbd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Oct 2014 21:11:27 -0700 Subject: rcu: Provide counterpart to rcu_dereference() for non-RCU situations Although rcu_dereference() and friends can be used in situations where object lifetimes are being managed by something other than RCU, the resulting sparse and lockdep-RCU noise can be annoying. This commit therefore supplies a lockless_dereference(), which provides the protection for dereferences without the RCU-related debugging noise. Reported-by: Al Viro Signed-off-by: Paul E. McKenney Signed-off-by: Al Viro --- include/linux/rcupdate.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a4a819ffb2d1..53ff1a752d7e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -616,6 +616,21 @@ static inline void rcu_preempt_sleep_check(void) */ #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) +/** + * lockless_dereference() - safely load a pointer for later dereference + * @p: The pointer to load + * + * Similar to rcu_dereference(), but for situations where the pointed-to + * object's lifetime is managed by something other than RCU. That + * "something other" might be reference counting or simple immortality. + */ +#define lockless_dereference(p) \ +({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + (_________p1); \ +}) + /** * rcu_assign_pointer() - assign to RCU-protected pointer * @p: pointer to assign to -- cgit v1.2.3 From d1b72cc6d8cb766c802fdc70a5edc2f0ba8a2b57 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 27 Oct 2014 15:42:01 +0100 Subject: overlayfs: fix lockdep misannotation In an overlay directory that shadows an empty lower directory, say /mnt/a/empty102, do: touch /mnt/a/empty102/x unlink /mnt/a/empty102/x rmdir /mnt/a/empty102 It's actually harmless, but needs another level of nesting between I_MUTEX_CHILD and I_MUTEX_NORMAL. Signed-off-by: Miklos Szeredi Tested-by: David Howells Signed-off-by: Al Viro --- include/linux/fs.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4e41a4a331bb..01036262095f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -639,11 +639,13 @@ static inline int inode_unhashed(struct inode *inode) * 2: child/target * 3: xattr * 4: second non-directory - * The last is for certain operations (such as rename) which lock two + * 5: second parent (when locking independent directories in rename) + * + * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two * non-directories at once. * * The locking order between these classes is - * parent -> child -> normal -> xattr -> second non-directory + * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory */ enum inode_i_mutex_lock_class { @@ -651,7 +653,8 @@ enum inode_i_mutex_lock_class I_MUTEX_PARENT, I_MUTEX_CHILD, I_MUTEX_XATTR, - I_MUTEX_NONDIR2 + I_MUTEX_NONDIR2, + I_MUTEX_PARENT2, }; void lock_two_nondirectories(struct inode *, struct inode*); -- cgit v1.2.3 From cb1a5ab6ece7a37da4ac98ee26b0475b7c3ea79e Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 28 Oct 2014 20:27:43 -0600 Subject: block: Fix merge logic when CONFIG_BLK_DEV_INTEGRITY is not defined Commit 4eaf99beadce switched to returning bool and as a result reversed the logic of the integrity merge checks. However, the empty stubs used when the block integrity code is compiled out were still returning 0. Make these stubs return "true". Signed-off-by: Martin K. Petersen Reported-by: Michael L. Semon Tested-by: Michael L. Semon Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0207a78a8d82..6cbee8395f60 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1583,13 +1583,13 @@ static inline bool blk_integrity_merge_rq(struct request_queue *rq, struct request *r1, struct request *r2) { - return 0; + return true; } static inline bool blk_integrity_merge_bio(struct request_queue *rq, struct request *r, struct bio *b) { - return 0; + return true; } static inline bool blk_integrity_is_initialized(struct gendisk *g) { -- cgit v1.2.3 From 47f29df7db78ee4fcdb104cf36918d987ddd0278 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 29 Oct 2014 14:50:29 -0700 Subject: drivers: of: add return value to of_reserved_mem_device_init() Driver calling of_reserved_mem_device_init() might be interested if the initialization has been successful or not, so add support for returning error code. This fixes a build warining caused by commit 7bfa5ab6fa1b ("drivers: dma-coherent: add initialization from device tree"), which has been merged without this change and without fixing function return value. Fixes: 7bfa5ab6fa1b1 ("drivers: dma-coherent: add initialization from device tree") Signed-off-by: Marek Szyprowski Acked-by: Arnd Bergmann Cc: Michal Nazarewicz Cc: Grant Likely Cc: Laura Abbott Cc: Josh Cartwright Cc: Joonsoo Kim Cc: Kyungmin Park Cc: Russell King Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/of_reserved_mem.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h index 5b5efae09135..ad2f67054372 100644 --- a/include/linux/of_reserved_mem.h +++ b/include/linux/of_reserved_mem.h @@ -16,7 +16,7 @@ struct reserved_mem { }; struct reserved_mem_ops { - void (*device_init)(struct reserved_mem *rmem, + int (*device_init)(struct reserved_mem *rmem, struct device *dev); void (*device_release)(struct reserved_mem *rmem, struct device *dev); @@ -28,14 +28,17 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem); _OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn) #ifdef CONFIG_OF_RESERVED_MEM -void of_reserved_mem_device_init(struct device *dev); +int of_reserved_mem_device_init(struct device *dev); void of_reserved_mem_device_release(struct device *dev); void fdt_init_reserved_mem(void); void fdt_reserved_mem_save_node(unsigned long node, const char *uname, phys_addr_t base, phys_addr_t size); #else -static inline void of_reserved_mem_device_init(struct device *dev) { } +static inline int of_reserved_mem_device_init(struct device *dev) +{ + return -ENOSYS; +} static inline void of_reserved_mem_device_release(struct device *pdev) { } static inline void fdt_init_reserved_mem(void) { } -- cgit v1.2.3 From 6d50e60cd2edb5a57154db5a6f64eef5aa59b751 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 29 Oct 2014 14:50:31 -0700 Subject: mm, thp: fix collapsing of hugepages on madvise If an anonymous mapping is not allowed to fault thp memory and then madvise(MADV_HUGEPAGE) is used after fault, khugepaged will never collapse this memory into thp memory. This occurs because the madvise(2) handler for thp, hugepage_madvise(), clears VM_NOHUGEPAGE on the stack and it isn't stored in vma->vm_flags until the final action of madvise_behavior(). This causes the khugepaged_enter_vma_merge() to be a no-op in hugepage_madvise() when the vma had previously had VM_NOHUGEPAGE set. Fix this by passing the correct vma flags to the khugepaged mm slot handler. There's no chance khugepaged can run on this vma until after madvise_behavior() returns since we hold mm->mmap_sem. It would be possible to clear VM_NOHUGEPAGE directly from vma->vm_flags in hugepage_advise(), but I didn't want to introduce special case behavior into madvise_behavior(). I think it's best to just let it always set vma->vm_flags itself. Signed-off-by: David Rientjes Reported-by: Suleiman Souhlal Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/khugepaged.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 6b394f0b5148..eeb307985715 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -6,7 +6,8 @@ #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern int __khugepaged_enter(struct mm_struct *mm); extern void __khugepaged_exit(struct mm_struct *mm); -extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma); +extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma, + unsigned long vm_flags); #define khugepaged_enabled() \ (transparent_hugepage_flags & \ @@ -35,13 +36,13 @@ static inline void khugepaged_exit(struct mm_struct *mm) __khugepaged_exit(mm); } -static inline int khugepaged_enter(struct vm_area_struct *vma) +static inline int khugepaged_enter(struct vm_area_struct *vma, + unsigned long vm_flags) { if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) if ((khugepaged_always() || - (khugepaged_req_madv() && - vma->vm_flags & VM_HUGEPAGE)) && - !(vma->vm_flags & VM_NOHUGEPAGE)) + (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) && + !(vm_flags & VM_NOHUGEPAGE)) if (__khugepaged_enter(vma->vm_mm)) return -ENOMEM; return 0; @@ -54,11 +55,13 @@ static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) static inline void khugepaged_exit(struct mm_struct *mm) { } -static inline int khugepaged_enter(struct vm_area_struct *vma) +static inline int khugepaged_enter(struct vm_area_struct *vma, + unsigned long vm_flags) { return 0; } -static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma) +static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma, + unsigned long vm_flags) { return 0; } -- cgit v1.2.3 From 3a3c02ecf7f2852f122d6d16fb9b3d9cb0c6f201 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 29 Oct 2014 14:50:46 -0700 Subject: mm: page-writeback: inline account_page_dirtied() into single caller A follow-up patch would have changed the call signature. To save the trouble, just fold it instead. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: [3.17.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 27eb1bfbe704..b46461116cd2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1235,7 +1235,6 @@ int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); void account_page_dirtied(struct page *page, struct address_space *mapping); -void account_page_writeback(struct page *page); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); -- cgit v1.2.3 From d7365e783edb858279be1d03f61bc8d5d3383d90 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 29 Oct 2014 14:50:48 -0700 Subject: mm: memcontrol: fix missed end-writeback page accounting Commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") changed page migration to uncharge the old page right away. The page is locked, unmapped, truncated, and off the LRU, but it could race with writeback ending, which then doesn't unaccount the page properly: test_clear_page_writeback() migration wait_on_page_writeback() TestClearPageWriteback() mem_cgroup_migrate() clear PCG_USED mem_cgroup_update_page_stat() if (PageCgroupUsed(pc)) decrease memcg pages under writeback release pc->mem_cgroup->move_lock The per-page statistics interface is heavily optimized to avoid a function call and a lookup_page_cgroup() in the file unmap fast path, which means it doesn't verify whether a page is still charged before clearing PageWriteback() and it has to do it in the stat update later. Rework it so that it looks up the page's memcg once at the beginning of the transaction and then uses it throughout. The charge will be verified before clearing PageWriteback() and migration can't uncharge the page as long as that is still set. The RCU lock will protect the memcg past uncharge. As far as losing the optimization goes, the following test results are from a microbenchmark that maps, faults, and unmaps a 4GB sparse file three times in a nested fashion, so that there are two negative passes that don't account but still go through the new transaction overhead. There is no actual difference: old: 33.195102545 seconds time elapsed ( +- 0.01% ) new: 33.199231369 seconds time elapsed ( +- 0.03% ) The time spent in page_remove_rmap()'s callees still adds up to the same, but the time spent in the function itself seems reduced: # Children Self Command Shared Object Symbol old: 0.12% 0.11% filemapstress [kernel.kallsyms] [k] page_remove_rmap new: 0.12% 0.08% filemapstress [kernel.kallsyms] [k] page_remove_rmap Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: [3.17.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 58 ++++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 19df5d857411..6b75640ef5ab 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -139,48 +139,23 @@ static inline bool mem_cgroup_disabled(void) return false; } -void __mem_cgroup_begin_update_page_stat(struct page *page, bool *locked, - unsigned long *flags); - -extern atomic_t memcg_moving; - -static inline void mem_cgroup_begin_update_page_stat(struct page *page, - bool *locked, unsigned long *flags) -{ - if (mem_cgroup_disabled()) - return; - rcu_read_lock(); - *locked = false; - if (atomic_read(&memcg_moving)) - __mem_cgroup_begin_update_page_stat(page, locked, flags); -} - -void __mem_cgroup_end_update_page_stat(struct page *page, - unsigned long *flags); -static inline void mem_cgroup_end_update_page_stat(struct page *page, - bool *locked, unsigned long *flags) -{ - if (mem_cgroup_disabled()) - return; - if (*locked) - __mem_cgroup_end_update_page_stat(page, flags); - rcu_read_unlock(); -} - -void mem_cgroup_update_page_stat(struct page *page, - enum mem_cgroup_stat_index idx, - int val); - -static inline void mem_cgroup_inc_page_stat(struct page *page, +struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked, + unsigned long *flags); +void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked, + unsigned long flags); +void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx, int val); + +static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { - mem_cgroup_update_page_stat(page, idx, 1); + mem_cgroup_update_page_stat(memcg, idx, 1); } -static inline void mem_cgroup_dec_page_stat(struct page *page, +static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { - mem_cgroup_update_page_stat(page, idx, -1); + mem_cgroup_update_page_stat(memcg, idx, -1); } unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, @@ -315,13 +290,14 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline void mem_cgroup_begin_update_page_stat(struct page *page, +static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked, unsigned long *flags) { + return NULL; } -static inline void mem_cgroup_end_update_page_stat(struct page *page, - bool *locked, unsigned long *flags) +static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, + bool locked, unsigned long flags) { } @@ -343,12 +319,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) return false; } -static inline void mem_cgroup_inc_page_stat(struct page *page, +static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { } -static inline void mem_cgroup_dec_page_stat(struct page *page, +static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { } -- cgit v1.2.3 From 39bb5e62867de82b269b07df900165029b928359 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Oct 2014 10:32:34 -0700 Subject: net: skb_fclone_busy() needs to detect orphaned skb Some drivers are unable to perform TX completions in a bound time. They instead call skb_orphan() Problem is skb_fclone_busy() has to detect this case, otherwise we block TCP retransmits and can freeze unlucky tcp sessions on mostly idle hosts. Signed-off-by: Eric Dumazet Fixes: 1f3279ae0c13 ("tcp: avoid retransmits of TCP packets hanging in host queues") Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5884f95ff0e9..6c8b6f604e76 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -799,15 +799,19 @@ struct sk_buff_fclones { * @skb: buffer * * Returns true is skb is a fast clone, and its clone is not freed. + * Some drivers call skb_orphan() in their ndo_start_xmit(), + * so we also check that this didnt happen. */ -static inline bool skb_fclone_busy(const struct sk_buff *skb) +static inline bool skb_fclone_busy(const struct sock *sk, + const struct sk_buff *skb) { const struct sk_buff_fclones *fclones; fclones = container_of(skb, struct sk_buff_fclones, skb1); return skb->fclone == SKB_FCLONE_ORIG && - fclones->skb2.fclone == SKB_FCLONE_CLONE; + fclones->skb2.fclone == SKB_FCLONE_CLONE && + fclones->skb2.sk == sk; } static inline struct sk_buff *alloc_skb_fclone(unsigned int size, -- cgit v1.2.3 From b2de525f095708b2adbadaec3f1e4017a23d1e09 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Mon, 29 Sep 2014 10:21:10 -0400 Subject: Return short read or 0 at end of a raw device, not EIO Author: David Jeffery Changes to the basic direct I/O code have broken the raw driver when reading to the end of a raw device. Instead of returning a short read for a read that extends partially beyond the device's end or 0 when at the end of the device, these reads now return EIO. The raw driver needs the same end of device handling as was added for normal block devices. Using blkdev_read_iter, which has the needed size checks, prevents the EIO conditions at the end of the device. Signed-off-by: David Jeffery Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 01036262095f..9ab779e8a63c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2469,6 +2469,7 @@ extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); /* fs/block_dev.c */ +extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to); extern ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from); extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync); -- cgit v1.2.3 From a87fa1d81a9fb5e9adca9820e16008c40ad09f33 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Mon, 3 Nov 2014 15:15:35 +0000 Subject: of: Fix overflow bug in string property parsing functions The string property read helpers will run off the end of the buffer if it is handed a malformed string property. Rework the parsers to make sure that doesn't happen. At the same time add new test cases to make sure the functions behave themselves. The original implementations of of_property_read_string_index() and of_property_count_strings() both open-coded the same block of parsing code, each with it's own subtly different bugs. The fix here merges functions into a single helper and makes the original functions static inline wrappers around the helper. One non-bugfix aspect of this patch is the addition of a new wrapper, of_property_read_string_array(). The new wrapper is needed by the device_properties feature that Rafael is working on and planning to merge for v3.19. The implementation is identical both with and without the new static inline wrapper, so it just got left in to reduce the churn on the header file. Signed-off-by: Grant Likely Cc: Rafael J. Wysocki Cc: Mika Westerberg Cc: Rob Herring Cc: Arnd Bergmann Cc: Darren Hart Cc: # v3.3+: Drop selftest hunks that don't apply --- include/linux/of.h | 84 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 6545e7aec7bb..29f0adc5f3e4 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -267,14 +267,12 @@ extern int of_property_read_u64(const struct device_node *np, extern int of_property_read_string(struct device_node *np, const char *propname, const char **out_string); -extern int of_property_read_string_index(struct device_node *np, - const char *propname, - int index, const char **output); extern int of_property_match_string(struct device_node *np, const char *propname, const char *string); -extern int of_property_count_strings(struct device_node *np, - const char *propname); +extern int of_property_read_string_helper(struct device_node *np, + const char *propname, + const char **out_strs, size_t sz, int index); extern int of_device_is_compatible(const struct device_node *device, const char *); extern int of_device_is_available(const struct device_node *device); @@ -486,15 +484,9 @@ static inline int of_property_read_string(struct device_node *np, return -ENOSYS; } -static inline int of_property_read_string_index(struct device_node *np, - const char *propname, int index, - const char **out_string) -{ - return -ENOSYS; -} - -static inline int of_property_count_strings(struct device_node *np, - const char *propname) +static inline int of_property_read_string_helper(struct device_node *np, + const char *propname, + const char **out_strs, size_t sz, int index) { return -ENOSYS; } @@ -667,6 +659,70 @@ static inline int of_property_count_u64_elems(const struct device_node *np, return of_property_count_elems_of_size(np, propname, sizeof(u64)); } +/** + * of_property_read_string_array() - Read an array of strings from a multiple + * strings property. + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_strs: output array of string pointers. + * @sz: number of array elements to read. + * + * Search for a property in a device tree node and retrieve a list of + * terminated string values (pointer to data, not a copy) in that property. + * + * If @out_strs is NULL, the number of strings in the property is returned. + */ +static inline int of_property_read_string_array(struct device_node *np, + const char *propname, const char **out_strs, + size_t sz) +{ + return of_property_read_string_helper(np, propname, out_strs, sz, 0); +} + +/** + * of_property_count_strings() - Find and return the number of strings from a + * multiple strings property. + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * + * Search for a property in a device tree node and retrieve the number of null + * terminated string contain in it. Returns the number of strings on + * success, -EINVAL if the property does not exist, -ENODATA if property + * does not have a value, and -EILSEQ if the string is not null-terminated + * within the length of the property data. + */ +static inline int of_property_count_strings(struct device_node *np, + const char *propname) +{ + return of_property_read_string_helper(np, propname, NULL, 0, 0); +} + +/** + * of_property_read_string_index() - Find and read a string from a multiple + * strings property. + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @index: index of the string in the list of strings + * @out_string: pointer to null terminated return string, modified only if + * return value is 0. + * + * Search for a property in a device tree node and retrieve a null + * terminated string value (pointer to data, not a copy) in the list of strings + * contained in that property. + * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if + * property does not have a value, and -EILSEQ if the string is not + * null-terminated within the length of the property data. + * + * The out_string pointer is modified only if a valid string can be decoded. + */ +static inline int of_property_read_string_index(struct device_node *np, + const char *propname, + int index, const char **output) +{ + int rc = of_property_read_string_helper(np, propname, output, 1, index); + return rc < 0 ? rc : 0; +} + /** * of_property_read_bool - Findfrom a property * @np: device node from which the property value is to be read. -- cgit v1.2.3 From 32f638fc11db0526c706454d9ab4339d55ac89f3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 30 Oct 2014 10:17:25 -0600 Subject: PCI: Don't oops on virtual buses in acpi_pci_get_bridge_handle() acpi_pci_get_bridge_handle() returns the ACPI handle for the bridge device (either a host bridge or a PCI-to-PCI bridge) leading to a PCI bus. But SR-IOV virtual functions can be on a virtual bus with no bridge leading to it. Return a NULL acpi_handle in this case instead of trying to dereference the NULL pointer to the bridge. This fixes a NULL pointer dereference oops in pci_get_hp_params() when adding SR-IOV VF devices on virtual buses. [bhelgaas: changelog, add comment in code] Fixes: 6cd33649fa83 ("PCI: Add pci_configure_device() during enumeration") Link: https://bugzilla.kernel.org/show_bug.cgi?id=87591 Reported-by: Chao Zhou Reported-by: Joerg Roedel Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- include/linux/pci-acpi.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 64dacb7288a6..24c7728ca681 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -41,8 +41,13 @@ static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) if (pci_is_root_bus(pbus)) dev = pbus->bridge; - else + else { + /* If pbus is a virtual bus, there is no bridge to it */ + if (!pbus->self) + return NULL; + dev = &pbus->self->dev; + } return ACPI_HANDLE(dev); } -- cgit v1.2.3