From 66ee59af630fd8d5f4f56fb28162857e629aa0ab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Feb 2015 19:56:46 +0100 Subject: fs: remove ki_nbytes There is no need to pass the total request length in the kiocb, as we already get passed in through the iov_iter argument. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/aio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index d9c92daa3944..132d1ecba435 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -42,7 +42,6 @@ struct kiocb { __u64 ki_user_data; /* user's data for completion */ loff_t ki_pos; - size_t ki_nbytes; /* copy of iocb->aio_nbytes */ struct list_head ki_list; /* the aio core uses this * for cancellation */ -- cgit v1.2.3 From 599bd19bdc4c6b20fd91d50f2f79dececbaf80c1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Feb 2015 19:59:44 +0100 Subject: fs: don't allow to complete sync iocbs through aio_complete The AIO interface is fairly complex because it tries to allow filesystems to always work async and then wakeup a synchronous caller through aio_complete. It turns out that basically no one was doing this to avoid the complexity and context switches, and we've already fixed up the remaining users and can now get rid of this case. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/aio.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index 132d1ecba435..f8516430490d 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -37,7 +37,6 @@ struct kiocb { union { void __user *user; - struct task_struct *tsk; } ki_obj; __u64 ki_user_data; /* user's data for completion */ @@ -63,13 +62,11 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) *kiocb = (struct kiocb) { .ki_ctx = NULL, .ki_filp = filp, - .ki_obj.tsk = current, }; } /* prototypes */ #ifdef CONFIG_AIO -extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); extern void aio_complete(struct kiocb *iocb, long res, long res2); struct mm_struct; extern void exit_aio(struct mm_struct *mm); @@ -77,7 +74,6 @@ extern long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user *__user *iocbpp, bool compat); void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else -static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; } static inline void aio_complete(struct kiocb *iocb, long res, long res2) { } struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } -- cgit v1.2.3 From 04b2fa9f8f36ec6fb6fd1c9dc9df6fff0cd27323 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2015 14:49:06 +0100 Subject: fs: split generic and aio kiocb Most callers in the kernel want to perform synchronous file I/O, but still have to bloat the stack with a full struct kiocb. Split out the parts needed in filesystem code from those in the aio code, and only allocate those needed to pass down argument on the stack. The aio code embedds the generic iocb in the one it allocates and can easily get back to it by using container_of. Also add a ->ki_complete method to struct kiocb, this is used to call into the aio code and thus removes the dependency on aio for filesystems impementing asynchronous operations. It will also allow other callers to substitute their own completion callback. We also add a new ->ki_flags field to work around the nasty layering violation recently introduced in commit 5e33f6 ("usb: gadget: ffs: add eventfd notification about ffs events"). Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/aio.h | 46 ++++++---------------------------------------- 1 file changed, 6 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index f8516430490d..5c40b61285ac 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -14,67 +14,38 @@ struct kiocb; #define KIOCB_KEY 0 -/* - * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either - * cancelled or completed (this makes a certain amount of sense because - * successful cancellation - io_cancel() - does deliver the completion to - * userspace). - * - * And since most things don't implement kiocb cancellation and we'd really like - * kiocb completion to be lockless when possible, we use ki_cancel to - * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED - * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel(). - */ -#define KIOCB_CANCELLED ((void *) (~0ULL)) - typedef int (kiocb_cancel_fn)(struct kiocb *); +#define IOCB_EVENTFD (1 << 0) + struct kiocb { struct file *ki_filp; - struct kioctx *ki_ctx; /* NULL for sync ops */ - kiocb_cancel_fn *ki_cancel; - void *private; - - union { - void __user *user; - } ki_obj; - - __u64 ki_user_data; /* user's data for completion */ loff_t ki_pos; - - struct list_head ki_list; /* the aio core uses this - * for cancellation */ - - /* - * If the aio_resfd field of the userspace iocb is not zero, - * this is the underlying eventfd context to deliver events to. - */ - struct eventfd_ctx *ki_eventfd; + void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); + void *private; + int ki_flags; }; static inline bool is_sync_kiocb(struct kiocb *kiocb) { - return kiocb->ki_ctx == NULL; + return kiocb->ki_complete == NULL; } static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { - .ki_ctx = NULL, .ki_filp = filp, }; } /* prototypes */ #ifdef CONFIG_AIO -extern void aio_complete(struct kiocb *iocb, long res, long res2); struct mm_struct; extern void exit_aio(struct mm_struct *mm); extern long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user *__user *iocbpp, bool compat); void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else -static inline void aio_complete(struct kiocb *iocb, long res, long res2) { } struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } static inline long do_io_submit(aio_context_t ctx_id, long nr, @@ -84,11 +55,6 @@ static inline void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) { } #endif /* CONFIG_AIO */ -static inline struct kiocb *list_kiocb(struct list_head *h) -{ - return list_entry(h, struct kiocb, ki_list); -} - /* for sysctl: */ extern unsigned long aio_nr; extern unsigned long aio_max_nr; -- cgit v1.2.3 From e2e40f2c1ed433c5e224525c8c862fd32e5d3df2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 22 Feb 2015 08:58:50 -0800 Subject: fs: move struct kiocb to fs.h struct kiocb now is a generic I/O container, so move it to fs.h. Also do a #include diet for aio.h while we're at it. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/aio.h | 31 +------------------------------ include/linux/fs.h | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index 5c40b61285ac..9eb42dbc5582 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -1,52 +1,23 @@ #ifndef __LINUX__AIO_H #define __LINUX__AIO_H -#include -#include #include -#include -#include - -#include struct kioctx; struct kiocb; +struct mm_struct; #define KIOCB_KEY 0 typedef int (kiocb_cancel_fn)(struct kiocb *); -#define IOCB_EVENTFD (1 << 0) - -struct kiocb { - struct file *ki_filp; - loff_t ki_pos; - void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); - void *private; - int ki_flags; -}; - -static inline bool is_sync_kiocb(struct kiocb *kiocb) -{ - return kiocb->ki_complete == NULL; -} - -static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) -{ - *kiocb = (struct kiocb) { - .ki_filp = filp, - }; -} - /* prototypes */ #ifdef CONFIG_AIO -struct mm_struct; extern void exit_aio(struct mm_struct *mm); extern long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user *__user *iocbpp, bool compat); void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else -struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } static inline long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user * __user *iocbpp, diff --git a/include/linux/fs.h b/include/linux/fs.h index 447932aed1e1..48c1472bde4a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -314,6 +314,28 @@ struct page; struct address_space; struct writeback_control; +#define IOCB_EVENTFD (1 << 0) + +struct kiocb { + struct file *ki_filp; + loff_t ki_pos; + void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); + void *private; + int ki_flags; +}; + +static inline bool is_sync_kiocb(struct kiocb *kiocb) +{ + return kiocb->ki_complete == NULL; +} + +static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) +{ + *kiocb = (struct kiocb) { + .ki_filp = filp, + }; +} + /* * "descriptor" for what we're up to with a read. * This allows us to use the same read code yet -- cgit v1.2.3 From bc917be8105993c256338ad1189650364a741483 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Mar 2015 17:45:43 -0400 Subject: saner iov_iter initialization primitives iovec-backed iov_iter instances are assumed to satisfy several properties: * no more than UIO_MAXIOV elements in iovec array * total size of all ranges is no more than MAX_RW_COUNT * all ranges pass access_ok(). The problem is, invariants of data structures should be established in the primitives creating those data structures, not in the code using those primitives. And iov_iter_init() violates that principle. For a while we managed to get away with that, but once the use of iov_iter started to spread, it didn't take long for shit to hit the fan - missed check in sys_sendto() had introduced a roothole. We _do_ have primitives for importing and validating iovecs (both native and compat ones) and those primitives are almost always followed by shoving the resulting iovec into iov_iter. Life would be considerably simpler (and safer) if we combined those primitives with initializing iov_iter. That gives us two new primitives - import_iovec() and compat_import_iovec(). Calling conventions: iovec = iov_array; err = import_iovec(direction, uvec, nr_segs, ARRAY_SIZE(iov_array), &iovec, &iter); imports user vector into kernel space (into iov_array if it fits, allocated if it doesn't fit or if iovec was NULL), validates it and sets iter up to refer to it. On success 0 is returned and allocated kernel copy (or NULL if the array had fit into caller-supplied one) is returned via iovec. On failure all allocations are undone and -E... is returned. If the total size of ranges exceeds MAX_RW_COUNT, the excess is silently truncated. compat_import_iovec() expects uvec to be a pointer to user array of compat_iovec; otherwise it's identical to import_iovec(). Finally, import_single_range() sets iov_iter backed by single-element iovec covering a user-supplied range - err = import_single_range(direction, address, size, iovec, &iter); does validation and sets iter up. Again, size in excess of MAX_RW_COUNT gets silently truncated. Next commits will be switching the things up to use of those and reducing the amount of iov_iter_init() instances. Signed-off-by: Al Viro --- include/linux/uio.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 71880299ed48..1f4a37f1f025 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -139,4 +139,18 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); +int import_iovec(int type, const struct iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i); + +#ifdef CONFIG_COMPAT +struct compat_iovec; +int compat_import_iovec(int type, const struct compat_iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i); +#endif + +int import_single_range(int type, void __user *buf, size_t len, + struct iovec *iov, struct iov_iter *i); + #endif -- cgit v1.2.3 From fd2f7cb5bcac58b63717cd45366bff9a6ab961c6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 Feb 2015 20:07:13 -0500 Subject: kill struct filename.separate just make const char iname[] the last member and compare name->name with name->iname instead of checking name->separate We need to make sure that out-of-line name doesn't end up allocated adjacent to struct filename refering to it; fortunately, it's easy to achieve - just allocate that struct filename with one byte in ->iname[], so that ->iname[0] will be inside the same object and thus have an address different from that of out-of-line name [spotted by Boqun Feng ] Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b4d71b5e1ff2..d70e333988f1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2144,7 +2144,7 @@ struct filename { const __user char *uptr; /* original userland pointer */ struct audit_names *aname; int refcnt; - bool separate; /* should "name" be freed? */ + const char iname[]; }; extern long vfs_truncate(struct path *, loff_t); -- cgit v1.2.3 From 3f7036a071b879da017eddaedb10fba173fdf1ff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 8 Mar 2015 19:28:30 -0400 Subject: switch security_inode_getattr() to struct path * Signed-off-by: Al Viro --- include/linux/security.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index a1b7dbd127ff..4e14e3d6309f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1556,7 +1556,7 @@ struct security_operations { int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); int (*inode_permission) (struct inode *inode, int mask); int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); - int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry); + int (*inode_getattr) (const struct path *path); int (*inode_setxattr) (struct dentry *dentry, const char *name, const void *value, size_t size, int flags); void (*inode_post_setxattr) (struct dentry *dentry, const char *name, @@ -1843,7 +1843,7 @@ int security_inode_readlink(struct dentry *dentry); int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); int security_inode_permission(struct inode *inode, int mask); int security_inode_setattr(struct dentry *dentry, struct iattr *attr); -int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry); +int security_inode_getattr(const struct path *path); int security_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); void security_inode_post_setxattr(struct dentry *dentry, const char *name, @@ -2259,8 +2259,7 @@ static inline int security_inode_setattr(struct dentry *dentry, return 0; } -static inline int security_inode_getattr(struct vfsmount *mnt, - struct dentry *dentry) +static inline int security_inode_getattr(const struct path *path) { return 0; } -- cgit v1.2.3 From 171a02032bf1e1bb35442a38d6e25e0dcbb85c63 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Wed, 11 Mar 2015 10:43:31 -0400 Subject: VFS: Add iov_iter_fault_in_multipages_readable() simillar to iov_iter_fault_in_readable() but differs in that it is not limited to faulting in the first iovec and instead faults in "bytes" bytes iterating over the iovecs as necessary. Also, instead of only faulting in the first and last page of the range, all pages are faulted in. This function is needed by NTFS when it does multi page file writes. Signed-off-by: Anton Altaparmakov Signed-off-by: Al Viro --- include/linux/uio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 71880299ed48..a69ed25af07f 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -76,6 +76,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); +int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); -- cgit v1.2.3