From 80f5ab097b87c86581cb9736a8e55c5a3047d4bb Mon Sep 17 00:00:00 2001 From: Shaun Ruffell Date: Sun, 19 Aug 2012 01:11:24 -0300 Subject: edac: edac_mc no longer deals with kobjects directly There are no more embedded kobjects in struct mem_ctl_info. Remove a header and a comment that does not reflect the code anymore. Signed-off-by: Shaun Ruffell Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 1b8c02b36f76..4784213c819d 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -14,7 +14,6 @@ #include #include -#include #include #include #include -- cgit v1.2.3 From d069015e268bcac6c8bd8997b3235f5f977d3ab6 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 19 Nov 2012 15:33:51 +0800 Subject: Introduce THERMAL_TREND_RAISE_FULL and THERMAL_TREND_DROP_FULL These two new thermal_trend types are used to tell the governor that the temeprature is raising/dropping quickly. Thermal cooling governors should handle this situation and make proper decisions, e.g. set cooling state to upper/lower limit directly instead of one step each time. Signed-off-by: Zhang Rui --- include/linux/thermal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index fe82022478e7..883bcda7e1e4 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -74,6 +74,8 @@ enum thermal_trend { THERMAL_TREND_STABLE, /* temperature is stable */ THERMAL_TREND_RAISING, /* temperature is raising */ THERMAL_TREND_DROPPING, /* temperature is dropping */ + THERMAL_TREND_RAISE_FULL, /* apply highest cooling action */ + THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */ }; /* Events supported by Thermal Netlink */ -- cgit v1.2.3 From 548bc8e1b38e48653a90f48f636f8d253504f8a2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jan 2013 08:05:13 -0800 Subject: block: RCU free request_queue RCU free request_queue so that blkcg_gq->q can be dereferenced under RCU lock. This will be used to implement hierarchical stats. Signed-off-by: Tejun Heo Acked-by: Vivek Goyal --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f94bc83011ed..406343c43cda 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -437,6 +438,7 @@ struct request_queue { /* Throttle data */ struct throtl_data *td; #endif + struct rcu_head rcu_head; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ -- cgit v1.2.3 From 242d98f077ac0ab80920219769eb095503b93f61 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 17 Dec 2012 10:01:27 -0500 Subject: block,elevator: use new hashtable implementation Switch elevator to use the new hashtable implementation. This reduces the amount of generic unrelated code in the elevator. This also removes the dymanic allocation of the hash table. The size of the table is constant so there's no point in paying the price of an extra dereference when accessing it. This patch depends on d9b482c ("hashtable: introduce a small and naive hashtable") which was merged in v3.6. Signed-off-by: Sasha Levin Signed-off-by: Jens Axboe --- include/linux/elevator.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index c03af7687bb4..7c5a7c9789ee 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -2,6 +2,7 @@ #define _LINUX_ELEVATOR_H #include +#include #ifdef CONFIG_BLOCK @@ -96,6 +97,8 @@ struct elevator_type struct list_head list; }; +#define ELV_HASH_BITS 6 + /* * each queue has an elevator_queue associated with it */ @@ -105,8 +108,8 @@ struct elevator_queue void *elevator_data; struct kobject kobj; struct mutex sysfs_lock; - struct hlist_head *hash; unsigned int registered:1; + DECLARE_HASHTABLE(hash, ELV_HASH_BITS); }; /* -- cgit v1.2.3 From 422765c2638924da10ff363b5eed77924911bdc7 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Fri, 11 Jan 2013 14:46:09 +0100 Subject: block: Remove should_sort judgement when flush blk_plug In commit 975927b942c932,it add blk_rq_pos to sort rq when flushing. Although this commit was used for the situation which blk_plug handled multi devices on the same time like md device. I think there must be some situations like this but only single device. So remove the should_sort judgement. Because the parameter should_sort is only for this purpose,it can delete should_sort from blk_plug. CC: Shaohua Li Signed-off-by: Jianpeng Ma Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f94bc83011ed..dbe74279f3d6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -974,7 +974,6 @@ struct blk_plug { unsigned long magic; /* detect uninitialized use-cases */ struct list_head list; /* requests */ struct list_head cb_list; /* md requires an unplug callback */ - unsigned int should_sort; /* list to be sorted before flushing? */ }; #define BLK_MAX_REQUEST_COUNT 16 -- cgit v1.2.3 From 10ee27a06cc8eb57f83342a8eabcb75deb872d52 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 10 Jan 2013 13:47:57 +0800 Subject: vfs: re-implement writeback_inodes_sb(_nr)_if_idle() and rename them writeback_inodes_sb(_nr)_if_idle() is re-implemented by replacing down_read() with down_read_trylock() because - If ->s_umount is write locked, then the sb is not idle. That is writeback_inodes_sb(_nr)_if_idle() needn't wait for the lock. - writeback_inodes_sb(_nr)_if_idle() grabs s_umount lock when it want to start writeback, it may bring us deadlock problem when doing umount. In order to fix the problem, ext4 and btrfs implemented their own writeback functions instead of writeback_inodes_sb(_nr)_if_idle(), but it introduced the redundant code, it is better to implement a new writeback_inodes_sb(_nr)_if_idle(). The name of these two functions is cumbersome, so rename them to try_to_writeback_inodes_sb(_nr). This idea came from Christoph Hellwig. Some code is from the patch of Kamal Mostafa. Reviewed-by: Jan Kara Signed-off-by: Miao Xie Signed-off-by: Fengguang Wu --- include/linux/writeback.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b82a83aba311..9a9367c0c076 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -87,9 +87,9 @@ int inode_wait(void *); void writeback_inodes_sb(struct super_block *, enum wb_reason reason); void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); -int writeback_inodes_sb_if_idle(struct super_block *, enum wb_reason reason); -int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr, - enum wb_reason reason); +int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); +int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, + enum wb_reason reason); void sync_inodes_sb(struct super_block *); long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, enum wb_reason reason); -- cgit v1.2.3 From 3a366e614d0837d9fc23f78cdb1a1186ebc3387f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Jan 2013 13:06:33 -0800 Subject: block: add missing block_bio_complete() tracepoint bio completion didn't kick block_bio_complete TP. Only dm was explicitly triggering the TP on IO completion. This makes block_bio_complete TP useless for tracers which want to know about bios, and all other bio based drivers skip generating blktrace completion events. This patch makes all bio completions via bio_endio() generate block_bio_complete TP. * Explicit trace_block_bio_complete() invocation removed from dm and the trace point is unexported. * @rq dropped from trace_block_bio_complete(). bios may fly around w/o queue associated. Verifying and accessing the assocaited queue belongs to TP probes. * blktrace now gets both request and bio completions. Make it ignore bio completions if request completion path is happening. This makes all bio based drivers generate blktrace completion events properly and makes the block_bio_complete TP actually useful. v2: With this change, block_bio_complete TP could be invoked on sg commands which have bio's with %NULL bi_bdev. Update TP assignment code to check whether bio->bi_bdev is %NULL before dereferencing. Signed-off-by: Tejun Heo Original-patch-by: Namhyung Kim Cc: Tejun Heo Cc: Steven Rostedt Cc: Alasdair Kergon Cc: dm-devel@redhat.com Cc: Neil Brown Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7c2e030e72f1..0ea61e07a91c 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -12,6 +12,7 @@ struct blk_trace { int trace_state; + bool rq_based; struct rchan *rchan; unsigned long __percpu *sequence; unsigned char __percpu *msg_data; -- cgit v1.2.3 From f0059afd3e6e7aa1a0ffc23468b74c43d47660b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Jan 2013 13:06:35 -0800 Subject: buffer: make touch_buffer() an exported function We want to add a trace point to touch_buffer() but macros and inline functions defined in header files can't have tracing points. Move touch_buffer() to fs/buffer.c and make it a proper function. The new exported function is also declared inline. As most uses of touch_buffer() are inside buffer.c with nilfs2 as the only other user, the effect of this change should be negligible. Signed-off-by: Tejun Heo Cc: Steven Rostedt Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 458f497738a4..5afc4f94d110 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -126,7 +126,6 @@ BUFFER_FNS(Write_EIO, write_io_error) BUFFER_FNS(Unwritten, unwritten) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) -#define touch_buffer(bh) mark_page_accessed(bh->b_page) /* If we *know* page->private refers to buffer_heads */ #define page_buffers(page) \ @@ -142,6 +141,7 @@ BUFFER_FNS(Unwritten, unwritten) void mark_buffer_dirty(struct buffer_head *bh); void init_buffer(struct buffer_head *, bh_end_io_t *, void *); +void touch_buffer(struct buffer_head *bh); void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); int try_to_free_buffers(struct page *); -- cgit v1.2.3 From 8ab3e6a08a98f7ff18c6814065eb30ba2e000233 Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Wed, 2 Jan 2013 15:29:39 +0000 Subject: thermal: Use thermal zone device id in netlink messages This patch changes the function thermal_generate_netlink_event to receive a thermal zone device instead of a originator id. This way, the messages will always be bound to a thermal zone. Signed-off-by: Eduardo Valentin Reviewed-by: Durgadoss R Signed-off-by: Zhang Rui --- include/linux/thermal.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 883bcda7e1e4..9b78f8c6f773 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -246,9 +246,11 @@ int thermal_register_governor(struct thermal_governor *); void thermal_unregister_governor(struct thermal_governor *); #ifdef CONFIG_NET -extern int thermal_generate_netlink_event(u32 orig, enum events event); +extern int thermal_generate_netlink_event(struct thermal_zone_device *tz, + enum events event); #else -static inline int thermal_generate_netlink_event(u32 orig, enum events event) +static int thermal_generate_netlink_event(struct thermal_zone_device *tz, + enum events event) { return 0; } -- cgit v1.2.3 From 6e8575faa8fa680d59404a4d58d12190667be815 Mon Sep 17 00:00:00 2001 From: Sam Lang Date: Fri, 28 Dec 2012 09:56:46 -0800 Subject: ceph: Check for created flag in response from mds The mds now sends back a created inode if the create request performed the create. If the file already existed, no inode is returned in the reply. This allows ceph to set the created flag in atomic_open so that permissions are properly checked in the case that the file wasn't created by the create call to the mds. To ensure compability with previous kernels, a feature for sending back the inode in the create reply was added, so that the mds will only send back the inode if the client indicates it supports the feature. Signed-off-by: Sam Lang Reviewed-by: Sage Weil --- include/linux/ceph/ceph_features.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index dad579b0c0e6..6b7c6acbb3bf 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -14,13 +14,16 @@ #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) /* bits 8-17 defined by user-space; not supported yet here */ #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) +/* bits 19-25 defined by user-space; not supported yet here */ +#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) /* * Features supported. */ #define CEPH_FEATURES_SUPPORTED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ - CEPH_FEATURE_CRUSH_TUNABLES) + CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_REPLY_CREATE_INODE) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR) -- cgit v1.2.3 From 1604f488ac2dcce33c8218e75a000e8c5fb57e61 Mon Sep 17 00:00:00 2001 From: Jim Schutt Date: Fri, 30 Nov 2012 09:15:25 -0700 Subject: libceph: for chooseleaf rules, retry CRUSH map descent from root if leaf is failed Add libceph support for a new CRUSH tunable recently added to Ceph servers. Consider the CRUSH rule step chooseleaf firstn 0 type This rule means that replicas will be chosen in a manner such that each chosen leaf's branch will contain a unique instance of . When an object is re-replicated after a leaf failure, if the CRUSH map uses a chooseleaf rule the remapped replica ends up under the bucket that held the failed leaf. This causes uneven data distribution across the storage cluster, to the point that when all the leaves but one fail under a particular bucket, that remaining leaf holds all the data from its failed peers. This behavior also limits the number of peers that can participate in the re-replication of the data held by the failed leaf, which increases the time required to re-replicate after a failure. For a chooseleaf CRUSH rule, the tree descent has two steps: call them the inner and outer descents. If the tree descent down to is the outer descent, and the descent from down to a leaf is the inner descent, the issue is that a down leaf is detected on the inner descent, so only the inner descent is retried. In order to disperse re-replicated data as widely as possible across a storage cluster after a failure, we want to retry the outer descent. So, fix up crush_choose() to allow the inner descent to return immediately on choosing a failed leaf. Wire this up as a new CRUSH tunable. Note that after this change, for a chooseleaf rule, if the primary OSD in a placement group has failed, choosing a replacement may result in one of the other OSDs in the PG colliding with the new primary. This requires that OSD's data for that PG to need moving as well. This seems unavoidable but should be relatively rare. This corresponds to ceph.git commit 88f218181a9e6d2292e2697fc93797d0f6d6e5dc. Signed-off-by: Jim Schutt Reviewed-by: Sage Weil --- include/linux/ceph/ceph_features.h | 7 +++++-- include/linux/crush/crush.h | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 6b7c6acbb3bf..2160aab482f6 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -14,7 +14,9 @@ #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) /* bits 8-17 defined by user-space; not supported yet here */ #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) -/* bits 19-25 defined by user-space; not supported yet here */ +/* bits 19-24 defined by user-space; not supported yet here */ +#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25) +/* bit 26 defined by user-space; not supported yet here */ #define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) /* @@ -22,7 +24,8 @@ */ #define CEPH_FEATURES_SUPPORTED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ - CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_CRUSH_TUNABLES2 | \ CEPH_FEATURE_REPLY_CREATE_INODE) #define CEPH_FEATURES_REQUIRED_DEFAULT \ diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 25baa287cff7..6a1101f24cfb 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -162,6 +162,8 @@ struct crush_map { __u32 choose_local_fallback_tries; /* choose attempts before giving up */ __u32 choose_total_tries; + /* attempt chooseleaf inner descent once; on failure retry outer descent */ + __u32 chooseleaf_descend_once; }; -- cgit v1.2.3 From dd5f049dbdf973d9bceebef1fd73647a5ede6732 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 1 Nov 2012 08:39:27 -0500 Subject: ceph: define ceph_encode_8_safe() It's kind of a silly macro, but ceph_encode_8_safe() is the only one missing from an otherwise pretty complete set. It's not used, but neither are a couple of the others in this set. While in there, insert some whitespace to tidy up the alignment of the line-terminating backslashes in some of the macro definitions. Signed-off-by: Alex Elder Reviewed-by: Dan Mick --- include/linux/ceph/decode.h | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 4bbf2db45f46..cd679f2d348b 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -52,10 +52,10 @@ static inline int ceph_has_room(void **p, void *end, size_t n) return end >= *p && n <= end - *p; } -#define ceph_decode_need(p, end, n, bad) \ - do { \ - if (!likely(ceph_has_room(p, end, n))) \ - goto bad; \ +#define ceph_decode_need(p, end, n, bad) \ + do { \ + if (!likely(ceph_has_room(p, end, n))) \ + goto bad; \ } while (0) #define ceph_decode_64_safe(p, end, v, bad) \ @@ -99,8 +99,8 @@ static inline int ceph_has_room(void **p, void *end, size_t n) * * There are two possible failures: * - converting the string would require accessing memory at or - * beyond the "end" pointer provided (-E - * - memory could not be allocated for the result + * beyond the "end" pointer provided (-ERANGE) + * - memory could not be allocated for the result (-ENOMEM) */ static inline char *ceph_extract_encoded_string(void **p, void *end, size_t *lenp, gfp_t gfp) @@ -217,10 +217,10 @@ static inline void ceph_encode_string(void **p, void *end, *p += len; } -#define ceph_encode_need(p, end, n, bad) \ - do { \ - if (!likely(ceph_has_room(p, end, n))) \ - goto bad; \ +#define ceph_encode_need(p, end, n, bad) \ + do { \ + if (!likely(ceph_has_room(p, end, n))) \ + goto bad; \ } while (0) #define ceph_encode_64_safe(p, end, v, bad) \ @@ -231,12 +231,17 @@ static inline void ceph_encode_string(void **p, void *end, #define ceph_encode_32_safe(p, end, v, bad) \ do { \ ceph_encode_need(p, end, sizeof(u32), bad); \ - ceph_encode_32(p, v); \ + ceph_encode_32(p, v); \ } while (0) #define ceph_encode_16_safe(p, end, v, bad) \ do { \ ceph_encode_need(p, end, sizeof(u16), bad); \ - ceph_encode_16(p, v); \ + ceph_encode_16(p, v); \ + } while (0) +#define ceph_encode_8_safe(p, end, v, bad) \ + do { \ + ceph_encode_need(p, end, sizeof(u8), bad); \ + ceph_encode_8(p, v); \ } while (0) #define ceph_encode_copy_safe(p, end, pv, n, bad) \ -- cgit v1.2.3 From af77f26caa35a95af09d1dac5c513b3901de7e37 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 9 Nov 2012 08:43:15 -0600 Subject: rbd: drop oid parameters from ceph_osdc_build_request() The last two parameters to ceph_osd_build_request() describe the object id, but the values passed always come from the osd request structure whose address is also provided. Get rid of those last two parameters. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d9b880e977e6..f2e5d2cdca06 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -227,9 +227,7 @@ extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, u64 *plen, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, - struct timespec *mtime, - const char *oid, - int oid_len); + struct timespec *mtime); extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, struct ceph_file_layout *layout, -- cgit v1.2.3 From c885837f7d4f8c4f5cb2a744cc6929bc078e9dc0 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 13 Nov 2012 21:11:15 -0600 Subject: libceph: always allow trail in osd request An osd request structure contains an optional trail portion, which if present will contain data to be passed in the payload portion of the message containing the request. The trail field is a ceph_pagelist pointer, and if null it indicates there is no trail. A ceph_pagelist structure contains a length field, and it can legitimately hold value 0. Make use of this to change the interpretation of the "trail" of an osd request so that every osd request has trailing data, it just might have length 0. This means we change the r_trail field in a ceph_osd_request structure from a pointer to a structure that is always initialized. Note that in ceph_osdc_start_request(), the trail pointer (or now address of that structure) is assigned to a ceph message's trail field. Here's why that's still OK (looking at net/ceph/messenger.c): - What would have resulted in a null pointer previously will now refer to a 0-length page list. That message trail pointer is used in two functions, write_partial_msg_pages() and out_msg_pos_next(). - In write_partial_msg_pages(), a null page list pointer is handled the same as a message with 0-length trail, and both result in a "in_trail" variable set to false. The trail pointer is only used if in_trail is true. - The only other place the message trail pointer is used is out_msg_pos_next(). That function is only called by write_partial_msg_pages() and only touches the trail pointer if the in_trail value it is passed is true. Therefore a null ceph_msg->trail pointer is equivalent to a non-null pointer referring to a 0-length page list structure. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index f2e5d2cdca06..61562c792855 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * Maximum object name size @@ -22,7 +23,6 @@ struct ceph_snap_context; struct ceph_osd_request; struct ceph_osd_client; struct ceph_authorizer; -struct ceph_pagelist; /* * completion callback for async writepages @@ -95,7 +95,7 @@ struct ceph_osd_request { struct bio *r_bio; /* instead of pages */ #endif - struct ceph_pagelist *r_trail; /* trailing part of the data */ + struct ceph_pagelist r_trail; /* trailing part of the data */ }; struct ceph_osd_event { -- cgit v1.2.3 From 0120be3c60d46d6d55f4bf7a3d654cc705eb0c54 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 14 Nov 2012 09:38:19 -0600 Subject: libceph: pass length to ceph_osdc_build_request() The len argument to ceph_osdc_build_request() is set up to be passed by address, but that function never updates its value so there's no need to do this. Tighten up the interface by passing the length directly. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 61562c792855..4bfb4582439a 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -224,7 +224,7 @@ extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client * struct bio *bio); extern void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 *plen, + u64 off, u64 len, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, struct timespec *mtime); -- cgit v1.2.3 From e8afad656cbcd06d02a7bacd4b318fa0e2907de0 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 14 Nov 2012 09:38:19 -0600 Subject: libceph: pass length to ceph_calc_file_object_mapping() ceph_calc_file_object_mapping() takes (among other things) a "file" offset and length, and based on the layout, determines the object number ("bno") backing the affected portion of the file's data and the offset into that object where the desired range begins. It also computes the size that should be used for the request--either the amount requested or something less if that would exceed the end of the object. This patch changes the input length parameter in this function so it is used only for input. That is, the argument will be passed by value rather than by address, so the value provided won't get updated by the function. The value would only get updated if the length would surpass the current object, and in that case the value it got updated to would be exactly that returned in *oxlen. Only one of the two callers is affected by this change. Update ceph_calc_raw_layout() so it records any updated value. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osdmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 5ea57ba69320..1f653e2ff5cc 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -110,7 +110,7 @@ extern void ceph_osdmap_destroy(struct ceph_osdmap *map); /* calculate mapping of a file extent to an object */ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, - u64 off, u64 *plen, + u64 off, u64 len, u64 *bno, u64 *oxoff, u64 *oxlen); /* calculate mapping of object to a placement group */ -- cgit v1.2.3 From 4d6b250bf18d44571d69a0f4afec4b6a1969729f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 13 Nov 2012 21:11:15 -0600 Subject: libceph: drop snapid in ceph_calc_raw_layout() A snapshot id must be provided to ceph_calc_raw_layout() even though it is not needed at all for calculating the layout. Where the snapshot id *is* needed is when building the request message for an osd operation. Drop the snapid parameter from ceph_calc_raw_layout() and pass that value instead in ceph_osdc_build_request(). Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 4bfb4582439a..0e82a0a967ef 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -209,7 +209,6 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, - u64 snapid, u64 off, u64 *plen, u64 *bno, struct ceph_osd_request *req, struct ceph_osd_req_op *op); @@ -227,6 +226,7 @@ extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, u64 len, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, + u64 snap_id, struct timespec *mtime); extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, -- cgit v1.2.3 From e75b45cf36565fd8ba206a9d80f670a86e61ba2f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 13 Nov 2012 21:11:14 -0600 Subject: libceph: drop osdc from ceph_calc_raw_layout() The osdc parameter to ceph_calc_raw_layout() is not used, so get rid of it. Consequently, the corresponding parameter in calc_layout() becomes unused, so get rid of that as well. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 0e82a0a967ef..fe3a6e8db1f9 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -207,8 +207,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); -extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc, - struct ceph_file_layout *layout, +extern int ceph_calc_raw_layout(struct ceph_file_layout *layout, u64 off, u64 *plen, u64 *bno, struct ceph_osd_request *req, struct ceph_osd_req_op *op); -- cgit v1.2.3 From d178a9e74006e80f568d87e29f2a68f14fc7cbb1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 13 Nov 2012 21:11:15 -0600 Subject: libceph: don't set flags in ceph_osdc_alloc_request() The only thing ceph_osdc_alloc_request() really does with the flags value it is passed is assign it to the newly-created osd request structure. Do that in the caller instead. Both callers subsequently call ceph_osdc_build_request(), so have that function (instead of ceph_osdc_alloc_request()) issue a warning if a request comes through with neither the read nor write flags set. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fe3a6e8db1f9..6ddda5bbd1a6 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -213,7 +213,6 @@ extern int ceph_calc_raw_layout(struct ceph_file_layout *layout, struct ceph_osd_req_op *op); extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, - int flags, struct ceph_snap_context *snapc, struct ceph_osd_req_op *ops, bool use_mempool, -- cgit v1.2.3 From 54a5400721da7fa5a16cea151aade5bdfee74111 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 13 Nov 2012 21:11:15 -0600 Subject: libceph: don't set pages or bio in ceph_osdc_alloc_request() Only one of the two callers of ceph_osdc_alloc_request() provides page or bio data for its payload. And essentially all that function was doing with those arguments was assigning them to fields in the osd request structure. Simplify ceph_osdc_alloc_request() by having the caller take care of making those assignments Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 6ddda5bbd1a6..75f56d372d44 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -216,9 +216,7 @@ extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client * struct ceph_snap_context *snapc, struct ceph_osd_req_op *ops, bool use_mempool, - gfp_t gfp_flags, - struct page **pages, - struct bio *bio); + gfp_t gfp_flags); extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, u64 len, -- cgit v1.2.3 From ae7ca4a35b1f5df86e2c32b2cfc01a8d528c7b8c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 13 Nov 2012 21:11:15 -0600 Subject: libceph: pass num_op with ops Both ceph_osdc_alloc_request() and ceph_osdc_build_request() are provided an array of ceph osd request operations. Rather than just passing the number of operations in the array, the caller is required append an additional zeroed operation structure to signal the end of the array. All callers know the number of operations at the time these functions are called, so drop the silly zero entry and supply that number directly. As a result, get_num_ops() is no longer needed. This also means that ceph_osdc_alloc_request() never uses its ops argument, so that can be dropped. Also rbd_create_rw_ops() no longer needs to add one to reserve room for the additional op. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 75f56d372d44..2b04d054e09d 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -214,12 +214,13 @@ extern int ceph_calc_raw_layout(struct ceph_file_layout *layout, extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, - struct ceph_osd_req_op *ops, + unsigned int num_op, bool use_mempool, gfp_t gfp_flags); extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, u64 len, + unsigned int num_op, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, u64 snap_id, -- cgit v1.2.3 From 2b5fc648af5eec2f4fe984cb6b926214e02c5cf4 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 14 Nov 2012 09:38:20 -0600 Subject: rbd: kill ceph_osd_req_op->flags The flags field of struct ceph_osd_req_op is never used, so just get rid of it. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 2b04d054e09d..69287ccfe68a 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -157,7 +157,6 @@ struct ceph_osd_client { struct ceph_osd_req_op { u16 op; /* CEPH_OSD_OP_* */ - u32 flags; /* CEPH_OSD_FLAG_* */ union { struct { u64 offset, length; -- cgit v1.2.3 From 35525b79786b2ba58ef13822198ce22c497bc7a2 Mon Sep 17 00:00:00 2001 From: Andriy Skulysh Date: Mon, 7 Jan 2013 00:12:15 +0200 Subject: sunrpc: Fix lockd sleeping until timeout There is a race in enqueueing thread to a pool and waking up a thread. lockd doesn't wake up on reception of lock granted callback if svc_wake_up() is called before lockd's thread is added to a pool. Signed-off-by: Andriy Skulysh Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 676ddf53b3ee..1f0216b9a6c9 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -50,6 +50,7 @@ struct svc_pool { unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ struct svc_pool_stats sp_stats; /* statistics on pool operation */ + int sp_task_pending;/* has pending task */ } ____cacheline_aligned_in_smp; /* -- cgit v1.2.3 From 6543becf26fff612cdadeed7250ccc8d49f67f27 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Sun, 20 Jan 2013 17:58:47 +0100 Subject: mod/file2alias: make modalias generation safe for cross compiling Use the target compiler to compute the offsets for the fields of the device_id structures, so that it won't be broken by different alignments between the host and target ABIs. This also fixes missing endian corrections for some modaliases. Signed-off-by: Andreas Schwab Signed-off-by: Michal Marek --- include/linux/mod_devicetable.h | 58 +++++++---------------------------------- 1 file changed, 9 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index fed3def62818..779cf7c4a3d1 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -33,8 +33,7 @@ struct ieee1394_device_id { __u32 model_id; __u32 specifier_id; __u32 version; - kernel_ulong_t driver_data - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; }; @@ -148,8 +147,7 @@ struct hid_device_id { __u16 group; __u32 vendor; __u32 product; - kernel_ulong_t driver_data - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; }; /* s390 CCW devices */ @@ -173,8 +171,6 @@ struct ccw_device_id { struct ap_device_id { __u16 match_flags; /* which fields to match against */ __u8 dev_type; /* device type */ - __u8 pad1; - __u32 pad2; kernel_ulong_t driver_info; }; @@ -184,13 +180,10 @@ struct ap_device_id { struct css_device_id { __u8 match_flags; __u8 type; /* subchannel type */ - __u16 pad2; - __u32 pad3; kernel_ulong_t driver_data; }; -#define ACPI_ID_LEN 16 /* only 9 bytes needed here, 16 bytes are used */ - /* to workaround crosscompile issues */ +#define ACPI_ID_LEN 9 struct acpi_device_id { __u8 id[ACPI_ID_LEN]; @@ -231,11 +224,7 @@ struct of_device_id char name[32]; char type[32]; char compatible[128]; -#ifdef __KERNEL__ const void *data; -#else - kernel_ulong_t data; -#endif }; /* VIO */ @@ -260,24 +249,14 @@ struct pcmcia_device_id { /* for pseudo multi-function devices */ __u8 device_no; - __u32 prod_id_hash[4] - __attribute__((aligned(sizeof(__u32)))); + __u32 prod_id_hash[4]; /* not matched against in kernelspace*/ -#ifdef __KERNEL__ const char * prod_id[4]; -#else - kernel_ulong_t prod_id[4] - __attribute__((aligned(sizeof(kernel_ulong_t)))); -#endif /* not matched against */ kernel_ulong_t driver_info; -#ifdef __KERNEL__ char * cisfile; -#else - kernel_ulong_t cisfile; -#endif }; #define PCMCIA_DEV_ID_MATCH_MANF_ID 0x0001 @@ -373,8 +352,7 @@ struct sdio_device_id { __u8 class; /* Standard interface or SDIO_ANY_ID */ __u16 vendor; /* Vendor or SDIO_ANY_ID */ __u16 device; /* Device ID or SDIO_ANY_ID */ - kernel_ulong_t driver_data /* Data private to the driver */ - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; /* Data private to the driver */ }; /* SSB core, see drivers/ssb/ */ @@ -420,8 +398,7 @@ struct virtio_device_id { */ struct hv_vmbus_device_id { __u8 guid[16]; - kernel_ulong_t driver_data /* Data private to the driver */ - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; /* Data private to the driver */ }; /* rpmsg */ @@ -440,8 +417,7 @@ struct rpmsg_device_id { struct i2c_device_id { char name[I2C_NAME_SIZE]; - kernel_ulong_t driver_data /* Data private to the driver */ - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; /* Data private to the driver */ }; /* spi */ @@ -451,8 +427,7 @@ struct i2c_device_id { struct spi_device_id { char name[SPI_NAME_SIZE]; - kernel_ulong_t driver_data /* Data private to the driver */ - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; /* Data private to the driver */ }; /* dmi */ @@ -484,15 +459,6 @@ struct dmi_strmatch { char substr[79]; }; -#ifndef __KERNEL__ -struct dmi_system_id { - kernel_ulong_t callback; - kernel_ulong_t ident; - struct dmi_strmatch matches[4]; - kernel_ulong_t driver_data - __attribute__((aligned(sizeof(kernel_ulong_t)))); -}; -#else struct dmi_system_id { int (*callback)(const struct dmi_system_id *); const char *ident; @@ -506,7 +472,6 @@ struct dmi_system_id { * error: storage size of '__mod_dmi_device_table' isn't known */ #define dmi_device_id dmi_system_id -#endif #define DMI_MATCH(a, b) { a, b } @@ -515,8 +480,7 @@ struct dmi_system_id { struct platform_device_id { char name[PLATFORM_NAME_SIZE]; - kernel_ulong_t driver_data - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; }; #define MDIO_MODULE_PREFIX "mdio:" @@ -572,11 +536,7 @@ struct isapnp_device_id { struct amba_id { unsigned int id; unsigned int mask; -#ifndef __KERNEL__ - kernel_ulong_t data; -#else void *data; -#endif }; /* -- cgit v1.2.3 From bf22433575ef30a4807f0620498017df0f27df67 Mon Sep 17 00:00:00 2001 From: Philip Avinash Date: Fri, 4 Jan 2013 13:26:50 +0530 Subject: mtd: devices: elm: Add support for ELM error correction The ELM hardware module can be used to speedup BCH 4/8/16 ECC scheme error correction. For now only 4 & 8 bit support is added Signed-off-by: Philip Avinash Signed-off-by: Artem Bityutskiy --- include/linux/platform_data/elm.h | 53 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/linux/platform_data/elm.h (limited to 'include/linux') diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h new file mode 100644 index 000000000000..11ab6aaf2431 --- /dev/null +++ b/include/linux/platform_data/elm.h @@ -0,0 +1,53 @@ +/* + * BCH Error Location Module + * + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __ELM_H +#define __ELM_H + +enum bch_ecc { + BCH4_ECC = 0, + BCH8_ECC, +}; + +/* ELM support 8 error syndrome process */ +#define ERROR_VECTOR_MAX 8 + +#define BCH8_ECC_OOB_BYTES 13 +#define BCH4_ECC_OOB_BYTES 7 +/* RBL requires 14 byte even though BCH8 uses only 13 byte */ +#define BCH8_SIZE (BCH8_ECC_OOB_BYTES + 1) +#define BCH4_SIZE (BCH4_ECC_OOB_BYTES) + +/** + * struct elm_errorvec - error vector for elm + * @error_reported: set true for vectors error is reported + * @error_uncorrectable: number of uncorrectable errors + * @error_count: number of correctable errors in the sector + * @error_loc: buffer for error location + * + */ +struct elm_errorvec { + bool error_reported; + bool error_uncorrectable; + int error_count; + int error_loc[ERROR_VECTOR_MAX]; +}; + +void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc, + struct elm_errorvec *err_vec); +void elm_config(struct device *dev, enum bch_ecc bch_type); +#endif /* __ELM_H */ -- cgit v1.2.3 From 62116e5171e00f85a8d53f76e45b84423c89ff34 Mon Sep 17 00:00:00 2001 From: Philip Avinash Date: Fri, 4 Jan 2013 13:26:51 +0530 Subject: mtd: nand: omap2: Support for hardware BCH error correction. ELM module can be used for hardware error correction of BCH 4 & 8 bit. ELM module functionality is verified by checking the availability of handle for ELM module in device tree. Hence supporting 1. ELM module available, BCH error correction done by ELM module. Also support read & write page in one shot by adding custom read_page and write_page methods. This helps in optimizing code for NAND flashes with page size less than 4 KB. 2. If ELM module not available fall back to software BCH error correction support. New structure member is added to omap_nand_info 1. "is_elm_used" to know the status of whether the ELM module is used for error correction or not. 2. "elm_dev" device pointer to elm device on detection of ELM module. Also being here update the device tree documentation of gpmc-nand for adding optional property elm_id. Note: ECC layout uses 1 extra bytes for 512 byte of data to handle erased pages. Extra byte programmed to zero for programmed pages. Also BCH8 requires 14 byte ecc to maintain compatibility with RBL ECC layout. This results a common ecc layout across RBL, U-boot & Linux with BCH8. Signed-off-by: Philip Avinash Signed-off-by: Artem Bityutskiy --- include/linux/platform_data/elm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h index 11ab6aaf2431..1bd5244d1dcd 100644 --- a/include/linux/platform_data/elm.h +++ b/include/linux/platform_data/elm.h @@ -30,7 +30,8 @@ enum bch_ecc { #define BCH4_ECC_OOB_BYTES 7 /* RBL requires 14 byte even though BCH8 uses only 13 byte */ #define BCH8_SIZE (BCH8_ECC_OOB_BYTES + 1) -#define BCH4_SIZE (BCH4_ECC_OOB_BYTES) +/* Uses 1 extra byte to handle erased pages */ +#define BCH4_SIZE (BCH4_ECC_OOB_BYTES + 1) /** * struct elm_errorvec - error vector for elm -- cgit v1.2.3 From a2f74a7dacc1c17a0b146eb3112217874c5db436 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 6 Jan 2013 21:28:50 +0100 Subject: mtd: bcm47xxsflash: add own struct for abstrating bus type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki --- include/linux/bcma/bcma_driver_chipcommon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 9a0e3fa3ca95..a5bfda6b0a76 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -528,6 +528,7 @@ struct bcma_sflash { u32 size; struct mtd_info *mtd; + void *priv; }; #endif -- cgit v1.2.3 From 1648eaaa1575ea686acb82fb8cb3d8839764ef2c Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Fri, 18 Jan 2013 13:10:05 +0100 Subject: mtd: cfi_cmdset_0002: Support Persistent Protection Bits (PPB) locking Currently cfi_cmdset_0002.c does not support PPB locking of sectors. This patch adds support for this locking/unlocking mechanism. It is needed on some platforms, since newer U-Boot versions do support this PPB locking and protect for example their environment sector(s) this way. This PPB locking/unlocking will be enabled for all devices supported by cfi_cmdset_0002 reporting 8 in the CFI word 0x49 (Sector Protect/Unprotect scheme). Please note that PPB locking does support sector-by-sector locking. But the whole chip can only be unlocked together. So unlocking one sector will automatically unlock all sectors of this device. Because of this chip limitation, the PPB unlocking function saves the current locking status of all sectors before unlocking the whole device. After unlocking the saved locking status is re-configured. This way only the addressed sectors will be unlocked. To selectively enable this advanced sector protection mechanism, the device-tree property "use-advanced-sector-protection" has been created. To enable support for this locking this property needs to be present in the flash DT node. E.g.: nor_flash@0,0 { compatible = "amd,s29gl256n", "cfi-flash"; bank-width = <2>; use-advanced-sector-protection; ... Tested with Spansion S29GL512S10THI and Micron JS28F512M29EWx flash devices. Signed-off-by: Stefan Roese Tested-by: Holger Brunck Signed-off-by: Artem Bityutskiy --- include/linux/mtd/map.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index f6eb4332ac92..8b9bfd7dcaa3 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -245,6 +245,7 @@ struct map_info { unsigned long pfow_base; unsigned long map_priv_1; unsigned long map_priv_2; + struct device_node *device_node; void *fldrv_priv; struct mtd_chip_driver *fldrv; }; -- cgit v1.2.3 From 7b9e8522a65886d8ae168547a67c3617b6ba83f1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 28 Jan 2013 14:41:07 -0500 Subject: nfsd: fix IPv6 address handling in the DRC Currently, it only stores the first 16 bytes of any address. struct sockaddr_in6 is 28 bytes however, so we're currently ignoring the last 12 bytes of the address. Expand the c_addr field to a sockaddr_in6, and cast it to a sockaddr_in as necessary. Also fix the comparitor to use the existing RPC helpers for this. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/clnt.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 34206b84d8da..47354a25a927 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -263,7 +263,9 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst, * @sap1: first sockaddr * @sap2: second sockaddr * - * Just compares the family and address portion. Ignores port, scope, etc. + * Just compares the family and address portion. Ignores port, but + * compares the scope if it's a link-local address. + * * Returns true if the addrs are equal, false if they aren't. */ static inline bool rpc_cmp_addr(const struct sockaddr *sap1, -- cgit v1.2.3 From 155a345a52e6cda18946efe2529d99d5040fad6d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 08:10:08 -0500 Subject: sunrpc: copy scope ID in __rpc_copy_addr6 When copying an address, we should also copy the scopeid in the event that this is a link-local address and the scope matters. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/clnt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 47354a25a927..6a7c2619a355 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -242,6 +242,7 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst, dsin6->sin6_family = ssin6->sin6_family; dsin6->sin6_addr = ssin6->sin6_addr; + dsin6->sin6_scope_id = ssin6->sin6_scope_id; return true; } #else /* !(IS_ENABLED(CONFIG_IPV6) */ -- cgit v1.2.3 From 5976687a2b3d1969f02aba16b80ad3ed79be6ad3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 12:50:00 -0500 Subject: sunrpc: move address copy/cmp/convert routines and prototypes from clnt.h to addr.h These routines are used by server and client code, so having them in a separate header would be best. Signed-off-by: Jeff Layton Acked-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/addr.h | 170 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/sunrpc/clnt.h | 155 ---------------------------------------- 2 files changed, 170 insertions(+), 155 deletions(-) create mode 100644 include/linux/sunrpc/addr.h (limited to 'include/linux') diff --git a/include/linux/sunrpc/addr.h b/include/linux/sunrpc/addr.h new file mode 100644 index 000000000000..07d8e53bedfc --- /dev/null +++ b/include/linux/sunrpc/addr.h @@ -0,0 +1,170 @@ +/* + * linux/include/linux/sunrpc/addr.h + * + * Various routines for copying and comparing sockaddrs and for + * converting them to and from presentation format. + */ +#ifndef _LINUX_SUNRPC_ADDR_H +#define _LINUX_SUNRPC_ADDR_H + +#include +#include +#include +#include + +size_t rpc_ntop(const struct sockaddr *, char *, const size_t); +size_t rpc_pton(struct net *, const char *, const size_t, + struct sockaddr *, const size_t); +char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); +size_t rpc_uaddr2sockaddr(struct net *, const char *, const size_t, + struct sockaddr *, const size_t); + +static inline unsigned short rpc_get_port(const struct sockaddr *sap) +{ + switch (sap->sa_family) { + case AF_INET: + return ntohs(((struct sockaddr_in *)sap)->sin_port); + case AF_INET6: + return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + } + return 0; +} + +static inline void rpc_set_port(struct sockaddr *sap, + const unsigned short port) +{ + switch (sap->sa_family) { + case AF_INET: + ((struct sockaddr_in *)sap)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); + break; + } +} + +#define IPV6_SCOPE_DELIMITER '%' +#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") + +static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; + + return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; +} + +static inline bool __rpc_copy_addr4(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in *ssin = (struct sockaddr_in *) src; + struct sockaddr_in *dsin = (struct sockaddr_in *) dst; + + dsin->sin_family = ssin->sin_family; + dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; + return true; +} + +#if IS_ENABLED(CONFIG_IPV6) +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; + + if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) + return false; + else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) + return sin1->sin6_scope_id == sin2->sin6_scope_id; + + return true; +} + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; + struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; + + dsin6->sin6_family = ssin6->sin6_family; + dsin6->sin6_addr = ssin6->sin6_addr; + dsin6->sin6_scope_id = ssin6->sin6_scope_id; + return true; +} +#else /* !(IS_ENABLED(CONFIG_IPV6) */ +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + return false; +} + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + return false; +} +#endif /* !(IS_ENABLED(CONFIG_IPV6) */ + +/** + * rpc_cmp_addr - compare the address portion of two sockaddrs. + * @sap1: first sockaddr + * @sap2: second sockaddr + * + * Just compares the family and address portion. Ignores port, but + * compares the scope if it's a link-local address. + * + * Returns true if the addrs are equal, false if they aren't. + */ +static inline bool rpc_cmp_addr(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + if (sap1->sa_family == sap2->sa_family) { + switch (sap1->sa_family) { + case AF_INET: + return __rpc_cmp_addr4(sap1, sap2); + case AF_INET6: + return __rpc_cmp_addr6(sap1, sap2); + } + } + return false; +} + +/** + * rpc_copy_addr - copy the address portion of one sockaddr to another + * @dst: destination sockaddr + * @src: source sockaddr + * + * Just copies the address portion and family. Ignores port, scope, etc. + * Caller is responsible for making certain that dst is large enough to hold + * the address in src. Returns true if address family is supported. Returns + * false otherwise. + */ +static inline bool rpc_copy_addr(struct sockaddr *dst, + const struct sockaddr *src) +{ + switch (src->sa_family) { + case AF_INET: + return __rpc_copy_addr4(dst, src); + case AF_INET6: + return __rpc_copy_addr6(dst, src); + } + return false; +} + +/** + * rpc_get_scope_id - return scopeid for a given sockaddr + * @sa: sockaddr to get scopeid from + * + * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if + * not an AF_INET6 address. + */ +static inline u32 rpc_get_scope_id(const struct sockaddr *sa) +{ + if (sa->sa_family != AF_INET6) + return 0; + + return ((struct sockaddr_in6 *) sa)->sin6_scope_id; +} + +#endif /* _LINUX_SUNRPC_ADDR_H */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6a7c2619a355..4a4abde000cb 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -165,160 +165,5 @@ size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); int rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t); -size_t rpc_ntop(const struct sockaddr *, char *, const size_t); -size_t rpc_pton(struct net *, const char *, const size_t, - struct sockaddr *, const size_t); -char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); -size_t rpc_uaddr2sockaddr(struct net *, const char *, const size_t, - struct sockaddr *, const size_t); - -static inline unsigned short rpc_get_port(const struct sockaddr *sap) -{ - switch (sap->sa_family) { - case AF_INET: - return ntohs(((struct sockaddr_in *)sap)->sin_port); - case AF_INET6: - return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); - } - return 0; -} - -static inline void rpc_set_port(struct sockaddr *sap, - const unsigned short port) -{ - switch (sap->sa_family) { - case AF_INET: - ((struct sockaddr_in *)sap)->sin_port = htons(port); - break; - case AF_INET6: - ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); - break; - } -} - -#define IPV6_SCOPE_DELIMITER '%' -#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") - -static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; - const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; - - return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; -} - -static inline bool __rpc_copy_addr4(struct sockaddr *dst, - const struct sockaddr *src) -{ - const struct sockaddr_in *ssin = (struct sockaddr_in *) src; - struct sockaddr_in *dsin = (struct sockaddr_in *) dst; - - dsin->sin_family = ssin->sin_family; - dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; - return true; -} - -#if IS_ENABLED(CONFIG_IPV6) -static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; - const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; - - if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) - return false; - else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) - return sin1->sin6_scope_id == sin2->sin6_scope_id; - - return true; -} - -static inline bool __rpc_copy_addr6(struct sockaddr *dst, - const struct sockaddr *src) -{ - const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; - struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; - - dsin6->sin6_family = ssin6->sin6_family; - dsin6->sin6_addr = ssin6->sin6_addr; - dsin6->sin6_scope_id = ssin6->sin6_scope_id; - return true; -} -#else /* !(IS_ENABLED(CONFIG_IPV6) */ -static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - return false; -} - -static inline bool __rpc_copy_addr6(struct sockaddr *dst, - const struct sockaddr *src) -{ - return false; -} -#endif /* !(IS_ENABLED(CONFIG_IPV6) */ - -/** - * rpc_cmp_addr - compare the address portion of two sockaddrs. - * @sap1: first sockaddr - * @sap2: second sockaddr - * - * Just compares the family and address portion. Ignores port, but - * compares the scope if it's a link-local address. - * - * Returns true if the addrs are equal, false if they aren't. - */ -static inline bool rpc_cmp_addr(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - if (sap1->sa_family == sap2->sa_family) { - switch (sap1->sa_family) { - case AF_INET: - return __rpc_cmp_addr4(sap1, sap2); - case AF_INET6: - return __rpc_cmp_addr6(sap1, sap2); - } - } - return false; -} - -/** - * rpc_copy_addr - copy the address portion of one sockaddr to another - * @dst: destination sockaddr - * @src: source sockaddr - * - * Just copies the address portion and family. Ignores port, scope, etc. - * Caller is responsible for making certain that dst is large enough to hold - * the address in src. Returns true if address family is supported. Returns - * false otherwise. - */ -static inline bool rpc_copy_addr(struct sockaddr *dst, - const struct sockaddr *src) -{ - switch (src->sa_family) { - case AF_INET: - return __rpc_copy_addr4(dst, src); - case AF_INET6: - return __rpc_copy_addr6(dst, src); - } - return false; -} - -/** - * rpc_get_scope_id - return scopeid for a given sockaddr - * @sa: sockaddr to get scopeid from - * - * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if - * not an AF_INET6 address. - */ -static inline u32 rpc_get_scope_id(const struct sockaddr *sa) -{ - if (sa->sa_family != AF_INET6) - return 0; - - return ((struct sockaddr_in6 *) sa)->sin6_scope_id; -} - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From de0b65ca55dc62b6b477f6e02088df2281da7b51 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 4 Feb 2013 12:51:17 -0500 Subject: sunrpc: fix comment in struct xdr_buf definition ...these pages aren't necessarily contiguous. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 63988990bd36..224d06047e45 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -56,7 +56,7 @@ struct xdr_buf { struct kvec head[1], /* RPC header + non-page data */ tail[1]; /* Appended after page data */ - struct page ** pages; /* Array of contiguous pages */ + struct page ** pages; /* Array of pages */ unsigned int page_base, /* Start of page data */ page_len, /* Length of page data */ flags; /* Flags for data disposition */ -- cgit v1.2.3 From e6e238c38bd4d42d5e2cddb2165e1a46e0fb1200 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Mon, 4 Feb 2013 00:30:15 +0000 Subject: thermal: sysfs: Add a new sysfs node emul_temp for thermal emulation This patch adds support to set the emulated temperature method in thermal zone (sensor). After setting this feature thermal zone may report this temperature and not the actual temperature. The emulation implementation may be based on sensor capability through platform specific handler or pure software emulation if no platform handler defined. This is useful in debugging different temperature threshold and its associated cooling action. Critical threshold's cannot be emulated. Writing 0 on this node should disable emulation. Signed-off-by: Amit Daniel Kachhap Acked-by: Kukjin Kim Signed-off-by: Zhang Rui --- include/linux/thermal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 9b78f8c6f773..f0bd7f90a90d 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -123,6 +123,7 @@ struct thermal_zone_device_ops { int (*set_trip_hyst) (struct thermal_zone_device *, int, unsigned long); int (*get_crit_temp) (struct thermal_zone_device *, unsigned long *); + int (*set_emul_temp) (struct thermal_zone_device *, unsigned long); int (*get_trend) (struct thermal_zone_device *, int, enum thermal_trend *); int (*notify) (struct thermal_zone_device *, int, @@ -165,6 +166,7 @@ struct thermal_zone_device { int polling_delay; int temperature; int last_temperature; + int emul_temperature; int passive; unsigned int forced_passive; const struct thermal_zone_device_ops *ops; -- cgit v1.2.3 From 4f0a6847815837b63b05fc23878ba391701d8f6a Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Fri, 8 Feb 2013 01:13:06 +0000 Subject: Thermal: exynos: Add support for temperature falling interrupt. This patch introduces using temperature falling interrupt in exynos thermal driver. Former patch, it only use polling way to check whether if system themperature is fallen. However, exynos SOC also provides temperature falling interrupt way to do same things by hw. This feature is not supported in exynos4210. Acked-by: Kukjin Kim Signed-off-by: Jonghwa Lee Signed-off-by: Amit Daniel Kachhap Signed-off-by: Zhang Rui --- include/linux/platform_data/exynos_thermal.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/exynos_thermal.h b/include/linux/platform_data/exynos_thermal.h index a7bdb2f63b73..da7e6274b175 100644 --- a/include/linux/platform_data/exynos_thermal.h +++ b/include/linux/platform_data/exynos_thermal.h @@ -53,6 +53,8 @@ struct freq_clip_table { * struct exynos_tmu_platform_data * @threshold: basic temperature for generating interrupt * 25 <= threshold <= 125 [unit: degree Celsius] + * @threshold_falling: differntial value for setting threshold + * of temperature falling interrupt. * @trigger_levels: array for each interrupt levels * [unit: degree Celsius] * 0: temperature for trigger_level0 interrupt @@ -97,6 +99,7 @@ struct freq_clip_table { */ struct exynos_tmu_platform_data { u8 threshold; + u8 threshold_falling; u8 trigger_levels[4]; bool trigger_level0_en; bool trigger_level1_en; -- cgit v1.2.3 From 4c190e2f913f038c9c91ee63b59cd037260ba353 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 6 Feb 2013 08:28:55 -0500 Subject: sunrpc: trim off trailing checksum before returning decrypted or integrity authenticated buffer When GSSAPI integrity signatures are in use, or when we're using GSSAPI privacy with the v2 token format, there is a trailing checksum on the xdr_buf that is returned. It's checked during the authentication stage, and afterward nothing cares about it. Ordinarily, it's not a problem since the XDR code generally ignores it, but it will be when we try to compute a checksum over the buffer to help prevent XID collisions in the duplicate reply cache. Fix the code to trim off the checksums after verifying them. Note that in unwrap_integ_data, we must avoid trying to reverify the checksum if the request was deferred since it will no longer be present when it's revisited. Signed-off-by: Jeff Layton --- include/linux/sunrpc/xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 224d06047e45..15f9204ee70b 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -152,6 +152,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); -- cgit v1.2.3 From 3ebc21f7bc2f9c0145bbbf0f12430b766a200f9f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 31 Jan 2013 16:02:01 -0600 Subject: libceph: fix messenger CONFIG_BLOCK dependencies The ceph messenger has a few spots that are only used when bio messages are supported, and that's only when CONFIG_BLOCK is defined. This surrounds a couple of spots with #ifdef's that would cause a problem if CONFIG_BLOCK were not present in the kernel configuration. This resolves: http://tracker.ceph.com/issues/3976 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/messenger.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 14ba5ee738a9..60903e0f665c 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -83,9 +83,11 @@ struct ceph_msg { struct list_head list_head; struct kref kref; +#ifdef CONFIG_BLOCK struct bio *bio; /* instead of pages/pagelist */ struct bio *bio_iter; /* bio iterator */ int bio_seg; /* current bio segment */ +#endif /* CONFIG_BLOCK */ struct ceph_pagelist *trail; /* the trailing part of the data */ bool front_is_vmalloc; bool more_to_follow; -- cgit v1.2.3 From 72fe25e3460c8673984370208e0e6261101372d6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 30 Jan 2013 11:13:33 -0600 Subject: libceph: add a compatibility check interface An upcoming change implements semantic change that could lead to a crash if an old version of the libceph kernel module is used with a new version of the rbd kernel module. In order to preclude that possibility, this adds a compatibilty check interface. If this interface doesn't exist, the modules are obviously not compatible. But if it does exist, this provides a way of letting the caller know whether it will operate properly with this libceph module. Perhaps confusingly, it returns false right now. The semantic change mentioned above will make it return true. This resolves: http://tracker.ceph.com/issues/3800 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/libceph.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 084d3c622b12..c44275ab375c 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -193,6 +193,8 @@ static inline int calc_pages_for(u64 off, u64 len) } /* ceph_common.c */ +extern bool libceph_compatible(void *data); + extern const char *ceph_msg_type_name(int type); extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); extern struct kmem_cache *ceph_inode_cachep; -- cgit v1.2.3 From 73fb847a44224d5708550e4be7baba9da75e00af Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 4 Feb 2013 14:02:45 +0300 Subject: SUNRPC: introduce cache_detail->cache_request callback This callback will allow to simplify upcalls in further patches in this series. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 5dc9ee4d616e..4f1c8582053c 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -83,6 +83,10 @@ struct cache_detail { int (*cache_upcall)(struct cache_detail *, struct cache_head *); + void (*cache_request)(struct cache_detail *cd, + struct cache_head *ch, + char **bpp, int *blen); + int (*cache_parse)(struct cache_detail *, char *buf, int len); -- cgit v1.2.3 From 21cd1254d3402a72927ed744e8ac1a7cf532f1ea Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 4 Feb 2013 14:02:55 +0300 Subject: SUNRPC: remove "cache_request" argument in sunrpc_cache_pipe_upcall() function Passing this pointer is redundant since it's stored on cache_detail structure, which is also passed to sunrpc_cache_pipe_upcall () function. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 4f1c8582053c..303399b1ba59 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -161,11 +161,7 @@ sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash); extern int -sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, - void (*cache_request)(struct cache_detail *, - struct cache_head *, - char **, - int *)); +sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h); extern void cache_clean_deferred(void *owner); -- cgit v1.2.3 From 686855f5d833178e518d79e7912cdb3268a9fa69 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 Feb 2013 18:19:58 +0400 Subject: sched: add wait_for_completion_io[_timeout] The only difference between wait_for_completion[_timeout]() and wait_for_completion_io[_timeout]() is that the latter calls io_schedule_timeout() instead of schedule_timeout() so that the caller is accounted as waiting for IO, not just sleeping. These functions can be used for correct iowait time accounting when the completion struct is actually used for waiting for IO (e.g. completion of a bio request in the block layer). Signed-off-by: Vladimir Davydov Acked-by: Ingo Molnar Signed-off-by: Jens Axboe --- include/linux/completion.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/completion.h b/include/linux/completion.h index 51494e6b5548..33f0280fd533 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -77,10 +77,13 @@ static inline void init_completion(struct completion *x) } extern void wait_for_completion(struct completion *); +extern void wait_for_completion_io(struct completion *); extern int wait_for_completion_interruptible(struct completion *x); extern int wait_for_completion_killable(struct completion *x); extern unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout); +extern unsigned long wait_for_completion_io_timeout(struct completion *x, + unsigned long timeout); extern long wait_for_completion_interruptible_timeout( struct completion *x, unsigned long timeout); extern long wait_for_completion_killable_timeout( -- cgit v1.2.3 From f25cc71e634edcf8a15bc60a48f2b5f3ec9fbb1d Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Wed, 13 Feb 2013 08:40:16 -0700 Subject: lockd: nlmclnt_reclaim(): avoid stack overflow Even though nlmclnt_reclaim() is only one call into the stack frame, 928 bytes on the stack seems like a lot. Recode to dynamically allocate the request structure once from within the reclaimer task, then pass this pointer into nlmclnt_reclaim() for reuse on subsequent calls. smatch analysis: fs/lockd/clntproc.c:620 nlmclnt_reclaim() warn: 'reqst' puts 928 bytes on stack Also remove redundant assignment of 0 after memset. Cc: Trond Myklebust Signed-off-by: Tim Gardner Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index f5a051a79273..a395f1e7998f 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -212,7 +212,8 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock); void nlmclnt_recovery(struct nlm_host *); -int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); +int nlmclnt_reclaim(struct nlm_host *, struct file_lock *, + struct nlm_rqst *); void nlmclnt_next_cookie(struct nlm_cookie *); /* -- cgit v1.2.3 From e7e319a9c51409c7effe34333ea26facf2fab9e1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 14 Feb 2013 12:16:43 -0600 Subject: libceph: improve packing in struct ceph_osd_req_op The layout of struct ceph_osd_req_op leaves lots of holes. Rearranging things a little for better field alignment reduces the size by a third. This resolves: http://tracker.ceph.com/issues/4163 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 69287ccfe68a..82bf6338d6c1 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -157,6 +157,7 @@ struct ceph_osd_client { struct ceph_osd_req_op { u16 op; /* CEPH_OSD_OP_* */ + u32 payload_len; union { struct { u64 offset, length; @@ -165,23 +166,24 @@ struct ceph_osd_req_op { } extent; struct { const char *name; - u32 name_len; const char *val; + u32 name_len; u32 value_len; __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ } xattr; struct { const char *class_name; - __u8 class_len; const char *method_name; - __u8 method_len; - __u8 argc; const char *indata; u32 indata_len; + __u8 class_len; + __u8 method_len; + __u8 argc; } cls; struct { - u64 cookie, count; + u64 cookie; + u64 count; } pgls; struct { u64 snapid; @@ -189,12 +191,11 @@ struct ceph_osd_req_op { struct { u64 cookie; u64 ver; - __u8 flag; u32 prot_ver; u32 timeout; + __u8 flag; } watch; }; - u32 payload_len; }; extern int ceph_osdc_init(struct ceph_osd_client *osdc, -- cgit v1.2.3 From 87f979d390f9ecfa3d0038a9f9a002a62f8a1895 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_writepages() "nofail" parameter There is only one caller of ceph_osdc_writepages(), and it always passes the value true as its "nofail" argument. Get rid of that argument and replace its use in ceph_osdc_writepages() with the constant value true. This and a number of cleanup patches that follow resolve: http://tracker.ceph.com/issues/4126 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 82bf6338d6c1..afcb255b016a 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -275,7 +275,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, struct page **pages, int nr_pages, - int flags, int do_sync, bool nofail); + int flags, int do_sync); /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, -- cgit v1.2.3 From fbf8685fb155e12a9f4d4b966c7b3442ed557687 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_writepages() "dosync" parameter There is only one caller of ceph_osdc_writepages(), and it always passes 0 as its "dosync" argument. Get rid of that argument and replace its use in ceph_osdc_writepages() with 0. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index afcb255b016a..7a63100a3e69 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -275,7 +275,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, struct page **pages, int nr_pages, - int flags, int do_sync); + int flags); /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, -- cgit v1.2.3 From 2480882611e3ab844563dd3d0a822227604ab8fe Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_writepages() "flags" parameter There is only one caller of ceph_osdc_writepages(), and it always passes 0 as its "flags" argument. Get rid of that argument and replace its use in ceph_osdc_writepages() with 0. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 7a63100a3e69..6540e8861998 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -274,8 +274,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - struct page **pages, int nr_pages, - int flags); + struct page **pages, int nr_pages); /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, -- cgit v1.2.3 From a3bea47e8bdd51d921e5b2045720d60140612c7c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_new_request() "num_reply" parameter The "num_reply" parameter to ceph_osdc_new_request() is never used inside that function, so get rid of it. Note that ceph_sync_write() passes 2 for that argument, while all other callers pass 1. It doesn't matter, but perhaps someone should verify this doesn't indicate a problem. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 6540e8861998..5812802bd8ae 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -234,8 +234,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, int do_sync, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply, - int page_align); + bool use_mempool, int page_align); extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, struct ceph_osd_request *req); -- cgit v1.2.3 From 60e56f138180e72fa8487d4b9c1c916013494f46 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: libceph: kill ceph_calc_raw_layout() There is no caller of ceph_calc_raw_layout() outside of libceph, so there's no need to export from the module. Furthermore, there is only one caller, in calc_layout(), and it is not much more than a simple wrapper for that function. So get rid of ceph_calc_raw_layout() and embed it instead within calc_layout(). While touching "osd_client.c", get rid of the unnecessary forward declaration of __send_request(). Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 5812802bd8ae..c39e7ed4b203 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -207,11 +207,6 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); -extern int ceph_calc_raw_layout(struct ceph_file_layout *layout, - u64 off, u64 *plen, u64 *bno, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op); - extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, unsigned int num_op, -- cgit v1.2.3 From 3c663bbdcdf9296e0fe3362acb9e81f49d7b72c6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: kill ceph_osdc_create_event() "one_shot" parameter There is only one caller of ceph_osdc_create_event(), and it provides 0 as its "one_shot" argument. Get rid of that argument and just use 0 in its place. Replace the code in handle_watch_notify() that executes if one_shot is nonzero in the event with a BUG_ON() call. While modifying "osd_client.c", give handle_watch_notify() static scope. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index c39e7ed4b203..39c55d61e159 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -273,8 +273,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), - int one_shot, void *data, - struct ceph_osd_event **pevent); + void *data, struct ceph_osd_event **pevent); extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); extern int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout); -- cgit v1.2.3 From 2d2f522699fe8b827087941eb31b9a12cf465f17 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: kill ceph_osdc_wait_event() There are no actual users of ceph_osdc_wait_event(). This would have been one-shot events, but we no longer support those so just get rid of this function. Since this leaves nothing else that waits for the completion of an event, we can get rid of the completion in a struct ceph_osd_event. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 39c55d61e159..388158ff0cbc 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -107,7 +107,6 @@ struct ceph_osd_event { struct rb_node node; struct list_head osd_node; struct kref kref; - struct completion completion; }; struct ceph_osd_event_work { @@ -275,8 +274,6 @@ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), void *data, struct ceph_osd_event **pevent); extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); -extern int ceph_osdc_wait_event(struct ceph_osd_event *event, - unsigned long timeout); extern void ceph_osdc_put_event(struct ceph_osd_event *event); #endif -- cgit v1.2.3 From 0315a7770983bbe69211efed1aaee08324acd54c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: update rados.h Update most of "include/linux/ceph/rados.h" to match its user space counterpart in "src/include/rados.h" in the ceph tree. Almost everything that has changed is either: - added or revised comments - added definitions (therefore no real effect on existing code) - defining the same value a different way (e.g., "1 << 0" vs "1") The only exceptions are: - The declaration of ceph_osd_state_name() was excluded; that will be inserted in the next patch. - ceph_osd_op_mode_read() and ceph_osd_op_mode_modify() are defined differently, but they were never used in the kernel - CEPH_OSD_FLAG_PEERSTAT is now CEPH_OSD_FLAG_PEERSTAT_OLD, but that was never used in the kernel Anything that was present in this file but not in its user space counterpart was left intact here. I left the definitions of EOLDSNAPC and EBLACKLISTED using numerical values here; I'm not sure the right way to go with those. This and the next two commits resolve: http://tracker.ceph.com/issues/4164 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/rados.h | 91 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2c04afeead1c..9c3b4aaf516b 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -145,8 +145,10 @@ struct ceph_eversion { */ /* status bits */ -#define CEPH_OSD_EXISTS 1 -#define CEPH_OSD_UP 2 +#define CEPH_OSD_EXISTS (1<<0) +#define CEPH_OSD_UP (1<<1) +#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ +#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ #define CEPH_OSD_IN 0x10000 @@ -161,9 +163,25 @@ struct ceph_eversion { #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ +#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */ +#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */ +#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */ +#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */ +#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */ +#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */ + +/* + * The error code to return when an OSD can't handle a write + * because it is too large. + */ +#define OSD_WRITETOOBIG EMSGSIZE /* * osd ops + * + * WARNING: do not use these op codes directly. Use the helpers + * defined below instead. In certain cases, op code behavior was + * redefined, resulting in special-cases in the helpers. */ #define CEPH_OSD_OP_MODE 0xf000 #define CEPH_OSD_OP_MODE_RD 0x1000 @@ -177,6 +195,7 @@ struct ceph_eversion { #define CEPH_OSD_OP_TYPE_ATTR 0x0300 #define CEPH_OSD_OP_TYPE_EXEC 0x0400 #define CEPH_OSD_OP_TYPE_PG 0x0500 +#define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */ enum { /** data **/ @@ -217,6 +236,23 @@ enum { CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, + /* omap */ + CEPH_OSD_OP_OMAPGETKEYS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17, + CEPH_OSD_OP_OMAPGETVALS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18, + CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19, + CEPH_OSD_OP_OMAPGETVALSBYKEYS = + CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20, + CEPH_OSD_OP_OMAPSETVALS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21, + CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22, + CEPH_OSD_OP_OMAPCLEAR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23, + CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24, + CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25, + + /** multi **/ + CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, + CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2, + CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3, + /** attrs **/ /* read */ CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, @@ -238,6 +274,7 @@ enum { CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6, CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7, CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8, + CEPH_OSD_OP_SCRUB_MAP = CEPH_OSD_OP_MODE_SUB | 9, /** lock **/ CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, @@ -248,10 +285,12 @@ enum { CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, /** exec **/ + /* note: the RD bit here is wrong; see special-case below in helper */ CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, /** pg **/ CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, + CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2, }; static inline int ceph_osd_op_type_lock(int op) @@ -274,6 +313,10 @@ static inline int ceph_osd_op_type_pg(int op) { return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; } +static inline int ceph_osd_op_type_multi(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI; +} static inline int ceph_osd_op_mode_subop(int op) { @@ -281,11 +324,12 @@ static inline int ceph_osd_op_mode_subop(int op) } static inline int ceph_osd_op_mode_read(int op) { - return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; + return (op & CEPH_OSD_OP_MODE_RD) && + op != CEPH_OSD_OP_CALL; } static inline int ceph_osd_op_mode_modify(int op) { - return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; + return op & CEPH_OSD_OP_MODE_WR; } /* @@ -294,34 +338,38 @@ static inline int ceph_osd_op_mode_modify(int op) */ #define CEPH_OSD_TMAP_HDR 'h' #define CEPH_OSD_TMAP_SET 's' +#define CEPH_OSD_TMAP_CREATE 'c' /* create key */ #define CEPH_OSD_TMAP_RM 'r' +#define CEPH_OSD_TMAP_RMSLOPPY 'R' extern const char *ceph_osd_op_name(int op); - /* * osd op flags * * An op may be READ, WRITE, or READ|WRITE. */ enum { - CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ - CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ - CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ - CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ - CEPH_OSD_FLAG_READ = 16, /* op may read */ - CEPH_OSD_FLAG_WRITE = 32, /* op may write */ - CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ - CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ - CEPH_OSD_FLAG_BALANCE_READS = 256, - CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ - CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ - CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ - CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ + CEPH_OSD_FLAG_ACK = 0x0001, /* want (or is) "ack" ack */ + CEPH_OSD_FLAG_ONNVRAM = 0x0002, /* want (or is) "onnvram" ack */ + CEPH_OSD_FLAG_ONDISK = 0x0004, /* want (or is) "ondisk" ack */ + CEPH_OSD_FLAG_RETRY = 0x0008, /* resend attempt */ + CEPH_OSD_FLAG_READ = 0x0010, /* op may read */ + CEPH_OSD_FLAG_WRITE = 0x0020, /* op may write */ + CEPH_OSD_FLAG_ORDERSNAP = 0x0040, /* EOLDSNAP if snapc is out of order */ + CEPH_OSD_FLAG_PEERSTAT_OLD = 0x0080, /* DEPRECATED msg includes osd_peer_stat */ + CEPH_OSD_FLAG_BALANCE_READS = 0x0100, + CEPH_OSD_FLAG_PARALLELEXEC = 0x0200, /* execute op in parallel */ + CEPH_OSD_FLAG_PGOP = 0x0400, /* pg op, no object */ + CEPH_OSD_FLAG_EXEC = 0x0800, /* op may exec */ + CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */ + CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */ + CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */ }; enum { CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ + CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */ }; #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ @@ -381,7 +429,11 @@ struct ceph_osd_op { __le64 ver; __u8 flag; /* 0 = unwatch, 1 = watch */ } __attribute__ ((packed)) watch; -}; + struct { + __le64 offset, length; + __le64 src_offset; + } __attribute__ ((packed)) clonerange; + }; __le32 payload_len; } __attribute__ ((packed)); @@ -424,5 +476,4 @@ struct ceph_osd_reply_head { } __attribute__ ((packed)); - #endif -- cgit v1.2.3 From 4b568b1aaf23d0ce64b98d01d5ad1bcc7694440a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: add ceph_osd_state_name() Add the definition of ceph_osd_state_name(), to match its counterpart in user space. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/rados.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 9c3b4aaf516b..b65182aba6f7 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -150,6 +150,8 @@ struct ceph_eversion { #define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ #define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ +extern const char *ceph_osd_state_name(int s); + /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ #define CEPH_OSD_IN 0x10000 #define CEPH_OSD_OUT 0 -- cgit v1.2.3 From dd6f5e105d85e02bc41db0891eb07152b1746ad9 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: update ceph_fs.h Update most of "include/linux/ceph/ceph_fs.h" to match its user space counterpart in "src/include/ceph_fs.h" in the ceph tree. Everything that has changed is either: - added definitions (therefore no real effect on existing code) - deleting unused symbols - added or revised comments There were some differences between the struct definitions for ceph_mon_subscribe_item and the open field of ceph_mds_request_args; those differences remain. This and the next commit resolve: http://tracker.ceph.com/issues/4165 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/ceph_fs.h | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index cf6f4d998a76..2ad7b860f062 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -21,16 +21,14 @@ * internal cluster protocols separately from the public, * client-facing protocol. */ -#define CEPH_OSD_PROTOCOL 8 /* cluster internal */ -#define CEPH_MDS_PROTOCOL 12 /* cluster internal */ -#define CEPH_MON_PROTOCOL 5 /* cluster internal */ #define CEPH_OSDC_PROTOCOL 24 /* server/client */ #define CEPH_MDSC_PROTOCOL 32 /* server/client */ #define CEPH_MONC_PROTOCOL 15 /* server/client */ -#define CEPH_INO_ROOT 1 -#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_ROOT 1 +#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */ /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 @@ -51,7 +49,7 @@ struct ceph_file_layout { __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ /* object -> pg layout */ - __le32 fl_unused; /* unused; used to be preferred primary (-1) */ + __le32 fl_unused; /* unused; used to be preferred primary for pg (-1 for none) */ __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ } __attribute__ ((packed)); @@ -101,6 +99,8 @@ struct ceph_dir_layout { #define CEPH_MSG_MON_SUBSCRIBE_ACK 16 #define CEPH_MSG_AUTH 17 #define CEPH_MSG_AUTH_REPLY 18 +#define CEPH_MSG_MON_GET_VERSION 19 +#define CEPH_MSG_MON_GET_VERSION_REPLY 20 /* client <-> mds */ #define CEPH_MSG_MDS_MAP 21 @@ -220,6 +220,11 @@ struct ceph_mon_subscribe_ack { struct ceph_fsid fsid; } __attribute__ ((packed)); +/* + * mdsmap flags + */ +#define CEPH_MDSMAP_DOWN (1<<0) /* cluster deliberately down */ + /* * mds states * > 0 -> in @@ -233,6 +238,7 @@ struct ceph_mon_subscribe_ack { #define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ #define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ #define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ +#define CEPH_MDS_STATE_REPLAYONCE -9 /* up, replaying an active node's journal */ #define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ #define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed @@ -264,6 +270,7 @@ extern const char *ceph_mds_state_name(int s); #define CEPH_LOCK_IXATTR 2048 #define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ +#define CEPH_LOCK_IPOLICY 16384 /* policy lock on dirs. MDS internal */ /* client_session ops */ enum { @@ -338,6 +345,12 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_SETATTR_SIZE 32 #define CEPH_SETATTR_CTIME 64 +/* + * Ceph setxattr request flags. + */ +#define CEPH_XATTR_CREATE 1 +#define CEPH_XATTR_REPLACE 2 + union ceph_mds_request_args { struct { __le32 mask; /* CEPH_CAP_* */ @@ -522,14 +535,17 @@ int ceph_flags_to_mode(int flags); #define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ #define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ +#define CEPH_CAP_SIMPLE_BITS 2 +#define CEPH_CAP_FILE_BITS 8 + /* per-lock shift */ #define CEPH_CAP_SAUTH 2 #define CEPH_CAP_SLINK 4 #define CEPH_CAP_SXATTR 6 #define CEPH_CAP_SFILE 8 -#define CEPH_CAP_SFLOCK 20 +#define CEPH_CAP_SFLOCK 20 -#define CEPH_CAP_BITS 22 +#define CEPH_CAP_BITS 22 /* composed values */ #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) -- cgit v1.2.3 From f84adf4921ae3115502f44ff467b04bf2f88cf04 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 13 Feb 2013 13:01:55 -0500 Subject: xen-blkfront: drop the use of llist_for_each_entry_safe Replace llist_for_each_entry_safe with a while loop. llist_for_each_entry_safe can trigger a bug in GCC 4.1, so it's best to remove it and use a while loop and do the deletion manually. Specifically this bug can be triggered by hot-unplugging a disk, either by doing xm block-detach or by save/restore cycle. BUG: unable to handle kernel paging request at fffffffffffffff0 IP: [] blkif_free+0x63/0x130 [xen_blkfront] The crash call trace is: ... bad_area_nosemaphore+0x13/0x20 do_page_fault+0x25e/0x4b0 page_fault+0x25/0x30 ? blkif_free+0x63/0x130 [xen_blkfront] blkfront_resume+0x46/0xa0 [xen_blkfront] xenbus_dev_resume+0x6c/0x140 pm_op+0x192/0x1b0 device_resume+0x82/0x1e0 dpm_resume+0xc9/0x1a0 dpm_resume_end+0x15/0x30 do_suspend+0x117/0x1e0 When drilling down to the assembler code, on newer GCC it does .L29: cmpq $-16, %r12 #, persistent_gnt check je .L30 #, out of the loop .L25: ... code in the loop testq %r13, %r13 # n je .L29 #, back to the top of the loop cmpq $-16, %r12 #, persistent_gnt check movq 16(%r12), %r13 # .node.next, n jne .L25 #, back to the top of the loop .L30: While on GCC 4.1, it is: L78: ... code in the loop testq %r13, %r13 # n je .L78 #, back to the top of the loop movq 16(%rbx), %r13 # .node.next, n jmp .L78 #, back to the top of the loop Which basically means that the exit loop condition instead of being: &(pos)->member != NULL; is: ; which makes the loop unbound. Since xen-blkfront is the only user of the llist_for_each_entry_safe macro remove it from llist.h. Orabug: 16263164 CC: stable@vger.kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/llist.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/llist.h b/include/linux/llist.h index d0ab98f73d38..a5199f6d0e82 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -124,31 +124,6 @@ static inline void init_llist_head(struct llist_head *list) &(pos)->member != NULL; \ (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) -/** - * llist_for_each_entry_safe - iterate safely against remove over some entries - * of lock-less list of given type. - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as a temporary storage. - * @node: the fist entry of deleted list entries. - * @member: the name of the llist_node with the struct. - * - * In general, some entries of the lock-less list can be traversed - * safely only after being removed from list, so start with an entry - * instead of list head. This variant allows removal of entries - * as we iterate. - * - * If being used on entries deleted from lock-less list directly, the - * traverse order is from the newest to the oldest added entry. If - * you want to traverse from the oldest to the newest, you must - * reverse the order by yourself before traversing. - */ -#define llist_for_each_entry_safe(pos, n, node, member) \ - for ((pos) = llist_entry((node), typeof(*(pos)), member), \ - (n) = (pos)->member.next; \ - &(pos)->member != NULL; \ - (pos) = llist_entry(n, typeof(*(pos)), member), \ - (n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL) - /** * llist_empty - tests whether a lock-less list is empty * @head: the list to test -- cgit v1.2.3 From b324814e8436772cb3367b14149ba003a9954525 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 6 Feb 2013 13:11:38 -0600 Subject: libceph: use void pointers in page vector functions The functions used for working with ceph page vectors are defined with char pointers, but they're really intended to operate on untyped data. Change the types of these function parameters to (void *) to reflect this. (Note that the functions now assume void pointer arithmetic works like arithmetic on char pointers.) Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/libceph.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index c44275ab375c..2250f8bb2490 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -222,7 +222,7 @@ extern int ceph_open_session(struct ceph_client *client); /* pagevec.c */ extern void ceph_release_page_vector(struct page **pages, int num_pages); -extern struct page **ceph_get_direct_page_vector(const char __user *data, +extern struct page **ceph_get_direct_page_vector(const void __user *data, int num_pages, bool write_page); extern void ceph_put_page_vector(struct page **pages, int num_pages, @@ -230,15 +230,15 @@ extern void ceph_put_page_vector(struct page **pages, int num_pages, extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_copy_user_to_page_vector(struct page **pages, - const char __user *data, + const void __user *data, loff_t off, size_t len); extern int ceph_copy_to_page_vector(struct page **pages, - const char *data, + const void *data, loff_t off, size_t len); extern int ceph_copy_from_page_vector(struct page **pages, - char *data, + void *data, loff_t off, size_t len); -extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, +extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data, loff_t off, size_t len); extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); -- cgit v1.2.3 From 903bb32e890237ca43ab847e561e5377cfe0fdb3 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 6 Feb 2013 13:11:38 -0600 Subject: libceph: drop return value from page vector copy routines The return values provided for ceph_copy_to_page_vector() and ceph_copy_from_page_vector() serve no purpose, so get rid of them. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/libceph.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 2250f8bb2490..29818fc3fa49 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -232,10 +232,10 @@ extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_copy_user_to_page_vector(struct page **pages, const void __user *data, loff_t off, size_t len); -extern int ceph_copy_to_page_vector(struct page **pages, +extern void ceph_copy_to_page_vector(struct page **pages, const void *data, loff_t off, size_t len); -extern int ceph_copy_from_page_vector(struct page **pages, +extern void ceph_copy_from_page_vector(struct page **pages, void *data, loff_t off, size_t len); extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data, -- cgit v1.2.3 From 55e301fd57a6239ec14b91a1cf2e70b3dd135194 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Tue, 29 Jan 2013 06:04:50 +0000 Subject: Btrfs: move fs/btrfs/ioctl.h to include/uapi/linux/btrfs.h The header file will then be installed under /usr/include/linux so that userspace applications can refer to Btrfs ioctls by name and use the same structs used internally in the kernel. Signed-off-by: Filipe Brandenburger Signed-off-by: Josef Bacik --- include/linux/btrfs.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/btrfs.h (limited to 'include/linux') diff --git a/include/linux/btrfs.h b/include/linux/btrfs.h new file mode 100644 index 000000000000..22d799147db2 --- /dev/null +++ b/include/linux/btrfs.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_BTRFS_H +#define _LINUX_BTRFS_H + +#include + +#endif /* _LINUX_BTRFS_H */ -- cgit v1.2.3 From 8b975bd3f9089f8ee5d7bbfd798537b992bbc7e7 Mon Sep 17 00:00:00 2001 From: "Markus F.X.J. Oberhumer" Date: Mon, 13 Aug 2012 17:25:44 +0200 Subject: lib/lzo: Update LZO compression to current upstream version This commit updates the kernel LZO code to the current upsteam version which features a significant speed improvement - benchmarking the Calgary and Silesia test corpora typically shows a doubled performance in both compression and decompression on modern i386/x86_64/powerpc machines. Signed-off-by: Markus F.X.J. Oberhumer --- include/linux/lzo.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lzo.h b/include/linux/lzo.h index d793497ec1ca..a0848d9377e5 100644 --- a/include/linux/lzo.h +++ b/include/linux/lzo.h @@ -4,28 +4,28 @@ * LZO Public Kernel Interface * A mini subset of the LZO real-time data compression library * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ -#define LZO1X_MEM_COMPRESS (16384 * sizeof(unsigned char *)) -#define LZO1X_1_MEM_COMPRESS LZO1X_MEM_COMPRESS +#define LZO1X_1_MEM_COMPRESS (8192 * sizeof(unsigned short)) +#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS #define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3) -/* This requires 'workmem' of size LZO1X_1_MEM_COMPRESS */ +/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */ int lzo1x_1_compress(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len, void *wrkmem); + unsigned char *dst, size_t *dst_len, void *wrkmem); /* safe decompression with overrun testing */ int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, - unsigned char *dst, size_t *dst_len); + unsigned char *dst, size_t *dst_len); /* * Return values (< 0 = Error) @@ -40,5 +40,6 @@ int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, #define LZO_E_EOF_NOT_FOUND (-7) #define LZO_E_INPUT_NOT_CONSUMED (-8) #define LZO_E_NOT_YET_IMPLEMENTED (-9) +#define LZO_E_INVALID_ARGUMENT (-10) #endif -- cgit v1.2.3 From 52608ba20546139dc76cca8a46c1d901455d5450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 8 Aug 2012 12:30:56 -0300 Subject: i5100_edac: probe for device 19 function 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Probe and store the device handle for the device 19 function 0 during driver initialization. The device is used during fault injection. Signed-off-by: Niklas Söderlund Signed-off-by: Mauro Carvalho Chehab --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0eb65796bcb9..d0d1e801e350 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2776,6 +2776,7 @@ #define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX 0x3ce0 #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 +#define PCI_DEVICE_ID_INTEL_5100_19 0x65f3 #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 #define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 #define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030 -- cgit v1.2.3 From c66b5a79a9348ccd6d1cd81416027d0e12da965d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 15 Feb 2013 07:21:08 -0300 Subject: edac: add a new memory layer type There are some cases where the memory controller layout is completely hidden. This is the case of firmware-driven error code, like the one provided by GHES. Add a new layer to be used on such memory error report mechanisms. Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 4784213c819d..1b7744c219b8 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -375,6 +375,9 @@ enum scrub_type { * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel" * @EDAC_MC_LAYER_SLOT: memory layer is named "slot" * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select" + * @EDAC_MC_LAYER_ALL_MEM: memory layout is unknown. All memory is mapped + * as a single memory area. This is used when + * retrieving errors from a firmware driven driver. * * This enum is used by the drivers to tell edac_mc_sysfs what name should * be used when describing a memory stick location. @@ -384,6 +387,7 @@ enum edac_mc_layer_type { EDAC_MC_LAYER_CHANNEL, EDAC_MC_LAYER_SLOT, EDAC_MC_LAYER_CHIP_SELECT, + EDAC_MC_LAYER_ALL_MEM, }; /** -- cgit v1.2.3 From c2c93dbc97622e26dc19edc71e50ebaa996d7804 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 19 Feb 2013 06:50:05 -0300 Subject: edac: remove proc_name from mci structure proc_name isn't used anywhere. Remove it. Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 1b7744c219b8..ff18efc754f3 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -48,7 +48,6 @@ static inline void opstate_init(void) } #define EDAC_MC_LABEL_LEN 31 -#define MC_PROC_NAME_MAX_LEN 7 /** * enum dev_type - describe the type of memory DRAM chips used at the stick @@ -633,7 +632,6 @@ struct mem_ctl_info { const char *mod_ver; const char *ctl_name; const char *dev_name; - char proc_name[MC_PROC_NAME_MAX_LEN + 1]; void *pvt_info; unsigned long start_time; /* mci load start time (in jiffies) */ -- cgit v1.2.3 From c7ef7645544131b0750478d1cf94cdfa945c809d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 21 Feb 2013 13:36:45 -0300 Subject: edac: reduce stack pressure by using a pre-allocated buffer The number of variables at the stack is too big. Reduces the stack usage by using a pre-allocated error buffer. Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index ff18efc754f3..096b7fcdf484 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -47,8 +47,18 @@ static inline void opstate_init(void) return; } +/* Max length of a DIMM label*/ #define EDAC_MC_LABEL_LEN 31 +/* Maximum size of the location string */ +#define LOCATION_SIZE 80 + +/* Defines the maximum number of labels that can be reported */ +#define EDAC_MAX_LABELS 8 + +/* String used to join two or more labels */ +#define OTHER_LABEL " or " + /** * enum dev_type - describe the type of memory DRAM chips used at the stick * @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it @@ -553,6 +563,46 @@ struct errcount_attribute_data { int layer0, layer1, layer2; }; +/** + * edac_raw_error_desc - Raw error report structure + * @grain: minimum granularity for an error report, in bytes + * @error_count: number of errors of the same type + * @top_layer: top layer of the error (layer[0]) + * @mid_layer: middle layer of the error (layer[1]) + * @low_layer: low layer of the error (layer[2]) + * @page_frame_number: page where the error happened + * @offset_in_page: page offset + * @syndrome: syndrome of the error (or 0 if unknown or if + * the syndrome is not applicable) + * @msg: error message + * @location: location of the error + * @label: label of the affected DIMM(s) + * @other_detail: other driver-specific detail about the error + * @enable_per_layer_report: if false, the error affects all layers + * (typically, a memory controller error) + */ +struct edac_raw_error_desc { + /* + * NOTE: everything before grain won't be cleaned by + * edac_raw_error_desc_clean() + */ + char location[LOCATION_SIZE]; + char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS]; + long grain; + + /* the vars below and grain will be cleaned on every new error report */ + u16 error_count; + int top_layer; + int mid_layer; + int low_layer; + unsigned long page_frame_number; + unsigned long offset_in_page; + unsigned long syndrome; + const char *msg; + const char *other_detail; + bool enable_per_layer_report; +}; + /* MEMORY controller information structure */ struct mem_ctl_info { @@ -660,6 +710,12 @@ struct mem_ctl_info { /* work struct for this MC */ struct delayed_work work; + /* + * Used to report an error - by being at the global struct + * makes the memory allocated by the EDAC core + */ + struct edac_raw_error_desc error_desc; + /* the internal state of this controller instance */ int op_state; -- cgit v1.2.3 From 8dd93d450bff251575c56b8f058393124e1f00fb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 19 Feb 2013 21:26:22 -0300 Subject: edac: add support for error type "Info" The CPER spec defines a forth type of error: informational logs. Add support for it at the edac API and at the trace event interface. Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 096b7fcdf484..4fd4999ccb5b 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -109,8 +109,24 @@ enum hw_event_mc_err_type { HW_EVENT_ERR_CORRECTED, HW_EVENT_ERR_UNCORRECTED, HW_EVENT_ERR_FATAL, + HW_EVENT_ERR_INFO, }; +static inline char *mc_event_error_type(const unsigned int err_type) +{ + switch (err_type) { + case HW_EVENT_ERR_CORRECTED: + return "Corrected"; + case HW_EVENT_ERR_UNCORRECTED: + return "Uncorrected"; + case HW_EVENT_ERR_FATAL: + return "Fatal"; + default: + case HW_EVENT_ERR_INFO: + return "Info"; + } +} + /** * enum mem_type - memory types. For a more detailed reference, please see * http://en.wikipedia.org/wiki/DRAM -- cgit v1.2.3 From 24dea0c9feccf699749f860fa2f4ccd84d30390d Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 31 Jan 2013 21:06:34 +0400 Subject: mtd: map: BUG() in non handled cases Several map-related functions look like a serie of ifs, checking widths of map. Those functions do not have any handling for default case. Instead of fiddling with uninitialized_var in those functions, let's just add a (correct) BUG() to the default case on those maps. This will also allow us to catch potential errors in maps setup in future. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Artem Bityutskiy --- include/linux/mtd/map.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 8b9bfd7dcaa3..4b02512e421c 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -329,7 +329,7 @@ static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word static inline map_word map_word_load(struct map_info *map, const void *ptr) { - map_word r = {{0} }; + map_word r; if (map_bankwidth_is_1(map)) r.x[0] = *(unsigned char *)ptr; @@ -343,6 +343,8 @@ static inline map_word map_word_load(struct map_info *map, const void *ptr) #endif else if (map_bankwidth_is_large(map)) memcpy(r.x, ptr, map->bankwidth); + else + BUG(); return r; } @@ -392,7 +394,7 @@ static inline map_word map_word_ff(struct map_info *map) static inline map_word inline_map_read(struct map_info *map, unsigned long ofs) { - map_word uninitialized_var(r); + map_word r; if (map_bankwidth_is_1(map)) r.x[0] = __raw_readb(map->virt + ofs); @@ -426,6 +428,8 @@ static inline void inline_map_write(struct map_info *map, const map_word datum, #endif else if (map_bankwidth_is_large(map)) memcpy_toio(map->virt+ofs, datum.x, map->bankwidth); + else + BUG(); mb(); } -- cgit v1.2.3 From 561c6731978fa128f29342495f47fc3365898b3d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Feb 2013 10:52:26 -0500 Subject: switch lseek to COMPAT_SYSCALL_DEFINE Signed-off-by: Al Viro --- include/linux/compat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index de095b0462a7..59c72048bf20 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -359,6 +359,7 @@ asmlinkage ssize_t compat_sys_preadv(unsigned long fd, asmlinkage ssize_t compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen, u32 pos_low, u32 pos_high); +asmlinkage long comat_sys_lseek(unsigned int, compat_off_t, unsigned int); asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv, const compat_uptr_t __user *envp); -- cgit v1.2.3 From 3f6d078d4accfff8b114f968259a060bfdc7c682 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Feb 2013 13:49:08 -0500 Subject: fix compat truncate/ftruncate Signed-off-by: Al Viro --- include/linux/compat.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 59c72048bf20..76a87fb57ac2 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -536,6 +536,8 @@ asmlinkage long compat_sys_openat(int dfd, const char __user *filename, asmlinkage long compat_sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); +asmlinkage long compat_sys_truncate(const char __user *, compat_off_t); +asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t); asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, -- cgit v1.2.3 From 12979354a1d6ef25d86f381e4d5f9e103f29913a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 8 Jan 2013 09:15:10 -0800 Subject: libceph: rename ceph_pg -> ceph_pg_v1 Rename the old version this type to distinguish it from the new version. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 2 +- include/linux/ceph/osdmap.h | 7 ++++--- include/linux/ceph/rados.h | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 388158ff0cbc..be2867330e23 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -56,7 +56,7 @@ struct ceph_osd_request { struct list_head r_linger_item; struct list_head r_linger_osd; struct ceph_osd *r_osd; - struct ceph_pg r_pgid; + struct ceph_pg_v1 r_pgid; int r_pg_osds[CEPH_PG_MAX_SIZE]; int r_num_pg_osds; diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index c83a838f89f5..eb4989aa48e8 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -28,7 +28,7 @@ struct ceph_pg_pool_info { struct ceph_pg_mapping { struct rb_node node; - struct ceph_pg pgid; + struct ceph_pg_v1 pgid; int len; int osds[]; }; @@ -118,10 +118,11 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap); -extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, +extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, + struct ceph_pg_v1 pgid, int *acting); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, - struct ceph_pg pgid); + struct ceph_pg_v1 pgid); extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index b65182aba6f7..e7cece69b13f 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -64,7 +64,7 @@ struct ceph_timespec { * placement group. * we encode this into one __le64. */ -struct ceph_pg { +struct ceph_pg_v1 { __le16 preferred; /* preferred primary osd */ __le16 ps; /* placement seed */ __le32 pool; /* object pool */ @@ -128,7 +128,7 @@ static inline int ceph_stable_mod(int x, int b, int bmask) * object layout - how a given object should be stored. */ struct ceph_object_layout { - struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */ + struct ceph_pg_v1 ol_pgid; /* raw pg, with _full_ ps precision. */ __le32 ol_stripe_unit; /* for per-object parity, if any */ } __attribute__ ((packed)); -- cgit v1.2.3 From 5b191d9914eb68257f47de9d5bfe099b77f0687c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 23 Feb 2013 10:38:16 -0800 Subject: libceph: decode into cpu-native ceph_pg type Always decode data into our cpu-native ceph_pg type that has the correct field widths. Limit any remaining uses of ceph_pg_v1 to dealing with the legacy protocol. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 2 +- include/linux/ceph/osdmap.h | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index be2867330e23..388158ff0cbc 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -56,7 +56,7 @@ struct ceph_osd_request { struct list_head r_linger_item; struct list_head r_linger_osd; struct ceph_osd *r_osd; - struct ceph_pg_v1 r_pgid; + struct ceph_pg r_pgid; int r_pg_osds[CEPH_PG_MAX_SIZE]; int r_num_pg_osds; diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index eb4989aa48e8..8a612df4c248 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -18,6 +18,11 @@ * The map can be updated either via an incremental map (diff) describing * the change between two successive epochs, or as a fully encoded map. */ +struct ceph_pg { + uint64_t pool; + uint32_t seed; +}; + struct ceph_pg_pool_info { struct rb_node node; int id; @@ -28,7 +33,7 @@ struct ceph_pg_pool_info { struct ceph_pg_mapping { struct rb_node node; - struct ceph_pg_v1 pgid; + struct ceph_pg pgid; int len; int osds[]; }; @@ -119,10 +124,10 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap); extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, - struct ceph_pg_v1 pgid, + struct ceph_pg pgid, int *acting); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, - struct ceph_pg_v1 pgid); + struct ceph_pg pgid); extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); -- cgit v1.2.3 From ec73a754989c27628c9037887df919561280519c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 26 Feb 2013 14:23:07 -0600 Subject: ceph: update "ceph_features.h" This updates "include/linux/ceph/ceph_features.h" so all the feature bits defined in the user space code are defined here. The features supported by this implementation will still differ so that's not updated here. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- include/linux/ceph/ceph_features.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 2160aab482f6..9e0f5a8ba247 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -12,12 +12,28 @@ #define CEPH_FEATURE_MONNAMES (1<<5) #define CEPH_FEATURE_RECONNECT_SEQ (1<<6) #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) -/* bits 8-17 defined by user-space; not supported yet here */ +#define CEPH_FEATURE_OBJECTLOCATOR (1<<8) +#define CEPH_FEATURE_PGID64 (1<<9) +#define CEPH_FEATURE_INCSUBOSDMAP (1<<10) +#define CEPH_FEATURE_PGPOOL3 (1<<11) +#define CEPH_FEATURE_OSDREPLYMUX (1<<12) +#define CEPH_FEATURE_OSDENC (1<<13) +#define CEPH_FEATURE_OMAP (1<<14) +#define CEPH_FEATURE_MONENC (1<<15) +#define CEPH_FEATURE_QUERY_T (1<<16) +#define CEPH_FEATURE_INDEP_PG_MAP (1<<17) #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) -/* bits 19-24 defined by user-space; not supported yet here */ +#define CEPH_FEATURE_CHUNKY_SCRUB (1<<19) +#define CEPH_FEATURE_MON_NULLROUTE (1<<20) +#define CEPH_FEATURE_MON_GV (1<<21) +#define CEPH_FEATURE_BACKFILL_RESERVATION (1<<22) +#define CEPH_FEATURE_MSG_AUTH (1<<23) +#define CEPH_FEATURE_RECOVERY_RESERVATION (1<<24) #define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25) -/* bit 26 defined by user-space; not supported yet here */ -#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) +#define CEPH_FEATURE_CREATEPOOLID (1<<26) +#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) +#define CEPH_FEATURE_OSD_HBMSGS (1<<28) +#define CEPH_FEATURE_MDSENC (1<<29) /* * Features supported. -- cgit v1.2.3 From 4f6a7e5ee1393ec4b243b39dac9f36992d161540 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 23 Feb 2013 10:41:09 -0800 Subject: ceph: update support for PGID64, PGPOOL3, OSDENC protocol features Support (and require) the PGID64, PGPOOL3, and OSDENC protocol features. These have been present in ceph.git since v0.42, Feb 2012. Require these features to simplify support; nobody is running older userspace. Note that the new request and reply encoding is still not in place, so the new code is not yet functional. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/ceph_features.h | 14 ++++++++++---- include/linux/ceph/mdsmap.h | 4 ++-- include/linux/ceph/osdmap.h | 16 +++++++++++++--- include/linux/ceph/rados.h | 23 ----------------------- 4 files changed, 25 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 9e0f5a8ba247..ab0a54286e0d 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -39,11 +39,17 @@ * Features supported. */ #define CEPH_FEATURES_SUPPORTED_DEFAULT \ - (CEPH_FEATURE_NOSRCADDR | \ - CEPH_FEATURE_CRUSH_TUNABLES | \ - CEPH_FEATURE_CRUSH_TUNABLES2 | \ + (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_PGID64 | \ + CEPH_FEATURE_PGPOOL3 | \ + CEPH_FEATURE_OSDENC | \ + CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_CRUSH_TUNABLES2 | \ CEPH_FEATURE_REPLY_CREATE_INODE) #define CEPH_FEATURES_REQUIRED_DEFAULT \ - (CEPH_FEATURE_NOSRCADDR) + (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_PGID64 | \ + CEPH_FEATURE_PGPOOL3 | \ + CEPH_FEATURE_OSDENC) #endif diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index cb15b5d867c7..87ed09f54800 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -29,8 +29,8 @@ struct ceph_mdsmap { /* which object pools file data can be stored in */ int m_num_data_pg_pools; - u32 *m_data_pg_pools; - u32 m_cas_pg_pool; + u64 *m_data_pg_pools; + u64 m_cas_pg_pool; }; static inline struct ceph_entity_addr * diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 8a612df4c248..8587746b7f0e 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -25,12 +25,22 @@ struct ceph_pg { struct ceph_pg_pool_info { struct rb_node node; - int id; - struct ceph_pg_pool v; - int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; + s64 id; + u8 type; + u8 size; + u8 crush_ruleset; + u8 object_hash; + u32 pg_num, pgp_num; + int pg_num_mask, pgp_num_mask; + u64 flags; char *name; }; +struct ceph_object_locator { + uint64_t pool; + char *key; +}; + struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index e7cece69b13f..d784c8dfb09a 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -8,14 +8,6 @@ #include -/* - * osdmap encoding versions - */ -#define CEPH_OSDMAP_INC_VERSION 5 -#define CEPH_OSDMAP_INC_VERSION_EXT 6 -#define CEPH_OSDMAP_VERSION 5 -#define CEPH_OSDMAP_VERSION_EXT 6 - /* * fs id */ @@ -91,21 +83,6 @@ struct ceph_pg_v1 { #define CEPH_PG_TYPE_REP 1 #define CEPH_PG_TYPE_RAID4 2 -#define CEPH_PG_POOL_VERSION 2 -struct ceph_pg_pool { - __u8 type; /* CEPH_PG_TYPE_* */ - __u8 size; /* number of osds in each pg */ - __u8 crush_ruleset; /* crush placement rule */ - __u8 object_hash; /* hash mapping object name to ps */ - __le32 pg_num, pgp_num; /* number of pg's */ - __le32 lpg_num, lpgp_num; /* number of localized pg's */ - __le32 last_change; /* most recent epoch changed */ - __le64 snap_seq; /* seq for per-pool snapshot */ - __le32 snap_epoch; /* epoch of last snap */ - __le32 num_snaps; - __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ - __le64 auid; /* who owns the pg */ -} __attribute__ ((packed)); /* * stable_mod func is used to control number of placement groups. -- cgit v1.2.3 From 2169aea649c08374bec7d220a3b8f64712275356 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 25 Feb 2013 16:13:08 -0800 Subject: libceph: calculate placement based on the internal data types Instead of using the old ceph_object_layout struct, update our internal ceph_calc_object_layout method to use the ceph_pg type. This allows us to pass the full 32-bit precision of the pgid.seed to the callers. It also allows some callers to avoid reaching into the request structures for the struct ceph_object_layout fields. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 1 + include/linux/ceph/osdmap.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 388158ff0cbc..ad8899fc3157 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -82,6 +82,7 @@ struct ceph_osd_request { char r_oid[MAX_OBJ_NAME_SIZE]; /* object name */ int r_oid_len; + u64 r_snapid; unsigned long r_stamp; /* send OR check time */ struct ceph_file_layout r_file_layout; diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 8587746b7f0e..35985125f118 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -129,7 +129,7 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u64 *bno, u64 *oxoff, u64 *oxlen); /* calculate mapping of object to a placement group */ -extern int ceph_calc_object_layout(struct ceph_object_layout *ol, +extern int ceph_calc_object_layout(struct ceph_pg *pg, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap); -- cgit v1.2.3 From 1b83bef24c6746a146d39915a18fb5425f2facb0 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 25 Feb 2013 16:11:12 -0800 Subject: libceph: update osd request/reply encoding Use the new version of the encoding for osd requests and replies. In the process, update the way we are tracking request ops and reply lengths and results in the struct ceph_osd_request. Update the rbd and fs/ceph users appropriately. The main changes are: - we keep pointers into the request memory for fields we need to update each time the request is sent out over the wire - we keep information about the result in an array in the request struct where the users can easily get at it. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 19 ++++++++++++++++++- include/linux/ceph/rados.h | 38 -------------------------------------- 2 files changed, 18 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index ad8899fc3157..1dd5d466b6f9 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -47,6 +47,9 @@ struct ceph_osd { struct list_head o_keepalive_item; }; + +#define CEPH_OSD_MAX_OP 10 + /* an in-flight request */ struct ceph_osd_request { u64 r_tid; /* unique for this client */ @@ -63,9 +66,23 @@ struct ceph_osd_request { struct ceph_connection *r_con_filling_msg; struct ceph_msg *r_request, *r_reply; - int r_result; int r_flags; /* any additional flags for the osd */ u32 r_sent; /* >0 if r_request is sending/sent */ + int r_num_ops; + + /* encoded message content */ + struct ceph_osd_op *r_request_ops; + /* these are updated on each send */ + __le32 *r_request_osdmap_epoch; + __le32 *r_request_flags; + __le64 *r_request_pool; + void *r_request_pgid; + __le32 *r_request_attempts; + struct ceph_eversion *r_request_reassert_version; + + int r_result; + int r_reply_op_len[CEPH_OSD_MAX_OP]; + s32 r_reply_op_result[CEPH_OSD_MAX_OP]; int r_got_reply; int r_linger; diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index d784c8dfb09a..68c96a508ac2 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -416,43 +416,5 @@ struct ceph_osd_op { __le32 payload_len; } __attribute__ ((packed)); -/* - * osd request message header. each request may include multiple - * ceph_osd_op object operations. - */ -struct ceph_osd_request_head { - __le32 client_inc; /* client incarnation */ - struct ceph_object_layout layout; /* pgid */ - __le32 osdmap_epoch; /* client's osdmap epoch */ - - __le32 flags; - - struct ceph_timespec mtime; /* for mutations only */ - struct ceph_eversion reassert_version; /* if we are replaying op */ - - __le32 object_len; /* length of object name */ - - __le64 snapid; /* snapid to read */ - __le64 snap_seq; /* writer's snap context */ - __le32 num_snaps; - - __le16 num_ops; - struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */ -} __attribute__ ((packed)); - -struct ceph_osd_reply_head { - __le32 client_inc; /* client incarnation */ - __le32 flags; - struct ceph_object_layout layout; - __le32 osdmap_epoch; - struct ceph_eversion reassert_version; /* for replaying uncommitted */ - - __le32 result; /* result code */ - - __le32 object_len; /* length of object name */ - __le32 num_ops; - struct ceph_osd_op ops[0]; /* ops[], object */ -} __attribute__ ((packed)); - #endif -- cgit v1.2.3 From 83ca14fdd35821554058e5fd4fa7b118ee504a33 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 26 Feb 2013 10:39:09 -0800 Subject: libceph: add support for HASHPSPOOL pool flag The legacy behavior adds the pgid seed and pool together as the input for CRUSH. That is problematic because each pool's PGs end up mapping to the same OSDs: 1.5 == 2.4 == 3.3 == ... Instead, if the HASHPSPOOL flag is set, we has the ps and pool together and feed that into CRUSH. This ensures that two adjacent pools will map to an independent pseudorandom set of OSDs. Advertise our support for this via a protocol feature flag. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/ceph_features.h | 4 +++- include/linux/ceph/osdmap.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index ab0a54286e0d..76554cecaab2 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -34,6 +34,7 @@ #define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) #define CEPH_FEATURE_OSD_HBMSGS (1<<28) #define CEPH_FEATURE_MDSENC (1<<29) +#define CEPH_FEATURE_OSDHASHPSPOOL (1<<30) /* * Features supported. @@ -45,7 +46,8 @@ CEPH_FEATURE_OSDENC | \ CEPH_FEATURE_CRUSH_TUNABLES | \ CEPH_FEATURE_CRUSH_TUNABLES2 | \ - CEPH_FEATURE_REPLY_CREATE_INODE) + CEPH_FEATURE_REPLY_CREATE_INODE | \ + CEPH_FEATURE_OSDHASHPSPOOL) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 35985125f118..c819190d1642 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -23,6 +23,8 @@ struct ceph_pg { uint32_t seed; }; +#define CEPH_POOL_FLAG_HASHPSPOOL 1 + struct ceph_pg_pool_info { struct rb_node node; s64 id; -- cgit v1.2.3 From f00b4dad9d9eb001a04cf72e8351a2a1b9e99322 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 20 Dec 2012 14:14:23 +0100 Subject: dma-buf: implement vmap refcounting in the interface logic All drivers which implement this need to have some sort of refcount to allow concurrent vmap usage. Hence implement this in the dma-buf core. To protect against concurrent calls we need a lock, which potentially causes new funny locking inversions. But this shouldn't be a problem for exporters with statically allocated backing storage, and more dynamic drivers have decent issues already anyway. Inspired by some refactoring patches from Aaron Plattner, who implemented the same idea, but only for drm/prime drivers. v2: Check in dma_buf_release that no dangling vmaps are left. Suggested by Aaron Plattner. We might want to do similar checks for attachments, but that's for another patch. Also fix up ERR_PTR return for vmap. v3: Check whether the passed-in vmap address matches with the cached one for vunmap. Eventually we might want to remove that parameter - compared to the kmap functions there's no need for the vaddr for unmapping. Suggested by Chris Wilson. v4: Fix a brown-paper-bag bug spotted by Aaron Plattner. Cc: Aaron Plattner Reviewed-by: Aaron Plattner Tested-by: Aaron Plattner Reviewed-by: Rob Clark Signed-off-by: Daniel Vetter Signed-off-by: Sumit Semwal --- include/linux/dma-buf.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 3d754a394e92..9978b614a1aa 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -119,8 +119,10 @@ struct dma_buf { struct file *file; struct list_head attachments; const struct dma_buf_ops *ops; - /* mutex to serialize list manipulation and attach/detach */ + /* mutex to serialize list manipulation, attach/detach and vmap/unmap */ struct mutex lock; + unsigned vmapping_counter; + void *vmap_ptr; void *priv; }; -- cgit v1.2.3 From 864ef69b2d9b34e7c85baa9c5c601d5e735b208a Mon Sep 17 00:00:00 2001 From: Matt Porter Date: Fri, 1 Feb 2013 18:22:52 +0000 Subject: dmaengine: add dma_request_slave_channel_compat() Adds a dma_request_slave_channel_compat() wrapper which accepts both the arguments from dma_request_channel() and dma_request_slave_channel(). Based on whether the driver is instantiated via DT, the appropriate channel request call will be made. This allows for a much cleaner migration of drivers to the dmaengine DT API as platforms continue to be mixed between those that boot using DT and those that do not. Suggested-by: Tony Lindgren Signed-off-by: Matt Porter Acked-by: Tony Lindgren Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index f5939999cb65..91ac8da25020 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1001,6 +1001,22 @@ void dma_run_dependencies(struct dma_async_tx_descriptor *tx); struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); struct dma_chan *net_dma_find_channel(void); #define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y) +#define dma_request_slave_channel_compat(mask, x, y, dev, name) \ + __dma_request_slave_channel_compat(&(mask), x, y, dev, name) + +static inline struct dma_chan +*__dma_request_slave_channel_compat(dma_cap_mask_t *mask, dma_filter_fn fn, + void *fn_param, struct device *dev, + char *name) +{ + struct dma_chan *chan; + + chan = dma_request_slave_channel(dev, name); + if (chan) + return chan; + + return __dma_request_channel(mask, fn, fn_param); +} /* --- Helper iov-locking functions --- */ -- cgit v1.2.3 From c5a51053cf3b499ddba60a89ab067ea05ad15840 Mon Sep 17 00:00:00 2001 From: "Kim, Milo" Date: Wed, 27 Feb 2013 17:02:43 -0800 Subject: backlight: add new lp8788 backlight driver TI LP8788 PMU supports regulators, battery charger, RTC, ADC, backlight dri= ver and current sinks. This patch enables LP8788 backlight module. (Brightness mode) The brightness is controlled by PWM input or I2C register. All modes are supported in the driver. (Platform data) Configurable data can be defined in the platform side. name : backlight driver name. (default: "lcd-backlight") initial_brightness : initial value of backlight brightness bl_mode : brightness control by PWM or lp8788 register dim_mode : dimming mode selection full_scale : full scale current setting rise_time : brightness ramp up step time fall_time : brightness ramp down step time pwm_pol : PWM polarity setting when bl_mode is PWM based period_ns : platform specific PWM period value. unit is nano. The default values are set in case no platform data is defined. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Milo(Woogyom) Kim Cc: Richard Purdie Cc: Samuel Ortiz Cc: Thierry Reding Cc: "devendra.aaru" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mfd/lp8788.h | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/lp8788.h b/include/linux/mfd/lp8788.h index 2a32b16f79cb..786bf6679a28 100644 --- a/include/linux/mfd/lp8788.h +++ b/include/linux/mfd/lp8788.h @@ -16,6 +16,7 @@ #include #include +#include #include #define LP8788_DEV_BUCK "lp8788-buck" @@ -124,11 +125,6 @@ enum lp8788_bl_ramp_step { LP8788_RAMP_65538us, }; -enum lp8788_bl_pwm_polarity { - LP8788_PWM_ACTIVE_HIGH, - LP8788_PWM_ACTIVE_LOW, -}; - enum lp8788_isink_scale { LP8788_ISINK_SCALE_100mA, LP8788_ISINK_SCALE_120mA, @@ -228,16 +224,6 @@ struct lp8788_charger_platform_data { enum lp8788_charger_event event); }; -/* - * struct lp8788_bl_pwm_data - * @pwm_set_intensity : set duty of pwm - * @pwm_get_intensity : get current duty of pwm - */ -struct lp8788_bl_pwm_data { - void (*pwm_set_intensity) (int brightness, int max_brightness); - int (*pwm_get_intensity) (int max_brightness); -}; - /* * struct lp8788_backlight_platform_data * @name : backlight driver name. (default: "lcd-backlight") @@ -248,8 +234,8 @@ struct lp8788_bl_pwm_data { * @rise_time : brightness ramp up step time * @fall_time : brightness ramp down step time * @pwm_pol : pwm polarity setting when bl_mode is pwm based - * @pwm_data : platform specific pwm generation functions - * only valid when bl_mode is pwm based + * @period_ns : platform specific pwm period value. unit is nano. + Only valid when bl_mode is LP8788_BL_COMB_PWM_BASED */ struct lp8788_backlight_platform_data { char *name; @@ -259,8 +245,8 @@ struct lp8788_backlight_platform_data { enum lp8788_bl_full_scale_current full_scale; enum lp8788_bl_ramp_step rise_time; enum lp8788_bl_ramp_step fall_time; - enum lp8788_bl_pwm_polarity pwm_pol; - struct lp8788_bl_pwm_data pwm_data; + enum pwm_polarity pwm_pol; + unsigned int period_ns; }; /* -- cgit v1.2.3 From a321e91b6d73ed011ffceed384c40d2785cf723b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 27 Feb 2013 17:02:56 -0800 Subject: lib/scatterlist: add simple page iterator Add an iterator to walk through a scatter list a page at a time starting at a specific page offset. As opposed to the mapping iterator this is meant to be small, performing well even in simple loops like collecting all pages on the scatterlist into an array or setting up an iommu table based on the pages' DMA address. Signed-off-by: Imre Deak Cc: Maxim Levitsky Cc: Tejun Heo Cc: Daniel Vetter Tested-by: Stephen Warren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/scatterlist.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 4bd6c06eb28e..788a853aa7a7 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -231,6 +231,41 @@ size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, */ #define SG_MAX_SINGLE_ALLOC (PAGE_SIZE / sizeof(struct scatterlist)) +/* + * sg page iterator + * + * Iterates over sg entries page-by-page. On each successful iteration, + * @piter->page points to the current page, @piter->sg to the sg holding this + * page and @piter->sg_pgoffset to the page's page offset within the sg. The + * iteration will stop either when a maximum number of sg entries was reached + * or a terminating sg (sg_last(sg) == true) was reached. + */ +struct sg_page_iter { + struct page *page; /* current page */ + struct scatterlist *sg; /* sg holding the page */ + unsigned int sg_pgoffset; /* page offset within the sg */ + + /* these are internal states, keep away */ + unsigned int __nents; /* remaining sg entries */ + int __pg_advance; /* nr pages to advance at the + * next step */ +}; + +bool __sg_page_iter_next(struct sg_page_iter *piter); +void __sg_page_iter_start(struct sg_page_iter *piter, + struct scatterlist *sglist, unsigned int nents, + unsigned long pgoffset); + +/** + * for_each_sg_page - iterate over the pages of the given sg list + * @sglist: sglist to iterate over + * @piter: page iterator to hold current page, sg, sg_pgoffset + * @nents: maximum number of sg entries to iterate over + * @pgoffset: starting page offset + */ +#define for_each_sg_page(sglist, piter, nents, pgoffset) \ + for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \ + __sg_page_iter_next(piter);) /* * Mapping sg iterator -- cgit v1.2.3 From 4225fc8555a992c7f91d174ef424384d6781e144 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 27 Feb 2013 17:02:57 -0800 Subject: lib/scatterlist: use page iterator in the mapping iterator For better code reuse use the newly added page iterator to iterate through the pages. The offset, length within the page is still calculated by the mapping iterator as well as the actual mapping. Idea from Tejun Heo. Signed-off-by: Imre Deak Cc: Maxim Levitsky Cc: Tejun Heo Cc: Daniel Vetter Cc: James Hogan Cc: Stephen Warren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/scatterlist.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 788a853aa7a7..2d8bdaef9611 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -293,11 +293,11 @@ struct sg_mapping_iter { void *addr; /* pointer to the mapped area */ size_t length; /* length of the mapped area */ size_t consumed; /* number of consumed bytes */ + struct sg_page_iter piter; /* page iterator */ /* these are internal states, keep away */ - struct scatterlist *__sg; /* current entry */ - unsigned int __nents; /* nr of remaining entries */ - unsigned int __offset; /* offset within sg */ + unsigned int __offset; /* offset within page */ + unsigned int __remaining; /* remaining bytes on page */ unsigned int __flags; }; -- cgit v1.2.3 From e579d2c259be42b6f29458327e5153b22414b031 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 27 Feb 2013 17:03:15 -0800 Subject: coredump: remove redundant defines for dumpable states The existing SUID_DUMP_* defines duplicate the newer SUID_DUMPABLE_* defines introduced in 54b501992dd2 ("coredump: warn about unsafe suid_dumpable / core_pattern combo"). Remove the new ones, and use the prior values instead. Signed-off-by: Kees Cook Reported-by: Chen Gang Cc: Alexander Viro Cc: Alan Cox Cc: "Eric W. Biederman" Cc: Doug Ledford Cc: Serge Hallyn Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6853bf947fde..d35d2b6ddbfb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -346,11 +346,6 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); -/* get/set_dumpable() values */ -#define SUID_DUMPABLE_DISABLED 0 -#define SUID_DUMPABLE_ENABLED 1 -#define SUID_DUMPABLE_SAFE 2 - /* mm flags */ /* dumpable bits */ #define MMF_DUMPABLE 0 /* core dump is permitted */ -- cgit v1.2.3 From 6aa9707099c4b25700940eb3d016f16c4434360d Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Wed, 27 Feb 2013 17:03:18 -0800 Subject: lockdep: check that no locks held at freeze time We shouldn't try_to_freeze if locks are held. Holding a lock can cause a deadlock if the lock is later acquired in the suspend or hibernate path (e.g. by dpm). Holding a lock can also cause a deadlock in the case of cgroup_freezer if a lock is held inside a frozen cgroup that is later acquired by a process outside that group. [akpm@linux-foundation.org: export debug_check_no_locks_held] Signed-off-by: Mandeep Singh Baines Cc: Ben Chan Cc: Oleg Nesterov Cc: Tejun Heo Cc: Rafael J. Wysocki Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/debug_locks.h | 4 ++-- include/linux/freezer.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 3bd46f766751..a975de1ff59f 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -51,7 +51,7 @@ struct task_struct; extern void debug_show_all_locks(void); extern void debug_show_held_locks(struct task_struct *task); extern void debug_check_no_locks_freed(const void *from, unsigned long len); -extern void debug_check_no_locks_held(struct task_struct *task); +extern void debug_check_no_locks_held(void); #else static inline void debug_show_all_locks(void) { @@ -67,7 +67,7 @@ debug_check_no_locks_freed(const void *from, unsigned long len) } static inline void -debug_check_no_locks_held(struct task_struct *task) +debug_check_no_locks_held(void) { } #endif diff --git a/include/linux/freezer.h b/include/linux/freezer.h index e70df40d84f6..043a5cf8b5ba 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -3,6 +3,7 @@ #ifndef FREEZER_H_INCLUDED #define FREEZER_H_INCLUDED +#include #include #include #include @@ -48,6 +49,8 @@ extern void thaw_kernel_threads(void); static inline bool try_to_freeze(void) { + if (!(current->flags & PF_NOFREEZE)) + debug_check_no_locks_held(); might_sleep(); if (likely(!freezing(current))) return false; -- cgit v1.2.3 From fe6e24ec90b753392c3f9ec1fbca196c4e88e511 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:50 -0800 Subject: idr: deprecate idr_remove_all() There was only one legitimate use of idr_remove_all() and a lot more of incorrect uses (or lack of it). Now that idr_destroy() implies idr_remove_all() and all the in-kernel users updated not to use it, there's no reason to keep it around. Mark it deprecated so that we can later unexport it. idr_remove_all() is made an inline function calling __idr_remove_all() to avoid triggering deprecated warning on EXPORT_SYMBOL(). Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index e5eb125effe6..4cf042da3892 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -110,10 +110,22 @@ int idr_for_each(struct idr *idp, void *idr_get_next(struct idr *idp, int *nextid); void *idr_replace(struct idr *idp, void *ptr, int id); void idr_remove(struct idr *idp, int id); -void idr_remove_all(struct idr *idp); void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); +void __idr_remove_all(struct idr *idp); /* don't use */ + +/** + * idr_remove_all - remove all ids from the given idr tree + * @idp: idr handle + * + * If you're trying to destroy @idp, calling idr_destroy() is enough. + * This is going away. Don't use. + */ +static inline void __deprecated idr_remove_all(struct idr *idp) +{ + __idr_remove_all(idp); +} /* * IDA - IDR based id allocator, use when translation from id to -- cgit v1.2.3 From 4106ecaf59b79efff3f9b466baf9e8c67e19ac5a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:51 -0800 Subject: idr: cosmetic updates to struct / initializer definitions * Tab align fields like a normal person. * Drop the unnecessary 0 inits from IDR_INIT(). This patch is purely cosmetic. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 4cf042da3892..8f4980db3524 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -49,28 +49,24 @@ #define MAX_IDR_FREE (MAX_IDR_LEVEL * 2) struct idr_layer { - unsigned long bitmap; /* A zero bit means "space here" */ + unsigned long bitmap; /* A zero bit means "space here" */ struct idr_layer __rcu *ary[1< Date: Wed, 27 Feb 2013 17:03:52 -0800 Subject: idr: relocate idr_for_each_entry() and reorganize id[r|a]_get_new() * Move idr_for_each_entry() definition next to other idr related definitions. * Make id[r|a]_get_new() inline wrappers of id[r|a]_get_new_above(). This changes the implementation of idr_get_new() but the new implementation is trivial. This patch doesn't introduce any functional change. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 8f4980db3524..ff44bc83f3cb 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -99,7 +99,6 @@ struct idr { void *idr_find(struct idr *idp, int id); int idr_pre_get(struct idr *idp, gfp_t gfp_mask); -int idr_get_new(struct idr *idp, void *ptr, int *id); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); int idr_for_each(struct idr *idp, int (*fn)(int id, void *p, void *data), void *data); @@ -109,6 +108,30 @@ void idr_remove(struct idr *idp, int id); void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); +/** + * idr_get_new - allocate new idr entry + * @idp: idr handle + * @ptr: pointer you want associated with the id + * @id: pointer to the allocated handle + * + * Simple wrapper around idr_get_new_above() w/ @starting_id of zero. + */ +static inline int idr_get_new(struct idr *idp, void *ptr, int *id) +{ + return idr_get_new_above(idp, ptr, 0, id); +} + +/** + * idr_for_each_entry - iterate over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + */ +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ + entry != NULL; \ + ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) + void __idr_remove_all(struct idr *idp); /* don't use */ /** @@ -149,7 +172,6 @@ struct ida { int ida_pre_get(struct ida *ida, gfp_t gfp_mask); int ida_get_new_above(struct ida *ida, int starting_id, int *p_id); -int ida_get_new(struct ida *ida, int *p_id); void ida_remove(struct ida *ida, int id); void ida_destroy(struct ida *ida); void ida_init(struct ida *ida); @@ -158,17 +180,18 @@ int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, gfp_t gfp_mask); void ida_simple_remove(struct ida *ida, unsigned int id); -void __init idr_init_cache(void); - /** - * idr_for_each_entry - iterate over an idr's elements of a given type - * @idp: idr handle - * @entry: the type * to use as cursor - * @id: id entry's key + * ida_get_new - allocate new ID + * @ida: idr handle + * @p_id: pointer to the allocated handle + * + * Simple wrapper around ida_get_new_above() w/ @starting_id of zero. */ -#define idr_for_each_entry(idp, entry, id) \ - for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ - entry != NULL; \ - ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) +static inline int ida_get_new(struct ida *ida, int *p_id) +{ + return ida_get_new_above(ida, 0, p_id); +} + +void __init idr_init_cache(void); #endif /* __IDR_H__ */ -- cgit v1.2.3 From 12d1b4393e0d8df36b2646a5e512f0513fb532d2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:53 -0800 Subject: idr: remove _idr_rc_to_errno() hack idr uses -1, IDR_NEED_TO_GROW and IDR_NOMORE_SPACE to communicate exception conditions internally. The return value is later translated to errno values using _idr_rc_to_errno(). This is confusing. Drop the custom ones and consistently use -EAGAIN for "tree needs to grow", -ENOMEM for "need more memory" and -ENOSPC for "ran out of ID space". Due to the weird memory preloading mechanism, [ra]_get_new*() return -EAGAIN on memory shortage, so we need to substitute -ENOMEM w/ -EAGAIN on those interface functions. They'll eventually be cleaned up and the translations will go away. This patch doesn't introduce any functional changes. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index ff44bc83f3cb..837f152b1383 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -70,12 +70,6 @@ struct idr { } #define DEFINE_IDR(name) struct idr name = IDR_INIT(name) -/* Actions to be taken after a call to _idr_sub_alloc */ -#define IDR_NEED_TO_GROW -2 -#define IDR_NOMORE_SPACE -3 - -#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC) - /** * DOC: idr sync * idr synchronization (stolen from radix-tree.h) -- cgit v1.2.3 From d5c7409f79e14db49d00785692334657592c07ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:55 -0800 Subject: idr: implement idr_preload[_end]() and idr_alloc() The current idr interface is very cumbersome. * For all allocations, two function calls - idr_pre_get() and idr_get_new*() - should be made. * idr_pre_get() doesn't guarantee that the following idr_get_new*() will not fail from memory shortage. If idr_get_new*() returns -EAGAIN, the caller is expected to retry pre_get and allocation. * idr_get_new*() can't enforce upper limit. Upper limit can only be enforced by allocating and then freeing if above limit. * idr_layer buffer is unnecessarily per-idr. Each idr ends up keeping around MAX_IDR_FREE idr_layers. The memory consumed per idr is under two pages but it makes it difficult to make idr_layer larger. This patch implements the following new set of allocation functions. * idr_preload[_end]() - Similar to radix preload but doesn't fail. The first idr_alloc() inside preload section can be treated as if it were called with @gfp_mask used for idr_preload(). * idr_alloc() - Allocate an ID w/ lower and upper limits. Takes @gfp_flags and can be used w/o preloading. When used inside preloaded section, the allocation mask of preloading can be assumed. If idr_alloc() can be called from a context which allows sufficiently relaxed @gfp_mask, it can be used by itself. If, for example, idr_alloc() is called inside spinlock protected region, preloading can be used like the following. idr_preload(GFP_KERNEL); spin_lock(lock); id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT); spin_unlock(lock); idr_preload_end(); if (id < 0) error; which is much simpler and less error-prone than idr_pre_get and idr_get_new*() loop. The new interface uses per-pcu idr_layer buffer and thus the number of idr's in the system doesn't affect the amount of memory used for preloading. idr_layer_alloc() is introduced to handle idr_layer allocations for both old and new ID allocation paths. This is a bit hairy now but the new interface is expected to replace the old and the internal implementation eventually will become simpler. Signed-off-by: Tejun Heo Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 837f152b1383..6dcf133f208a 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -94,14 +94,28 @@ struct idr { void *idr_find(struct idr *idp, int id); int idr_pre_get(struct idr *idp, gfp_t gfp_mask); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); +void idr_preload(gfp_t gfp_mask); +int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask); int idr_for_each(struct idr *idp, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *idp, int *nextid); void *idr_replace(struct idr *idp, void *ptr, int id); void idr_remove(struct idr *idp, int id); +void idr_free(struct idr *idp, int id); void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); +/** + * idr_preload_end - end preload section started with idr_preload() + * + * Each idr_preload() should be matched with an invocation of this + * function. See idr_preload() for details. + */ +static inline void idr_preload_end(void) +{ + preempt_enable(); +} + /** * idr_get_new - allocate new idr entry * @idp: idr handle -- cgit v1.2.3 From e8c8d1bc063bc88cfa1356266027b5075d3a82d7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:05:04 -0800 Subject: idr: remove MAX_IDR_MASK and move left MAX_IDR_* into idr.c MAX_IDR_MASK is another weirdness in the idr interface. As idr covers whole positive integer range, it's defined as 0x7fffffff or INT_MAX. Its usage in idr_find(), idr_replace() and idr_remove() is bizarre. They basically mask off the sign bit and operate on the rest, so if the caller, by accident, passes in a negative number, the sign bit will be masked off and the remaining part will be used as if that was the input, which is worse than crashing. The constant is visible in idr.h and there are several users in the kernel. * drivers/i2c/i2c-core.c:i2c_add_numbered_adapter() Basically used to test if adap->nr is a negative number which isn't -1 and returns -EINVAL if so. idr_alloc() already has negative @start checking (w/ WARN_ON_ONCE), so this can go away. * drivers/infiniband/core/cm.c:cm_alloc_id() drivers/infiniband/hw/mlx4/cm.c:id_map_alloc() Used to wrap cyclic @start. Can be replaced with max(next, 0). Note that this type of cyclic allocation using idr is buggy. These are prone to spurious -ENOSPC failure after the first wraparound. * fs/super.c:get_anon_bdev() The ID allocated from ida is masked off before being tested whether it's inside valid range. ida allocated ID can never be a negative number and the masking is unnecessary. Update idr_*() functions to fail with -EINVAL when negative @id is specified and update other MAX_IDR_MASK users as described above. This leaves MAX_IDR_MASK without any user, remove it and relocate other MAX_IDR_* constants to lib/idr.c. Signed-off-by: Tejun Heo Cc: Jean Delvare Cc: Roland Dreier Cc: Sean Hefty Cc: Hal Rosenstock Cc: "Marciniszyn, Mike" Cc: Jack Morgenstein Cc: Or Gerlitz Cc: Al Viro Acked-by: Wolfram Sang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 6dcf133f208a..99b0ce533f0e 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -38,16 +38,6 @@ #define IDR_SIZE (1 << IDR_BITS) #define IDR_MASK ((1 << IDR_BITS)-1) -#define MAX_IDR_SHIFT (sizeof(int)*8 - 1) -#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) -#define MAX_IDR_MASK (MAX_IDR_BIT - 1) - -/* Leave the possibility of an incomplete final layer */ -#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS) - -/* Number of id_layer structs to leave in free list */ -#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2) - struct idr_layer { unsigned long bitmap; /* A zero bit means "space here" */ struct idr_layer __rcu *ary[1< Date: Wed, 27 Feb 2013 17:05:05 -0800 Subject: idr: remove length restriction from idr_layer->bitmap Currently, idr->bitmap is declared as an unsigned long which restricts the number of bits an idr_layer can contain. All bitops can handle arbitrary positive integer bit number and there's no reason for this restriction. Declare idr_layer->bitmap using DECLARE_BITMAP() instead of a single unsigned long. * idr_layer->bitmap is now an array. '&' dropped from params to bitops. * Replaced "== IDR_FULL" tests with bitmap_full() and removed IDR_FULL. * Replaced find_next_bit() on ~bitmap with find_next_zero_bit(). * Replaced "bitmap = 0" with bitmap_clear(). This patch doesn't (or at least shouldn't) introduce any behavior changes. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 99b0ce533f0e..63aa542da49b 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -19,18 +19,8 @@ #if BITS_PER_LONG == 32 # define IDR_BITS 5 -# define IDR_FULL 0xfffffffful -/* We can only use two of the bits in the top level because there is - only one possible bit in the top level (5 bits * 7 levels = 35 - bits, but you only use 31 bits in the id). */ -# define TOP_LEVEL_FULL (IDR_FULL >> 30) #elif BITS_PER_LONG == 64 # define IDR_BITS 6 -# define IDR_FULL 0xfffffffffffffffful -/* We can only use two of the bits in the top level because there is - only one possible bit in the top level (6 bits * 6 levels = 36 - bits, but you only use 31 bits in the id). */ -# define TOP_LEVEL_FULL (IDR_FULL >> 62) #else # error "BITS_PER_LONG is not 32 or 64" #endif @@ -39,7 +29,7 @@ #define IDR_MASK ((1 << IDR_BITS)-1) struct idr_layer { - unsigned long bitmap; /* A zero bit means "space here" */ + DECLARE_BITMAP(bitmap, IDR_SIZE); /* A zero bit means "space here" */ struct idr_layer __rcu *ary[1< Date: Wed, 27 Feb 2013 17:05:06 -0800 Subject: idr: make idr_layer larger With recent preloading changes, idr no longer keeps full layer cache per each idr instance (used to be ~6.5k per idr on 64bit) and the previous patch removed restriction on the bitmap size. Both now allow us to have larger layers. Increase IDR_BITS to 8 regardless of BITS_PER_LONG. Each layer is slightly larger than 2k on 64bit and 1k on 32bit and carries 256 entries. The size isn't too large, especially compared to what we used to waste on per-idr caches, and 256 entries should be able to serve most use cases with single layer. The max tree depth is 4 which is much better than the previous 6 on 64bit and 7 on 32bit. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 63aa542da49b..43b87b1c77a3 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -17,14 +17,13 @@ #include #include -#if BITS_PER_LONG == 32 -# define IDR_BITS 5 -#elif BITS_PER_LONG == 64 -# define IDR_BITS 6 -#else -# error "BITS_PER_LONG is not 32 or 64" -#endif - +/* + * We want shallower trees and thus more bits covered at each layer. 8 + * bits gives us large enough first layer for most use cases and maximum + * tree depth of 4. Each idr_layer is slightly larger than 2k on 64bit and + * 1k on 32bit. + */ +#define IDR_BITS 8 #define IDR_SIZE (1 << IDR_BITS) #define IDR_MASK ((1 << IDR_BITS)-1) -- cgit v1.2.3 From 54616283c2948812a44240858ced610e7cacbde1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:05:07 -0800 Subject: idr: add idr_layer->prefix Add a field which carries the prefix of ID the idr_layer covers. This will be used to implement lookup hint. This patch doesn't make use of the new field and doesn't introduce any behavior difference. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 43b87b1c77a3..7b1c5c6f9a06 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -28,6 +28,7 @@ #define IDR_MASK ((1 << IDR_BITS)-1) struct idr_layer { + int prefix; /* the ID prefix of this idr_layer */ DECLARE_BITMAP(bitmap, IDR_SIZE); /* A zero bit means "space here" */ struct idr_layer __rcu *ary[1< Date: Wed, 27 Feb 2013 17:05:08 -0800 Subject: idr: implement lookup hint While idr lookup isn't a particularly heavy operation, it still is too substantial to use in hot paths without worrying about the performance implications. With recent changes, each idr_layer covers 256 slots which should be enough to cover most use cases with single idr_layer making lookup hint very attractive. This patch adds idr->hint which points to the idr_layer which allocated an ID most recently and the fast path lookup becomes if (look up target's prefix matches that of the hinted layer) return hint->ary[ID's offset in the leaf layer]; which can be inlined. idr->hint is set to the leaf node on idr_fill_slot() and cleared from free_layer(). [andriy.shevchenko@linux.intel.com: always do slow path when hint is uninitialized] Signed-off-by: Tejun Heo Cc: Kirill A. Shutemov Cc: Sasha Levin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 7b1c5c6f9a06..a6f38b5c34e4 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -37,6 +37,7 @@ struct idr_layer { }; struct idr { + struct idr_layer __rcu *hint; /* the last layer allocated from */ struct idr_layer __rcu *top; struct idr_layer *id_free; int layers; /* only valid w/o concurrent changes */ @@ -71,7 +72,7 @@ struct idr { * This is what we export. */ -void *idr_find(struct idr *idp, int id); +void *idr_find_slowpath(struct idr *idp, int id); int idr_pre_get(struct idr *idp, gfp_t gfp_mask); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); void idr_preload(gfp_t gfp_mask); @@ -96,6 +97,28 @@ static inline void idr_preload_end(void) preempt_enable(); } +/** + * idr_find - return pointer for given id + * @idp: idr handle + * @id: lookup key + * + * Return the pointer given the id it has been registered with. A %NULL + * return indicates that @id is not valid or you passed %NULL in + * idr_get_new(). + * + * This function can be called under rcu_read_lock(), given that the leaf + * pointers lifetimes are correctly managed. + */ +static inline void *idr_find(struct idr *idr, int id) +{ + struct idr_layer *hint = rcu_dereference_raw(idr->hint); + + if (hint && (id & ~IDR_MASK) == hint->prefix) + return rcu_dereference_raw(hint->ary[id & IDR_MASK]); + + return idr_find_slowpath(idr, id); +} + /** * idr_get_new - allocate new idr entry * @idp: idr handle -- cgit v1.2.3 From 59fb1b9f5d9910c2eb97107dd0eb7e3bce8f0dde Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Wed, 27 Feb 2013 17:05:11 -0800 Subject: ipmi: remove superfluous kernel/userspace explanation Given the obvious distinction between kernel and userspace supported by uapi/, it seems unnecessary to comment on that. Signed-off-by: Robert P. J. Day Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipmi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 1487e7906bbd..1f9f56e28851 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -35,10 +35,6 @@ #include - -/* - * The in-kernel interface. - */ #include #include -- cgit v1.2.3 From 1d730c49a91dc5b7660269b98ad76e5a9b85740f Mon Sep 17 00:00:00 2001 From: Martin Sustrik Date: Wed, 27 Feb 2013 17:05:42 -0800 Subject: include/linux/eventfd.h: fix incorrect filename is a comment Comment in eventfd.h referred to 'include/asm-generic/fcntl.h' while the correct path is 'include/uapi/asm-generic/fcntl.h'. Signed-off-by: Martin Sustrik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/eventfd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 3c3ef19a625a..cf5d2af61b81 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -13,7 +13,7 @@ #include /* - * CAREFUL: Check include/asm-generic/fcntl.h when defining + * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining * new flags, since they might collide with O_* ones. We want * to re-use O_* flags that couldn't possibly have a meaning * from eventfd, in order to leave a free define-space for -- cgit v1.2.3 From b67bfe0d42cac56c512dd5da4b1b347a23f4b70a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 27 Feb 2013 17:06:00 -0800 Subject: hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin Acked-by: Paul E. McKenney Signed-off-by: Sasha Levin Cc: Wu Fengguang Cc: Marcelo Tosatti Cc: Gleb Natapov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hashtable.h | 40 ++++++++++++++++----------------- include/linux/if_team.h | 6 ++--- include/linux/list.h | 49 +++++++++++++++++++---------------------- include/linux/pid.h | 3 +-- include/linux/rculist.h | 56 +++++++++++++++++++++++------------------------ 5 files changed, 72 insertions(+), 82 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h index 227c62424f3c..a9df51f5d54c 100644 --- a/include/linux/hashtable.h +++ b/include/linux/hashtable.h @@ -115,51 +115,50 @@ static inline void hash_del_rcu(struct hlist_node *node) * hash_for_each - iterate over a hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor - * @node: the &struct list_head to use as a loop cursor for each entry * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ -#define hash_for_each(name, bkt, node, obj, member) \ - for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ - hlist_for_each_entry(obj, node, &name[bkt], member) +#define hash_for_each(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry(obj, &name[bkt], member) /** * hash_for_each_rcu - iterate over a rcu enabled hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor - * @node: the &struct list_head to use as a loop cursor for each entry * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ -#define hash_for_each_rcu(name, bkt, node, obj, member) \ - for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ - hlist_for_each_entry_rcu(obj, node, &name[bkt], member) +#define hash_for_each_rcu(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_rcu(obj, &name[bkt], member) /** * hash_for_each_safe - iterate over a hashtable safe against removal of * hash entry * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor - * @node: the &struct list_head to use as a loop cursor for each entry * @tmp: a &struct used for temporary storage * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ -#define hash_for_each_safe(name, bkt, node, tmp, obj, member) \ - for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ - hlist_for_each_entry_safe(obj, node, tmp, &name[bkt], member) +#define hash_for_each_safe(name, bkt, tmp, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_safe(obj, tmp, &name[bkt], member) /** * hash_for_each_possible - iterate over all possible objects hashing to the * same bucket * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry - * @node: the &struct list_head to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ -#define hash_for_each_possible(name, obj, node, member, key) \ - hlist_for_each_entry(obj, node, &name[hash_min(key, HASH_BITS(name))], member) +#define hash_for_each_possible(name, obj, member, key) \ + hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member) /** * hash_for_each_possible_rcu - iterate over all possible objects hashing to the @@ -167,25 +166,24 @@ static inline void hash_del_rcu(struct hlist_node *node) * in a rcu enabled hashtable * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry - * @node: the &struct list_head to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ -#define hash_for_each_possible_rcu(name, obj, node, member, key) \ - hlist_for_each_entry_rcu(obj, node, &name[hash_min(key, HASH_BITS(name))], member) +#define hash_for_each_possible_rcu(name, obj, member, key) \ + hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\ + member) /** * hash_for_each_possible_safe - iterate over all possible objects hashing to the * same bucket safe against removals * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry - * @node: the &struct list_head to use as a loop cursor for each entry * @tmp: a &struct used for temporary storage * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ -#define hash_for_each_possible_safe(name, obj, node, tmp, member, key) \ - hlist_for_each_entry_safe(obj, node, tmp, \ +#define hash_for_each_possible_safe(name, obj, tmp, member, key) \ + hlist_for_each_entry_safe(obj, tmp,\ &name[hash_min(key, HASH_BITS(name))], member) diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 4648d8021244..cfd21e3d5506 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -216,11 +216,10 @@ static inline struct hlist_head *team_port_index_hash(struct team *team, static inline struct team_port *team_get_port_by_index(struct team *team, int port_index) { - struct hlist_node *p; struct team_port *port; struct hlist_head *head = team_port_index_hash(team, port_index); - hlist_for_each_entry(port, p, head, hlist) + hlist_for_each_entry(port, head, hlist) if (port->index == port_index) return port; return NULL; @@ -228,11 +227,10 @@ static inline struct team_port *team_get_port_by_index(struct team *team, static inline struct team_port *team_get_port_by_index_rcu(struct team *team, int port_index) { - struct hlist_node *p; struct team_port *port; struct hlist_head *head = team_port_index_hash(team, port_index); - hlist_for_each_entry_rcu(port, p, head, hlist) + hlist_for_each_entry_rcu(port, head, hlist) if (port->index == port_index) return port; return NULL; diff --git a/include/linux/list.h b/include/linux/list.h index cc6d2aa6b415..d991cc147c98 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -666,54 +666,49 @@ static inline void hlist_move_list(struct hlist_head *old, for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ pos = n) +#define hlist_entry_safe(ptr, type, member) \ + (ptr) ? hlist_entry(ptr, type, member) : NULL + /** * hlist_for_each_entry - iterate over list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry(tpos, pos, head, member) \ - for (pos = (head)->first; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) +#define hlist_for_each_entry(pos, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member);\ + pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) /** * hlist_for_each_entry_continue - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_continue(tpos, pos, member) \ - for (pos = (pos)->next; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) +#define hlist_for_each_entry_continue(pos, member) \ + for (pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member);\ + pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) /** * hlist_for_each_entry_from - iterate over a hlist continuing from current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_from(tpos, pos, member) \ - for (; pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) +#define hlist_for_each_entry_from(pos, member) \ + for (; pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) /** * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @n: another &struct hlist_node to use as temporary storage * @head: the head for your list. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ - for (pos = (head)->first; \ - pos && ({ n = pos->next; 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = n) +#define hlist_for_each_entry_safe(pos, n, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);\ + pos && ({ n = pos->member.next; 1; }); \ + pos = hlist_entry_safe(n, typeof(*pos), member)) #endif diff --git a/include/linux/pid.h b/include/linux/pid.h index 2381c973d897..a089a3c447fc 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -176,9 +176,8 @@ pid_t pid_vnr(struct pid *pid); #define do_each_pid_task(pid, type, task) \ do { \ - struct hlist_node *pos___; \ if ((pid) != NULL) \ - hlist_for_each_entry_rcu((task), pos___, \ + hlist_for_each_entry_rcu((task), \ &(pid)->tasks[type], pids[type].node) { /* diff --git a/include/linux/rculist.h b/include/linux/rculist.h index c92dd28eaa6c..8089e35d47ac 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -445,8 +445,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, /** * hlist_for_each_entry_rcu - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. * @@ -454,16 +453,16 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(hlist_next_rcu(pos))) +#define hlist_for_each_entry_rcu(pos, head, member) \ + for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. * @@ -471,35 +470,36 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu_bh(tpos, pos, head, member) \ - for (pos = rcu_dereference_bh((head)->first); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_bh(pos->next)) +#define hlist_for_each_entry_rcu_bh(pos, head, member) \ + for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_continue_rcu - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_continue_rcu(tpos, pos, member) \ - for (pos = rcu_dereference((pos)->next); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference(pos->next)) +#define hlist_for_each_entry_continue_rcu(pos, member) \ + for (pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ + typeof(*(pos)), member)) /** * hlist_for_each_entry_continue_rcu_bh - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member) \ - for (pos = rcu_dereference_bh((pos)->next); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_bh(pos->next)) +#define hlist_for_each_entry_continue_rcu_bh(pos, member) \ + for (pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ + typeof(*(pos)), member)) #endif /* __KERNEL__ */ -- cgit v1.2.3 From f9c6a655a94042f94c0adb30d07d93cfd8915e95 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 Feb 2013 21:36:03 +0000 Subject: dmaengine: dw_dmac: move to generic DMA binding The original device tree binding for this driver, from Viresh Kumar unfortunately conflicted with the generic DMA binding, and did not allow to completely seperate slave device configuration from the controller. This is an attempt to replace it with an implementation of the generic binding, but it is currently completely untested, because I do not have any hardware with this particular controller. The patch applies on top of the slave-dma tree, which contains both the base support for the generic DMA binding, as well as the earlier attempt from Viresh. Both of these are currently not merged upstream however. This version incorporates feedback from Viresh Kumar, Andy Shevchenko and Russell King. Signed-off-by: Arnd Bergmann Acked-by: Viresh Kumar Acked-by: Andy Shevchenko Cc: Vinod Koul Cc: devicetree-discuss@lists.ozlabs.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Vinod Koul --- include/linux/dw_dmac.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h index 41766de66e33..481ab2345d6b 100644 --- a/include/linux/dw_dmac.h +++ b/include/linux/dw_dmac.h @@ -27,7 +27,6 @@ */ struct dw_dma_slave { struct device *dma_dev; - const char *bus_id; u32 cfg_hi; u32 cfg_lo; u8 src_master; @@ -60,9 +59,6 @@ struct dw_dma_platform_data { unsigned short block_size; unsigned char nr_masters; unsigned char data_width[4]; - - struct dw_dma_slave *sd; - unsigned int sd_count; }; /* bursts size */ @@ -114,6 +110,5 @@ void dw_dma_cyclic_stop(struct dma_chan *chan); dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan); dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan); -bool dw_dma_generic_filter(struct dma_chan *chan, void *param); #endif /* DW_DMAC_H */ -- cgit v1.2.3 From edddbb1eda61753c886a3c5e159293a7b3a9e30a Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 28 Feb 2013 20:30:09 -0500 Subject: SUNRPC: add call to get configured timeout Returns the configured timeout for the xprt of the rpc client. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 34206b84d8da..21d52d0dc15c 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -160,6 +160,7 @@ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); int rpc_protocol(struct rpc_clnt *); struct net * rpc_net_ns(struct rpc_clnt *); size_t rpc_max_payload(struct rpc_clnt *); +unsigned long rpc_get_timeout(struct rpc_clnt *clnt); void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); -- cgit v1.2.3 From 3000512137602b84d1ad5fd89d62984993a19bb6 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 28 Feb 2013 20:30:10 -0500 Subject: NFSv4.1: LAYOUTGET EDELAY loops timeout to the MDS The client will currently try LAYOUTGETs forever if a server is returning NFS4ERR_LAYOUTTRYLATER or NFS4ERR_RECALLCONFLICT - even if the client no longer needs the layout (ie process killed, unmounted). This patch uses the DS timeout value (module parameter 'dataserver_timeo' via rpc layer) to set an upper limit of how long the client tries LATOUTGETs in this situation. Once the timeout is reached, IO is redirected to the MDS. This also changes how the client checks if a layout is on the clp list to avoid a double list_add. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 29adb12c7ecf..2250cab6fc4b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -233,6 +233,7 @@ struct nfs4_layoutget_args { struct inode *inode; struct nfs_open_context *ctx; nfs4_stateid stateid; + unsigned long timestamp; struct nfs4_layoutdriver_data layout; }; -- cgit v1.2.3 From 1a71fb84fda651105e1e194c2d3a3a13a38210a9 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 27 Sep 2011 22:21:37 +0200 Subject: rtc: stmp3xxx: add wdt-accessor function This RTC also includes a watchdog timer. Provide an accessor function for setting the watchdog timeout value which will be picked up by a watchdog driver. Also register the platform_device for the watchdog here to get the boot-time dependencies right. Signed-off-by: Wolfram Sang Acked-by: Andrew Morton Signed-off-by: Wim Van Sebroeck --- include/linux/stmp3xxx_rtc_wdt.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/linux/stmp3xxx_rtc_wdt.h (limited to 'include/linux') diff --git a/include/linux/stmp3xxx_rtc_wdt.h b/include/linux/stmp3xxx_rtc_wdt.h new file mode 100644 index 000000000000..1dd12c96231b --- /dev/null +++ b/include/linux/stmp3xxx_rtc_wdt.h @@ -0,0 +1,15 @@ +/* + * stmp3xxx_rtc_wdt.h + * + * Copyright (C) 2011 Wolfram Sang, Pengutronix e.K. + * + * This file is released under the GPLv2. + */ +#ifndef __LINUX_STMP3XXX_RTC_WDT_H +#define __LINUX_STMP3XXX_RTC_WDT_H + +struct stmp3xxx_wdt_pdata { + void (*wdt_set_timeout)(struct device *dev, u32 timeout); +}; + +#endif /* __LINUX_STMP3XXX_RTC_WDT_H */ -- cgit v1.2.3 From f82dedf812ecdf0c19c6c240e85a4a487ab62016 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 24 Jan 2013 18:13:34 +0100 Subject: watchdog: bcm47xx_wdt.c: use platform device Instead of accessing the function to set the watchdog timer directly, register a platform driver the platform could register to use this watchdog driver. Signed-off-by: Hauke Mehrtens Signed-off-by: Wim Van Sebroeck --- include/linux/bcm47xx_wdt.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcm47xx_wdt.h b/include/linux/bcm47xx_wdt.h index e5dfc256485b..b708786d4cbf 100644 --- a/include/linux/bcm47xx_wdt.h +++ b/include/linux/bcm47xx_wdt.h @@ -1,7 +1,10 @@ #ifndef LINUX_BCM47XX_WDT_H_ #define LINUX_BCM47XX_WDT_H_ +#include +#include #include +#include struct bcm47xx_wdt { @@ -10,6 +13,12 @@ struct bcm47xx_wdt { u32 max_timer_ms; void *driver_data; + + struct watchdog_device wdd; + struct notifier_block notifier; + + struct timer_list soft_timer; + atomic_t soft_ticks; }; static inline void *bcm47xx_wdt_get_drvdata(struct bcm47xx_wdt *wdt) -- cgit v1.2.3 From 3048253ed957fc6cdc34599178408559aa1e0062 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Tue, 8 Jan 2013 11:04:10 +0100 Subject: watchdog: core: dt: add support for the timeout-sec dt property Add support for watchdog drivers to initialize/set the timeout field of the watchdog_device structure. The timeout field is initialised either with the module timeout parameter value (if valid) or with the timeout-sec dt property (if valid). If both are invalid the initial value is unchanged. Signed-off-by: Fabio Porcedda Acked-by: Nicolas Ferre Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 3a9df2f43be6..2a3038ee17a3 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -118,6 +118,13 @@ static inline void watchdog_set_nowayout(struct watchdog_device *wdd, bool noway set_bit(WDOG_NO_WAY_OUT, &wdd->status); } +/* Use the following function to check if a timeout value is invalid */ +static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigned int t) +{ + return ((wdd->max_timeout != 0) && + (t < wdd->min_timeout || t > wdd->max_timeout)); +} + /* Use the following functions to manipulate watchdog driver specific data */ static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data) { @@ -130,6 +137,8 @@ static inline void *watchdog_get_drvdata(struct watchdog_device *wdd) } /* drivers/watchdog/watchdog_core.c */ +extern int watchdog_init_timeout(struct watchdog_device *wdd, + unsigned int timeout_parm, struct device *dev); extern int watchdog_register_device(struct watchdog_device *); extern void watchdog_unregister_device(struct watchdog_device *); -- cgit v1.2.3 From 8eae508b7c6ff502a71d0293b69e97c5505d5840 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 1 Mar 2013 11:11:47 -0800 Subject: hsi: fix kernel-doc warnings Fix kernel-doc warnings in hsi files: Warning(include/linux/hsi/hsi.h:136): Excess struct/union/enum/typedef member 'e_handler' description in 'hsi_client' Warning(include/linux/hsi/hsi.h:136): Excess struct/union/enum/typedef member 'pclaimed' description in 'hsi_client' Warning(include/linux/hsi/hsi.h:136): Excess struct/union/enum/typedef member 'nb' description in 'hsi_client' Warning(drivers/hsi/hsi.c:434): No description found for parameter 'handler' Warning(drivers/hsi/hsi.c:434): Excess function parameter 'cb' description in 'hsi_register_port_event' Don't document "private:" fields with kernel-doc notation. If you want to leave them fully documented, that's OK, but then don't mark them as "private:". Signed-off-by: Randy Dunlap Cc: Carlos Chinea Cc: Linus Walleij Cc: Greg Kroah-Hartman Cc: linux-kernel@vger.kernel.org Cc: linux-omap@vger.kernel.org Acked-by: Nishanth Menon Signed-off-by: Linus Torvalds --- include/linux/hsi/hsi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hsi/hsi.h b/include/linux/hsi/hsi.h index 56fae865e272..0dca785288cf 100644 --- a/include/linux/hsi/hsi.h +++ b/include/linux/hsi/hsi.h @@ -121,9 +121,9 @@ static inline int hsi_register_board_info(struct hsi_board_info const *info, * @device: Driver model representation of the device * @tx_cfg: HSI TX configuration * @rx_cfg: HSI RX configuration - * @e_handler: Callback for handling port events (RX Wake High/Low) - * @pclaimed: Keeps tracks if the clients claimed its associated HSI port - * @nb: Notifier block for port events + * e_handler: Callback for handling port events (RX Wake High/Low) + * pclaimed: Keeps tracks if the clients claimed its associated HSI port + * nb: Notifier block for port events */ struct hsi_client { struct device device; -- cgit v1.2.3 From fd7c092e711ebab55b2688d3859d95dfd0301f73 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 1 Mar 2013 22:45:44 +0000 Subject: dm: fix truncated status strings Avoid returning a truncated table or status string instead of setting the DM_BUFFER_FULL_FLAG when the last target of a table fills the buffer. When processing a table or status request, the function retrieve_status calls ti->type->status. If ti->type->status returns non-zero, retrieve_status assumes that the buffer overflowed and sets DM_BUFFER_FULL_FLAG. However, targets don't return non-zero values from their status method on overflow. Most targets returns always zero. If a buffer overflow happens in a target that is not the last in the table, it gets noticed during the next iteration of the loop in retrieve_status; but if a buffer overflow happens in the last target, it goes unnoticed and erroneously truncated data is returned. In the current code, the targets behave in the following way: * dm-crypt returns -ENOMEM if there is not enough space to store the key, but it returns 0 on all other overflows. * dm-thin returns errors from the status method if a disk error happened. This is incorrect because retrieve_status doesn't check the error code, it assumes that all non-zero values mean buffer overflow. * all the other targets always return 0. This patch changes the ti->type->status function to return void (because most targets don't use the return code). Overflow is detected in retrieve_status: if the status method fills up the remaining space completely, it is assumed that buffer overflow happened. Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index bf6afa2fc432..a5cda3ea6b88 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -68,8 +68,8 @@ typedef void (*dm_postsuspend_fn) (struct dm_target *ti); typedef int (*dm_preresume_fn) (struct dm_target *ti); typedef void (*dm_resume_fn) (struct dm_target *ti); -typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, - unsigned status_flags, char *result, unsigned maxlen); +typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, + unsigned status_flags, char *result, unsigned maxlen); typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv); -- cgit v1.2.3 From 55a62eef8d1b50ceff3b7bf46851103bdcc7e5b0 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 1 Mar 2013 22:45:47 +0000 Subject: dm: rename request variables to bios Use 'bio' in the name of variables and functions that deal with bios rather than 'request' to avoid confusion with the normal block layer use of 'request'. No functional changes. Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a5cda3ea6b88..d5f984b07466 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -187,26 +187,26 @@ struct dm_target { uint32_t max_io_len; /* - * A number of zero-length barrier requests that will be submitted + * A number of zero-length barrier bios that will be submitted * to the target for the purpose of flushing cache. * - * The request number can be accessed with dm_bio_get_target_request_nr. - * It is a responsibility of the target driver to remap these requests + * The bio number can be accessed with dm_bio_get_target_bio_nr. + * It is a responsibility of the target driver to remap these bios * to the real underlying devices. */ - unsigned num_flush_requests; + unsigned num_flush_bios; /* - * The number of discard requests that will be submitted to the target. - * The request number can be accessed with dm_bio_get_target_request_nr. + * The number of discard bios that will be submitted to the target. + * The bio number can be accessed with dm_bio_get_target_bio_nr. */ - unsigned num_discard_requests; + unsigned num_discard_bios; /* - * The number of WRITE SAME requests that will be submitted to the target. - * The request number can be accessed with dm_bio_get_target_request_nr. + * The number of WRITE SAME bios that will be submitted to the target. + * The bio number can be accessed with dm_bio_get_target_bio_nr. */ - unsigned num_write_same_requests; + unsigned num_write_same_bios; /* * The minimum number of extra bytes allocated in each bio for the @@ -233,10 +233,10 @@ struct dm_target { bool discards_supported:1; /* - * Set if the target required discard request to be split + * Set if the target required discard bios to be split * on max_io_len boundary. */ - bool split_discard_requests:1; + bool split_discard_bios:1; /* * Set if this target does not return zeroes on discarded blocks. @@ -261,7 +261,7 @@ struct dm_target_io { struct dm_io *io; struct dm_target *ti; union map_info info; - unsigned target_request_nr; + unsigned target_bio_nr; struct bio clone; }; @@ -275,9 +275,9 @@ static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size) return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone)); } -static inline unsigned dm_bio_get_target_request_nr(const struct bio *bio) +static inline unsigned dm_bio_get_target_bio_nr(const struct bio *bio) { - return container_of(bio, struct dm_target_io, clone)->target_request_nr; + return container_of(bio, struct dm_target_io, clone)->target_bio_nr; } int dm_register_target(struct target_type *t); -- cgit v1.2.3 From df5d2e9089c7d5b8c46f767e4278610ea3e815b9 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 1 Mar 2013 22:45:49 +0000 Subject: dm kcopyd: introduce configurable throttling This patch allows the administrator to reduce the rate at which kcopyd issues I/O. Each module that uses kcopyd acquires a throttle parameter that can be set in /sys/module/*/parameters. We maintain a history of kcopyd usage by each module in the variables io_period and total_period in struct dm_kcopyd_throttle. The actual kcopyd activity is calculated as a percentage of time equal to "(100 * io_period / total_period)". This is compared with the user-defined throttle percentage threshold and if it is exceeded, we sleep. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/dm-kcopyd.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 47d9d376e4e7..f486d636b82e 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -21,11 +21,34 @@ #define DM_KCOPYD_IGNORE_ERROR 1 +struct dm_kcopyd_throttle { + unsigned throttle; + unsigned num_io_jobs; + unsigned io_period; + unsigned total_period; + unsigned last_jiffies; +}; + +/* + * kcopyd clients that want to support throttling must pass an initialised + * dm_kcopyd_throttle struct into dm_kcopyd_client_create(). + * Two or more clients may share the same instance of this struct between + * them if they wish to be throttled as a group. + * + * This macro also creates a corresponding module parameter to configure + * the amount of throttling. + */ +#define DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(name, description) \ +static struct dm_kcopyd_throttle dm_kcopyd_throttle = { 100, 0, 0, 0, 0 }; \ +module_param_named(name, dm_kcopyd_throttle.throttle, uint, 0644); \ +MODULE_PARM_DESC(name, description) + /* * To use kcopyd you must first create a dm_kcopyd_client object. + * throttle can be NULL if you don't want any throttling. */ struct dm_kcopyd_client; -struct dm_kcopyd_client *dm_kcopyd_client_create(void); +struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle); void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); /* -- cgit v1.2.3 From b0d8ed4d96a26ef3ac54a4aa8911c9413070662e Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 1 Mar 2013 22:45:49 +0000 Subject: dm: add target num_write_bios fn Add a num_write_bios function to struct target. If an instance of a target sets this, it will be queried before the target's mapping function is called on a write bio, and the response controls the number of copies of the write bio that the target will receive. This provides a convenient way for a target to send the same data to more than one device. The new cache target uses this in writethrough mode, to send the data both to the cache and the backing device. Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index d5f984b07466..1e483fa7afb4 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -175,6 +175,14 @@ struct target_type { #define DM_TARGET_IMMUTABLE 0x00000004 #define dm_target_is_immutable(type) ((type)->features & DM_TARGET_IMMUTABLE) +/* + * Some targets need to be sent the same WRITE bio severals times so + * that they can send copies of it to different devices. This function + * examines any supplied bio and returns the number of copies of it the + * target requires. + */ +typedef unsigned (*dm_num_write_bios_fn) (struct dm_target *ti, struct bio *bio); + struct dm_target { struct dm_table *table; struct target_type *type; @@ -214,6 +222,13 @@ struct dm_target { */ unsigned per_bio_data_size; + /* + * If defined, this function is called to find out how many + * duplicate bios should be sent to the target when writing + * data. + */ + dm_num_write_bios_fn num_write_bios; + /* target specific data */ void *private; -- cgit v1.2.3 From dd37978c50bc8b354e5c4633f69387f16572fdac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Mar 2013 19:48:30 -0500 Subject: cache the value of file_inode() in struct file Note that this thing does *not* contribute to inode refcount; it's pinned down by dentry. Signed-off-by: Al Viro --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4e686a099465..74a907b8b950 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -769,6 +769,7 @@ struct file { } f_u; struct path f_path; #define f_dentry f_path.dentry + struct inode *f_inode; /* cached value */ const struct file_operations *f_op; /* @@ -2217,7 +2218,7 @@ static inline bool execute_ok(struct inode *inode) static inline struct inode *file_inode(struct file *f) { - return f->f_path.dentry->d_inode; + return f->f_inode; } /* -- cgit v1.2.3 From dcf787f39162ce32ca325b3e784aba2d2444619a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Mar 2013 23:51:07 -0500 Subject: constify path_get/path_put and fs_struct.c stuff Signed-off-by: Al Viro --- include/linux/fs_struct.h | 4 ++-- include/linux/path.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index d0ae3a84bcfb..729eded4b24f 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -17,8 +17,8 @@ struct fs_struct { extern struct kmem_cache *fs_cachep; extern void exit_fs(struct task_struct *); -extern void set_fs_root(struct fs_struct *, struct path *); -extern void set_fs_pwd(struct fs_struct *, struct path *); +extern void set_fs_root(struct fs_struct *, const struct path *); +extern void set_fs_pwd(struct fs_struct *, const struct path *); extern struct fs_struct *copy_fs_struct(struct fs_struct *); extern void free_fs_struct(struct fs_struct *); extern int unshare_fs_struct(void); diff --git a/include/linux/path.h b/include/linux/path.h index edc98dec6266..d1372186f431 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -9,8 +9,8 @@ struct path { struct dentry *dentry; }; -extern void path_get(struct path *); -extern void path_put(struct path *); +extern void path_get(const struct path *); +extern void path_put(const struct path *); static inline int path_equal(const struct path *path1, const struct path *path2) { -- cgit v1.2.3 From 20e6926dcbafa1b361f1c29d967688be14b6ca4b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 1 Mar 2013 14:51:27 -0800 Subject: x86, ACPI, mm: Revert movablemem_map support Tim found: WARNING: at arch/x86/kernel/smpboot.c:324 topology_sane.isra.2+0x6f/0x80() Hardware name: S2600CP sched: CPU #1's llc-sibling CPU #0 is not on the same node! [node: 1 != 0]. Ignoring dependency. smpboot: Booting Node 1, Processors #1 Modules linked in: Pid: 0, comm: swapper/1 Not tainted 3.9.0-0-generic #1 Call Trace: set_cpu_sibling_map+0x279/0x449 start_secondary+0x11d/0x1e5 Don Morris reproduced on a HP z620 workstation, and bisected it to commit e8d195525809 ("acpi, memory-hotplug: parse SRAT before memblock is ready") It turns out movable_map has some problems, and it breaks several things 1. numa_init is called several times, NOT just for srat. so those nodes_clear(numa_nodes_parsed) memset(&numa_meminfo, 0, sizeof(numa_meminfo)) can not be just removed. Need to consider sequence is: numaq, srat, amd, dummy. and make fall back path working. 2. simply split acpi_numa_init to early_parse_srat. a. that early_parse_srat is NOT called for ia64, so you break ia64. b. for (i = 0; i < MAX_LOCAL_APIC; i++) set_apicid_to_node(i, NUMA_NO_NODE) still left in numa_init. So it will just clear result from early_parse_srat. it should be moved before that.... c. it breaks ACPI_TABLE_OVERIDE...as the acpi table scan is moved early before override from INITRD is settled. 3. that patch TITLE is total misleading, there is NO x86 in the title, but it changes critical x86 code. It caused x86 guys did not pay attention to find the problem early. Those patches really should be routed via tip/x86/mm. 4. after that commit, following range can not use movable ram: a. real_mode code.... well..funny, legacy Node0 [0,1M) could be hot-removed? b. initrd... it will be freed after booting, so it could be on movable... c. crashkernel for kdump...: looks like we can not put kdump kernel above 4G anymore. d. init_mem_mapping: can not put page table high anymore. e. initmem_init: vmemmap can not be high local node anymore. That is not good. If node is hotplugable, the mem related range like page table and vmemmap could be on the that node without problem and should be on that node. We have workaround patch that could fix some problems, but some can not be fixed. So just remove that offending commit and related ones including: f7210e6c4ac7 ("mm/memblock.c: use CONFIG_HAVE_MEMBLOCK_NODE_MAP to protect movablecore_map in memblock_overlaps_region().") 01a178a94e8e ("acpi, memory-hotplug: support getting hotplug info from SRAT") 27168d38fa20 ("acpi, memory-hotplug: extend movablemem_map ranges to the end of node") e8d195525809 ("acpi, memory-hotplug: parse SRAT before memblock is ready") fb06bc8e5f42 ("page_alloc: bootmem limit with movablecore_map") 42f47e27e761 ("page_alloc: make movablemem_map have higher priority") 6981ec31146c ("page_alloc: introduce zone_movable_limit[] to keep movable limit for nodes") 34b71f1e04fc ("page_alloc: add movable_memmap kernel parameter") 4d59a75125d5 ("x86: get pg_data_t's memory from other node") Later we should have patches that will make sure kernel put page table and vmemmap on local node ram instead of push them down to node0. Also need to find way to put other kernel used ram to local node ram. Reported-by: Tim Gardner Reported-by: Don Morris Bisected-by: Don Morris Tested-by: Don Morris Signed-off-by: Yinghai Lu Cc: Tony Luck Cc: Thomas Renninger Cc: Tejun Heo Cc: Tang Chen Cc: Yasuaki Ishimatsu Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 8 -------- include/linux/memblock.h | 2 -- include/linux/mm.h | 18 ------------------ 3 files changed, 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index f46cfd73a553..bcbdd7484e58 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -485,14 +485,6 @@ static inline bool acpi_driver_match_device(struct device *dev, #endif /* !CONFIG_ACPI */ -#ifdef CONFIG_ACPI_NUMA -void __init early_parse_srat(void); -#else -static inline void early_parse_srat(void) -{ -} -#endif - #ifdef CONFIG_ACPI void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state, u32 pm1a_ctrl, u32 pm1b_ctrl)); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 3e5ecb2d790e..f388203db7e8 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -42,7 +42,6 @@ struct memblock { extern struct memblock memblock; extern int memblock_debug; -extern struct movablemem_map movablemem_map; #define memblock_dbg(fmt, ...) \ if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) @@ -61,7 +60,6 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); void memblock_trim_memory(phys_addr_t align); #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP - void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, unsigned long *out_end_pfn, int *out_nid); diff --git a/include/linux/mm.h b/include/linux/mm.h index e7c3f9a0111a..1ede55f292c2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1333,24 +1333,6 @@ extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); extern void sparse_memory_present_with_active_regions(int nid); -#define MOVABLEMEM_MAP_MAX MAX_NUMNODES -struct movablemem_entry { - unsigned long start_pfn; /* start pfn of memory segment */ - unsigned long end_pfn; /* end pfn of memory segment (exclusive) */ -}; - -struct movablemem_map { - bool acpi; /* true if using SRAT info */ - int nr_map; - struct movablemem_entry map[MOVABLEMEM_MAP_MAX]; - nodemask_t numa_nodes_hotplug; /* on which nodes we specify memory */ - nodemask_t numa_nodes_kernel; /* on which nodes kernel resides in */ -}; - -extern void __init insert_movablemem_map(unsigned long start_pfn, - unsigned long end_pfn); -extern int __init movablemem_map_overlap(unsigned long start_pfn, - unsigned long end_pfn); #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ -- cgit v1.2.3 From 5698c50d9da4ab2f57d98c64ea97675dcaf2a608 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 9 Oct 2012 10:54:47 +0100 Subject: metag: Internal and external irqchips Meta core internal interrupts (from HWSTATMETA and friends) are vectored onto the TR1 core trigger for the current thread. This is demultiplexed in irq-metag.c to individual Linux IRQs for each internal interrupt. External SoC interrupts (from HWSTATEXT and friends) are vectored onto the TR2 core trigger for the current thread. This is demultiplexed in irq-metag-ext.c to individual Linux IRQs for each external SoC interrupt. The external irqchip has devicetree bindings for configuring the number of irq banks and the type of masking available. Signed-off-by: James Hogan Cc: Arnd Bergmann Cc: Grant Likely Cc: Rob Herring Cc: Rob Landley Cc: Dom Cobley Cc: Simon Arlott Cc: Viresh Kumar Cc: Maxime Ripard Cc: devicetree-discuss@lists.ozlabs.org Cc: linux-doc@vger.kernel.org --- include/linux/irqchip/metag-ext.h | 33 +++++++++++++++++++++++++++++++++ include/linux/irqchip/metag.h | 24 ++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 include/linux/irqchip/metag-ext.h create mode 100644 include/linux/irqchip/metag.h (limited to 'include/linux') diff --git a/include/linux/irqchip/metag-ext.h b/include/linux/irqchip/metag-ext.h new file mode 100644 index 000000000000..697af0fe7c5a --- /dev/null +++ b/include/linux/irqchip/metag-ext.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2012 Imagination Technologies + */ + +#ifndef _LINUX_IRQCHIP_METAG_EXT_H_ +#define _LINUX_IRQCHIP_METAG_EXT_H_ + +struct irq_data; +struct platform_device; + +/* called from core irq code at init */ +int init_external_IRQ(void); + +/* + * called from SoC init_irq() callback to dynamically indicate the lack of + * HWMASKEXT registers. + */ +void meta_intc_no_mask(void); + +/* + * These allow SoCs to specialise the interrupt controller from their init_irq + * callbacks. + */ + +extern struct irq_chip meta_intc_edge_chip; +extern struct irq_chip meta_intc_level_chip; + +/* this should be called in the mask callback */ +void meta_intc_mask_irq_simple(struct irq_data *data); +/* this should be called in the unmask callback */ +void meta_intc_unmask_irq_simple(struct irq_data *data); + +#endif /* _LINUX_IRQCHIP_METAG_EXT_H_ */ diff --git a/include/linux/irqchip/metag.h b/include/linux/irqchip/metag.h new file mode 100644 index 000000000000..4ebdfb3101ab --- /dev/null +++ b/include/linux/irqchip/metag.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2011 Imagination Technologies + */ + +#ifndef _LINUX_IRQCHIP_METAG_H_ +#define _LINUX_IRQCHIP_METAG_H_ + +#include + +#ifdef CONFIG_METAG_PERFCOUNTER_IRQS +extern int init_internal_IRQ(void); +extern int internal_irq_map(unsigned int hw); +#else +static inline int init_internal_IRQ(void) +{ + return 0; +} +static inline int internal_irq_map(unsigned int hw) +{ + return -EINVAL; +} +#endif + +#endif /* _LINUX_IRQCHIP_METAG_H_ */ -- cgit v1.2.3 From 9ca52ed979b6b45ae480a5fc56d593efb3bf16e8 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 16 Oct 2012 10:16:14 +0100 Subject: mm: define VM_GROWSUP for CONFIG_METAG Commit cc2383ec06be093789469852e1fe96e1148e9a2c ("mm: introduce arch-specific vma flag VM_ARCH_1") merged in v3.7-rc1. The above commit combined several arch-specific vma flags into one, and in the process it changed the VM_GROWSUP definition to depend on specific architectures rather than CONFIG_STACK_GROWSUP. Therefore add an ifdef for CONFIG_METAG to also set VM_GROWSUP. Signed-off-by: James Hogan Cc: Konstantin Khlebnikov Cc: Andrew Morton Cc: Mel Gorman Cc: Michel Lespinasse Cc: Al Viro Cc: linux-mm@kvack.org --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 66e2f7c61e5c..44ac3dc363e7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -114,6 +114,8 @@ extern unsigned int kobjsize(const void *objp); # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 +#elif defined(CONFIG_METAG) +# define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_IA64) # define VM_GROWSUP VM_ARCH_1 #elif !defined(CONFIG_MMU) -- cgit v1.2.3