From 099f53cb50e45ef617a9f1d63ceec799e489418b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 8 Apr 2009 14:28:37 -0700 Subject: async_tx: rename zero_sum to val 'zero_sum' does not properly describe the operation of generating parity and checking that it validates against an existing buffer. Change the name of the operation to 'val' (for 'validate'). This is in anticipation of the p+q case where it is a requirement to identify the target parity buffers separately from the source buffers, because the target parity buffers will not have corresponding pq coefficients. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/async_tx.h | 2 +- include/linux/dmaengine.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 5fc2ef8d97fa..513150d8c25b 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -117,7 +117,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, dma_async_tx_callback cb_fn, void *cb_fn_param); struct dma_async_tx_descriptor * -async_xor_zero_sum(struct page *dest, struct page **src_list, +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, u32 *result, enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 2e2aa3df170c..6768727d00d7 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -55,8 +55,8 @@ enum dma_transaction_type { DMA_PQ_XOR, DMA_DUAL_XOR, DMA_PQ_UPDATE, - DMA_ZERO_SUM, - DMA_PQ_ZERO_SUM, + DMA_XOR_VAL, + DMA_PQ_VAL, DMA_MEMSET, DMA_MEMCPY_CRC32C, DMA_INTERRUPT, @@ -214,7 +214,7 @@ struct dma_async_tx_descriptor { * @device_free_chan_resources: release DMA channel's resources * @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_xor: prepares a xor operation - * @device_prep_dma_zero_sum: prepares a zero_sum operation + * @device_prep_dma_xor_val: prepares a xor validation operation * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation @@ -243,7 +243,7 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_xor)( struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags); - struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( + struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, size_t len, u32 *result, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( -- cgit v1.2.3 From 88ba2aa586c874681c072101287e15d40de7e6e2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 9 Apr 2009 16:16:18 -0700 Subject: async_tx: kill ASYNC_TX_DEP_ACK flag In support of inter-channel chaining async_tx utilizes an ack flag to gate whether a dependent operation can be chained to another. While the flag is not set the chain can be considered open for appending. Setting the ack flag closes the chain and flags the descriptor for garbage collection. The ASYNC_TX_DEP_ACK flag essentially means "close the chain after adding this dependency". Since each operation can only have one child the api now implicitly sets the ack flag at dependency submission time. This removes an unnecessary management burden from clients of the api. [ Impact: clean up and enforce one dependency per operation ] Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/async_tx.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 513150d8c25b..9f14cd540cd2 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -58,13 +58,11 @@ struct dma_chan_ref { * array. * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a * dependency chain - * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. */ enum async_tx_flags { ASYNC_TX_XOR_ZERO_DST = (1 << 0), ASYNC_TX_XOR_DROP_DST = (1 << 1), - ASYNC_TX_ACK = (1 << 3), - ASYNC_TX_DEP_ACK = (1 << 4), + ASYNC_TX_ACK = (1 << 2), }; #ifdef CONFIG_DMA_ENGINE -- cgit v1.2.3 From a08abd8ca890a377521d65d493d174bebcaf694b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 3 Jun 2009 11:43:59 -0700 Subject: async_tx: structify submission arguments, add scribble Prepare the api for the arrival of a new parameter, 'scribble'. This will allow callers to identify scratchpad memory for dma address or page address conversions. As this adds yet another parameter, take this opportunity to convert the common submission parameters (flags, dependency, callback, and callback argument) into an object that is passed by reference. Also, take this opportunity to fix up the kerneldoc and add notes about the relevant ASYNC_TX_* flags for each routine. [ Impact: moves api pass-by-value parameters to a pass-by-reference struct ] Signed-off-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/async_tx.h | 84 +++++++++++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 9f14cd540cd2..00cfb637ddf2 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -65,6 +65,22 @@ enum async_tx_flags { ASYNC_TX_ACK = (1 << 2), }; +/** + * struct async_submit_ctl - async_tx submission/completion modifiers + * @flags: submission modifiers + * @depend_tx: parent dependency of the current operation being submitted + * @cb_fn: callback routine to run at operation completion + * @cb_param: parameter for the callback routine + * @scribble: caller provided space for dma/page address conversions + */ +struct async_submit_ctl { + enum async_tx_flags flags; + struct dma_async_tx_descriptor *depend_tx; + dma_async_tx_callback cb_fn; + void *cb_param; + void *scribble; +}; + #ifdef CONFIG_DMA_ENGINE #define async_tx_issue_pending_all dma_issue_pending_all #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL @@ -73,8 +89,8 @@ enum async_tx_flags { #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ __async_tx_find_channel(dep, type) struct dma_chan * -__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type); +__async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type); #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ #else static inline void async_tx_issue_pending_all(void) @@ -83,9 +99,10 @@ static inline void async_tx_issue_pending_all(void) } static inline struct dma_chan * -async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type, struct page **dst, int dst_count, - struct page **src, int src_count, size_t len) +async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type, struct page **dst, + int dst_count, struct page **src, int src_count, + size_t len) { return NULL; } @@ -97,46 +114,53 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, * @cb_fn_param: parameter to pass to the callback routine */ static inline void -async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) +async_tx_sync_epilog(struct async_submit_ctl *submit) +{ + if (submit->cb_fn) + submit->cb_fn(submit->cb_param); +} + +typedef union { + unsigned long addr; + struct page *page; + dma_addr_t dma; +} addr_conv_t; + +static inline void +init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags, + struct dma_async_tx_descriptor *tx, + dma_async_tx_callback cb_fn, void *cb_param, + addr_conv_t *scribble) { - if (cb_fn) - cb_fn(cb_fn_param); + args->flags = flags; + args->depend_tx = tx; + args->cb_fn = cb_fn; + args->cb_param = cb_param; + args->scribble = scribble; } -void -async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, - enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + int src_cnt, size_t len, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_xor_val(struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - u32 *result, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, u32 *result, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, - unsigned int src_offset, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + unsigned int src_offset, size_t len, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memset(struct page *dest, int val, unsigned int offset, - size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + size_t len, struct async_submit_ctl *submit); -struct dma_async_tx_descriptor * -async_trigger_callback(enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ -- cgit v1.2.3 From b294a290d24d1196d68399cc3a9b8c50bfb55abd Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Tue, 30 Jun 2009 02:13:01 -0400 Subject: Revert "power: remove POWER_SUPPLY_PROP_CAPACITY_LEVEL" This reverts commit 8efe444038a205e79b38b7ad03878824901849a8 and 4cbc76eadf56399cd11fb736b33c53aec9caab8c. Richard@laptop.org was apparently using CAPACITY_LEVEL for debugging battery/EC problems, and was upset that it was removed. This readds it. Conflicts: Documentation/power_supply_class.txt Signed-off-by: Andres Salomon Signed-off-by: Anton Vorontsov --- include/linux/power_supply.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 594c494ac3f0..0ab6aa171241 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -58,6 +58,15 @@ enum { POWER_SUPPLY_TECHNOLOGY_LiMn, }; +enum { + POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN = 0, + POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL, + POWER_SUPPLY_CAPACITY_LEVEL_LOW, + POWER_SUPPLY_CAPACITY_LEVEL_NORMAL, + POWER_SUPPLY_CAPACITY_LEVEL_HIGH, + POWER_SUPPLY_CAPACITY_LEVEL_FULL, +}; + enum power_supply_property { /* Properties of type `int' */ POWER_SUPPLY_PROP_STATUS = 0, @@ -89,6 +98,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_ENERGY_NOW, POWER_SUPPLY_PROP_ENERGY_AVG, POWER_SUPPLY_PROP_CAPACITY, /* in percents! */ + POWER_SUPPLY_PROP_CAPACITY_LEVEL, POWER_SUPPLY_PROP_TEMP, POWER_SUPPLY_PROP_TEMP_AMBIENT, POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW, -- cgit v1.2.3 From ee8076ed3e1cdd0cd1e61318386932669c90b92f Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Thu, 2 Jul 2009 09:45:18 -0400 Subject: power_supply: Add a charge_type property, and use it for olpc driver This adds a new sysfs file called 'charge_type' which displays the type of charging (unknown, n/a, trickle charge, or fast charging). This allows things like battery diagnostics to determine what the battery/EC is doing without resorting to changing the 'status' sysfs output. Signed-off-by: Andres Salomon Acked-by: Mark Brown Signed-off-by: Anton Vorontsov --- include/linux/power_supply.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 0ab6aa171241..4c7c6fc35487 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -38,6 +38,13 @@ enum { POWER_SUPPLY_STATUS_FULL, }; +enum { + POWER_SUPPLY_CHARGE_TYPE_UNKNOWN = 0, + POWER_SUPPLY_CHARGE_TYPE_NONE, + POWER_SUPPLY_CHARGE_TYPE_TRICKLE, + POWER_SUPPLY_CHARGE_TYPE_FAST, +}; + enum { POWER_SUPPLY_HEALTH_UNKNOWN = 0, POWER_SUPPLY_HEALTH_GOOD, @@ -70,6 +77,7 @@ enum { enum power_supply_property { /* Properties of type `int' */ POWER_SUPPLY_PROP_STATUS = 0, + POWER_SUPPLY_PROP_CHARGE_TYPE, POWER_SUPPLY_PROP_HEALTH, POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_ONLINE, -- cgit v1.2.3 From e5f5ccb646bc6009572b5c23201b5e81638ff150 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 23 Jul 2009 20:35:53 +0200 Subject: power_supply: get_by_name and set_charged functionality This adds a function that indicates that a battery is fully charged. It also includes functions to get a power_supply device from the class of registered devices by name reference. These can be used to find a specific battery to call power_supply_set_battery_charged() on. Some battery drivers might need this information to calibrate themselves. Signed-off-by: Daniel Mack Cc: Ian Molton Cc: Anton Vorontsov Cc: Matt Reimer Signed-off-by: Anton Vorontsov --- include/linux/power_supply.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 4c7c6fc35487..b5d096d3a9be 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -144,6 +144,7 @@ struct power_supply { enum power_supply_property psp, union power_supply_propval *val); void (*external_power_changed)(struct power_supply *psy); + void (*set_charged)(struct power_supply *psy); /* For APM emulation, think legacy userspace. */ int use_for_apm; @@ -183,8 +184,10 @@ struct power_supply_info { int use_for_apm; }; +extern struct power_supply *power_supply_get_by_name(char *name); extern void power_supply_changed(struct power_supply *psy); extern int power_supply_am_i_supplied(struct power_supply *psy); +extern int power_supply_set_battery_charged(struct power_supply *psy); #if defined(CONFIG_POWER_SUPPLY) || defined(CONFIG_POWER_SUPPLY_MODULE) extern int power_supply_is_system_supplied(void); -- cgit v1.2.3 From ad283ea4a3ce82cda2efe33163748a397b31b1eb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 29 Aug 2009 19:09:26 -0700 Subject: async_tx: add sum check flags Replace the flat zero_sum_result with a collection of flags to contain the P (xor) zero-sum result, and the soon to be utilized Q (raid6 reed solomon syndrome) zero-sum result. Use the SUM_CHECK_ namespace instead of DMA_ since these flags will be used on non-dma-zero-sum enabled platforms. Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/async_tx.h | 2 +- include/linux/dmaengine.h | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 00cfb637ddf2..3d21a2517518 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -148,7 +148,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, struct dma_async_tx_descriptor * async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, u32 *result, + int src_cnt, size_t len, enum sum_check_flags *result, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 6768727d00d7..02447afcebad 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -86,6 +86,25 @@ enum dma_ctrl_flags { DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), }; +/** + * enum sum_check_bits - bit position of pq_check_flags + */ +enum sum_check_bits { + SUM_CHECK_P = 0, + SUM_CHECK_Q = 1, +}; + +/** + * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations + * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise + * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise + */ +enum sum_check_flags { + SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P), + SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q), +}; + + /** * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. * See linux/cpumask.h @@ -245,7 +264,7 @@ struct dma_device { unsigned int src_cnt, size_t len, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, - size_t len, u32 *result, unsigned long flags); + size_t len, enum sum_check_flags *result, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); -- cgit v1.2.3 From 95475e57113c66aac7583925736ed2e2d58c990d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:19:02 -0700 Subject: async_tx: remove walk of tx->parent chain in dma_wait_for_async_tx We currently walk the parent chain when waiting for a given tx to complete however this walk may race with the driver cleanup routine. The routines in async_raid6_recov.c may fall back to the synchronous path at any point so we need to be prepared to call async_tx_quiesce() (which calls dma_wait_for_async_tx). To remove the ->parent walk we guarantee that every time a dependency is attached ->issue_pending() is invoked, then we can simply poll the initial descriptor until completion. This also allows for a lighter weight 'issue pending' implementation as there is no longer a requirement to iterate through all the channels' ->issue_pending() routines as long as operations have been submitted in an ordered chain. async_tx_issue_pending() is added for this case. Signed-off-by: Dan Williams --- include/linux/async_tx.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 3d21a2517518..12a2efcbd565 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -83,6 +83,24 @@ struct async_submit_ctl { #ifdef CONFIG_DMA_ENGINE #define async_tx_issue_pending_all dma_issue_pending_all + +/** + * async_tx_issue_pending - send pending descriptor to the hardware channel + * @tx: descriptor handle to retrieve hardware context + * + * Note: any dependent operations will have already been issued by + * async_tx_channel_switch, or (in the case of no channel switch) will + * be already pending on this channel. + */ +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + if (likely(tx)) { + struct dma_chan *chan = tx->chan; + struct dma_device *dma = chan->device; + + dma->device_issue_pending(chan); + } +} #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #include #else @@ -98,6 +116,11 @@ static inline void async_tx_issue_pending_all(void) do { } while (0); } +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + do { } while (0); +} + static inline struct dma_chan * async_tx_find_channel(struct async_submit_ctl *submit, enum dma_transaction_type tx_type, struct page **dst, -- cgit v1.2.3 From b2f46fd8ef3dff2ab30f31126833f78b7480283a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:20:36 -0700 Subject: async_tx: add support for asynchronous GF multiplication [ Based on an original patch by Yuri Tikhonov ] This adds support for doing asynchronous GF multiplication by adding two additional functions to the async_tx API: async_gen_syndrome() does simultaneous XOR and Galois field multiplication of sources. async_syndrome_val() validates the given source buffers against known P and Q values. When a request is made to run async_pq against more than the hardware maximum number of supported sources we need to reuse the previous generated P and Q values as sources into the next operation. Care must be taken to remove Q from P' and P from Q'. For example to perform a 5 source pq op with hardware that only supports 4 sources at a time the following approach is taken: p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})) p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10})) p' = p + q + q + src4 = p + src4 q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10}*src4 Note: 4 is the minimum acceptable maxpq otherwise we punt to synchronous-software path. The DMA_PREP_CONTINUE flag indicates to the driver to reuse p and q as sources (in the above manner) and fill the remaining slots up to maxpq with the new sources/coefficients. Note1: Some devices have native support for P+Q continuation and can skip this extra work. Devices with this capability can advertise it with dma_set_maxpq. It is up to each driver how to handle the DMA_PREP_CONTINUE flag. Note2: The api supports disabling the generation of P when generating Q, this is ignored by the synchronous path but is implemented by some dma devices to save unnecessary writes. In this case the continuation algorithm is simplified to only reuse Q as a source. Cc: H. Peter Anvin Cc: David Woodhouse Signed-off-by: Yuri Tikhonov Signed-off-by: Ilya Yanok Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/async_tx.h | 9 +++++ include/linux/dmaengine.h | 87 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 90 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 12a2efcbd565..e6ce5f004f98 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -185,5 +185,14 @@ async_memset(struct page *dest, int val, unsigned int offset, struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); +struct dma_async_tx_descriptor * +async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, enum sum_check_flags *pqres, struct page *spare, + struct async_submit_ctl *submit); + void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 02447afcebad..ce010cd991d2 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -52,7 +52,7 @@ enum dma_status { enum dma_transaction_type { DMA_MEMCPY, DMA_XOR, - DMA_PQ_XOR, + DMA_PQ, DMA_DUAL_XOR, DMA_PQ_UPDATE, DMA_XOR_VAL, @@ -70,20 +70,28 @@ enum dma_transaction_type { /** * enum dma_ctrl_flags - DMA flags to augment operation preparation, - * control completion, and communicate status. + * control completion, and communicate status. * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of - * this transaction + * this transaction * @DMA_CTRL_ACK - the descriptor cannot be reused until the client - * acknowledges receipt, i.e. has has a chance to establish any - * dependency chains + * acknowledges receipt, i.e. has has a chance to establish any dependency + * chains * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) + * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q + * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P + * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as + * sources that were the result of a previous operation, in the case of a PQ + * operation it continues the calculation with new sources */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), DMA_CTRL_ACK = (1 << 1), DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2), DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), + DMA_PREP_PQ_DISABLE_P = (1 << 4), + DMA_PREP_PQ_DISABLE_Q = (1 << 5), + DMA_PREP_CONTINUE = (1 << 6), }; /** @@ -226,6 +234,7 @@ struct dma_async_tx_descriptor { * @global_node: list_head for global dma_device_list * @cap_mask: one or more dma_capability flags * @max_xor: maximum number of xor sources, 0 if no capability + * @max_pq: maximum number of PQ sources and PQ-continue capability * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the @@ -234,6 +243,8 @@ struct dma_async_tx_descriptor { * @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_xor: prepares a xor operation * @device_prep_dma_xor_val: prepares a xor validation operation + * @device_prep_dma_pq: prepares a pq operation + * @device_prep_dma_pq_val: prepares a pqzero_sum operation * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation @@ -248,7 +259,9 @@ struct dma_device { struct list_head channels; struct list_head global_node; dma_cap_mask_t cap_mask; - int max_xor; + unsigned short max_xor; + unsigned short max_pq; + #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; struct device *dev; @@ -265,6 +278,14 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, size_t len, enum sum_check_flags *result, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq)( + struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)( + struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); @@ -283,6 +304,60 @@ struct dma_device { void (*device_issue_pending)(struct dma_chan *chan); }; +static inline void +dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) +{ + dma->max_pq = maxpq; + if (has_pq_continue) + dma->max_pq |= DMA_HAS_PQ_CONTINUE; +} + +static inline bool dmaf_continue(enum dma_ctrl_flags flags) +{ + return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE; +} + +static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags) +{ + enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P; + + return (flags & mask) == mask; +} + +static inline bool dma_dev_has_pq_continue(struct dma_device *dma) +{ + return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; +} + +static unsigned short dma_dev_to_maxpq(struct dma_device *dma) +{ + return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; +} + +/* dma_maxpq - reduce maxpq in the face of continued operations + * @dma - dma device with PQ capability + * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set + * + * When an engine does not support native continuation we need 3 extra + * source slots to reuse P and Q with the following coefficients: + * 1/ {00} * P : remove P from Q', but use it as a source for P' + * 2/ {01} * Q : use Q to continue Q' calculation + * 3/ {00} * Q : subtract Q from P' to cancel (2) + * + * In the case where P is disabled we only need 1 extra source: + * 1/ {01} * Q : use Q to continue Q' calculation + */ +static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags) +{ + if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags)) + return dma_dev_to_maxpq(dma); + else if (dmaf_p_disabled_continue(flags)) + return dma_dev_to_maxpq(dma) - 1; + else if (dmaf_continue(flags)) + return dma_dev_to_maxpq(dma) - 3; + BUG(); +} + /* --- public DMA engine API --- */ #ifdef CONFIG_DMA_ENGINE -- cgit v1.2.3 From 0a82a6239beecc95db6e05fe43ee62d16b381d38 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 14 Jul 2009 12:20:37 -0700 Subject: async_tx: add support for asynchronous RAID6 recovery operations async_raid6_2data_recov() recovers two data disk failures async_raid6_datap_recov() recovers a data disk and the P disk These routines are a port of the synchronous versions found in drivers/md/raid6recov.c. The primary difference is breaking out the xor operations into separate calls to async_xor. Two helper routines are introduced to perform scalar multiplication where needed. async_sum_product() multiplies two sources by scalar coefficients and then sums (xor) the result. async_mult() simply multiplies a single source by a scalar. This implemention also includes, in contrast to the original synchronous-only code, special case handling for the 4-disk and 5-disk array cases. In these situations the default N-disk algorithm will present 0-source or 1-source operations to dma devices. To cover for dma devices where the minimum source count is 2 we implement 4-disk and 5-disk handling in the recovery code. [ Impact: asynchronous raid6 recovery routines for 2data and datap cases ] Cc: Yuri Tikhonov Cc: Ilya Yanok Cc: H. Peter Anvin Cc: David Woodhouse Reviewed-by: Andre Noll Acked-by: Maciej Sosnowski Signed-off-by: Dan Williams --- include/linux/async_tx.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index e6ce5f004f98..866e61c4e2e0 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -194,5 +194,13 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, size_t len, enum sum_check_flags *pqres, struct page *spare, struct async_submit_ctl *submit); +struct dma_async_tx_descriptor * +async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb, + struct page **ptrs, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_raid6_datap_recov(int src_num, size_t bytes, int faila, + struct page **ptrs, struct async_submit_ctl *submit); + void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ -- cgit v1.2.3 From 652696efce135559b98ee5a3d7899295e8d553fa Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Wed, 24 Jun 2009 12:03:51 +0900 Subject: mtd: OneNAND: 4-bit ECC status macros Define ECC status for 4-bit ECC status Signed-off-by: Kyungmin Park Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/onenand_regs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h index 86a6bbef6465..acadbf53a69f 100644 --- a/include/linux/mtd/onenand_regs.h +++ b/include/linux/mtd/onenand_regs.h @@ -207,6 +207,9 @@ #define ONENAND_ECC_2BIT (1 << 1) #define ONENAND_ECC_2BIT_ALL (0xAAAA) #define FLEXONENAND_UNCORRECTABLE_ERROR (0x1010) +#define ONENAND_ECC_3BIT (1 << 2) +#define ONENAND_ECC_4BIT (1 << 3) +#define ONENAND_ECC_4BIT_UNCORRECTABLE (0x1010) /* * One-Time Programmable (OTP) -- cgit v1.2.3 From b8bdc1d0cfc488ac0d94724639f9a61b0a5a1d40 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 31 Aug 2009 06:20:12 +0200 Subject: wm97xx_battery: Use platform_data This patch converts the wm97xx-battery driver to use platform_data supplied by ac97 bus. Signed-off-by: Marek Vasut Signed-off-by: Anton Vorontsov --- include/linux/wm97xx.h | 18 ++++++++++++++++++ include/linux/wm97xx_batt.h | 18 ++++-------------- 2 files changed, 22 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wm97xx.h b/include/linux/wm97xx.h index 6f69968eab24..b2c2297844e3 100644 --- a/include/linux/wm97xx.h +++ b/include/linux/wm97xx.h @@ -286,6 +286,24 @@ struct wm97xx { u16 suspend_mode; /* PRP in suspend mode */ }; +struct wm97xx_batt_pdata { + int batt_aux; + int temp_aux; + int charge_gpio; + int min_voltage; + int max_voltage; + int batt_div; + int batt_mult; + int temp_div; + int temp_mult; + int batt_tech; + char *batt_name; +}; + +struct wm97xx_pdata { + struct wm97xx_batt_pdata *batt_pdata; /* battery data */ +}; + /* * Codec GPIO access (not supported on WM9705) * This can be used to set/get codec GPIO and Virtual GPIO status. diff --git a/include/linux/wm97xx_batt.h b/include/linux/wm97xx_batt.h index 9681d1ab0e4f..a1d6419c2ff8 100644 --- a/include/linux/wm97xx_batt.h +++ b/include/linux/wm97xx_batt.h @@ -3,22 +3,12 @@ #include -struct wm97xx_batt_info { - int batt_aux; - int temp_aux; - int charge_gpio; - int min_voltage; - int max_voltage; - int batt_div; - int batt_mult; - int temp_div; - int temp_mult; - int batt_tech; - char *batt_name; -}; +#warning This file will be removed soon, use wm97xx.h instead! + +#define wm97xx_batt_info wm97xx_batt_pdata #ifdef CONFIG_BATTERY_WM97XX -void __init wm97xx_bat_set_pdata(struct wm97xx_batt_info *data); +void wm97xx_bat_set_pdata(struct wm97xx_batt_info *data); #else static inline void wm97xx_bat_set_pdata(struct wm97xx_batt_info *data) {} #endif -- cgit v1.2.3 From 3961f7c3cf247eee5df7fabadc7a40f2deeb98f3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 10 Aug 2009 17:43:53 +0100 Subject: power_supply: Add driver for the PMU on WM831x PMICs The WM831x PMICs provide power path management from three sources: a wall supply, USB and a battery with integrated charger. They also provide an additional backup supply with integrated for maintaining always on functionality such as the RTC and monitoring of power switches. After some initial configuration at startup the device operates autonomously, the driver simply provides reporting of the current state. Signed-off-by: Mark Brown Signed-off-by: Anton Vorontsov --- include/linux/mfd/wm831x/pmu.h | 189 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 include/linux/mfd/wm831x/pmu.h (limited to 'include/linux') diff --git a/include/linux/mfd/wm831x/pmu.h b/include/linux/mfd/wm831x/pmu.h new file mode 100644 index 000000000000..b18cbb027bc3 --- /dev/null +++ b/include/linux/mfd/wm831x/pmu.h @@ -0,0 +1,189 @@ +/* + * include/linux/mfd/wm831x/pmu.h -- PMU for WM831x + * + * Copyright 2009 Wolfson Microelectronics PLC. + * + * Author: Mark Brown + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __MFD_WM831X_PMU_H__ +#define __MFD_WM831X_PMU_H__ + +/* + * R16387 (0x4003) - Power State + */ +#define WM831X_CHIP_ON 0x8000 /* CHIP_ON */ +#define WM831X_CHIP_ON_MASK 0x8000 /* CHIP_ON */ +#define WM831X_CHIP_ON_SHIFT 15 /* CHIP_ON */ +#define WM831X_CHIP_ON_WIDTH 1 /* CHIP_ON */ +#define WM831X_CHIP_SLP 0x4000 /* CHIP_SLP */ +#define WM831X_CHIP_SLP_MASK 0x4000 /* CHIP_SLP */ +#define WM831X_CHIP_SLP_SHIFT 14 /* CHIP_SLP */ +#define WM831X_CHIP_SLP_WIDTH 1 /* CHIP_SLP */ +#define WM831X_REF_LP 0x1000 /* REF_LP */ +#define WM831X_REF_LP_MASK 0x1000 /* REF_LP */ +#define WM831X_REF_LP_SHIFT 12 /* REF_LP */ +#define WM831X_REF_LP_WIDTH 1 /* REF_LP */ +#define WM831X_PWRSTATE_DLY_MASK 0x0C00 /* PWRSTATE_DLY - [11:10] */ +#define WM831X_PWRSTATE_DLY_SHIFT 10 /* PWRSTATE_DLY - [11:10] */ +#define WM831X_PWRSTATE_DLY_WIDTH 2 /* PWRSTATE_DLY - [11:10] */ +#define WM831X_SWRST_DLY 0x0200 /* SWRST_DLY */ +#define WM831X_SWRST_DLY_MASK 0x0200 /* SWRST_DLY */ +#define WM831X_SWRST_DLY_SHIFT 9 /* SWRST_DLY */ +#define WM831X_SWRST_DLY_WIDTH 1 /* SWRST_DLY */ +#define WM831X_USB100MA_STARTUP_MASK 0x0030 /* USB100MA_STARTUP - [5:4] */ +#define WM831X_USB100MA_STARTUP_SHIFT 4 /* USB100MA_STARTUP - [5:4] */ +#define WM831X_USB100MA_STARTUP_WIDTH 2 /* USB100MA_STARTUP - [5:4] */ +#define WM831X_USB_CURR_STS 0x0008 /* USB_CURR_STS */ +#define WM831X_USB_CURR_STS_MASK 0x0008 /* USB_CURR_STS */ +#define WM831X_USB_CURR_STS_SHIFT 3 /* USB_CURR_STS */ +#define WM831X_USB_CURR_STS_WIDTH 1 /* USB_CURR_STS */ +#define WM831X_USB_ILIM_MASK 0x0007 /* USB_ILIM - [2:0] */ +#define WM831X_USB_ILIM_SHIFT 0 /* USB_ILIM - [2:0] */ +#define WM831X_USB_ILIM_WIDTH 3 /* USB_ILIM - [2:0] */ + +/* + * R16397 (0x400D) - System Status + */ +#define WM831X_THW_STS 0x8000 /* THW_STS */ +#define WM831X_THW_STS_MASK 0x8000 /* THW_STS */ +#define WM831X_THW_STS_SHIFT 15 /* THW_STS */ +#define WM831X_THW_STS_WIDTH 1 /* THW_STS */ +#define WM831X_PWR_SRC_BATT 0x0400 /* PWR_SRC_BATT */ +#define WM831X_PWR_SRC_BATT_MASK 0x0400 /* PWR_SRC_BATT */ +#define WM831X_PWR_SRC_BATT_SHIFT 10 /* PWR_SRC_BATT */ +#define WM831X_PWR_SRC_BATT_WIDTH 1 /* PWR_SRC_BATT */ +#define WM831X_PWR_WALL 0x0200 /* PWR_WALL */ +#define WM831X_PWR_WALL_MASK 0x0200 /* PWR_WALL */ +#define WM831X_PWR_WALL_SHIFT 9 /* PWR_WALL */ +#define WM831X_PWR_WALL_WIDTH 1 /* PWR_WALL */ +#define WM831X_PWR_USB 0x0100 /* PWR_USB */ +#define WM831X_PWR_USB_MASK 0x0100 /* PWR_USB */ +#define WM831X_PWR_USB_SHIFT 8 /* PWR_USB */ +#define WM831X_PWR_USB_WIDTH 1 /* PWR_USB */ +#define WM831X_MAIN_STATE_MASK 0x001F /* MAIN_STATE - [4:0] */ +#define WM831X_MAIN_STATE_SHIFT 0 /* MAIN_STATE - [4:0] */ +#define WM831X_MAIN_STATE_WIDTH 5 /* MAIN_STATE - [4:0] */ + +/* + * R16456 (0x4048) - Charger Control 1 + */ +#define WM831X_CHG_ENA 0x8000 /* CHG_ENA */ +#define WM831X_CHG_ENA_MASK 0x8000 /* CHG_ENA */ +#define WM831X_CHG_ENA_SHIFT 15 /* CHG_ENA */ +#define WM831X_CHG_ENA_WIDTH 1 /* CHG_ENA */ +#define WM831X_CHG_FRC 0x4000 /* CHG_FRC */ +#define WM831X_CHG_FRC_MASK 0x4000 /* CHG_FRC */ +#define WM831X_CHG_FRC_SHIFT 14 /* CHG_FRC */ +#define WM831X_CHG_FRC_WIDTH 1 /* CHG_FRC */ +#define WM831X_CHG_ITERM_MASK 0x1C00 /* CHG_ITERM - [12:10] */ +#define WM831X_CHG_ITERM_SHIFT 10 /* CHG_ITERM - [12:10] */ +#define WM831X_CHG_ITERM_WIDTH 3 /* CHG_ITERM - [12:10] */ +#define WM831X_CHG_FAST 0x0020 /* CHG_FAST */ +#define WM831X_CHG_FAST_MASK 0x0020 /* CHG_FAST */ +#define WM831X_CHG_FAST_SHIFT 5 /* CHG_FAST */ +#define WM831X_CHG_FAST_WIDTH 1 /* CHG_FAST */ +#define WM831X_CHG_IMON_ENA 0x0002 /* CHG_IMON_ENA */ +#define WM831X_CHG_IMON_ENA_MASK 0x0002 /* CHG_IMON_ENA */ +#define WM831X_CHG_IMON_ENA_SHIFT 1 /* CHG_IMON_ENA */ +#define WM831X_CHG_IMON_ENA_WIDTH 1 /* CHG_IMON_ENA */ +#define WM831X_CHG_CHIP_TEMP_MON 0x0001 /* CHG_CHIP_TEMP_MON */ +#define WM831X_CHG_CHIP_TEMP_MON_MASK 0x0001 /* CHG_CHIP_TEMP_MON */ +#define WM831X_CHG_CHIP_TEMP_MON_SHIFT 0 /* CHG_CHIP_TEMP_MON */ +#define WM831X_CHG_CHIP_TEMP_MON_WIDTH 1 /* CHG_CHIP_TEMP_MON */ + +/* + * R16457 (0x4049) - Charger Control 2 + */ +#define WM831X_CHG_OFF_MSK 0x4000 /* CHG_OFF_MSK */ +#define WM831X_CHG_OFF_MSK_MASK 0x4000 /* CHG_OFF_MSK */ +#define WM831X_CHG_OFF_MSK_SHIFT 14 /* CHG_OFF_MSK */ +#define WM831X_CHG_OFF_MSK_WIDTH 1 /* CHG_OFF_MSK */ +#define WM831X_CHG_TIME_MASK 0x0F00 /* CHG_TIME - [11:8] */ +#define WM831X_CHG_TIME_SHIFT 8 /* CHG_TIME - [11:8] */ +#define WM831X_CHG_TIME_WIDTH 4 /* CHG_TIME - [11:8] */ +#define WM831X_CHG_TRKL_ILIM_MASK 0x00C0 /* CHG_TRKL_ILIM - [7:6] */ +#define WM831X_CHG_TRKL_ILIM_SHIFT 6 /* CHG_TRKL_ILIM - [7:6] */ +#define WM831X_CHG_TRKL_ILIM_WIDTH 2 /* CHG_TRKL_ILIM - [7:6] */ +#define WM831X_CHG_VSEL_MASK 0x0030 /* CHG_VSEL - [5:4] */ +#define WM831X_CHG_VSEL_SHIFT 4 /* CHG_VSEL - [5:4] */ +#define WM831X_CHG_VSEL_WIDTH 2 /* CHG_VSEL - [5:4] */ +#define WM831X_CHG_FAST_ILIM_MASK 0x000F /* CHG_FAST_ILIM - [3:0] */ +#define WM831X_CHG_FAST_ILIM_SHIFT 0 /* CHG_FAST_ILIM - [3:0] */ +#define WM831X_CHG_FAST_ILIM_WIDTH 4 /* CHG_FAST_ILIM - [3:0] */ + +/* + * R16458 (0x404A) - Charger Status + */ +#define WM831X_BATT_OV_STS 0x8000 /* BATT_OV_STS */ +#define WM831X_BATT_OV_STS_MASK 0x8000 /* BATT_OV_STS */ +#define WM831X_BATT_OV_STS_SHIFT 15 /* BATT_OV_STS */ +#define WM831X_BATT_OV_STS_WIDTH 1 /* BATT_OV_STS */ +#define WM831X_CHG_STATE_MASK 0x7000 /* CHG_STATE - [14:12] */ +#define WM831X_CHG_STATE_SHIFT 12 /* CHG_STATE - [14:12] */ +#define WM831X_CHG_STATE_WIDTH 3 /* CHG_STATE - [14:12] */ +#define WM831X_BATT_HOT_STS 0x0800 /* BATT_HOT_STS */ +#define WM831X_BATT_HOT_STS_MASK 0x0800 /* BATT_HOT_STS */ +#define WM831X_BATT_HOT_STS_SHIFT 11 /* BATT_HOT_STS */ +#define WM831X_BATT_HOT_STS_WIDTH 1 /* BATT_HOT_STS */ +#define WM831X_BATT_COLD_STS 0x0400 /* BATT_COLD_STS */ +#define WM831X_BATT_COLD_STS_MASK 0x0400 /* BATT_COLD_STS */ +#define WM831X_BATT_COLD_STS_SHIFT 10 /* BATT_COLD_STS */ +#define WM831X_BATT_COLD_STS_WIDTH 1 /* BATT_COLD_STS */ +#define WM831X_CHG_TOPOFF 0x0200 /* CHG_TOPOFF */ +#define WM831X_CHG_TOPOFF_MASK 0x0200 /* CHG_TOPOFF */ +#define WM831X_CHG_TOPOFF_SHIFT 9 /* CHG_TOPOFF */ +#define WM831X_CHG_TOPOFF_WIDTH 1 /* CHG_TOPOFF */ +#define WM831X_CHG_ACTIVE 0x0100 /* CHG_ACTIVE */ +#define WM831X_CHG_ACTIVE_MASK 0x0100 /* CHG_ACTIVE */ +#define WM831X_CHG_ACTIVE_SHIFT 8 /* CHG_ACTIVE */ +#define WM831X_CHG_ACTIVE_WIDTH 1 /* CHG_ACTIVE */ +#define WM831X_CHG_TIME_ELAPSED_MASK 0x00FF /* CHG_TIME_ELAPSED - [7:0] */ +#define WM831X_CHG_TIME_ELAPSED_SHIFT 0 /* CHG_TIME_ELAPSED - [7:0] */ +#define WM831X_CHG_TIME_ELAPSED_WIDTH 8 /* CHG_TIME_ELAPSED - [7:0] */ + +#define WM831X_CHG_STATE_OFF (0 << WM831X_CHG_STATE_SHIFT) +#define WM831X_CHG_STATE_TRICKLE (1 << WM831X_CHG_STATE_SHIFT) +#define WM831X_CHG_STATE_FAST (2 << WM831X_CHG_STATE_SHIFT) +#define WM831X_CHG_STATE_TRICKLE_OT (3 << WM831X_CHG_STATE_SHIFT) +#define WM831X_CHG_STATE_FAST_OT (4 << WM831X_CHG_STATE_SHIFT) +#define WM831X_CHG_STATE_DEFECTIVE (5 << WM831X_CHG_STATE_SHIFT) + +/* + * R16459 (0x404B) - Backup Charger Control + */ +#define WM831X_BKUP_CHG_ENA 0x8000 /* BKUP_CHG_ENA */ +#define WM831X_BKUP_CHG_ENA_MASK 0x8000 /* BKUP_CHG_ENA */ +#define WM831X_BKUP_CHG_ENA_SHIFT 15 /* BKUP_CHG_ENA */ +#define WM831X_BKUP_CHG_ENA_WIDTH 1 /* BKUP_CHG_ENA */ +#define WM831X_BKUP_CHG_STS 0x4000 /* BKUP_CHG_STS */ +#define WM831X_BKUP_CHG_STS_MASK 0x4000 /* BKUP_CHG_STS */ +#define WM831X_BKUP_CHG_STS_SHIFT 14 /* BKUP_CHG_STS */ +#define WM831X_BKUP_CHG_STS_WIDTH 1 /* BKUP_CHG_STS */ +#define WM831X_BKUP_CHG_MODE 0x1000 /* BKUP_CHG_MODE */ +#define WM831X_BKUP_CHG_MODE_MASK 0x1000 /* BKUP_CHG_MODE */ +#define WM831X_BKUP_CHG_MODE_SHIFT 12 /* BKUP_CHG_MODE */ +#define WM831X_BKUP_CHG_MODE_WIDTH 1 /* BKUP_CHG_MODE */ +#define WM831X_BKUP_BATT_DET_ENA 0x0800 /* BKUP_BATT_DET_ENA */ +#define WM831X_BKUP_BATT_DET_ENA_MASK 0x0800 /* BKUP_BATT_DET_ENA */ +#define WM831X_BKUP_BATT_DET_ENA_SHIFT 11 /* BKUP_BATT_DET_ENA */ +#define WM831X_BKUP_BATT_DET_ENA_WIDTH 1 /* BKUP_BATT_DET_ENA */ +#define WM831X_BKUP_BATT_STS 0x0400 /* BKUP_BATT_STS */ +#define WM831X_BKUP_BATT_STS_MASK 0x0400 /* BKUP_BATT_STS */ +#define WM831X_BKUP_BATT_STS_SHIFT 10 /* BKUP_BATT_STS */ +#define WM831X_BKUP_BATT_STS_WIDTH 1 /* BKUP_BATT_STS */ +#define WM831X_BKUP_CHG_VLIM 0x0010 /* BKUP_CHG_VLIM */ +#define WM831X_BKUP_CHG_VLIM_MASK 0x0010 /* BKUP_CHG_VLIM */ +#define WM831X_BKUP_CHG_VLIM_SHIFT 4 /* BKUP_CHG_VLIM */ +#define WM831X_BKUP_CHG_VLIM_WIDTH 1 /* BKUP_CHG_VLIM */ +#define WM831X_BKUP_CHG_ILIM_MASK 0x0003 /* BKUP_CHG_ILIM - [1:0] */ +#define WM831X_BKUP_CHG_ILIM_SHIFT 0 /* BKUP_CHG_ILIM - [1:0] */ +#define WM831X_BKUP_CHG_ILIM_WIDTH 2 /* BKUP_CHG_ILIM - [1:0] */ + +#endif -- cgit v1.2.3 From c746b5519a88b8803d43946ad06326ece4829116 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 5 Sep 2009 14:09:21 +0100 Subject: leds: Add WM831x status LED driver The WM831x devices feature two software controlled status LEDs with hardware assisted blinking. The device can also autonomously control the LEDs based on a selection of sources. This can be configured at boot time using either platform data or the chip OTP. A sysfs file in the style of that for triggers allowing the control source to be configured at run time. Triggers can't be used here since they can't depend on the implementation details of a specific LED type. Signed-off-by: Mark Brown Signed-off-by: Richard Purdie --- include/linux/mfd/wm831x/status.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/linux/mfd/wm831x/status.h (limited to 'include/linux') diff --git a/include/linux/mfd/wm831x/status.h b/include/linux/mfd/wm831x/status.h new file mode 100644 index 000000000000..6bc090d0e3ac --- /dev/null +++ b/include/linux/mfd/wm831x/status.h @@ -0,0 +1,34 @@ +/* + * include/linux/mfd/wm831x/status.h -- Status LEDs for WM831x + * + * Copyright 2009 Wolfson Microelectronics PLC. + * + * Author: Mark Brown + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __MFD_WM831X_STATUS_H__ +#define __MFD_WM831X_STATUS_H__ + +#define WM831X_LED_SRC_MASK 0xC000 /* LED_SRC - [15:14] */ +#define WM831X_LED_SRC_SHIFT 14 /* LED_SRC - [15:14] */ +#define WM831X_LED_SRC_WIDTH 2 /* LED_SRC - [15:14] */ +#define WM831X_LED_MODE_MASK 0x0300 /* LED_MODE - [9:8] */ +#define WM831X_LED_MODE_SHIFT 8 /* LED_MODE - [9:8] */ +#define WM831X_LED_MODE_WIDTH 2 /* LED_MODE - [9:8] */ +#define WM831X_LED_SEQ_LEN_MASK 0x0030 /* LED_SEQ_LEN - [5:4] */ +#define WM831X_LED_SEQ_LEN_SHIFT 4 /* LED_SEQ_LEN - [5:4] */ +#define WM831X_LED_SEQ_LEN_WIDTH 2 /* LED_SEQ_LEN - [5:4] */ +#define WM831X_LED_DUR_MASK 0x000C /* LED_DUR - [3:2] */ +#define WM831X_LED_DUR_SHIFT 2 /* LED_DUR - [3:2] */ +#define WM831X_LED_DUR_WIDTH 2 /* LED_DUR - [3:2] */ +#define WM831X_LED_DUTY_CYC_MASK 0x0003 /* LED_DUTY_CYC - [1:0] */ +#define WM831X_LED_DUTY_CYC_SHIFT 0 /* LED_DUTY_CYC - [1:0] */ +#define WM831X_LED_DUTY_CYC_WIDTH 2 /* LED_DUTY_CYC - [1:0] */ + +#endif -- cgit v1.2.3 From 5036cc41e07d6614350e329666ee8a79cea6f793 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 6 Aug 2009 16:07:10 -0700 Subject: backlight: spi driver for LMS283GF05 LCD ADd support for the SPI part of LMS283GF05 LCD. The LCD uses SPI for initialization and powerdown sequences. No further defails are specified in the datasheet about the initialization/powerdown sequence, just the magic numbers that have to be sent over SPI bus. This LCD can be found in the Aeronix Zipit Z2 handheld. Signed-off-by: Marek Vasut Signed-off-by: Andrew Morton Signed-off-by: Richard Purdie --- include/linux/spi/lms283gf05.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/linux/spi/lms283gf05.h (limited to 'include/linux') diff --git a/include/linux/spi/lms283gf05.h b/include/linux/spi/lms283gf05.h new file mode 100644 index 000000000000..555d254e6606 --- /dev/null +++ b/include/linux/spi/lms283gf05.h @@ -0,0 +1,28 @@ +/* + * lms283gf05.h - Platform glue for Samsung LMS283GF05 LCD + * + * Copyright (C) 2009 Marek Vasut + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef _INCLUDE_LINUX_SPI_LMS283GF05_H_ +#define _INCLUDE_LINUX_SPI_LMS283GF05_H_ + +struct lms283gf05_pdata { + unsigned long reset_gpio; + bool reset_inverted; +}; + +#endif /* _INCLUDE_LINUX_SPI_LMS283GF05_H_ */ -- cgit v1.2.3 From 0403e3827788d878163f9ef0541b748b0f88ca5d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:50 -0700 Subject: dmaengine: add fence support Some engines optimize operation by reading ahead in the descriptor chain such that descriptor2 may start execution before descriptor1 completes. If descriptor2 depends on the result from descriptor1 then a fence is required (on descriptor2) to disable this optimization. The async_tx api could implicitly identify dependencies via the 'depend_tx' parameter, but that would constrain cases where the dependency chain only specifies a completion order rather than a data dependency. So, provide an ASYNC_TX_FENCE to explicitly identify data dependencies. Signed-off-by: Dan Williams --- include/linux/async_tx.h | 3 +++ include/linux/dmaengine.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 866e61c4e2e0..a1c486a88e88 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -58,11 +58,14 @@ struct dma_chan_ref { * array. * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a * dependency chain + * @ASYNC_TX_FENCE: specify that the next operation in the dependency + * chain uses this operation's result as an input */ enum async_tx_flags { ASYNC_TX_XOR_ZERO_DST = (1 << 0), ASYNC_TX_XOR_DROP_DST = (1 << 1), ASYNC_TX_ACK = (1 << 2), + ASYNC_TX_FENCE = (1 << 3), }; /** diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 1012f1abcb54..4d6c1c925fd4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -87,6 +87,8 @@ enum dma_transaction_type { * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as * sources that were the result of a previous operation, in the case of a PQ * operation it continues the calculation with new sources + * @DMA_PREP_FENCE - tell the driver that subsequent operations depend + * on the result of this operation */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), @@ -98,6 +100,7 @@ enum dma_ctrl_flags { DMA_PREP_PQ_DISABLE_P = (1 << 6), DMA_PREP_PQ_DISABLE_Q = (1 << 7), DMA_PREP_CONTINUE = (1 << 8), + DMA_PREP_FENCE = (1 << 9), }; /** -- cgit v1.2.3 From 138f4c359d23d2ec38d18bd70dd9613ae515fe93 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:51 -0700 Subject: dmaengine, async_tx: add a "no channel switch" allocator Channel switching is problematic for some dmaengine drivers as the architecture precludes separating the ->prep from ->submit. In these cases the driver can select ASYNC_TX_DISABLE_CHANNEL_SWITCH to modify the async_tx allocator to only return channels that support all of the required asynchronous operations. For example MD_RAID456=y selects support for asynchronous xor, xor validate, pq, pq validate, and memcpy. When ASYNC_TX_DISABLE_CHANNEL_SWITCH=y any channel with all these capabilities is marked DMA_ASYNC_TX allowing async_tx_find_channel() to quickly locate compatible channels with the guarantee that dependency chains will remain on one channel. When ASYNC_TX_DISABLE_CHANNEL_SWITCH=n async_tx_find_channel() may select channels that lead to operation chains that need to cross channel boundaries using the async_tx channel switch capability. Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 4d6c1c925fd4..86853ed7970b 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -48,6 +48,9 @@ enum dma_status { /** * enum dma_transaction_type - DMA transaction types/indexes + * + * Note: The DMA_ASYNC_TX capability is not to be set by drivers. It is + * automatically set as dma devices are registered. */ enum dma_transaction_type { DMA_MEMCPY, @@ -61,6 +64,7 @@ enum dma_transaction_type { DMA_MEMCPY_CRC32C, DMA_INTERRUPT, DMA_PRIVATE, + DMA_ASYNC_TX, DMA_SLAVE, }; @@ -396,7 +400,11 @@ static inline void net_dmaengine_put(void) #ifdef CONFIG_ASYNC_TX_DMA #define async_dmaengine_get() dmaengine_get() #define async_dmaengine_put() dmaengine_put() +#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH +#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX) +#else #define async_dma_find_channel(type) dma_find_channel(type) +#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */ #else static inline void async_dmaengine_get(void) { @@ -409,7 +417,7 @@ async_dma_find_channel(enum dma_transaction_type type) { return NULL; } -#endif +#endif /* CONFIG_ASYNC_TX_DMA */ dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); -- cgit v1.2.3 From 9308add6ea4fedeba37b0d7c4630a542bd34f214 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:52 -0700 Subject: dmaengine: cleanup unused transaction types No drivers currently implement these operation types, so they can be deleted. Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 86853ed7970b..db23fd583f98 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -56,12 +56,9 @@ enum dma_transaction_type { DMA_MEMCPY, DMA_XOR, DMA_PQ, - DMA_DUAL_XOR, - DMA_PQ_UPDATE, DMA_XOR_VAL, DMA_PQ_VAL, DMA_MEMSET, - DMA_MEMCPY_CRC32C, DMA_INTERRUPT, DMA_PRIVATE, DMA_ASYNC_TX, -- cgit v1.2.3 From 83544ae9f3991bfc7d5e0fe9a3008cd05a8d57b7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:53 -0700 Subject: dmaengine, async_tx: support alignment checks Some engines have transfer size and address alignment restrictions. Add a per-operation alignment property to struct dma_device that the async routines and dmatest can use to check alignment capabilities. Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index db23fd583f98..835b9c7bf1c2 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -245,6 +245,10 @@ struct dma_async_tx_descriptor { * @cap_mask: one or more dma_capability flags * @max_xor: maximum number of xor sources, 0 if no capability * @max_pq: maximum number of PQ sources and PQ-continue capability + * @copy_align: alignment shift for memcpy operations + * @xor_align: alignment shift for xor operations + * @pq_align: alignment shift for pq operations + * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the @@ -271,6 +275,10 @@ struct dma_device { dma_cap_mask_t cap_mask; unsigned short max_xor; unsigned short max_pq; + u8 copy_align; + u8 xor_align; + u8 pq_align; + u8 fill_align; #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; @@ -314,6 +322,42 @@ struct dma_device { void (*device_issue_pending)(struct dma_chan *chan); }; +static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len) +{ + size_t mask; + + if (!align) + return true; + mask = (1 << align) - 1; + if (mask & (off1 | off2 | len)) + return false; + return true; +} + +static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->copy_align, off1, off2, len); +} + +static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->xor_align, off1, off2, len); +} + +static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->pq_align, off1, off2, len); +} + +static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->fill_align, off1, off2, len); +} + static inline void dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) { -- cgit v1.2.3 From b265b11fc1a0bd6ae5a7fde12e374583a52ab326 Mon Sep 17 00:00:00 2001 From: Tom Picard Date: Tue, 8 Sep 2009 17:43:01 -0700 Subject: ioat3: ioat3.2 pci ids for Jasper Forest Jasper Forest introduces raid offload support via ioat3.2 support. When raid offload is enabled two (out of 8 channels) will report raid5/raid6 offload capabilities. The remaining channels will only report ioat3.0 capabilities (memcpy). Signed-off-by: Tom Picard Signed-off-by: Dan Williams --- include/linux/pci_ids.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0f71812d67d3..2b4b8ce53256 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2529,6 +2529,16 @@ #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b #define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c +#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719 #define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 #define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 #define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 -- cgit v1.2.3 From 0803172778901e24a75ab074798d98c2b7411559 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:53:04 -0700 Subject: dmaengine: kill tx_list The tx_list attribute of struct dma_async_tx_descriptor is common to most, but not all dma driver implementations. None of the upper level code (dmaengine/async_tx) uses it, so allow drivers to implement it locally if they need it. This saves sizeof(struct list_head) bytes for drivers that do not manage descriptors with a linked list (e.g.: ioatdma v2,3). Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ffefba81c818..f114bc7790bc 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -180,8 +180,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param); * @flags: flags to augment operation preparation, control completion, and * communicate status * @phys: physical address of the descriptor - * @tx_list: driver common field for operations that require multiple - * descriptors * @chan: target channel for this operation * @tx_submit: set the prepared descriptor(s) to be executed by the engine * @callback: routine to call after this operation is complete @@ -195,7 +193,6 @@ struct dma_async_tx_descriptor { dma_cookie_t cookie; enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */ dma_addr_t phys; - struct list_head tx_list; struct dma_chan *chan; dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); dma_async_tx_callback callback; -- cgit v1.2.3 From 1a5aeeecd550ee4344cfba1791f1134739b16dc6 Mon Sep 17 00:00:00 2001 From: Maciej Sosnowski Date: Thu, 10 Sep 2009 15:05:58 +0200 Subject: dca: registering requesters in multiple dca domains This patch enables DCA support on multiple-IOH/multiple-IIO architectures. It modifies dca module by replacing single dca_providers list with dca_domains list, each domain containing separate list of providers. This approach lets dca driver manage multiple domains, i.e. sets of providers and requesters mapped back to the same PCI root complex device. The driver takes care to register each requester to a provider from the same domain. Signed-off-by: Dan Williams Signed-off-by: Maciej Sosnowski --- include/linux/dca.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dca.h b/include/linux/dca.h index 9c20c7e87d0a..d27a7a05718d 100644 --- a/include/linux/dca.h +++ b/include/linux/dca.h @@ -20,6 +20,9 @@ */ #ifndef DCA_H #define DCA_H + +#include + /* DCA Provider API */ /* DCA Notifier Interface */ @@ -36,6 +39,12 @@ struct dca_provider { int id; }; +struct dca_domain { + struct list_head node; + struct list_head dca_providers; + struct pci_bus *pci_rc; +}; + struct dca_ops { int (*add_requester) (struct dca_provider *, struct device *); int (*remove_requester) (struct dca_provider *, struct device *); @@ -47,7 +56,7 @@ struct dca_ops { struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size); void free_dca_provider(struct dca_provider *dca); int register_dca_provider(struct dca_provider *dca, struct device *dev); -void unregister_dca_provider(struct dca_provider *dca); +void unregister_dca_provider(struct dca_provider *dca, struct device *dev); static inline void *dca_priv(struct dca_provider *dca) { -- cgit v1.2.3 From 52a7a1cec88acdaf3f8b36a6b1fe904f6eca7ee5 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 10 Sep 2009 15:26:30 +0200 Subject: [ARM] pxafb: add accelerator ID for PXA3xx GCU Add ID 99 for PXA3xx frame buffers and report it in the pxa frame buffer conditionally, depending on a new flag in struct pxafb_mach_info. Signed-off-by: Daniel Mack Cc: linux-fbdev-devel@lists.sourceforge.net Cc: Dennis Oliver Kropp Cc: Sven Neumann Cc: Guennadi Liakhovetski Signed-off-by: Eric Miao --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index f847df9e99b6..a34bdf5a9d23 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -133,6 +133,7 @@ struct dentry; #define FB_ACCEL_NEOMAGIC_NM2230 96 /* NeoMagic NM2230 */ #define FB_ACCEL_NEOMAGIC_NM2360 97 /* NeoMagic NM2360 */ #define FB_ACCEL_NEOMAGIC_NM2380 98 /* NeoMagic NM2380 */ +#define FB_ACCEL_PXA3XX 99 /* PXA3xx */ #define FB_ACCEL_SAVAGE4 0x80 /* S3 Savage4 */ #define FB_ACCEL_SAVAGE3D 0x81 /* S3 Savage3D */ -- cgit v1.2.3 From d466f2fcb32cd97fd586bfa33f5dba3ac78aadb0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:03 +0200 Subject: HWPOISON: Add page flag for poisoned pages Hardware poisoned pages need special handling in the VM and shouldn't be touched again. This requires a new page flag. Define it here. The page flags wars seem to be over, so it shouldn't be a problem to get a new one. v2: Add TestSetHWPoison (suggested by Johannes Weiner) Acked-by: Christoph Lameter Signed-off-by: Andi Kleen --- include/linux/page-flags.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 2b87acfc5f87..9bc5fd9fdbf6 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -51,6 +51,9 @@ * PG_buddy is set to indicate that the page is free and in the buddy system * (see mm/page_alloc.c). * + * PG_hwpoison indicates that a page got corrupted in hardware and contains + * data with incorrect ECC bits that triggered a machine check. Accessing is + * not safe since it may cause another machine check. Don't touch! */ /* @@ -101,6 +104,9 @@ enum pageflags { #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED PG_uncached, /* Page has been mapped as uncached */ +#endif +#ifdef CONFIG_MEMORY_FAILURE + PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif __NR_PAGEFLAGS, @@ -263,6 +269,15 @@ PAGEFLAG(Uncached, uncached) PAGEFLAG_FALSE(Uncached) #endif +#ifdef CONFIG_MEMORY_FAILURE +PAGEFLAG(HWPoison, hwpoison) +TESTSETFLAG(HWPoison, hwpoison) +#define __PG_HWPOISON (1UL << PG_hwpoison) +#else +PAGEFLAG_FALSE(HWPoison) +#define __PG_HWPOISON 0 +#endif + static inline int PageUptodate(struct page *page) { int ret = test_bit(PG_uptodate, &(page)->flags); @@ -387,7 +402,7 @@ static inline void __ClearPageTail(struct page *page) 1 << PG_private | 1 << PG_private_2 | \ 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - 1 << PG_unevictable | __PG_MLOCKED) + 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON) /* * Flags checked when a page is prepped for return by the page allocator. -- cgit v1.2.3 From 10be22dfe1e6ad978269dc275147e0ed049187bb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:04 +0200 Subject: HWPOISON: Export some rmap vma locking to outside world Needed for later patch that walks rmap entries on its own. This used to be very frowned upon, but memory-failure.c does some rather specialized rmap walking and rmap has been stable for quite some time, so I think it's ok now to export it. Signed-off-by: Andi Kleen --- include/linux/rmap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf116d0dbf23..8dff2ffab82c 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -112,6 +112,12 @@ int page_mkclean(struct page *); */ int try_to_munlock(struct page *); +/* + * Called by memory-failure.c to kill processes. + */ +struct anon_vma *page_lock_anon_vma(struct page *page); +void page_unlock_anon_vma(struct anon_vma *anon_vma); + #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) -- cgit v1.2.3 From a7420aa54dbf699a5a05feba3c859b6baaa3938c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:05 +0200 Subject: HWPOISON: Add support for poison swap entries v2 Memory migration uses special swap entry types to trigger special actions on page faults. Extend this mechanism to also support poisoned swap entries, to trigger poison handling on page faults. This allows follow-on patches to prevent processes from faulting in poisoned pages again. v2: Fix overflow in MAX_SWAPFILES (Fengguang Wu) v3: Better overflow fix (Hidehiro Kawai) Signed-off-by: Andi Kleen --- include/linux/swap.h | 34 ++++++++++++++++++++++++++++------ include/linux/swapops.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 7c15334f3ff2..f077e454c659 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -34,15 +34,37 @@ static inline int current_is_kswapd(void) * the type/offset into the pte as 5/27 as well. */ #define MAX_SWAPFILES_SHIFT 5 -#ifndef CONFIG_MIGRATION -#define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT) + +/* + * Use some of the swap files numbers for other purposes. This + * is a convenient way to hook into the VM to trigger special + * actions on faults. + */ + +/* + * NUMA node memory migration support + */ +#ifdef CONFIG_MIGRATION +#define SWP_MIGRATION_NUM 2 +#define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM) +#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1) #else -/* Use last two entries for page migration swap entries */ -#define MAX_SWAPFILES ((1 << MAX_SWAPFILES_SHIFT)-2) -#define SWP_MIGRATION_READ MAX_SWAPFILES -#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + 1) +#define SWP_MIGRATION_NUM 0 #endif +/* + * Handling of hardware poisoned pages with memory corruption. + */ +#ifdef CONFIG_MEMORY_FAILURE +#define SWP_HWPOISON_NUM 1 +#define SWP_HWPOISON MAX_SWAPFILES +#else +#define SWP_HWPOISON_NUM 0 +#endif + +#define MAX_SWAPFILES \ + ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) + /* * Magic header for a swap area. The first part of the union is * what the swap magic looks like for the old (limited to 128MB) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 6ec39ab27b4b..cd42e30b7c6e 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry) #endif +#ifdef CONFIG_MEMORY_FAILURE +/* + * Support for hardware poisoned pages + */ +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + BUG_ON(!PageLocked(page)); + return swp_entry(SWP_HWPOISON, page_to_pfn(page)); +} + +static inline int is_hwpoison_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_HWPOISON; +} +#else + +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + return swp_entry(0, 0); +} + +static inline int is_hwpoison_entry(swp_entry_t swp) +{ + return 0; +} +#endif + +#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) +static inline int non_swap_entry(swp_entry_t entry) +{ + return swp_type(entry) >= MAX_SWAPFILES; +} +#else +static inline int non_swap_entry(swp_entry_t entry) +{ + return 0; +} +#endif -- cgit v1.2.3 From d1737fdbec7f90edc52dd0c5c3767457f28e78d8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:06 +0200 Subject: HWPOISON: Add basic support for poisoned pages in fault handler v3 - Add a new VM_FAULT_HWPOISON error code to handle_mm_fault. Right now architectures have to explicitely enable poison page support, so this is forward compatible to all architectures. They only need to add it when they enable poison page support. - Add poison page handling in swap in fault code v2: Add missing delayacct_clear_flag (Hidehiro Kawai) v3: Really use delayacct_clear_flag (Hidehiro Kawai) Signed-off-by: Andi Kleen --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9a72cc78e6b8..082b68cb5ffe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -685,11 +685,12 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_SIGBUS 0x0002 #define VM_FAULT_MAJOR 0x0004 #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ +#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */ #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ -#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) +#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON) /* * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. -- cgit v1.2.3 From 14fa31b89c5ae79e4131da41761378a6df674352 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:10 +0200 Subject: HWPOISON: Use bitmask/action code for try_to_unmap behaviour try_to_unmap currently has multiple modi (migration, munlock, normal unmap) which are selected by magic flag variables. The logic is not very straight forward, because each of these flag change multiple behaviours (e.g. migration turns off aging, not only sets up migration ptes etc.) Also the different flags interact in magic ways. A later patch in this series adds another mode to try_to_unmap, so this becomes quickly unmanageable. Replace the different flags with a action code (migration, munlock, munmap) and some additional flags as modifiers (ignore mlock, ignore aging). This makes the logic more straight forward and allows easier extension to new behaviours. Change all the caller to declare what they want to do. This patch is supposed to be a nop in behaviour. If anyone can prove it is not that would be a bug. Cc: Lee.Schermerhorn@hp.com Cc: npiggin@suse.de Signed-off-by: Andi Kleen --- include/linux/rmap.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 8dff2ffab82c..4c4a2d4d289e 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -85,7 +85,18 @@ static inline void page_dup_rmap(struct page *page, struct vm_area_struct *vma, */ int page_referenced(struct page *, int is_locked, struct mem_cgroup *cnt, unsigned long *vm_flags); -int try_to_unmap(struct page *, int ignore_refs); +enum ttu_flags { + TTU_UNMAP = 0, /* unmap mode */ + TTU_MIGRATION = 1, /* migration mode */ + TTU_MUNLOCK = 2, /* munlock mode */ + TTU_ACTION_MASK = 0xff, + + TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ + TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ +}; +#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) + +int try_to_unmap(struct page *, enum ttu_flags flags); /* * Called from mm/filemap_xip.c to unmap empty zero page -- cgit v1.2.3 From 888b9f7c58ebe8303bad817cd554df887a683957 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:11 +0200 Subject: HWPOISON: Handle hardware poisoned pages in try_to_unmap When a page has the poison bit set replace the PTE with a poison entry. This causes the right error handling to be done later when a process runs into it. v2: add a new flag to not do that (needed for the memory-failure handler later) (Fengguang) v3: remove unnecessary is_migration_entry() test (Fengguang, Minchan) Reviewed-by: Minchan Kim Reviewed-by: Wu Fengguang Signed-off-by: Andi Kleen --- include/linux/rmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 4c4a2d4d289e..ce989f1fc2ed 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -93,6 +93,7 @@ enum ttu_flags { TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ + TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */ }; #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) -- cgit v1.2.3 From 750b4987b0cd4d408e54cb83a80a067cbe690feb Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 16 Sep 2009 11:50:12 +0200 Subject: HWPOISON: Refactor truncate to allow direct truncating of page v2 Extract out truncate_inode_page() out of the truncate path so that it can be used by memory-failure.c [AK: description, headers, fix typos] v2: Some white space changes from Fengguang Wu Signed-off-by: Andi Kleen --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 082b68cb5ffe..8cbc0aafd5bd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -794,6 +794,8 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); +int truncate_inode_page(struct address_space *mapping, struct page *page); + #ifdef CONFIG_MMU extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); -- cgit v1.2.3 From 83f786680aec8d030184f7ced1a0a3dd8ac81764 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 16 Sep 2009 11:50:13 +0200 Subject: HWPOISON: Add invalidate_inode_page Add a simple way to invalidate a single page This is just a refactoring of the truncate.c code. Originally from Fengguang, modified by Andi Kleen. Signed-off-by: Andi Kleen --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8cbc0aafd5bd..b05bbde0296d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -796,6 +796,8 @@ extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); +int invalidate_inode_page(struct page *page); + #ifdef CONFIG_MMU extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); -- cgit v1.2.3 From 257187362123f15d9d1e09918cf87cebbea4e786 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:13 +0200 Subject: HWPOISON: Define a new error_remove_page address space op for async truncation Truncating metadata pages is not safe right now before we haven't audited all file systems. To enable truncation only for data address space define a new address_space callback error_remove_page. This is used for memory_failure.c memory error handling. This can be then set to truncate_inode_page() This patch just defines the new operation and adds documentation. Callers and users come in followon patches. Signed-off-by: Andi Kleen --- include/linux/fs.h | 1 + include/linux/mm.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b21cf6b9c80b..4f47afd37647 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -595,6 +595,7 @@ struct address_space_operations { int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, read_descriptor_t *, unsigned long); + int (*error_remove_page)(struct address_space *, struct page *); }; /* diff --git a/include/linux/mm.h b/include/linux/mm.h index b05bbde0296d..a16018f7d61c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -795,6 +795,7 @@ extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); +int generic_error_remove_page(struct address_space *mapping, struct page *page); int invalidate_inode_page(struct page *page); -- cgit v1.2.3 From 4db96cf077aa938b11fe7ac79ecc9b29ec00fbab Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:14 +0200 Subject: HWPOISON: Add PR_MCE_KILL prctl to control early kill behaviour per process This allows processes to override their early/late kill behaviour on hardware memory errors. Typically applications which are memory error aware is better of with early kill (see the error as soon as possible), all others with late kill (only see the error when the error is really impacting execution) There's a global sysctl, but this way an application can set its specific policy. We're using two bits, one to signify that the process stated its intention and that I also made the prctl future proof by enforcing the unused arguments are 0. The state is inherited to children. Note this makes us officially run out of process flags on 32bit, but the next patch can easily add another field. Manpage patch will be supplied separately. Signed-off-by: Andi Kleen --- include/linux/prctl.h | 2 ++ include/linux/sched.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index b00df4c79c63..3dc303197e67 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -88,4 +88,6 @@ #define PR_TASK_PERF_COUNTERS_DISABLE 31 #define PR_TASK_PERF_COUNTERS_ENABLE 32 +#define PR_MCE_KILL 33 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index f3d74bd04d18..29eae73c951d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1687,6 +1687,7 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ +#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_SIGNALED 0x00000400 /* killed by a signal */ @@ -1706,6 +1707,7 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ +#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ -- cgit v1.2.3 From 6a46079cf57a7f7758e8b926980a4f852f89b34d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Sep 2009 11:50:15 +0200 Subject: HWPOISON: The high level memory error handler in the VM v7 Add the high level memory handler that poisons pages that got corrupted by hardware (typically by a two bit flip in a DIMM or a cache) on the Linux level. The goal is to prevent everyone from accessing these pages in the future. This done at the VM level by marking a page hwpoisoned and doing the appropriate action based on the type of page it is. The code that does this is portable and lives in mm/memory-failure.c To quote the overview comment: High level machine check handler. Handles pages reported by the hardware as being corrupted usually due to a 2bit ECC memory or cache failure. This focuses on pages detected as corrupted in the background. When the current CPU tries to consume corruption the currently running process can just be killed directly instead. This implies that if the error cannot be handled for some reason it's safe to just ignore it because no corruption has been consumed yet. Instead when that happens another machine check will happen. Handles page cache pages in various states. The tricky part here is that we can access any page asynchronous to other VM users, because memory failures could happen anytime and anywhere, possibly violating some of their assumptions. This is why this code has to be extremely careful. Generally it tries to use normal locking rules, as in get the standard locks, even if that means the error handling takes potentially a long time. Some of the operations here are somewhat inefficient and have non linear algorithmic complexity, because the data structures have not been optimized for this case. This is in particular the case for the mapping from a vma to a process. Since this case is expected to be rare we hope we can get away with this. There are in principle two strategies to kill processes on poison: - just unmap the data and wait for an actual reference before killing - kill as soon as corruption is detected. Both have advantages and disadvantages and should be used in different situations. Right now both are implemented and can be switched with a new sysctl vm.memory_failure_early_kill The default is early kill. The patch does some rmap data structure walking on its own to collect processes to kill. This is unusual because normally all rmap data structure knowledge is in rmap.c only. I put it here for now to keep everything together and rmap knowledge has been seeping out anyways Includes contributions from Johannes Weiner, Chris Mason, Fengguang Wu, Nick Piggin (who did a lot of great work) and others. Cc: npiggin@suse.de Cc: riel@redhat.com Signed-off-by: Andi Kleen Acked-by: Rik van Riel Reviewed-by: Hidehiro Kawai --- include/linux/mm.h | 7 +++++++ include/linux/rmap.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a16018f7d61c..1ffca03f34b7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1309,5 +1309,12 @@ void vmemmap_populate_print_last(void); extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, size_t size); extern void refund_locked_memory(struct mm_struct *mm, size_t size); + +extern void memory_failure(unsigned long pfn, int trapno); +extern int __memory_failure(unsigned long pfn, int trapno, int ref); +extern int sysctl_memory_failure_early_kill; +extern int sysctl_memory_failure_recovery; +extern atomic_long_t mce_bad_pages; + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/rmap.h b/include/linux/rmap.h index ce989f1fc2ed..3c1004e50747 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -129,6 +129,7 @@ int try_to_munlock(struct page *); */ struct anon_vma *page_lock_anon_vma(struct page *page); void page_unlock_anon_vma(struct anon_vma *anon_vma); +int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); #else /* !CONFIG_MMU */ -- cgit v1.2.3 From e2d4304b7d2b85c45de89ec420037d6b9261a12d Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 17 Sep 2009 09:40:31 -0700 Subject: PCI: fix VGA arbiter header file Remove reference to vgaarb.c and replace it with a comment about the arbiter itself. Reported-by: Tiago Vignatti Signed-off-by: Jesse Barnes --- include/linux/vgaarb.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index e81c64af80c1..a1fa1da8dc1a 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -1,5 +1,6 @@ /* - * vgaarb.c + * The VGA aribiter manages VGA space routing and VGA resource decode to + * allow multiple VGA devices to be used in a system in a safe way. * * (C) Copyright 2005 Benjamin Herrenschmidt * (C) Copyright 2007 Paulo R. Zanoni -- cgit v1.2.3 From 181d683d752c432635eda0f182ee71548c1f1820 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 16 Sep 2009 01:06:43 -0700 Subject: Input: libps2 - additional locking for i8042 ports The serio ports on i8042 are not completely isolated; while we provide enough locking to ensure proper serialization when accessing control and data registers AUX and KBD ports can still have an effect on each other on PS/2 protocol level. The most prominent effect is that issuing a command for the device connected to one port may cause abort of the command currently executing by the device connected to another port. Since i8042 nor serio subsystem are not aware of the details of the PS/2 protocol (length of the commands and their replies and so on) the locking should be done on libps2 level by adding special handling when we see that we are dealing with serio port on i8042. Signed-off-by: Dmitry Torokhov --- include/linux/i8042.h | 30 ++++++++++++++++++++++++++++++ include/linux/libps2.h | 2 ++ 2 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i8042.h b/include/linux/i8042.h index 7907a72403ee..60c3360ef6ad 100644 --- a/include/linux/i8042.h +++ b/include/linux/i8042.h @@ -7,6 +7,7 @@ * the Free Software Foundation. */ +#include /* * Standard commands. @@ -30,6 +31,35 @@ #define I8042_CMD_MUX_PFX 0x0090 #define I8042_CMD_MUX_SEND 0x1090 +struct serio; + +#if defined(CONFIG_SERIO_I8042) || defined(CONFIG_SERIO_I8042_MODULE) + +void i8042_lock_chip(void); +void i8042_unlock_chip(void); int i8042_command(unsigned char *param, int command); +bool i8042_check_port_owner(const struct serio *); + +#else + +void i8042_lock_chip(void) +{ +} + +void i8042_unlock_chip(void) +{ +} + +int i8042_command(unsigned char *param, int command) +{ + return -ENOSYS; +} + +bool i8042_check_port_owner(const struct serio *serio) +{ + return false; +} + +#endif #endif diff --git a/include/linux/libps2.h b/include/linux/libps2.h index fcf5fbe6a50c..79603a6c356f 100644 --- a/include/linux/libps2.h +++ b/include/linux/libps2.h @@ -44,6 +44,8 @@ struct ps2dev { void ps2_init(struct ps2dev *ps2dev, struct serio *serio); int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout); void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout); +void ps2_begin_command(struct ps2dev *ps2dev); +void ps2_end_command(struct ps2dev *ps2dev); int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command); int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command); int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data); -- cgit v1.2.3 From ffd0db97196c1057f09c2ab42dd5b30e94e511d9 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 16 Sep 2009 01:06:43 -0700 Subject: Input: add generic suspend and resume for input devices Automatically turn off leds and sound effects as part of suspend process and restore led state, sounds and repeat rate at resume. Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 8b3bc3e0d146..0ccfc30cd40f 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1123,7 +1123,7 @@ struct input_dev { struct mutex mutex; unsigned int users; - int going_away; + bool going_away; struct device dev; -- cgit v1.2.3 From 38e783b38148531c0840ac130b97eb8158f84b48 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Thu, 17 Sep 2009 22:35:45 -0700 Subject: Input: add touchscreen driver for MELFAS MCS-5000 controller The MELPAS MCS-5000 is the touchscreen controller. The overview of this controller can see at the following website: http://www.melfas.com/product/product01.asp?k_r=eng_ This driver is tested on s3c6410 NCP board and supports only the i2c interface. Signed-off-by: Joonyoung Shim Signed-off-by: Dmitry Torokhov --- include/linux/i2c/mcs5000_ts.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 include/linux/i2c/mcs5000_ts.h (limited to 'include/linux') diff --git a/include/linux/i2c/mcs5000_ts.h b/include/linux/i2c/mcs5000_ts.h new file mode 100644 index 000000000000..5a117b5ca15e --- /dev/null +++ b/include/linux/i2c/mcs5000_ts.h @@ -0,0 +1,24 @@ +/* + * mcs5000_ts.h + * + * Copyright (C) 2009 Samsung Electronics Co.Ltd + * Author: Joonyoung Shim + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __LINUX_MCS5000_TS_H +#define __LINUX_MCS5000_TS_H + +/* platform data for the MELFAS MCS-5000 touchscreen driver */ +struct mcs5000_ts_platform_data { + void (*cfg_pin)(void); + int x_size; + int y_size; +}; + +#endif /* __LINUX_MCS5000_TS_H */ -- cgit v1.2.3 From 88751dd6ce1fb0627c36c4ab08a40730e5a50d3e Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Thu, 17 Sep 2009 22:39:38 -0700 Subject: Input: add driver for ADP5588 QWERTY I2C Keypad Signed-off-by: Michael Hennerich Signed-off-by: Bryan Wu Signed-off-by: Mike Frysinger Signed-off-by: Dmitry Torokhov --- include/linux/i2c/adp5588.h | 92 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 include/linux/i2c/adp5588.h (limited to 'include/linux') diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h new file mode 100644 index 000000000000..fc5db826b48e --- /dev/null +++ b/include/linux/i2c/adp5588.h @@ -0,0 +1,92 @@ +/* + * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller + * + * Copyright 2009 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#ifndef _ADP5588_H +#define _ADP5588_H + +#define DEV_ID 0x00 /* Device ID */ +#define CFG 0x01 /* Configuration Register1 */ +#define INT_STAT 0x02 /* Interrupt Status Register */ +#define KEY_LCK_EC_STAT 0x03 /* Key Lock and Event Counter Register */ +#define Key_EVENTA 0x04 /* Key Event Register A */ +#define Key_EVENTB 0x05 /* Key Event Register B */ +#define Key_EVENTC 0x06 /* Key Event Register C */ +#define Key_EVENTD 0x07 /* Key Event Register D */ +#define Key_EVENTE 0x08 /* Key Event Register E */ +#define Key_EVENTF 0x09 /* Key Event Register F */ +#define Key_EVENTG 0x0A /* Key Event Register G */ +#define Key_EVENTH 0x0B /* Key Event Register H */ +#define Key_EVENTI 0x0C /* Key Event Register I */ +#define Key_EVENTJ 0x0D /* Key Event Register J */ +#define KP_LCK_TMR 0x0E /* Keypad Lock1 to Lock2 Timer */ +#define UNLOCK1 0x0F /* Unlock Key1 */ +#define UNLOCK2 0x10 /* Unlock Key2 */ +#define GPIO_INT_STAT1 0x11 /* GPIO Interrupt Status */ +#define GPIO_INT_STAT2 0x12 /* GPIO Interrupt Status */ +#define GPIO_INT_STAT3 0x13 /* GPIO Interrupt Status */ +#define GPIO_DAT_STAT1 0x14 /* GPIO Data Status, Read twice to clear */ +#define GPIO_DAT_STAT2 0x15 /* GPIO Data Status, Read twice to clear */ +#define GPIO_DAT_STAT3 0x16 /* GPIO Data Status, Read twice to clear */ +#define GPIO_DAT_OUT1 0x17 /* GPIO DATA OUT */ +#define GPIO_DAT_OUT2 0x18 /* GPIO DATA OUT */ +#define GPIO_DAT_OUT3 0x19 /* GPIO DATA OUT */ +#define GPIO_INT_EN1 0x1A /* GPIO Interrupt Enable */ +#define GPIO_INT_EN2 0x1B /* GPIO Interrupt Enable */ +#define GPIO_INT_EN3 0x1C /* GPIO Interrupt Enable */ +#define KP_GPIO1 0x1D /* Keypad or GPIO Selection */ +#define KP_GPIO2 0x1E /* Keypad or GPIO Selection */ +#define KP_GPIO3 0x1F /* Keypad or GPIO Selection */ +#define GPI_EM1 0x20 /* GPI Event Mode 1 */ +#define GPI_EM2 0x21 /* GPI Event Mode 2 */ +#define GPI_EM3 0x22 /* GPI Event Mode 3 */ +#define GPIO_DIR1 0x23 /* GPIO Data Direction */ +#define GPIO_DIR2 0x24 /* GPIO Data Direction */ +#define GPIO_DIR3 0x25 /* GPIO Data Direction */ +#define GPIO_INT_LVL1 0x26 /* GPIO Edge/Level Detect */ +#define GPIO_INT_LVL2 0x27 /* GPIO Edge/Level Detect */ +#define GPIO_INT_LVL3 0x28 /* GPIO Edge/Level Detect */ +#define Debounce_DIS1 0x29 /* Debounce Disable */ +#define Debounce_DIS2 0x2A /* Debounce Disable */ +#define Debounce_DIS3 0x2B /* Debounce Disable */ +#define GPIO_PULL1 0x2C /* GPIO Pull Disable */ +#define GPIO_PULL2 0x2D /* GPIO Pull Disable */ +#define GPIO_PULL3 0x2E /* GPIO Pull Disable */ +#define CMP_CFG_STAT 0x30 /* Comparator Configuration and Status Register */ +#define CMP_CONFG_SENS1 0x31 /* Sensor1 Comparator Configuration Register */ +#define CMP_CONFG_SENS2 0x32 /* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */ +#define CMP1_LVL2_TRIP 0x33 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */ +#define CMP1_LVL2_HYS 0x34 /* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */ +#define CMP1_LVL3_TRIP 0x35 /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */ +#define CMP1_LVL3_HYS 0x36 /* Sensor 2 Comparator Configuration Register */ +#define CMP2_LVL2_TRIP 0x37 /* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */ +#define CMP2_LVL2_HYS 0x38 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */ +#define CMP2_LVL3_TRIP 0x39 /* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */ +#define CMP2_LVL3_HYS 0x3A /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */ +#define CMP1_ADC_DAT_R1 0x3B /* Comparator 1 ADC data Register1 */ +#define CMP1_ADC_DAT_R2 0x3C /* Comparator 1 ADC data Register2 */ +#define CMP2_ADC_DAT_R1 0x3D /* Comparator 2 ADC data Register1 */ +#define CMP2_ADC_DAT_R2 0x3E /* Comparator 2 ADC data Register2 */ + +#define ADP5588_DEVICE_ID_MASK 0xF + +/* Put one of these structures in i2c_board_info platform_data */ + +#define ADP5588_KEYMAPSIZE 80 + +struct adp5588_kpad_platform_data { + int rows; /* Number of rows */ + int cols; /* Number of columns */ + const unsigned short *keymap; /* Pointer to keymap */ + unsigned short keymapsize; /* Keymap size */ + unsigned repeat:1; /* Enable key repeat */ + unsigned en_keylock:1; /* Enable Key Lock feature */ + unsigned short unlock_key1; /* Unlock Key 1 */ + unsigned short unlock_key2; /* Unlock Key 2 */ +}; + +#endif -- cgit v1.2.3 From ee2b805c8eb6459cf541ef141ff70dae17af59ca Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 15 Aug 2009 15:12:05 +0100 Subject: ARM: 5678/1: SSP/SPI PL022 polarity terminology fix The definition of the SPI clock phase for the Motorola mode of the PL022 driver was incorrect: the spec had been interpreted as data being recieved on rising or falling edge of the clocks while the correct interpretation is that data can be recieved on the first or second edge transition, falling or rising depending on the polarity setting. Signed-off-by: Linus Walleij Signed-off-by: Russell King --- include/linux/amba/pl022.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h index dcad0ffd1755..e4836c6b3dd7 100644 --- a/include/linux/amba/pl022.h +++ b/include/linux/amba/pl022.h @@ -136,12 +136,12 @@ enum ssp_tx_level_trig { /** * enum SPI Clock Phase - clock phase (Motorola SPI interface only) - * @SSP_CLK_RISING_EDGE: Receive data on rising edge - * @SSP_CLK_FALLING_EDGE: Receive data on falling edge + * @SSP_CLK_FIRST_EDGE: Receive data on first edge transition (actual direction depends on polarity) + * @SSP_CLK_SECOND_EDGE: Receive data on second edge transition (actual direction depends on polarity) */ enum ssp_spi_clk_phase { - SSP_CLK_RISING_EDGE, - SSP_CLK_FALLING_EDGE + SSP_CLK_FIRST_EDGE, + SSP_CLK_SECOND_EDGE }; /** -- cgit v1.2.3 From 778dbcc1ebea6f9a560020110987449bf4607e5f Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 18 Sep 2009 12:51:44 -0700 Subject: mtd: onenand: make onenand/generic.c more generic Remove the ARM dependency from the generic "onenand" platform device driver. This change makes the driver useful for other architectures as well. Needed for the SuperH kfr2r09 board. Apart from the obvious Kconfig bits, the most important change is the move away from ARM specific includes and platform data. Together with this change the only in-tree board code gets an update, and the driver name is also changed gracefully break potential out of tree drivers. The driver is also updated to allow NULL as platform data together with a few changes to make use of resource_size() and dev_name(). Signed-off-by: Magnus Damm Cc: Paul Mundt Cc: Tony Lindgren Cc: Kyungmin Park Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- include/linux/mtd/onenand.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h index 8ed873374381..4e49f3350678 100644 --- a/include/linux/mtd/onenand.h +++ b/include/linux/mtd/onenand.h @@ -214,4 +214,12 @@ unsigned onenand_block(struct onenand_chip *this, loff_t addr); loff_t onenand_addr(struct onenand_chip *this, int block); int flexonenand_region(struct mtd_info *mtd, loff_t addr); +struct mtd_partition; + +struct onenand_platform_data { + void (*mmcontrol)(struct mtd_info *mtd, int sync_read); + struct mtd_partition *parts; + unsigned int nr_parts; +}; + #endif /* __LINUX_MTD_ONENAND_H */ -- cgit v1.2.3 From 46a8cf2df2232c0051f29716ff8a166ebeb08daf Mon Sep 17 00:00:00 2001 From: Sneha Narnakaje Date: Fri, 18 Sep 2009 12:51:46 -0700 Subject: mtd: nand: add "page" parameter to all read_page/read_page_raw APIs This patch adds a new "page" parameter to all NAND read_page/read_page_raw APIs. The read_page API for the new mode ECC_HW_OOB_FIRST requires the page information to send the READOOB command and read the OOB area before the data area. Reviewed-by: David Brownell Signed-off-by: Sneha Narnakaje Signed-off-by: Sandeep Paulraj Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- include/linux/mtd/nand.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 4030ebada49e..686f3701f2f4 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -271,13 +271,13 @@ struct nand_ecc_ctrl { uint8_t *calc_ecc); int (*read_page_raw)(struct mtd_info *mtd, struct nand_chip *chip, - uint8_t *buf); + uint8_t *buf, int page); void (*write_page_raw)(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf); int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip, - uint8_t *buf); + uint8_t *buf, int page); int (*read_subpage)(struct mtd_info *mtd, struct nand_chip *chip, uint32_t offs, uint32_t len, -- cgit v1.2.3 From 6e0cb135b3f3713b95ea41a11155e83a8c70f5f8 Mon Sep 17 00:00:00 2001 From: Sneha Narnakaje Date: Fri, 18 Sep 2009 12:51:47 -0700 Subject: mtd: nand: add new ECC mode - ECC_HW_OOB_FIRST This patch adds the new mode NAND_ECC_HW_OOB_FIRST in the nand code to support 4-bit ECC on TI DaVinci devices with large page (up to 2KiB) NAND chips. This ECC mode is similar to NAND_ECC_HW, with the exception of read_page API that first reads the OOB area, reads the data in chunks, feeds the ECC from OOB area to the ECC hw engine and perform any correction on the data as per the ECC status reported by the engine. "ECC_HW_OOB_FIRST" name suggested by Thomas Gleixner Reviewed-by: David Brownell Signed-off-by: Sneha Narnakaje Signed-off-by: Sandeep Paulraj Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 686f3701f2f4..7a232a9bdd62 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -121,6 +121,7 @@ typedef enum { NAND_ECC_SOFT, NAND_ECC_HW, NAND_ECC_HW_SYNDROME, + NAND_ECC_HW_OOB_FIRST, } nand_ecc_modes_t; /* -- cgit v1.2.3 From be2f092bfc4f6a415bb4c3e2dcbf521a1f2a0fe5 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Sat, 5 Sep 2009 01:20:43 +0900 Subject: mtd: nand: add __nand_correct_data helper function Split nand_correct_data() into two part, a pure calculation function and a wrapper for mtd interface. The tmio_nand driver can implement its ecc.correct function easily using this __nand_correct_data helper. Signed-off-by: Atsushi Nemoto Acked-by: Dmitry Eremin-Solenikov Acked-by: Vimal Singh Signed-off-by: David Woodhouse --- include/linux/mtd/nand_ecc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand_ecc.h b/include/linux/mtd/nand_ecc.h index 090da505425d..052ea8ca2434 100644 --- a/include/linux/mtd/nand_ecc.h +++ b/include/linux/mtd/nand_ecc.h @@ -20,6 +20,12 @@ struct mtd_info; */ int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code); +/* + * Detect and correct a 1 bit error for eccsize byte block + */ +int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc, + unsigned int eccsize); + /* * Detect and correct a 1 bit error for 256 byte block */ -- cgit v1.2.3 From 42f29a25207dc7b3051d299cc028d4b395d1328d Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Sun, 20 Sep 2009 18:14:12 -0400 Subject: kbuild: Don't define ALIGN and ENTRY when preprocessing linker scripts. Adding a reference to to x86's causes the x86 linker script to have syntax errors, because the ALIGN and ENTRY keywords get redefined to the assembly implementations of those. One could fix this by adjusting the include structure, but I think any solution based on that approach would be fragile. Currently, it is impossible when writing a header to do something different for assembly files and linker scripts, even though there are clearly cases where one wants them to define macros differently for the two (ENTRY being an excellent example). So I think the right solution here is to introduce a new preprocessor definition, called LINKER_SCRIPT that is set along with __ASSEMBLY__ for linker scripts, and to use that to not define ALIGN and ENTRY in linker scripts. I suspect we'll find other uses for this mechanism in the future. Signed-off-by: Tim Abbott Signed-off-by: Sam Ravnborg --- include/linux/linkage.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 691f59171c6c..5126cceb6ae9 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -57,6 +57,7 @@ #ifdef __ASSEMBLY__ +#ifndef LINKER_SCRIPT #define ALIGN __ALIGN #define ALIGN_STR __ALIGN_STR @@ -66,6 +67,7 @@ ALIGN; \ name: #endif +#endif /* LINKER_SCRIPT */ #ifndef WEAK #define WEAK(name) \ -- cgit v1.2.3 From 325253a6b2de4bdfa9ef0e28b5df8a4a4fe2b677 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 14 Jul 2009 17:06:02 +0100 Subject: backlight: Allow drivers to update the core, and generate events on changes Certain hardware will send us events when the backlight brightness changes. Add a function to update the value in the core, and additionally send a uevent so that userspace can pop up appropriate UI. The uevents are flagged depending on whether the update originated in the kernel or from userspace, making it easier to only display UI at the appropriate time. Signed-off-by: Matthew Garrett Signed-off-by: Richard Purdie --- include/linux/backlight.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index 79ca2da81c87..0f5f57858a23 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -27,6 +27,11 @@ * Any other use of the locks below is probably wrong. */ +enum backlight_update_reason { + BACKLIGHT_UPDATE_HOTKEY, + BACKLIGHT_UPDATE_SYSFS, +}; + struct backlight_device; struct fb_info; @@ -100,6 +105,8 @@ static inline void backlight_update_status(struct backlight_device *bd) extern struct backlight_device *backlight_device_register(const char *name, struct device *dev, void *devdata, struct backlight_ops *ops); extern void backlight_device_unregister(struct backlight_device *bd); +extern void backlight_force_update(struct backlight_device *bd, + enum backlight_update_reason reason); #define to_backlight_device(obj) container_of(obj, struct backlight_device, dev) -- cgit v1.2.3 From a6f10a2f5d8c2738b3ac05974bdbea3b68a2aecd Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 22 Sep 2009 22:34:24 +1000 Subject: perf_event: Update PERF_EVENT_FORK header definition PERF_EVENT_FORK always outputs the time field, so update the header to reflect this. Signed-off-by: Anton Blanchard Cc: Arjan van de Ven Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20090922123424.GD19453@kryten> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 +- include/linux/perf_event.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 368bd70f1d2d..7b7fbf433cff 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -361,7 +361,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid, ppid; * u32 tid, ptid; - * { u64 time; } && PERF_SAMPLE_TIME + * u64 time; * }; */ PERF_EVENT_FORK = 7, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index acefaf71e6dd..3a9d36d1e92a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -357,7 +357,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid, ppid; * u32 tid, ptid; - * { u64 time; } && PERF_SAMPLE_TIME + * u64 time; * }; */ PERF_RECORD_FORK = 7, -- cgit v1.2.3 From 6ef297f86b62f187c59475784208f75c2ed8ccd8 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 22 Sep 2009 14:29:36 +0100 Subject: ARM: 5720/1: Move MMCI header to amba include dir This moves the mmci platform data definition struct away from arch/arm/include/asm/mach/mmc.h into the more proper place among the other primecells in include/linux/amba/mmci.h and at the same time renames it to "mmci.h", and also the struct in this file confusingly named mmc_platform_data has been renamed mmci_platform_data for clarity. Cc: Catalin Marinas Signed-off-by: Linus Walleij Signed-off-by: Russell King --- include/linux/amba/mmci.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/linux/amba/mmci.h (limited to 'include/linux') diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h new file mode 100644 index 000000000000..6b4241748dda --- /dev/null +++ b/include/linux/amba/mmci.h @@ -0,0 +1,18 @@ +/* + * include/linux/amba/mmci.h + */ +#ifndef AMBA_MMCI_H +#define AMBA_MMCI_H + +#include + +struct mmci_platform_data { + unsigned int ocr_mask; /* available voltages */ + u32 (*translate_vdd)(struct device *, unsigned int); + unsigned int (*status)(struct device *); + int gpio_wp; + int gpio_cd; + unsigned long capabilities; +}; + +#endif -- cgit v1.2.3 From 8cd09a5984c08dafade74c9c1ab4311af2bf2d24 Mon Sep 17 00:00:00 2001 From: Li Hong Date: Tue, 22 Sep 2009 18:00:44 +0800 Subject: tracing: Fix a comment and a trivial format issue in tracepoint.h Fix the tracepoint documentation path in tracepoints headers and a misaligned tabulation. Signed-off-by: Li Hong Cc: Steven Rostedt Cc: Ingo Molnar LKML-Reference: <3a3680030909220300h7cf18849q4d4702b9d4feaa67@mail.gmail.com> Signed-off-by: Frederic Weisbecker --- include/linux/tracepoint.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 63a3f7a80580..2aac8a83e89b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -4,7 +4,7 @@ /* * Kernel Tracepoint API. * - * See Documentation/tracepoint.txt. + * See Documentation/trace/tracepoints.txt. * * (C) Copyright 2008 Mathieu Desnoyers * @@ -36,7 +36,7 @@ struct tracepoint { #ifndef DECLARE_TRACE #define TP_PROTO(args...) args -#define TP_ARGS(args...) args +#define TP_ARGS(args...) args #ifdef CONFIG_TRACEPOINTS -- cgit v1.2.3 From ec4756238239f1a331d9fb95bad8b281dad56855 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Tue, 22 Sep 2009 04:00:27 +0000 Subject: smsc95xx: fix transmission where ZLP is expected Usbnet framework assumes USB hardware doesn't handle zero length packets, but SMSC LAN95xx requires these to be sent for correct operation. This patch fixes an easily reproducible tx lockup when sending a frame that results in exactly 512 bytes in a USB transmission (e.g. a UDP frame with 458 data bytes, due to IP headers and our USB headers). It adds an extra flag to usbnet for the hardware driver to indicate that it can handle and requires the zero length packets. This patch should not affect other usbnet users, please also consider for -stable. Signed-off-by: Steve Glendinning Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index bb69e256cd16..f81473052059 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -89,6 +89,7 @@ struct driver_info { #define FLAG_FRAMING_AX 0x0040 /* AX88772/178 packets */ #define FLAG_WLAN 0x0080 /* use "wlan%d" names */ #define FLAG_AVOID_UNLINK_URBS 0x0100 /* don't unlink urbs at usbnet_stop() */ +#define FLAG_SEND_ZLP 0x0200 /* hw requires ZLPs are sent */ /* init device ... can sleep, or cause probe() failure */ -- cgit v1.2.3 From 7c329288d72e025db4feac65f0fed95fb3e3ef1c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 23 Sep 2009 09:52:18 +1000 Subject: vgaarb: make client interface config invariant. Fixes build when VGA_ARB is off. Reported-by: Ingo Molnar Signed-off-by: Ingo Molnar Signed-off-by: Dave Airlie --- include/linux/vgaarb.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index e81c64af80c1..923f9040ea20 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -41,7 +41,7 @@ * interrupts at any time. */ extern void vga_set_legacy_decoding(struct pci_dev *pdev, - unsigned int decodes); + unsigned int decodes); /** * vga_get - acquire & locks VGA resources @@ -193,8 +193,17 @@ static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2) * They driver will get a callback when VGA arbitration is first used * by userspace since we some older X servers have issues. */ +#if defined(CONFIG_VGA_ARB) int vga_client_register(struct pci_dev *pdev, void *cookie, void (*irq_set_state)(void *cookie, bool state), unsigned int (*set_vga_decode)(void *cookie, bool state)); +#else +static inline int vga_client_register(struct pci_dev *pdev, void *cookie, + void (*irq_set_state)(void *cookie, bool state), + unsigned int (*set_vga_decode)(void *cookie, bool state)) +{ + return 0; +} +#endif #endif /* LINUX_VGA_H */ -- cgit v1.2.3 From fc2219d49ef1606e7fd2c88af2b423b01ff3d319 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 23 Sep 2009 09:50:41 -0700 Subject: rcu: Clean up code based on review feedback from Josh Triplett These issues identified during an old-fashioned face-to-face code review extended over many hours. o Bury various forms of the "rsp->completed == rsp->gpnum" comparison into an rcu_gp_in_progress() function, which has the beneficial side-effect of forcing consistent use of ACCESS_ONCE(). o Replace hand-coded arithmetic with DIV_ROUND_UP(). o Bury several "!list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x01])" instances into an rcu_preempted_readers() function, as this expression indicates that there are no readers blocked within RCU read-side critical sections blocking the current grace period. (Though there might well be similar readers blocking the next grace period.) o Remove a dangling rcu_restart_cpu() declaration that has been dangling for almost 20 minor releases of the kernel. Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12537246442687-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutree.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 37682770e9d2..88109c87f29c 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -85,7 +85,6 @@ static inline void synchronize_rcu_bh_expedited(void) extern void __rcu_init(void); extern void rcu_check_callbacks(int cpu, int user); -extern void rcu_restart_cpu(int cpu); extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); -- cgit v1.2.3 From 1eba8f84380bede3c602bd7758dea96925cead01 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 23 Sep 2009 09:50:42 -0700 Subject: rcu: Clean up code based on review feedback from Josh Triplett, part 2 These issues identified during an old-fashioned face-to-face code review extending over many hours. o Add comments for tricky parts of code, and correct comments that have passed their sell-by date. o Get rid of the vestiges of rcu_init_sched(), which is no longer needed now that PREEMPT_RCU is gone. o Move the #include of rcutree_plugin.h to the end of rcutree.c, which means that, rather than having a random collection of forward declarations, the new set of forward declarations document the set of plugins. The new home for this #include also allows __rcu_init_preempt() to move into rcutree_plugin.h. o Fix rcu_preempt_check_callbacks() to be static. Suggested-by: Josh Triplett Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12537246443924-git-send-email-> Signed-off-by: Ingo Molnar Peter Zijlstra --- include/linux/rcupdate.h | 4 ++++ include/linux/rcutree.h | 6 +----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6fe0363724e9..70331218e4b4 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -196,6 +196,8 @@ static inline void rcu_read_lock_sched(void) __acquire(RCU_SCHED); rcu_read_acquire(); } + +/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ static inline notrace void rcu_read_lock_sched_notrace(void) { preempt_disable_notrace(); @@ -213,6 +215,8 @@ static inline void rcu_read_unlock_sched(void) __release(RCU_SCHED); preempt_enable(); } + +/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ static inline notrace void rcu_read_unlock_sched_notrace(void) { __release(RCU_SCHED); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 88109c87f29c..19a3b06943e0 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -90,10 +90,6 @@ extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); extern long rcu_batches_completed_sched(void); -static inline void rcu_init_sched(void) -{ -} - #ifdef CONFIG_NO_HZ void rcu_enter_nohz(void); void rcu_exit_nohz(void); @@ -106,7 +102,7 @@ static inline void rcu_exit_nohz(void) } #endif /* CONFIG_NO_HZ */ -/* A context switch is a grace period for rcutree. */ +/* A context switch is a grace period for RCU-sched and RCU-bh. */ static inline int rcu_blocking_is_gp(void) { return num_online_cpus() == 1; -- cgit v1.2.3 From 9b2619aff0332e95ea5eb7a0d75b0208818d871c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 23 Sep 2009 09:50:43 -0700 Subject: rcu: Clean up code to address Ingo's checkpatch feedback Move declarations and update storage classes to make checkpatch happy. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12537246441701-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutree.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 19a3b06943e0..46e9ab3ee6e1 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,10 +30,14 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H +struct notifier_block; + extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); - +extern int rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu); extern int rcu_needs_cpu(int cpu); +extern int rcu_expedited_torture_stats(char *page); #ifdef CONFIG_TREE_PREEMPT_RCU -- cgit v1.2.3 From 74908a0009eb36054190ab80deb9671014efed96 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 17 Sep 2009 17:47:12 -0700 Subject: include/linux/cred.h: fix build mips allmodconfig: include/linux/cred.h: In function `creds_are_invalid': include/linux/cred.h:187: error: `PAGE_SIZE' undeclared (first use in this function) include/linux/cred.h:187: error: (Each undeclared identifier is reported only once include/linux/cred.h:187: error: for each function it appears in.) Fixes commit b6dff3ec5e116e3af6f537d4caedcad6b9e5082a Author: David Howells AuthorDate: Fri Nov 14 10:39:16 2008 +1100 Commit: James Morris CommitDate: Fri Nov 14 10:39:16 2008 +1100 CRED: Separate task security context from task_struct I think. It's way too large to be inlined anyway. Dunno if this needs an EXPORT_SYMBOL() yet. Cc: David Howells Cc: James Morris Cc: Serge Hallyn Signed-off-by: Andrew Morton Acked-by: David Howells Signed-off-by: James Morris --- include/linux/cred.h | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index fb371601a3b4..4e3387a89cb9 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -176,23 +176,7 @@ extern void __invalid_creds(const struct cred *, const char *, unsigned); extern void __validate_process_creds(struct task_struct *, const char *, unsigned); -static inline bool creds_are_invalid(const struct cred *cred) -{ - if (cred->magic != CRED_MAGIC) - return true; - if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers)) - return true; -#ifdef CONFIG_SECURITY_SELINUX - if (selinux_is_enabled()) { - if ((unsigned long) cred->security < PAGE_SIZE) - return true; - if ((*(u32 *)cred->security & 0xffffff00) == - (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8)) - return true; - } -#endif - return false; -} +extern bool creds_are_invalid(const struct cred *cred); static inline void __validate_creds(const struct cred *cred, const char *file, unsigned line) -- cgit v1.2.3 From 1b9894f342a39601bb0420b7b8c7e445670c1b51 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 21 Sep 2009 11:12:03 -0700 Subject: serial core: fix new kernel-doc warnings Fix new kernel-doc warnings in serial_core.[hc] files. Warning(include/linux/serial_core.h:485): No description found for parameter 'uport' Warning(include/linux/serial_core.h:485): Excess function parameter 'port' description in 'uart_handle_dcd_change' Warning(include/linux/serial_core.h:511): No description found for parameter 'uport' Warning(include/linux/serial_core.h:511): Excess function parameter 'port' description in 'uart_handle_cts_change' Warning(drivers/serial/serial_core.c:2437): No description found for parameter 'uport' Warning(drivers/serial/serial_core.c:2437): Excess function parameter 'port' description in 'uart_add_one_port' Warning(drivers/serial/serial_core.c:2509): No description found for parameter 'uport' Warning(drivers/serial/serial_core.c:2509): Excess function parameter 'port' description in 'uart_remove_one_port' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- include/linux/serial_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index d58e460844dd..fe661afe0713 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -477,7 +477,7 @@ static inline int uart_handle_break(struct uart_port *port) /** * uart_handle_dcd_change - handle a change of carrier detect state - * @port: uart_port structure for the open port + * @uport: uart_port structure for the open port * @status: new carrier detect status, nonzero if active */ static inline void @@ -503,7 +503,7 @@ uart_handle_dcd_change(struct uart_port *uport, unsigned int status) /** * uart_handle_cts_change - handle a change of clear-to-send state - * @port: uart_port structure for the open port + * @uport: uart_port structure for the open port * @status: new clear to send status, nonzero if active */ static inline void -- cgit v1.2.3 From 97363c6a4f93a20380b4a9e11f35e27fed68a517 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 23 Sep 2009 14:36:38 -0400 Subject: sunrpc: xdr_xcode_hyper helpers cannot presume 64-bit alignment Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 7da466ba4b0d..f5cc0898bc53 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -11,6 +11,7 @@ #include #include +#include #include /* @@ -117,14 +118,14 @@ static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int le static inline __be32 * xdr_encode_hyper(__be32 *p, __u64 val) { - *(__be64 *)p = cpu_to_be64(val); + put_unaligned_be64(val, p); return p + 2; } static inline __be32 * xdr_decode_hyper(__be32 *p, __u64 *valp) { - *valp = be64_to_cpup((__be64 *)p); + *valp = get_unaligned_be64(p); return p + 2; } -- cgit v1.2.3 From a0219d948dd712561817b0d7c95fd2f10b698203 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:35 -0600 Subject: cpumask: remove dangerous CPU_MASK_ALL_PTR (Thanks to Al Viro for reminding me of this, via Ingo) CPU_MASK_ALL is the (deprecated) "all bits set" cpumask, defined as so: #define CPU_MASK_ALL (cpumask_t) { { ... } } Taking the address of such a temporary is questionable at best, unfortunately 321a8e9d (cpumask: add CPU_MASK_ALL_PTR macro) added CPU_MASK_ALL_PTR: #define CPU_MASK_ALL_PTR (&CPU_MASK_ALL) Which formalizes this practice. One day gcc could bite us over this usage (though we seem to have gotten away with it so far). Now all callers are removed, we kill it. Signed-off-by: Rusty Russell Acked-by: Ingo Molnar Reported-by: Al Viro Cc: Mike Travis --- include/linux/cpumask.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9b1d458aac6e..c0ab3588129d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -324,8 +324,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ } } -#define CPU_MASK_ALL_PTR (&CPU_MASK_ALL) - #else #define CPU_MASK_ALL \ @@ -336,7 +334,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) /* cpu_mask_all is in init/main.c */ extern cpumask_t cpu_mask_all; -#define CPU_MASK_ALL_PTR (&cpu_mask_all) #endif -- cgit v1.2.3 From 72d78d05cbaa69f2a32f5f9d65a4551ba0da571f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:36 -0600 Subject: cpumask: remove unused cpu_mask_all It's only defined for NR_CPUS > BITS_PER_LONG; cpu_all_mask is always defined (and const). Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c0ab3588129d..dbb8367ecf56 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -332,9 +332,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ } } -/* cpu_mask_all is in init/main.c */ -extern cpumask_t cpu_mask_all; - #endif #define CPU_MASK_NONE \ -- cgit v1.2.3 From ef79f8e191722dbc1fc33bdfc448f572266c37e9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:37 -0600 Subject: cpumask: remove unused mask field from struct irqaction. Up until 1.1.83, the primitive human tribes used struct sigaction for interrupts. The sa_mask field was overloaded to hold a pointer to the name. When someone created the new "struct irqaction" they carried across the "mask" field as a kind of ancestor worship: the fact that it was unused makes clear its spiritual significance. Signed-off-by: Rusty Russell --- include/linux/interrupt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 8e9e151f811e..894ed7180bff 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -97,7 +97,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); struct irqaction { irq_handler_t handler; unsigned long flags; - cpumask_t mask; const char *name; void *dev_id; struct irqaction *next; -- cgit v1.2.3 From 144e2ce6115c0a1ee4cb5c935360ea4e2966b0ce Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Mon, 15 Jun 2009 12:16:54 +0900 Subject: cpumask: Remove mask field from comments By 7be23e278f, mask field was deleted by irqaction. However, it was not deleted from comment. Signed-off-by: Nobuhiro Iwamatsu CC: Rusty Russell Signed-off-by: Rusty Russell --- include/linux/interrupt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 894ed7180bff..b78cf8194957 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -84,7 +84,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); * struct irqaction - per interrupt action descriptor * @handler: interrupt handler function * @flags: flags (see IRQF_* above) - * @mask: no comment as it is useless and about to be removed * @name: name of the device * @dev_id: cookie to identify the device * @next: pointer to the next irqaction for shared interrupts -- cgit v1.2.3 From e0ad955680878998ff7dc51ce06ddad12260423a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:38 -0600 Subject: cpumask: don't define set_cpus_allowed() if CONFIG_CPUMASK_OFFSTACK=y You're not supposed to pass cpumasks on the stack in that case. Signed-off-by: Rusty Russell --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index cbf2a3b46280..848d1f20086e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1817,10 +1817,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, return 0; } #endif + +#ifndef CONFIG_CPUMASK_OFFSTACK static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) { return set_cpus_allowed_ptr(p, &new_mask); } +#endif /* * Architectures can set this to 1 if they have specified -- cgit v1.2.3 From fe71a3c7dc8cfe0f239c04b4fc6501f4aa56aa0a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:40 -0600 Subject: cpumask: remove the deprecated smp_call_function_mask() Everyone is now using smp_call_function_many(). Signed-off-by: Rusty Russell --- include/linux/smp.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 9e3d8af09207..39c64bae776d 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -73,15 +73,6 @@ int smp_call_function(void(*func)(void *info), void *info, int wait); void smp_call_function_many(const struct cpumask *mask, void (*func)(void *info), void *info, bool wait); -/* Deprecated: Use smp_call_function_many which takes a pointer to the mask. */ -static inline int -smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, - int wait) -{ - smp_call_function_many(&mask, func, info, wait); - return 0; -} - void __smp_call_function_single(int cpuid, struct call_single_data *data, int wait); @@ -144,8 +135,6 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) static inline void smp_send_reschedule(int cpu) { } #define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) -#define smp_call_function_mask(mask, func, info, wait) \ - (up_smp_call_function(func, info)) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) static inline void init_call_single_data(void) -- cgit v1.2.3 From 6f401420e2822c24c36e6e1c657f6e7f7f777a93 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:40 -0600 Subject: cpumask: remove obsolete topology_core_siblings and topology_thread_siblings: core There were replaced by topology_core_cpumask and topology_thread_cpumask. Signed-off-by: Rusty Russell --- include/linux/topology.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 809b26c07090..fc0bf3edeb67 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -211,12 +211,6 @@ int arch_update_cpu_topology(void); #ifndef topology_core_id #define topology_core_id(cpu) ((void)(cpu), 0) #endif -#ifndef topology_thread_siblings -#define topology_thread_siblings(cpu) cpumask_of_cpu(cpu) -#endif -#ifndef topology_core_siblings -#define topology_core_siblings(cpu) cpumask_of_cpu(cpu) -#endif #ifndef topology_thread_cpumask #define topology_thread_cpumask(cpu) cpumask_of(cpu) #endif -- cgit v1.2.3 From 4b805b17382c11a8b1c9bb8053ce9d1dcde0701a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:52 -0600 Subject: cpumask: remove unused deprecated functions, avoid accusations of insanity We're not forcing removal of the old cpu_ functions, but we might as well delete the now-unused ones. Especially CPUMASK_ALLOC and friends. I actually got a phone call (!) from a hacker who thought I had introduced them as the new cpumask API. He seemed bewildered that I had lost all taste. Signed-off-by: Rusty Russell Cc: benh@kernel.crashing.org --- include/linux/cpumask.h | 112 +----------------------------------------------- 1 file changed, 1 insertion(+), 111 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index dbb8367ecf56..e162d13c65ab 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -15,10 +15,6 @@ * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. * For details of cpulist_scnprintf() and cpulist_parse(), see * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. - * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c - * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c. - * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c. - * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c. * * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . * Note: The alternate operations with the suffix "_nr" are used @@ -47,22 +43,17 @@ * void cpus_or(dst, src1, src2) dst = src1 | src2 [union] * void cpus_xor(dst, src1, src2) dst = src1 ^ src2 * int cpus_andnot(dst, src1, src2) dst = src1 & ~src2 - * void cpus_complement(dst, src) dst = ~src * * int cpus_equal(mask1, mask2) Does mask1 == mask2? * int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect? * int cpus_subset(mask1, mask2) Is mask1 a subset of mask2? * int cpus_empty(mask) Is mask empty (no bits sets)? - * int cpus_full(mask) Is mask full (all bits sets)? * int cpus_weight(mask) Hamming weigh - number of set bits - * int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS * - * void cpus_shift_right(dst, src, n) Shift right * void cpus_shift_left(dst, src, n) Shift left * * int first_cpu(mask) Number lowest set bit, or NR_CPUS * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS - * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids * * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set * (can be used as an lvalue) @@ -70,45 +61,10 @@ * CPU_MASK_NONE Initializer - no bits set * unsigned long *cpus_addr(mask) Array of unsigned long's in mask * - * CPUMASK_ALLOC kmalloc's a structure that is a composite of many cpumask_t - * variables, and CPUMASK_PTR provides pointers to each field. - * - * The structure should be defined something like this: - * struct my_cpumasks { - * cpumask_t mask1; - * cpumask_t mask2; - * }; - * - * Usage is then: - * CPUMASK_ALLOC(my_cpumasks); - * CPUMASK_PTR(mask1, my_cpumasks); - * CPUMASK_PTR(mask2, my_cpumasks); - * - * --- DO NOT reference cpumask_t pointers until this check --- - * if (my_cpumasks == NULL) - * "kmalloc failed"... - * - * References are now pointers to the cpumask_t variables (*mask1, ...) - * - *if NR_CPUS > BITS_PER_LONG - * CPUMASK_ALLOC(m) Declares and allocates struct m *m = - * kmalloc(sizeof(*m), GFP_KERNEL) - * CPUMASK_FREE(m) Macro for kfree(m) - *else - * CPUMASK_ALLOC(m) Declares struct m _m, *m = &_m - * CPUMASK_FREE(m) Nop - *endif - * CPUMASK_PTR(v, m) Declares cpumask_t *v = &(m->v) - * ------------------------------------------------------------------------ - * * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing * int cpulist_parse(buf, map) Parse ascii string as cpulist - * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit) - * void cpus_remap(dst, src, old, new) *dst = map(old, new)(src) - * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap - * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz * * for_each_cpu_mask(cpu, mask) for-loop cpu over mask using NR_CPUS * for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids @@ -142,7 +98,6 @@ #include typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; -extern cpumask_t _unused_cpumask_arg_; #ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) @@ -207,13 +162,6 @@ static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); } -#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS) -static inline void __cpus_complement(cpumask_t *dstp, - const cpumask_t *srcp, int nbits) -{ - bitmap_complement(dstp->bits, srcp->bits, nbits); -} - #define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS) static inline int __cpus_equal(const cpumask_t *src1p, const cpumask_t *src2p, int nbits) @@ -241,26 +189,12 @@ static inline int __cpus_empty(const cpumask_t *srcp, int nbits) return bitmap_empty(srcp->bits, nbits); } -#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS) -static inline int __cpus_full(const cpumask_t *srcp, int nbits) -{ - return bitmap_full(srcp->bits, nbits); -} - #define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS) static inline int __cpus_weight(const cpumask_t *srcp, int nbits) { return bitmap_weight(srcp->bits, nbits); } -#define cpus_shift_right(dst, src, n) \ - __cpus_shift_right(&(dst), &(src), (n), NR_CPUS) -static inline void __cpus_shift_right(cpumask_t *dstp, - const cpumask_t *srcp, int n, int nbits) -{ - bitmap_shift_right(dstp->bits, srcp->bits, n, nbits); -} - #define cpus_shift_left(dst, src, n) \ __cpus_shift_left(&(dst), &(src), (n), NR_CPUS) static inline void __cpus_shift_left(cpumask_t *dstp, @@ -346,46 +280,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) #define cpus_addr(src) ((src).bits) -#if NR_CPUS > BITS_PER_LONG -#define CPUMASK_ALLOC(m) struct m *m = kmalloc(sizeof(*m), GFP_KERNEL) -#define CPUMASK_FREE(m) kfree(m) -#else -#define CPUMASK_ALLOC(m) struct m _m, *m = &_m -#define CPUMASK_FREE(m) -#endif -#define CPUMASK_PTR(v, m) cpumask_t *v = &(m->v) - -#define cpu_remap(oldbit, old, new) \ - __cpu_remap((oldbit), &(old), &(new), NR_CPUS) -static inline int __cpu_remap(int oldbit, - const cpumask_t *oldp, const cpumask_t *newp, int nbits) -{ - return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); -} - -#define cpus_remap(dst, src, old, new) \ - __cpus_remap(&(dst), &(src), &(old), &(new), NR_CPUS) -static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp, - const cpumask_t *oldp, const cpumask_t *newp, int nbits) -{ - bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); -} - -#define cpus_onto(dst, orig, relmap) \ - __cpus_onto(&(dst), &(orig), &(relmap), NR_CPUS) -static inline void __cpus_onto(cpumask_t *dstp, const cpumask_t *origp, - const cpumask_t *relmapp, int nbits) -{ - bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits); -} - -#define cpus_fold(dst, orig, sz) \ - __cpus_fold(&(dst), &(orig), sz, NR_CPUS) -static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, - int sz, int nbits) -{ - bitmap_fold(dstp->bits, origp->bits, sz, nbits); -} #endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ #if NR_CPUS == 1 @@ -419,18 +313,14 @@ int __any_online_cpu(const cpumask_t *mask); #ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS #if NR_CPUS <= 64 -#define next_cpu_nr(n, src) next_cpu(n, src) -#define cpus_weight_nr(cpumask) cpus_weight(cpumask) #define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) #else /* NR_CPUS > 64 */ int __next_cpu_nr(int n, const cpumask_t *srcp); -#define next_cpu_nr(n, src) __next_cpu_nr((n), &(src)) -#define cpus_weight_nr(cpumask) __cpus_weight(&(cpumask), nr_cpu_ids) #define for_each_cpu_mask_nr(cpu, mask) \ for ((cpu) = -1; \ - (cpu) = next_cpu_nr((cpu), (mask)), \ + (cpu) = __next_cpu_nr((cpu), &(mask)), \ (cpu) < nr_cpu_ids; ) #endif /* NR_CPUS > 64 */ -- cgit v1.2.3 From 6ba2ef7baac23a5d9bb85e28b882d16b439a2293 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:34:53 -0600 Subject: cpumask: Move deprecated functions to end of header. The new ones have pretty kerneldoc. Move the old ones to the end to avoid confusing people. Signed-off-by: Rusty Russell Cc: benh@kernel.crashing.org --- include/linux/cpumask.h | 593 ++++++++++++++++++++---------------------------- 1 file changed, 252 insertions(+), 341 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index e162d13c65ab..789cf5f920ce 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -3,328 +3,37 @@ /* * Cpumasks provide a bitmap suitable for representing the - * set of CPU's in a system, one bit position per CPU number. - * - * The new cpumask_ ops take a "struct cpumask *"; the old ones - * use cpumask_t. - * - * See detailed comments in the file linux/bitmap.h describing the - * data type on which these cpumasks are based. - * - * For details of cpumask_scnprintf() and cpumask_parse_user(), - * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. - * For details of cpulist_scnprintf() and cpulist_parse(), see - * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. - * - * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . - * Note: The alternate operations with the suffix "_nr" are used - * to limit the range of the loop to nr_cpu_ids instead of - * NR_CPUS when NR_CPUS > 64 for performance reasons. - * If NR_CPUS is <= 64 then most assembler bitmask - * operators execute faster with a constant range, so - * the operator will continue to use NR_CPUS. - * - * Another consideration is that nr_cpu_ids is initialized - * to NR_CPUS and isn't lowered until the possible cpus are - * discovered (including any disabled cpus). So early uses - * will span the entire range of NR_CPUS. - * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . - * - * The obsolescent cpumask operations are: - * - * void cpu_set(cpu, mask) turn on bit 'cpu' in mask - * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask - * void cpus_setall(mask) set all bits - * void cpus_clear(mask) clear all bits - * int cpu_isset(cpu, mask) true iff bit 'cpu' set in mask - * int cpu_test_and_set(cpu, mask) test and set bit 'cpu' in mask - * - * int cpus_and(dst, src1, src2) dst = src1 & src2 [intersection] - * void cpus_or(dst, src1, src2) dst = src1 | src2 [union] - * void cpus_xor(dst, src1, src2) dst = src1 ^ src2 - * int cpus_andnot(dst, src1, src2) dst = src1 & ~src2 - * - * int cpus_equal(mask1, mask2) Does mask1 == mask2? - * int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect? - * int cpus_subset(mask1, mask2) Is mask1 a subset of mask2? - * int cpus_empty(mask) Is mask empty (no bits sets)? - * int cpus_weight(mask) Hamming weigh - number of set bits - * - * void cpus_shift_left(dst, src, n) Shift left - * - * int first_cpu(mask) Number lowest set bit, or NR_CPUS - * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS - * - * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set - * (can be used as an lvalue) - * CPU_MASK_ALL Initializer - all bits set - * CPU_MASK_NONE Initializer - no bits set - * unsigned long *cpus_addr(mask) Array of unsigned long's in mask - * - * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing - * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask - * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing - * int cpulist_parse(buf, map) Parse ascii string as cpulist - * - * for_each_cpu_mask(cpu, mask) for-loop cpu over mask using NR_CPUS - * for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids - * - * int num_online_cpus() Number of online CPUs - * int num_possible_cpus() Number of all possible CPUs - * int num_present_cpus() Number of present CPUs - * - * int cpu_online(cpu) Is some cpu online? - * int cpu_possible(cpu) Is some cpu possible? - * int cpu_present(cpu) Is some cpu present (can schedule)? - * - * int any_online_cpu(mask) First online cpu in mask - * - * for_each_possible_cpu(cpu) for-loop cpu over cpu_possible_map - * for_each_online_cpu(cpu) for-loop cpu over cpu_online_map - * for_each_present_cpu(cpu) for-loop cpu over cpu_present_map - * - * Subtlety: - * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway) - * to generate slightly worse code. Note for example the additional - * 40 lines of assembly code compiling the "for each possible cpu" - * loops buried in the disk_stat_read() macros calls when compiling - * drivers/block/genhd.c (arch i386, CONFIG_SMP=y). So use a simple - * one-line #define for cpu_isset(), instead of wrapping an inline - * inside a macro, the way we do the other calls. + * set of CPU's in a system, one bit position per CPU number. In general, + * only nr_cpu_ids (<= NR_CPUS) bits are valid. */ - #include #include #include typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) -static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) -{ - set_bit(cpu, dstp->bits); -} - -#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst)) -static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp) -{ - clear_bit(cpu, dstp->bits); -} - -#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS) -static inline void __cpus_setall(cpumask_t *dstp, int nbits) -{ - bitmap_fill(dstp->bits, nbits); -} - -#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS) -static inline void __cpus_clear(cpumask_t *dstp, int nbits) -{ - bitmap_zero(dstp->bits, nbits); -} - -/* No static inline type checking - see Subtlety (1) above. */ -#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits) - -#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask)) -static inline int __cpu_test_and_set(int cpu, cpumask_t *addr) -{ - return test_and_set_bit(cpu, addr->bits); -} - -#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS) -static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS) -static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS) -static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_andnot(dst, src1, src2) \ - __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS) -static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS) -static inline int __cpus_equal(const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_equal(src1p->bits, src2p->bits, nbits); -} - -#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS) -static inline int __cpus_intersects(const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_intersects(src1p->bits, src2p->bits, nbits); -} - -#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS) -static inline int __cpus_subset(const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_subset(src1p->bits, src2p->bits, nbits); -} - -#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS) -static inline int __cpus_empty(const cpumask_t *srcp, int nbits) -{ - return bitmap_empty(srcp->bits, nbits); -} - -#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS) -static inline int __cpus_weight(const cpumask_t *srcp, int nbits) -{ - return bitmap_weight(srcp->bits, nbits); -} - -#define cpus_shift_left(dst, src, n) \ - __cpus_shift_left(&(dst), &(src), (n), NR_CPUS) -static inline void __cpus_shift_left(cpumask_t *dstp, - const cpumask_t *srcp, int n, int nbits) -{ - bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); -} -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ - /** - * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * - * @bitmap: the bitmap + * cpumask_bits - get the bits in a cpumask + * @maskp: the struct cpumask * * - * There are a few places where cpumask_var_t isn't appropriate and - * static cpumasks must be used (eg. very early boot), yet we don't - * expose the definition of 'struct cpumask'. - * - * This does the conversion, and can be used as a constant initializer. + * You should only assume nr_cpu_ids bits of this mask are valid. This is + * a macro so it's const-correct. */ -#define to_cpumask(bitmap) \ - ((struct cpumask *)(1 ? (bitmap) \ - : (void *)sizeof(__check_is_bitmap(bitmap)))) - -static inline int __check_is_bitmap(const unsigned long *bitmap) -{ - return 1; -} - -/* - * Special-case data structure for "single bit set only" constant CPU masks. - * - * We pre-generate all the 64 (or 32) possible bit positions, with enough - * padding to the left and the right, and return the constant pointer - * appropriately offset. - */ -extern const unsigned long - cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; - -static inline const struct cpumask *get_cpu_mask(unsigned int cpu) -{ - const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; - p -= cpu / BITS_PER_LONG; - return to_cpumask(p); -} - -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -/* - * In cases where we take the address of the cpumask immediately, - * gcc optimizes it out (it's a constant) and there's no huge stack - * variable created: - */ -#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu)) - - -#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) - -#if NR_CPUS <= BITS_PER_LONG - -#define CPU_MASK_ALL \ -(cpumask_t) { { \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ -} } - -#else - -#define CPU_MASK_ALL \ -(cpumask_t) { { \ - [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ -} } - -#endif - -#define CPU_MASK_NONE \ -(cpumask_t) { { \ - [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ -} } - -#define CPU_MASK_CPU0 \ -(cpumask_t) { { \ - [0] = 1UL \ -} } - -#define cpus_addr(src) ((src).bits) - -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ +#define cpumask_bits(maskp) ((maskp)->bits) #if NR_CPUS == 1 - #define nr_cpu_ids 1 -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -#define first_cpu(src) ({ (void)(src); 0; }) -#define next_cpu(n, src) ({ (void)(src); 1; }) -#define any_online_cpu(mask) 0 -#define for_each_cpu_mask(cpu, mask) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ -#else /* NR_CPUS > 1 */ - +#else extern int nr_cpu_ids; -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -int __first_cpu(const cpumask_t *srcp); -int __next_cpu(int n, const cpumask_t *srcp); -int __any_online_cpu(const cpumask_t *mask); - -#define first_cpu(src) __first_cpu(&(src)) -#define next_cpu(n, src) __next_cpu((n), &(src)) -#define any_online_cpu(mask) __any_online_cpu(&(mask)) -#define for_each_cpu_mask(cpu, mask) \ - for ((cpu) = -1; \ - (cpu) = next_cpu((cpu), (mask)), \ - (cpu) < NR_CPUS; ) -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ #endif -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -#if NR_CPUS <= 64 - -#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) - -#else /* NR_CPUS > 64 */ - -int __next_cpu_nr(int n, const cpumask_t *srcp); -#define for_each_cpu_mask_nr(cpu, mask) \ - for ((cpu) = -1; \ - (cpu) = __next_cpu_nr((cpu), &(mask)), \ - (cpu) < nr_cpu_ids; ) - -#endif /* NR_CPUS > 64 */ -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ +#ifdef CONFIG_CPUMASK_OFFSTACK +/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, + * not all bits may be allocated. */ +#define nr_cpumask_bits nr_cpu_ids +#else +#define nr_cpumask_bits NR_CPUS +#endif /* * The following particular system cpumasks and operations manage @@ -371,12 +80,6 @@ extern const struct cpumask *const cpu_online_mask; extern const struct cpumask *const cpu_present_mask; extern const struct cpumask *const cpu_active_mask; -/* These strip const, as traditionally they weren't const. */ -#define cpu_possible_map (*(cpumask_t *)cpu_possible_mask) -#define cpu_online_map (*(cpumask_t *)cpu_online_mask) -#define cpu_present_map (*(cpumask_t *)cpu_present_mask) -#define cpu_active_map (*(cpumask_t *)cpu_active_mask) - #if NR_CPUS > 1 #define num_online_cpus() cpumask_weight(cpu_online_mask) #define num_possible_cpus() cpumask_weight(cpu_possible_mask) @@ -395,35 +98,6 @@ extern const struct cpumask *const cpu_active_mask; #define cpu_active(cpu) ((cpu) == 0) #endif -#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) - -/* These are the new versions of the cpumask operators: passed by pointer. - * The older versions will be implemented in terms of these, then deleted. */ -#define cpumask_bits(maskp) ((maskp)->bits) - -#if NR_CPUS <= BITS_PER_LONG -#define CPU_BITS_ALL \ -{ \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ -} - -#else /* NR_CPUS > BITS_PER_LONG */ - -#define CPU_BITS_ALL \ -{ \ - [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ -} -#endif /* NR_CPUS > BITS_PER_LONG */ - -#ifdef CONFIG_CPUMASK_OFFSTACK -/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, - * not all bits may be allocated. */ -#define nr_cpumask_bits nr_cpu_ids -#else -#define nr_cpumask_bits NR_CPUS -#endif - /* verify cpu argument to cpumask_* operators */ static inline unsigned int cpumask_check(unsigned int cpu) { @@ -984,4 +658,241 @@ void set_cpu_active(unsigned int cpu, bool active); void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); void init_cpu_online(const struct cpumask *src); + +/** + * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * + * @bitmap: the bitmap + * + * There are a few places where cpumask_var_t isn't appropriate and + * static cpumasks must be used (eg. very early boot), yet we don't + * expose the definition of 'struct cpumask'. + * + * This does the conversion, and can be used as a constant initializer. + */ +#define to_cpumask(bitmap) \ + ((struct cpumask *)(1 ? (bitmap) \ + : (void *)sizeof(__check_is_bitmap(bitmap)))) + +static inline int __check_is_bitmap(const unsigned long *bitmap) +{ + return 1; +} + +/* + * Special-case data structure for "single bit set only" constant CPU masks. + * + * We pre-generate all the 64 (or 32) possible bit positions, with enough + * padding to the left and the right, and return the constant pointer + * appropriately offset. + */ +extern const unsigned long + cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; + +static inline const struct cpumask *get_cpu_mask(unsigned int cpu) +{ + const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; + p -= cpu / BITS_PER_LONG; + return to_cpumask(p); +} + +#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) + +#if NR_CPUS <= BITS_PER_LONG +#define CPU_BITS_ALL \ +{ \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} + +#else /* NR_CPUS > BITS_PER_LONG */ + +#define CPU_BITS_ALL \ +{ \ + [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} +#endif /* NR_CPUS > BITS_PER_LONG */ + +/* + * + * From here down, all obsolete. Use cpumask_ variants! + * + */ +#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS +/* These strip const, as traditionally they weren't const. */ +#define cpu_possible_map (*(cpumask_t *)cpu_possible_mask) +#define cpu_online_map (*(cpumask_t *)cpu_online_mask) +#define cpu_present_map (*(cpumask_t *)cpu_present_mask) +#define cpu_active_map (*(cpumask_t *)cpu_active_mask) + +#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu)) + +#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) + +#if NR_CPUS <= BITS_PER_LONG + +#define CPU_MASK_ALL \ +(cpumask_t) { { \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} } + +#else + +#define CPU_MASK_ALL \ +(cpumask_t) { { \ + [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} } + +#endif + +#define CPU_MASK_NONE \ +(cpumask_t) { { \ + [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ +} } + +#define CPU_MASK_CPU0 \ +(cpumask_t) { { \ + [0] = 1UL \ +} } + +#if NR_CPUS == 1 +#define first_cpu(src) ({ (void)(src); 0; }) +#define next_cpu(n, src) ({ (void)(src); 1; }) +#define any_online_cpu(mask) 0 +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#else /* NR_CPUS > 1 */ +int __first_cpu(const cpumask_t *srcp); +int __next_cpu(int n, const cpumask_t *srcp); +int __any_online_cpu(const cpumask_t *mask); + +#define first_cpu(src) __first_cpu(&(src)) +#define next_cpu(n, src) __next_cpu((n), &(src)) +#define any_online_cpu(mask) __any_online_cpu(&(mask)) +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = -1; \ + (cpu) = next_cpu((cpu), (mask)), \ + (cpu) < NR_CPUS; ) +#endif /* SMP */ + +#if NR_CPUS <= 64 + +#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) + +#else /* NR_CPUS > 64 */ + +int __next_cpu_nr(int n, const cpumask_t *srcp); +#define for_each_cpu_mask_nr(cpu, mask) \ + for ((cpu) = -1; \ + (cpu) = __next_cpu_nr((cpu), &(mask)), \ + (cpu) < nr_cpu_ids; ) + +#endif /* NR_CPUS > 64 */ + +#define cpus_addr(src) ((src).bits) + +#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) +static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) +{ + set_bit(cpu, dstp->bits); +} + +#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst)) +static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp) +{ + clear_bit(cpu, dstp->bits); +} + +#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS) +static inline void __cpus_setall(cpumask_t *dstp, int nbits) +{ + bitmap_fill(dstp->bits, nbits); +} + +#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS) +static inline void __cpus_clear(cpumask_t *dstp, int nbits) +{ + bitmap_zero(dstp->bits, nbits); +} + +/* No static inline type checking - see Subtlety (1) above. */ +#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits) + +#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask)) +static inline int __cpu_test_and_set(int cpu, cpumask_t *addr) +{ + return test_and_set_bit(cpu, addr->bits); +} + +#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS) +static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_andnot(dst, src1, src2) \ + __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS) +static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS) +static inline int __cpus_equal(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_equal(src1p->bits, src2p->bits, nbits); +} + +#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS) +static inline int __cpus_intersects(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_intersects(src1p->bits, src2p->bits, nbits); +} + +#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS) +static inline int __cpus_subset(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_subset(src1p->bits, src2p->bits, nbits); +} + +#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS) +static inline int __cpus_empty(const cpumask_t *srcp, int nbits) +{ + return bitmap_empty(srcp->bits, nbits); +} + +#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS) +static inline int __cpus_weight(const cpumask_t *srcp, int nbits) +{ + return bitmap_weight(srcp->bits, nbits); +} + +#define cpus_shift_left(dst, src, n) \ + __cpus_shift_left(&(dst), &(src), (n), NR_CPUS) +static inline void __cpus_shift_left(cpumask_t *dstp, + const cpumask_t *srcp, int n, int nbits) +{ + bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); +} +#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ + #endif /* __LINUX_CPUMASK_H */ -- cgit v1.2.3 From 2bcd57ab61e7cabed626226a3771617981c11ce1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 24 Sep 2009 04:22:25 +0400 Subject: headers: utsname.h redux * remove asm/atomic.h inclusion from linux/utsname.h -- not needed after kref conversion * remove linux/utsname.h inclusion from files which do not need it NOTE: it looks like fs/binfmt_elf.c do not need utsname.h, however due to some personality stuff it _is_ needed -- cowardly leave ELF-related headers and files alone. Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- include/linux/utsname.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 3656b300de3a..69f39974c041 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -36,7 +36,6 @@ struct new_utsname { #include #include #include -#include struct uts_namespace { struct kref kref; -- cgit v1.2.3 From 22fe404218156328a27e66349b1175cd0baa4990 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 18 Sep 2009 13:05:44 -0700 Subject: vfs: split generic_forget_inode() so that hugetlbfs does not have to copy it Hugetlbfs needs to do special things instead of truncate_inode_pages(). Currently, it copied generic_forget_inode() except for truncate_inode_pages() call which is asking for trouble (the code there isn't trivial). So create a separate function generic_detach_inode() which does all the list magic done in generic_forget_inode() and call it from hugetlbfs_forget_inode(). Signed-off-by: Jan Kara Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 51803528b095..955e34615cb7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2156,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); extern void generic_delete_inode(struct inode *inode); extern void generic_drop_inode(struct inode *inode); +extern int generic_detach_inode(struct inode *inode); extern struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), -- cgit v1.2.3 From 42cb56ae2ab67390da34906b27bedc3f2ff1393b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 18 Sep 2009 13:05:53 -0700 Subject: vfs: change sb->s_maxbytes to a loff_t sb->s_maxbytes is supposed to indicate the maximum size of a file that can exist on the filesystem. It's declared as an unsigned long long. Even if a filesystem has no inherent limit that prevents it from using every bit in that unsigned long long, it's still problematic to set it to anything larger than MAX_LFS_FILESIZE. There are places in the kernel that cast s_maxbytes to a signed value. If it's set too large then this cast makes it a negative number and generally breaks the comparison. Change s_maxbytes to be loff_t instead. That should help eliminate the temptation to set it too large by making it a signed value. Also, add a warning for couple of releases to help catch filesystems that set s_maxbytes too large. Eventually we can either convert this to a BUG() or just remove it and in the hope that no one will get it wrong now that it's a signed value. Signed-off-by: Jeff Layton Cc: Johannes Weiner Cc: Christoph Hellwig Cc: Al Viro Cc: Robert Love Cc: Mandeep Singh Baines Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 955e34615cb7..cbb7724c11d3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1315,7 +1315,7 @@ struct super_block { unsigned long s_blocksize; unsigned char s_blocksize_bits; unsigned char s_dirt; - unsigned long long s_maxbytes; /* Max file size */ + loff_t s_maxbytes; /* Max file size */ struct file_system_type *s_type; const struct super_operations *s_op; const struct dquot_operations *dq_op; -- cgit v1.2.3 From f84398068d9c2babe41500504ef247ae07081857 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 Sep 2009 14:48:36 +0200 Subject: vfs: seq_file: add helpers for data filling Add two helpers that allow access to the seq_file's own buffer, but hide the internal details of seq_files. This allows easier implementation of special purpose filling functions. It also cleans up some existing functions which duplicated the seq_file logic. Make these inline functions in seq_file.h, as suggested by Al. Signed-off-by: Miklos Szeredi Acked-by: Hugh Dickins Signed-off-by: Al Viro --- include/linux/seq_file.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 0c6a86b79596..8366d8f12e53 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -35,6 +35,44 @@ struct seq_operations { #define SEQ_SKIP 1 +/** + * seq_get_buf - get buffer to write arbitrary data to + * @m: the seq_file handle + * @bufp: the beginning of the buffer is stored here + * + * Return the number of bytes available in the buffer, or zero if + * there's no space. + */ +static inline size_t seq_get_buf(struct seq_file *m, char **bufp) +{ + BUG_ON(m->count > m->size); + if (m->count < m->size) + *bufp = m->buf + m->count; + else + *bufp = NULL; + + return m->size - m->count; +} + +/** + * seq_commit - commit data to the buffer + * @m: the seq_file handle + * @num: the number of bytes to commit + * + * Commit @num bytes of data written to a buffer previously acquired + * by seq_buf_get. To signal an error condition, or that the data + * didn't fit in the available space, pass a negative @num value. + */ +static inline void seq_commit(struct seq_file *m, int num) +{ + if (num < 0) { + m->count = m->size; + } else { + BUG_ON(m->count + num > m->size); + m->count += num; + } +} + char *mangle_path(char *s, char *p, char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); -- cgit v1.2.3 From 4fadd7bb20a1e7c774ed88dc703d8fbcd00ff917 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Aug 2009 23:28:06 +0200 Subject: freeze_bdev: kill bd_mount_sem Now that we have the freeze count there is not much reason for bd_mount_sem anymore. The actual freeze/thaw operations are serialized using the bd_fsfreeze_mutex, and the only other place we take bd_mount_sem is get_sb_bdev which tries to prevent mounting a filesystem while the block device is frozen. Instead of add a check for bd_fsfreeze_count and return -EBUSY if a filesystem is frozen. While that is a change in user visible behaviour a failing mount is much better for this case rather than having the mount process stuck uninterruptible for a long time. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index cbb7724c11d3..72dfbd423974 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -640,7 +640,6 @@ struct block_device { struct super_block * bd_super; int bd_openers; struct mutex bd_mutex; /* open/close mutex */ - struct semaphore bd_mount_sem; struct list_head bd_inodes; void * bd_holder; int bd_holders; -- cgit v1.2.3 From 4504230a71566785a05d3e6b53fa1ee071b864eb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Aug 2009 23:28:35 +0200 Subject: freeze_bdev: grab active reference to frozen superblocks Currently we held s_umount while a filesystem is frozen, despite that we might return to userspace and unlock it from a different process. Instead grab an active reference to keep the file system busy and add an explicit check for frozen filesystems in remount and reject the remount instead of blocking on s_umount. Add a new get_active_super helper to super.c for use by freeze_bdev that grabs an active reference to a superblock from a given block device. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 72dfbd423974..502d96ef345d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2334,6 +2334,7 @@ extern void get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); +extern struct super_block *get_active_super(struct block_device *bdev); extern struct super_block *user_get_super(dev_t); extern void drop_super(struct super_block *sb); -- cgit v1.2.3 From 25d9e2d15286281ec834b829a4aaf8969011f1cd Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Fri, 21 Aug 2009 02:35:05 +1000 Subject: truncate: new helpers Introduce new truncate helpers truncate_pagecache and inode_newsize_ok. vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and into mm/truncate.c. Reviewed-by: Christoph Hellwig Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/fs.h | 3 ++- include/linux/mm.h | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 502d96ef345d..2b08b5ce09b6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2382,7 +2382,8 @@ extern int buffer_migrate_page(struct address_space *, #define buffer_migrate_page NULL #endif -extern int inode_change_ok(struct inode *, struct iattr *); +extern int inode_change_ok(const struct inode *, struct iattr *); +extern int inode_newsize_ok(const struct inode *, loff_t offset); extern int __must_check inode_setattr(struct inode *, struct iattr *); extern void file_update_time(struct file *file); diff --git a/include/linux/mm.h b/include/linux/mm.h index b6eae5e3144b..8347e938fb2f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -791,8 +791,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, unmap_mapping_range(mapping, holebegin, holelen, 0); } -extern int vmtruncate(struct inode * inode, loff_t offset); -extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); +extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); +extern int vmtruncate(struct inode *inode, loff_t offset); +extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end); #ifdef CONFIG_MMU extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, -- cgit v1.2.3 From 57f1f0874f426a9bdfc5cd3f886113dd5cd17834 Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Wed, 23 Sep 2009 15:56:10 -0700 Subject: time: add function to convert between calendar time and broken-down time for universal use There are many similar code in kernel for one object: convert time between calendar time and broken-down time. Here is some source I found: fs/ncpfs/dir.c fs/smbfs/proc.c fs/fat/misc.c fs/udf/udftime.c fs/cifs/netmisc.c net/netfilter/xt_time.c drivers/scsi/ips.c drivers/input/misc/hp_sdc_rtc.c drivers/rtc/rtc-lib.c arch/ia64/hp/sim/boot/fw-emu.c arch/m68k/mac/misc.c arch/powerpc/kernel/time.c arch/parisc/include/asm/rtc.h ... We can make a common function for this type of conversion, At least we can get following benefit: 1: Make kernel simple and unify 2: Easy to fix bug in converting code 3: Reduce clone of code in future For example, I'm trying to make ftrace display walltime, this patch will make me easy. This code is based on code from glibc-2.6 Signed-off-by: Zhao Lei Cc: OGAWA Hirofumi Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Pavel Machek Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 56787c093345..fe04e5ef6a59 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -155,6 +155,34 @@ extern void timekeeping_leap_insert(int leapsecond); struct tms; extern void do_sys_times(struct tms *); +/* + * Similar to the struct tm in userspace , but it needs to be here so + * that the kernel source is self contained. + */ +struct tm { + /* + * the number of seconds after the minute, normally in the range + * 0 to 59, but can be up to 60 to allow for leap seconds + */ + int tm_sec; + /* the number of minutes after the hour, in the range 0 to 59*/ + int tm_min; + /* the number of hours past midnight, in the range 0 to 23 */ + int tm_hour; + /* the day of the month, in the range 1 to 31 */ + int tm_mday; + /* the number of months since January, in the range 0 to 11 */ + int tm_mon; + /* the number of years since 1900 */ + long tm_year; + /* the number of days since Sunday, in the range 0 to 6 */ + int tm_wday; + /* the number of days since January 1, in the range 0 to 365 */ + int tm_yday; +}; + +void time_to_tm(time_t totalsecs, int offset, struct tm *result); + /** * timespec_to_ns - Convert timespec to nanoseconds * @ts: pointer to the timespec variable to be converted -- cgit v1.2.3 From 55dff4954ebdeba2be59e19398a607d799c5fa9f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 23 Sep 2009 15:56:17 -0700 Subject: docs: fix various Documentation/ paths in header files Fix various Documentation/ paths in include/linux/. Signed-off-by: Randy Dunlap Reviewed-by: Jesper Juhl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/configfs.h | 4 ++-- include/linux/debugfs.h | 2 +- include/linux/relay.h | 2 +- include/linux/tracepoint.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 7f627775c947..ddb7a97c78c2 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -27,8 +27,8 @@ * * configfs Copyright (C) 2005 Oracle. All rights reserved. * - * Please read Documentation/filesystems/configfs.txt before using the - * configfs interface, ESPECIALLY the parts about reference counts and + * Please read Documentation/filesystems/configfs/configfs.txt before using + * the configfs interface, ESPECIALLY the parts about reference counts and * item destructors. */ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index eb5c2ba2f81a..fc1b930f246c 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -9,7 +9,7 @@ * 2 as published by the Free Software Foundation. * * debugfs is for people to use instead of /proc or /sys. - * See Documentation/DocBook/kernel-api for more details. + * See Documentation/DocBook/filesystems for more details. */ #ifndef _DEBUGFS_H_ diff --git a/include/linux/relay.h b/include/linux/relay.h index 953fc055e875..14a86bc7102b 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -140,7 +140,7 @@ struct rchan_callbacks * cause relay_open() to create a single global buffer rather * than the default set of per-cpu buffers. * - * See Documentation/filesystems/relayfs.txt for more info. + * See Documentation/filesystems/relay.txt for more info. */ struct dentry *(*create_buf_file)(const char *filename, struct dentry *parent, diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 63a3f7a80580..660a9de96f81 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -4,7 +4,7 @@ /* * Kernel Tracepoint API. * - * See Documentation/tracepoint.txt. + * See Documentation/trace/tracepoints.txt. * * (C) Copyright 2008 Mathieu Desnoyers * -- cgit v1.2.3 From 8f3ff20862cfcb85500a2bb55ee64622bd59fd0c Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Wed, 23 Sep 2009 15:56:25 -0700 Subject: cgroups: revert "cgroups: fix pid namespace bug" The following series adds a "cgroup.procs" file to each cgroup that reports unique tgids rather than pids, and allows all threads in a threadgroup to be atomically moved to a new cgroup. The subsystem "attach" interface is modified to support attaching whole threadgroups at a time, which could introduce potential problems if any subsystem were to need to access the old cgroup of every thread being moved. The attach interface may need to be revised if this becomes the case. Also added is functionality for read/write locking all CLONE_THREAD fork()ing within a threadgroup, by means of an rwsem that lives in the sighand_struct, for per-threadgroup-ness and also for sharing a cacheline with the sighand's atomic count. This scheme should introduce no extra overhead in the fork path when there's no contention. The final patch reveals potential for a race when forking before a subsystem's attach function is called - one potential solution in case any subsystem has this problem is to hang on to the group's fork mutex through the attach() calls, though no subsystem yet demonstrates need for an extended critical section. This patch: Revert commit 096b7fe012d66ed55e98bc8022405ede0cc80e96 Author: Li Zefan AuthorDate: Wed Jul 29 15:04:04 2009 -0700 Commit: Linus Torvalds CommitDate: Wed Jul 29 19:10:35 2009 -0700 cgroups: fix pid namespace bug This is in preparation for some clashing cgroups changes that subsume the original commit's functionaliy. The original commit fixed a pid namespace bug which Ben Blum fixed independently (in the same way, but with different code) as part of a series of patches. I played around with trying to reconcile Ben's patch series with Li's patch, but concluded that it was simpler to just revert Li's, given that Ben's patch series contained essentially the same fix. Signed-off-by: Paul Menage Cc: Li Zefan Cc: Matt Helsley Cc: "Eric W. Biederman" Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 90bba9e62286..c833d6f23672 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -179,11 +179,14 @@ struct cgroup { */ struct list_head release_list; - /* pids_mutex protects pids_list and cached pid arrays. */ + /* pids_mutex protects the fields below */ struct rw_semaphore pids_mutex; - - /* Linked list of struct cgroup_pids */ - struct list_head pids_list; + /* Array of process ids in the cgroup */ + pid_t *tasks_pids; + /* How many files are using the current tasks_pids array */ + int pids_use_count; + /* Length of the current tasks_pids array */ + int pids_length; /* For RCU-protected deletion */ struct rcu_head rcu_head; -- cgit v1.2.3 From 102a775e3647628727ae83a9a6abf0564c3ca7cb Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 23 Sep 2009 15:56:26 -0700 Subject: cgroups: add a read-only "procs" file similar to "tasks" that shows only unique tgids struct cgroup used to have a bunch of fields for keeping track of the pidlist for the tasks file. Those are now separated into a new struct cgroup_pidlist, of which two are had, one for procs and one for tasks. The way the seq_file operations are set up is changed so that just the pidlist struct gets passed around as the private data. Interface example: Suppose a multithreaded process has pid 1000 and other threads with ids 1001, 1002, 1003: $ cat tasks 1000 1001 1002 1003 $ cat cgroup.procs 1000 $ Signed-off-by: Ben Blum Signed-off-by: Paul Menage Acked-by: Li Zefan Cc: Matt Helsley Cc: "Eric W. Biederman" Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c833d6f23672..2357733a0a80 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -141,6 +141,17 @@ enum { CGRP_WAIT_ON_RMDIR, }; +struct cgroup_pidlist { + /* protects the other fields */ + struct rw_semaphore mutex; + /* array of xids */ + pid_t *list; + /* how many elements the above list has */ + int length; + /* how many files are using the current array */ + int use_count; +}; + struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ @@ -179,14 +190,9 @@ struct cgroup { */ struct list_head release_list; - /* pids_mutex protects the fields below */ - struct rw_semaphore pids_mutex; - /* Array of process ids in the cgroup */ - pid_t *tasks_pids; - /* How many files are using the current tasks_pids array */ - int pids_use_count; - /* Length of the current tasks_pids array */ - int pids_length; + /* we will have two separate pidlists, one for pids (the tasks file) + * and one for tgids (the procs file). */ + struct cgroup_pidlist tasks, procs; /* For RCU-protected deletion */ struct rcu_head rcu_head; -- cgit v1.2.3 From 72a8cb30d10d4041c455a7054607a7d519167c87 Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 23 Sep 2009 15:56:27 -0700 Subject: cgroups: ensure correct concurrent opening/reading of pidlists across pid namespaces Previously there was the problem in which two processes from different pid namespaces reading the tasks or procs file could result in one process seeing results from the other's namespace. Rather than one pidlist for each file in a cgroup, we now keep a list of pidlists keyed by namespace and file type (tasks versus procs) in which entries are placed on demand. Each pidlist has its own lock, and that the pidlists themselves are passed around in the seq_file's private pointer means we don't have to touch the cgroup or its master list except when creating and destroying entries. Signed-off-by: Ben Blum Signed-off-by: Paul Menage Cc: Li Zefan Cc: Matt Helsley Cc: "Eric W. Biederman" Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 2357733a0a80..88e863460726 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -141,15 +141,36 @@ enum { CGRP_WAIT_ON_RMDIR, }; +/* which pidlist file are we talking about? */ +enum cgroup_filetype { + CGROUP_FILE_PROCS, + CGROUP_FILE_TASKS, +}; + +/* + * A pidlist is a list of pids that virtually represents the contents of one + * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists, + * a pair (one each for procs, tasks) for each pid namespace that's relevant + * to the cgroup. + */ struct cgroup_pidlist { - /* protects the other fields */ - struct rw_semaphore mutex; + /* + * used to find which pidlist is wanted. doesn't change as long as + * this particular list stays in the list. + */ + struct { enum cgroup_filetype type; struct pid_namespace *ns; } key; /* array of xids */ pid_t *list; /* how many elements the above list has */ int length; /* how many files are using the current array */ int use_count; + /* each of these stored in a list by its cgroup */ + struct list_head links; + /* pointer to the cgroup we belong to, for list removal purposes */ + struct cgroup *owner; + /* protects the other fields */ + struct rw_semaphore mutex; }; struct cgroup { @@ -190,9 +211,12 @@ struct cgroup { */ struct list_head release_list; - /* we will have two separate pidlists, one for pids (the tasks file) - * and one for tgids (the procs file). */ - struct cgroup_pidlist tasks, procs; + /* + * list of pidlists, up to two for each namespace (one for procs, one + * for tasks); created on demand. + */ + struct list_head pidlists; + struct mutex pidlist_mutex; /* For RCU-protected deletion */ struct rcu_head rcu_head; -- cgit v1.2.3 From c378369d8b4fa516ff2b1e79c3eded4e0e955ebb Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 23 Sep 2009 15:56:29 -0700 Subject: cgroups: change css_set freeing mechanism to be under RCU Changes css_set freeing mechanism to be under RCU This is a prepatch for making the procs file writable. In order to free the old css_sets for each task to be moved as they're being moved, the freeing mechanism must be RCU-protected, or else we would have to have a call to synchronize_rcu() for each task before freeing its old css_set. Signed-off-by: Ben Blum Signed-off-by: Paul Menage Cc: "Paul E. McKenney" Acked-by: Li Zefan Cc: Matt Helsley Cc: "Eric W. Biederman" Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 88e863460726..3ac78a2f4b5a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -260,6 +260,9 @@ struct css_set { * during subsystem registration (at boot time). */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + + /* For RCU-protected deletion */ + struct rcu_head rcu_head; }; /* -- cgit v1.2.3 From be367d09927023d081f9199665c8500f69f14d22 Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 23 Sep 2009 15:56:31 -0700 Subject: cgroups: let ss->can_attach and ss->attach do whole threadgroups at a time Alter the ss->can_attach and ss->attach functions to be able to deal with a whole threadgroup at a time, for use in cgroup_attach_proc. (This is a pre-patch to cgroup-procs-writable.patch.) Currently, new mode of the attach function can only tell the subsystem about the old cgroup of the threadgroup leader. No subsystem currently needs that information for each thread that's being moved, but if one were to be added (for example, one that counts tasks within a group) this bit would need to be reworked a bit to tell the subsystem the right information. [hidave.darkstar@gmail.com: fix build] Signed-off-by: Ben Blum Signed-off-by: Paul Menage Acked-by: Li Zefan Reviewed-by: Matt Helsley Cc: "Eric W. Biederman" Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 3ac78a2f4b5a..b62bb9294d0c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -425,10 +425,11 @@ struct cgroup_subsys { struct cgroup *cgrp); int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); - int (*can_attach)(struct cgroup_subsys *ss, - struct cgroup *cgrp, struct task_struct *tsk); + int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct task_struct *tsk, bool threadgroup); void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *tsk); + struct cgroup *old_cgrp, struct task_struct *tsk, + bool threadgroup); void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); int (*populate)(struct cgroup_subsys *ss, -- cgit v1.2.3 From 4b3bde4c983de36c59e6c1a24701f6fe816f9f55 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 23 Sep 2009 15:56:32 -0700 Subject: memcg: remove the overhead associated with the root cgroup Change the memory cgroup to remove the overhead associated with accounting all pages in the root cgroup. As a side-effect, we can no longer set a memory hard limit in the root cgroup. A new flag to track whether the page has been accounted or not has been added as well. Flags are now set atomically for page_cgroup, pcg_default_flags is now obsolete and removed. [akpm@linux-foundation.org: fix a few documentation glitches] Signed-off-by: Balbir Singh Signed-off-by: Daisuke Nishimura Reviewed-by: KAMEZAWA Hiroyuki Cc: Daisuke Nishimura Cc: Li Zefan Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index ada779f24178..4b938d4f3ac2 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -38,6 +38,7 @@ enum { PCG_LOCK, /* page cgroup is locked */ PCG_CACHE, /* charged as cache */ PCG_USED, /* this object is in use. */ + PCG_ACCT_LRU, /* page has been accounted for */ }; #define TESTPCGFLAG(uname, lname) \ @@ -52,11 +53,23 @@ static inline void SetPageCgroup##uname(struct page_cgroup *pc)\ static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ { clear_bit(PCG_##lname, &pc->flags); } +#define TESTCLEARPCGFLAG(uname, lname) \ +static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \ + { return test_and_clear_bit(PCG_##lname, &pc->flags); } + /* Cache flag is set only once (at allocation) */ TESTPCGFLAG(Cache, CACHE) +CLEARPCGFLAG(Cache, CACHE) +SETPCGFLAG(Cache, CACHE) TESTPCGFLAG(Used, USED) CLEARPCGFLAG(Used, USED) +SETPCGFLAG(Used, USED) + +SETPCGFLAG(AcctLRU, ACCT_LRU) +CLEARPCGFLAG(AcctLRU, ACCT_LRU) +TESTPCGFLAG(AcctLRU, ACCT_LRU) +TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU) static inline int page_cgroup_nid(struct page_cgroup *pc) { -- cgit v1.2.3 From 296c81d89f4f14269f7346f81442910158c0a83a Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 23 Sep 2009 15:56:36 -0700 Subject: memory controller: soft limit interface Add an interface to allow get/set of soft limits. Soft limits for memory plus swap controller (memsw) is currently not supported. Resource counters have been enhanced to support soft limits and new type RES_SOFT_LIMIT has been added. Unlike hard limits, soft limits can be directly set and do not need any reclaim or checks before setting them to a newer value. Kamezawa-San raised a question as to whether soft limit should belong to res_counter. Since all resources understand the basic concepts of hard and soft limits, it is justified to add soft limits here. Soft limits are a generic resource usage feature, even file system quotas support soft limits. Signed-off-by: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 58 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 511f42fc6816..fcb9884df618 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -34,6 +34,10 @@ struct res_counter { * the limit that usage cannot exceed */ unsigned long long limit; + /* + * the limit that usage can be exceed + */ + unsigned long long soft_limit; /* * the number of unsuccessful attempts to consume the resource */ @@ -87,6 +91,7 @@ enum { RES_MAX_USAGE, RES_LIMIT, RES_FAILCNT, + RES_SOFT_LIMIT, }; /* @@ -132,6 +137,36 @@ static inline bool res_counter_limit_check_locked(struct res_counter *cnt) return false; } +static inline bool res_counter_soft_limit_check_locked(struct res_counter *cnt) +{ + if (cnt->usage < cnt->soft_limit) + return true; + + return false; +} + +/** + * Get the difference between the usage and the soft limit + * @cnt: The counter + * + * Returns 0 if usage is less than or equal to soft limit + * The difference between usage and soft limit, otherwise. + */ +static inline unsigned long long +res_counter_soft_limit_excess(struct res_counter *cnt) +{ + unsigned long long excess; + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + if (cnt->usage <= cnt->soft_limit) + excess = 0; + else + excess = cnt->usage - cnt->soft_limit; + spin_unlock_irqrestore(&cnt->lock, flags); + return excess; +} + /* * Helper function to detect if the cgroup is within it's limit or * not. It's currently called from cgroup_rss_prepare() @@ -147,6 +182,17 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt) return ret; } +static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + ret = res_counter_soft_limit_check_locked(cnt); + spin_unlock_irqrestore(&cnt->lock, flags); + return ret; +} + static inline void res_counter_reset_max(struct res_counter *cnt) { unsigned long flags; @@ -180,4 +226,16 @@ static inline int res_counter_set_limit(struct res_counter *cnt, return ret; } +static inline int +res_counter_set_soft_limit(struct res_counter *cnt, + unsigned long long soft_limit) +{ + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + cnt->soft_limit = soft_limit; + spin_unlock_irqrestore(&cnt->lock, flags); + return 0; +} + #endif -- cgit v1.2.3 From f64c3f54940d6929a2b6dcffaab942bd62be2e66 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 23 Sep 2009 15:56:37 -0700 Subject: memory controller: soft limit organize cgroups Organize cgroups over soft limit in a RB-Tree Introduce an RB-Tree for storing memory cgroups that are over their soft limit. The overall goal is to 1. Add a memory cgroup to the RB-Tree when the soft limit is exceeded. We are careful about updates, updates take place only after a particular time interval has passed 2. We remove the node from the RB-Tree when the usage goes below the soft limit The next set of patches will exploit the RB-Tree to get the group that is over its soft limit by the largest amount and reclaim from it, when we face memory contention. [hugh.dickins@tiscali.co.uk: CONFIG_CGROUP_MEM_RES_CTLR=y CONFIG_PREEMPT=y fails to boot] Signed-off-by: Balbir Singh Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: KOSAKI Motohiro Signed-off-by: Hugh Dickins Cc: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index fcb9884df618..731af71cddc9 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -114,7 +114,8 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent); int __must_check res_counter_charge_locked(struct res_counter *counter, unsigned long val); int __must_check res_counter_charge(struct res_counter *counter, - unsigned long val, struct res_counter **limit_fail_at); + unsigned long val, struct res_counter **limit_fail_at, + struct res_counter **soft_limit_at); /* * uncharge - tell that some portion of the resource is released @@ -127,7 +128,8 @@ int __must_check res_counter_charge(struct res_counter *counter, */ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); -void res_counter_uncharge(struct res_counter *counter, unsigned long val); +void res_counter_uncharge(struct res_counter *counter, unsigned long val, + bool *was_soft_limit_excess); static inline bool res_counter_limit_check_locked(struct res_counter *cnt) { -- cgit v1.2.3 From 4e41695356fb4e0b153be1440ad027e46e0a7ea2 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 23 Sep 2009 15:56:39 -0700 Subject: memory controller: soft limit reclaim on contention Implement reclaim from groups over their soft limit Permit reclaim from memory cgroups on contention (via the direct reclaim path). memory cgroup soft limit reclaim finds the group that exceeds its soft limit by the largest number of pages and reclaims pages from it and then reinserts the cgroup into its correct place in the rbtree. Add additional checks to mem_cgroup_hierarchical_reclaim() to detect long loops in case all swap is turned off. The code has been refactored and the loop check (loop < 2) has been enhanced for soft limits. For soft limits, we try to do more targetted reclaim. Instead of bailing out after two loops, the routine now reclaims memory proportional to the size by which the soft limit is exceeded. The proportion has been empirically determined. [akpm@linux-foundation.org: build fix] [kamezawa.hiroyu@jp.fujitsu.com: fix softlimit css refcnt handling] [nishimura@mxp.nes.nec.co.jp: refcount of the "victim" should be decremented before exiting the loop] Signed-off-by: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Li Zefan Acked-by: KOSAKI Motohiro Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 ++++++++++ include/linux/swap.h | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e46a0734ab6e..bf9213b2db8f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -118,6 +118,9 @@ static inline bool mem_cgroup_disabled(void) extern bool mem_cgroup_oom_called(struct task_struct *task); void mem_cgroup_update_mapped_file_stat(struct page *page, int val); +unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, + gfp_t gfp_mask, int nid, + int zid); #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -276,6 +279,13 @@ static inline void mem_cgroup_update_mapped_file_stat(struct page *page, { } +static inline +unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, + gfp_t gfp_mask, int nid, int zid) +{ + return 0; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 6c990e658f4e..4c78fea989b9 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -217,6 +217,11 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, unsigned int swappiness); +extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, + gfp_t gfp_mask, bool noswap, + unsigned int swappiness, + struct zone *zone, + int nid); extern int __isolate_lru_page(struct page *page, int mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; -- cgit v1.2.3 From a7f0765edfd53aed09cb7b0e15863688b39447de Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 23 Sep 2009 15:56:44 -0700 Subject: ptrace: __ptrace_detach: do __wake_up_parent() if we reap the tracee The bug is old, it wasn't cause by recent changes. Test case: static void *tfunc(void *arg) { int pid = (long)arg; assert(ptrace(PTRACE_ATTACH, pid, NULL, NULL) == 0); kill(pid, SIGKILL); sleep(1); return NULL; } int main(void) { pthread_t th; long pid = fork(); if (!pid) pause(); signal(SIGCHLD, SIG_IGN); assert(pthread_create(&th, NULL, tfunc, (void*)pid) == 0); int r = waitpid(-1, NULL, __WNOTHREAD); printf("waitpid: %d %m\n", r); return 0; } Before the patch this program hangs, after this patch waitpid() correctly fails with errno == -ECHILD. The problem is, __ptrace_detach() reaps the EXIT_ZOMBIE tracee if its ->real_parent is our sub-thread and we ignore SIGCHLD. But in this case we should wake up other threads which can sleep in do_wait(). Signed-off-by: Oleg Nesterov Cc: Roland McGrath Cc: Vitaly Mayatskikh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 848d1f20086e..9e5a88afe6be 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2059,6 +2059,7 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern int kill_proc_info(int, struct siginfo *, pid_t); extern int do_notify_parent(struct task_struct *, int); +extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int, struct task_struct *); extern void force_sig_specific(int, struct task_struct *); extern int send_sig(int, struct task_struct *, int); -- cgit v1.2.3 From ae6d2ed7bb3877ff35b9569402025f40ea2e1803 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 23 Sep 2009 15:56:53 -0700 Subject: signals: tracehook_notify_jctl change This changes tracehook_notify_jctl() so it's called with the siglock held, and changes its argument and return value definition. These clean-ups make it a better fit for what new tracing hooks need to check. Tracing needs the siglock here, held from the time TASK_STOPPED was set, to avoid potential SIGCONT races if it wants to allow any blocking in its tracing hooks. This also folds the finish_stop() function into its caller do_signal_stop(). The function is short, called only once and only unconditionally. It aids readability to fold it in. [oleg@redhat.com: do not call tracehook_notify_jctl() in TASK_STOPPED state] [oleg@redhat.com: introduce tracehook_finish_jctl() helper] Signed-off-by: Roland McGrath Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tracehook.h | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 17ba82efa483..1eb44a924e56 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -1,7 +1,7 @@ /* * Tracing hooks * - * Copyright (C) 2008 Red Hat, Inc. All rights reserved. + * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -463,22 +463,38 @@ static inline int tracehook_get_signal(struct task_struct *task, /** * tracehook_notify_jctl - report about job control stop/continue - * @notify: nonzero if this is the last thread in the group to stop + * @notify: zero, %CLD_STOPPED or %CLD_CONTINUED * @why: %CLD_STOPPED or %CLD_CONTINUED * * This is called when we might call do_notify_parent_cldstop(). - * It's called when about to stop for job control; we are already in - * %TASK_STOPPED state, about to call schedule(). It's also called when - * a delayed %CLD_STOPPED or %CLD_CONTINUED report is ready to be made. * - * Return nonzero to generate a %SIGCHLD with @why, which is - * normal if @notify is nonzero. + * @notify is zero if we would not ordinarily send a %SIGCHLD, + * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD. * - * Called with no locks held. + * @why is %CLD_STOPPED when about to stop for job control; + * we are already in %TASK_STOPPED state, about to call schedule(). + * It might also be that we have just exited (check %PF_EXITING), + * but need to report that a group-wide stop is complete. + * + * @why is %CLD_CONTINUED when waking up after job control stop and + * ready to make a delayed @notify report. + * + * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal. + * + * Called with the siglock held. */ static inline int tracehook_notify_jctl(int notify, int why) { - return notify || (current->ptrace & PT_PTRACED); + return notify ?: (current->ptrace & PT_PTRACED) ? why : 0; +} + +/** + * tracehook_finish_jctl - report about return from job control stop + * + * This is called by do_signal_stop() after wakeup. + */ +static inline void tracehook_finish_jctl(void) +{ } #define DEATH_REAP -1 -- cgit v1.2.3 From 964ee7df90d799e38fb1556c57cd5c45fc736436 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 23 Sep 2009 15:56:59 -0700 Subject: exec: fix set_binfmt() vs sys_delete_module() race sys_delete_module() can set MODULE_STATE_GOING after search_binary_handler() does try_module_get(). In this case set_binfmt()->try_module_get() fails but since none of the callers check the returned error, the task will run with the wrong old ->binfmt. The proper fix should change all ->load_binary() methods, but we can rely on fact that the caller must hold a reference to binfmt->module and use __module_get() which never fails. Signed-off-by: Oleg Nesterov Acked-by: Rusty Russell Cc: Hiroshi Shimamoto Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 2046b5b8af48..aece486ac734 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -120,7 +120,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); -extern int set_binfmt(struct linux_binfmt *new); +extern void set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 4a30debfb778240a4b1767d4b0c5a5b25ab97160 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 23 Sep 2009 15:57:00 -0700 Subject: signals: introduce do_send_sig_info() helper Introduce do_send_sig_info() and convert group_send_sig_info(), send_sig_info(), do_send_specific() to use this helper. Hopefully it will have more users soon, it allows to specify specific/group behaviour via "bool group" argument. Shaves 80 bytes from .text. Signed-off-by: Oleg Nesterov Cc: Peter Zijlstra Cc: stephane eranian Cc: Ingo Molnar Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index c7552836bd95..ab9272cc270c 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -233,6 +233,8 @@ static inline int valid_signal(unsigned long sig) } extern int next_signal(struct sigpending *pending, sigset_t *mask); +extern int do_send_sig_info(int sig, struct siginfo *info, + struct task_struct *p, bool group); extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, -- cgit v1.2.3 From d9588725e52650e82989707f8fd2feb67ad2dc8e Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 23 Sep 2009 15:57:04 -0700 Subject: signals: inline __fatal_signal_pending __fatal_signal_pending inlines to one instruction on x86, probably two instructions on other machines. It takes two longer x86 instructions just to call it and test its return value, not to mention the function itself. On my random x86_64 config, this saved 70 bytes of text (59 of those being __fatal_signal_pending itself). Signed-off-by: Roland McGrath Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9e5a88afe6be..e951bd2eb9fc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2337,7 +2337,10 @@ static inline int signal_pending(struct task_struct *p) return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } -extern int __fatal_signal_pending(struct task_struct *p); +static inline int __fatal_signal_pending(struct task_struct *p) +{ + return unlikely(sigismember(&p->pending.signal, SIGKILL)); +} static inline int fatal_signal_pending(struct task_struct *p) { -- cgit v1.2.3 From 8d65af789f3e2cf4cfbdbf71a0f7a61ebcd41d38 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 23 Sep 2009 15:57:19 -0700 Subject: sysctl: remove "struct file *" argument of ->proc_handler It's unused. It isn't needed -- read or write flag is already passed and sysctl shouldn't care about the rest. It _was_ used in two places at arch/frv for some reason. Signed-off-by: Alexey Dobriyan Cc: David Howells Cc: "Eric W. Biederman" Cc: Al Viro Cc: Ralf Baechle Cc: Martin Schwidefsky Cc: Ingo Molnar Cc: "David S. Miller" Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- include/linux/ftrace.h | 4 ++-- include/linux/hugetlb.h | 6 +++--- include/linux/mm.h | 2 +- include/linux/mmzone.h | 13 ++++++------- include/linux/sched.h | 8 ++++---- include/linux/security.h | 2 +- include/linux/swap.h | 2 +- include/linux/sysctl.h | 19 +++++++++---------- include/linux/writeback.h | 11 +++++------ 10 files changed, 33 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 51803528b095..33ed6644abd0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2467,7 +2467,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); struct ctl_table; -int proc_nr_files(struct ctl_table *table, int write, struct file *filp, +int proc_nr_files(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); int __init get_filesystem_list(char *buf); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3c0924a18daf..cd3d2abaf30a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -19,7 +19,7 @@ extern int ftrace_enabled; extern int ftrace_enable_sysctl(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); @@ -94,7 +94,7 @@ static inline void ftrace_start(void) { } extern int stack_tracer_enabled; int stack_trace_sysctl(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); #endif diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 176e7ee73eff..11ab19ac6b3d 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -20,9 +20,9 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) } void reset_vma_resv_huge_pages(struct vm_area_struct *vma); -int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, diff --git a/include/linux/mm.h b/include/linux/mm.h index b6eae5e3144b..87218ae84e36 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1279,7 +1279,7 @@ int in_gate_area_no_task(unsigned long addr); #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ -int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, +int drop_caches_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 652ef01be582..6f7561730d88 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -755,21 +755,20 @@ static inline int is_dma(struct zone *zone) /* These two functions are used to setup the per zone pages min values */ struct ctl_table; -struct file; -int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, +int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; -int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, +int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, +int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern int numa_zonelist_order_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ diff --git a/include/linux/sched.h b/include/linux/sched.h index e951bd2eb9fc..811cd96524d7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -309,7 +309,7 @@ extern void softlockup_tick(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern int softlockup_thresh; @@ -331,7 +331,7 @@ extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos); #endif @@ -1906,7 +1906,7 @@ extern unsigned int sysctl_sched_time_avg; extern unsigned int sysctl_timer_migration; int sched_nr_latency_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, + void __user *buffer, size_t *length, loff_t *ppos); #endif #ifdef CONFIG_SCHED_DEBUG @@ -1924,7 +1924,7 @@ extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; int sched_rt_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int sysctl_sched_compat_yield; diff --git a/include/linux/security.h b/include/linux/security.h index d050b66ab9ef..239e40d0450b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -133,7 +133,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint) return PAGE_ALIGN(mmap_min_addr); return hint; } -extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, +extern int mmap_min_addr_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #ifdef CONFIG_SECURITY diff --git a/include/linux/swap.h b/include/linux/swap.h index 4c78fea989b9..82232dbea3f7 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -245,7 +245,7 @@ extern int page_evictable(struct page *page, struct vm_area_struct *vma); extern void scan_mapping_unevictable_pages(struct address_space *); extern unsigned long scan_unevictable_pages; -extern int scan_unevictable_handler(struct ctl_table *, int, struct file *, +extern int scan_unevictable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int scan_unevictable_register_node(struct node *node); extern void scan_unevictable_unregister_node(struct node *node); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e76d3b22a466..1e4743ee6831 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -29,7 +29,6 @@ #include #include -struct file; struct completion; #define CTL_MAXNAME 10 /* how many path components do we allow in a @@ -977,25 +976,25 @@ typedef int ctl_handler (struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); -typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp, +typedef int proc_handler (struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -extern int proc_dostring(struct ctl_table *, int, struct file *, +extern int proc_dostring(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec(struct ctl_table *, int, struct file *, +extern int proc_dointvec(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *, +extern int proc_dointvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *, +extern int proc_doulongvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern int do_sysctl (int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 75cf58666ff9..66ebddcff664 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -110,21 +110,20 @@ extern int laptop_mode; extern unsigned long determine_dirtyable_memory(void); extern int dirty_background_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_background_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); struct ctl_table; -struct file; -int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, +int dirty_writeback_centisecs_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, -- cgit v1.2.3 From 9064a6787aa1d8ceaf5ba16fe1dfedb0755dc7eb Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 23 Sep 2009 15:57:23 -0700 Subject: linux/futex.h: place kernel types behind __KERNEL__ The forward decls for some kernel types are only needed by the code behind __KERNEL__, so don't bleed these types to userspace. Signed-off-by: Mike Frysinger Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/futex.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 34956c8fdebf..8ec17997d94f 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -4,11 +4,6 @@ #include #include -struct inode; -struct mm_struct; -struct task_struct; -union ktime; - /* Second argument to futex syscall */ @@ -129,6 +124,11 @@ struct robust_list_head { #define FUTEX_BITSET_MATCH_ANY 0xffffffff #ifdef __KERNEL__ +struct inode; +struct mm_struct; +struct task_struct; +union ktime; + long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout, u32 __user *uaddr2, u32 val2, u32 val3); -- cgit v1.2.3 From 858f09930b32c11b40fd0c5c467982ba09b10894 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 23 Sep 2009 15:57:32 -0700 Subject: aio: ifdef fields in mm_struct ->ioctx_lock and ->ioctx_list are used only under CONFIG_AIO. Signed-off-by: Alexey Dobriyan Cc: Zach Brown Cc: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0042090a4d70..6b7029ab9c8e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -259,11 +259,10 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access the bits */ struct core_state *core_state; /* coredumping support */ - - /* aio bits */ +#ifdef CONFIG_AIO spinlock_t ioctx_lock; struct hlist_head ioctx_list; - +#endif #ifdef CONFIG_MM_OWNER /* * "owner" points to a task that is regarded as the canonical -- cgit v1.2.3 From 2fa4341074cd02fb39aa23410740764948755635 Mon Sep 17 00:00:00 2001 From: Albin Tonnerre Date: Wed, 23 Sep 2009 15:57:38 -0700 Subject: include/linux/unaligned/{l,b}e_byteshift.h: fix usage for compressed kernels When unaligned accesses are required for uncompressing a kernel (such as for LZO decompression on ARM in a patch that follows), including causes issues as it brings in a lot of things that are not available in the decompression environment. linux/kernel.h brings at least: extern int console_printk[]; extern const char hex_asc[]; which causes errors at link-time as they are not available when compiling the pre-boot environement. There are also a few others: arch/arm/boot/compressed/misc.o: In function `valid_user_regs': arch/arm/include/asm/ptrace.h:158: undefined reference to `elf_hwcap' arch/arm/boot/compressed/misc.o: In function `console_silent': include/linux/kernel.h:292: undefined reference to `console_printk' arch/arm/boot/compressed/misc.o: In function `console_verbose': include/linux/kernel.h:297: undefined reference to `console_printk' arch/arm/boot/compressed/misc.o: In function `pack_hex_byte': include/linux/kernel.h:360: undefined reference to `hex_asc' arch/arm/boot/compressed/misc.o: In function `hweight_long': include/linux/bitops.h:45: undefined reference to `hweight32' arch/arm/boot/compressed/misc.o: In function `__cmpxchg_local_generic': include/asm-generic/cmpxchg-local.h:21: undefined reference to `wrong_size_cmpxchg' include/asm-generic/cmpxchg-local.h:42: undefined reference to `wrong_size_cmpxchg' arch/arm/boot/compressed/misc.o: In function `__xchg': arch/arm/include/asm/system.h:309: undefined reference to `__bad_xchg' However, those files apparently use nothing from , all they need is the declaration of types such as u32 or u64, so should be enough Signed-off-by: Albin Tonnerre Cc: Sam Ravnborg Cc: Russell King Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Phillip Lougher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/unaligned/be_byteshift.h | 2 +- include/linux/unaligned/le_byteshift.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h index 46dd12c5709e..9356b24223ac 100644 --- a/include/linux/unaligned/be_byteshift.h +++ b/include/linux/unaligned/be_byteshift.h @@ -1,7 +1,7 @@ #ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H #define _LINUX_UNALIGNED_BE_BYTESHIFT_H -#include +#include static inline u16 __get_unaligned_be16(const u8 *p) { diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h index 59777e951baf..be376fb79b64 100644 --- a/include/linux/unaligned/le_byteshift.h +++ b/include/linux/unaligned/le_byteshift.h @@ -1,7 +1,7 @@ #ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H #define _LINUX_UNALIGNED_LE_BYTESHIFT_H -#include +#include static inline u16 __get_unaligned_le16(const u8 *p) { -- cgit v1.2.3 From 801460d0cf5c5288153b722565773059b0f44348 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 23 Sep 2009 15:57:41 -0700 Subject: task_struct cleanup: move binfmt field to mm_struct Because the binfmt is not different between threads in the same process, it can be moved from task_struct to mm_struct. And binfmt moudle is handled per mm_struct instead of task_struct. Signed-off-by: Hiroshi Shimamoto Acked-by: Oleg Nesterov Cc: Rusty Russell Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 ++ include/linux/sched.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6b7029ab9c8e..21d6aa45206a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -240,6 +240,8 @@ struct mm_struct { unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ + struct linux_binfmt *binfmt; + cpumask_t cpu_vm_mask; /* Architecture-specific MM context */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 811cd96524d7..8a16f6d11dcd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1271,7 +1271,6 @@ struct task_struct { struct mm_struct *mm, *active_mm; /* task state */ - struct linux_binfmt *binfmt; int exit_state; int exit_code, exit_signal; int pdeath_signal; /* The signal sent when the parent dies */ -- cgit v1.2.3 From 4a4962263f07d14660849ec134ee42b63e95ea9a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 6 Jul 2009 14:50:42 +0100 Subject: module: reduce symbol table for loaded modules (v2) Discard all symbols not interesting for kallsyms use: absolute, section, and in the common case (!KALLSYMS_ALL) data ones. Signed-off-by: Jan Beulich Signed-off-by: Rusty Russell --- include/linux/module.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 1c755b2f937d..1d3ccb173fd6 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -308,9 +308,13 @@ struct module #endif #ifdef CONFIG_KALLSYMS - /* We keep the symbol and string tables for kallsyms. */ - Elf_Sym *symtab; - unsigned int num_symtab; + /* + * We keep the symbol and string tables for kallsyms. + * The core_* fields below are temporary, loader-only (they + * could really be discarded after module init). + */ + Elf_Sym *symtab, *core_symtab; + unsigned int num_symtab, core_num_syms; char *strtab; /* Section attributes */ -- cgit v1.2.3 From 554bdfe5acf3715e87c8d5e25a4f9a896ac9f014 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 6 Jul 2009 14:51:44 +0100 Subject: module: reduce string table for loaded modules (v2) Also remove all parts of the string table (referenced by the symbol table) that are not needed for kallsyms use (i.e. which were only referenced by symbols discarded by the previous patch, or not referenced at all for whatever reason). Signed-off-by: Jan Beulich Signed-off-by: Rusty Russell --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 1d3ccb173fd6..aca980365956 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -315,7 +315,7 @@ struct module */ Elf_Sym *symtab, *core_symtab; unsigned int num_symtab, core_num_syms; - char *strtab; + char *strtab, *core_strtab; /* Section attributes */ struct module_sect_attrs *sect_attrs; -- cgit v1.2.3 From 1d7015caa082d465faeae5d6fd1be077ee6dfa87 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Sep 2009 00:32:58 -0600 Subject: module: preferred way to use MODULE_AUTHOR For the longest time now we've been using multiple MODULE_AUTHOR() statements when a module has more than one author, but the comment here disagrees. Signed-off-by: Johannes Berg Cc: Jiri Kosina Cc: Luciano Coelho Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell --- include/linux/module.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index aca980365956..482efc865acf 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -128,7 +128,10 @@ extern struct module __this_module; */ #define MODULE_LICENSE(_license) MODULE_INFO(license, _license) -/* Author, ideally of form NAME[, NAME]*[ and NAME] */ +/* + * Author(s), use "Name " or just "Name", for multiple + * authors use multiple MODULE_AUTHOR() statements/lines. + */ #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author) /* What your module does. */ -- cgit v1.2.3 From 18a1166de994685d770425086b2bcc1ba567f7ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 23 Sep 2009 03:17:11 +0000 Subject: Phonet: error on broadcast sending (unimplemented) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we ever implement this, then we can stop returning an error. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 1ef5a0781831..e5126cff9b2a 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -38,6 +38,7 @@ #define PNPIPE_IFINDEX 2 #define PNADDR_ANY 0 +#define PNADDR_BROADCAST 0xFC #define PNPORT_RESOURCE_ROUTING 0 /* Values for PNPIPE_ENCAP option */ -- cgit v1.2.3 From b8273570f802a7658827dcb077b0b517ba75a289 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 24 Sep 2009 15:44:05 -0700 Subject: genetlink: fix netns vs. netlink table locking (2) Similar to commit d136f1bd366fdb7e747ca7e0218171e7a00a98a5, there's a bug when unregistering a generic netlink family, which is caught by the might_sleep() added in that commit: BUG: sleeping function called from invalid context at net/netlink/af_netlink.c:183 in_atomic(): 1, irqs_disabled(): 0, pid: 1510, name: rmmod 2 locks held by rmmod/1510: #0: (genl_mutex){+.+.+.}, at: [] genl_unregister_family+0x2b/0x130 #1: (rcu_read_lock){.+.+..}, at: [] __genl_unregister_mc_group+0x1c/0x120 Pid: 1510, comm: rmmod Not tainted 2.6.31-wl #444 Call Trace: [] __might_sleep+0x119/0x150 [] netlink_table_grab+0x21/0x100 [] netlink_clear_multicast_users+0x23/0x60 [] __genl_unregister_mc_group+0x71/0x120 [] genl_unregister_family+0x56/0x130 [] nl80211_exit+0x15/0x20 [cfg80211] [] cfg80211_exit+0x1a/0x40 [cfg80211] Fix in the same way by grabbing the netlink table lock before doing rcu_read_lock(). Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 080f6ba9e73a..ab5d3126831f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -187,6 +187,7 @@ extern struct sock *netlink_kernel_create(struct net *net, extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); +extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); -- cgit v1.2.3 From e9ea0e2d1d00959b451dfc239df126d3c6a22043 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 24 Sep 2009 14:47:45 -0700 Subject: hugetlb_file_setup(): use C, not cpp Why macros are always wrong: mm/mmap.c: In function 'do_mmap_pgoff': mm/mmap.c:953: warning: unused variable 'user' also, move a couple of struct forward-decls outside `#ifdef CONFIG_HUGETLB_PAGE' - it's pointless and frequently harmful to make these conditional (eg, this patch needed `struct user_struct'). Cc: Lee Schermerhorn Cc: Mel Gorman Cc: Nishanth Aravamudan Cc: David Rientjes Cc: Adam Litke Cc: Andy Whitcroft Cc: Eric Whitney Cc: Eric B Munson Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 11ab19ac6b3d..16937995abd4 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -3,15 +3,15 @@ #include +struct ctl_table; +struct user_struct; + #ifdef CONFIG_HUGETLB_PAGE #include #include #include -struct ctl_table; -struct user_struct; - int PageHuge(struct page *page); static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) @@ -187,7 +187,11 @@ static inline void set_file_hugepages(struct file *file) #define is_file_hugepages(file) 0 #define set_file_hugepages(file) BUG() -#define hugetlb_file_setup(name,size,acct,user,creat) ERR_PTR(-ENOSYS) +static inline struct file *hugetlb_file_setup(const char *name, size_t size, + int acctflag, struct user_struct **user, int creat_flags) +{ + return ERR_PTR(-ENOSYS); +} #endif /* !CONFIG_HUGETLBFS */ -- cgit v1.2.3 From 934831d060ccd5471ecbc562804a8d3ccd6e562c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 24 Sep 2009 12:33:32 +0100 Subject: NOMMU: Fallback for is_vmalloc_or_module_addr() should be inline The NOMMU fallback for is_vmalloc_or_module_addr() should be static inline, not just static, in linux/mm.h. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index df08551cb0ad..24c395694f4d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -288,7 +288,7 @@ static inline int is_vmalloc_addr(const void *x) #ifdef CONFIG_MMU extern int is_vmalloc_or_module_addr(const void *x); #else -static int is_vmalloc_or_module_addr(const void *x) +static inline int is_vmalloc_or_module_addr(const void *x) { return 0; } -- cgit v1.2.3 From a72bfd4dea053bb8e2233902c3f1893ef5485802 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 26 Sep 2009 00:07:46 +0200 Subject: writeback: pass in super_block to bdi_start_writeback() Sometimes we only want to write pages from a specific super_block, so allow that to be passed in. This fixes a problem with commit 56a131dcf7ed36c3c6e36bea448b674ea85ed5bb causing writeback on all super_blocks on a bdi, where we only really want to sync a specific sb from writeback_inodes_sb(). Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0ee33c2e6129..b449e738533a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -101,7 +101,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); void bdi_unregister(struct backing_dev_info *bdi); -void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages); +void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, + long nr_pages); int bdi_writeback_task(struct bdi_writeback *wb); int bdi_has_dirty_io(struct backing_dev_info *bdi); -- cgit v1.2.3 From 1d1764c39815db55e10b2d78732db4d6dd9d6039 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 26 Sep 2009 19:37:22 +0400 Subject: headers: kref.h redux * remove asm/atomic.h inclusion from kref.h -- not needed, linux/types.h is enough for atomic_t * remove linux/kref.h inclusion from files which do not need it. Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- include/linux/ipc.h | 2 -- include/linux/kref.h | 1 - include/linux/nfs_fs.h | 1 - 3 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc.h b/include/linux/ipc.h index b8826107b518..3b1594d662b0 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -78,8 +78,6 @@ struct ipc_kludge { #define IPCCALL(version,op) ((version)<<16 | (op)) #ifdef __KERNEL__ - -#include #include #define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ diff --git a/include/linux/kref.h b/include/linux/kref.h index 0cef6badd6fb..b0cb0ebad9e6 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -16,7 +16,6 @@ #define _KREF_H_ #include -#include struct kref { atomic_t refcount; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f6b90240dd41..d09db1bc9083 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -40,7 +40,6 @@ #ifdef __KERNEL__ #include -#include #include #include #include -- cgit v1.2.3 From d1f8297a96b0d70f17704296a6666468f2087ce6 Mon Sep 17 00:00:00 2001 From: Sascha Hlusiak Date: Sat, 26 Sep 2009 20:28:07 -0700 Subject: Revert "sit: stateless autoconf for isatap" This reverts commit 645069299a1c7358cf7330afe293f07552f11a5d. While the code does not actually break anything, it does not completely follow RFC5214 yet. After talking back with Fred L. Templin, I agree that completing the ISATAP specific RS/RA code, would pollute the kernel a lot with code that is better implemented in userspace. The kernel should not send RS packages for ISATAP at all. Signed-off-by: Sascha Hlusiak Acked-by: Fred L. Templin Signed-off-by: David S. Miller --- include/linux/if_tunnel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index 5eb9b0f857e0..5a9aae4adb44 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -44,7 +44,7 @@ struct ip_tunnel_prl { __u16 flags; __u16 __reserved; __u32 datalen; - __u32 rs_delay; + __u32 __reserved2; /* data follows */ }; -- cgit v1.2.3 From f0f37e2f77731b3473fa6bd5ee53255d9a9cdb40 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 27 Sep 2009 22:29:37 +0400 Subject: const: mark struct vm_struct_operations * mark struct vm_area_struct::vm_ops as const * mark vm_ops in AGP code But leave TTM code alone, something is fishy there with global vm_ops being used. Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- include/linux/agp_backend.h | 2 +- include/linux/hugetlb.h | 2 +- include/linux/mm_types.h | 2 +- include/linux/ramfs.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h index 880130f7311f..9101ed64f803 100644 --- a/include/linux/agp_backend.h +++ b/include/linux/agp_backend.h @@ -53,7 +53,7 @@ struct agp_kern_info { int current_memory; bool cant_use_aperture; unsigned long page_mask; - struct vm_operations_struct *vm_ops; + const struct vm_operations_struct *vm_ops; }; /* diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 16937995abd4..41a59afc70fa 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -163,7 +163,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) } extern const struct file_operations hugetlbfs_file_operations; -extern struct vm_operations_struct hugetlb_vm_ops; +extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t size, int acct, struct user_struct **user, int creat_flags); int hugetlb_get_quota(struct address_space *mapping, long delta); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 21d6aa45206a..84a524afb3dc 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -171,7 +171,7 @@ struct vm_area_struct { struct anon_vma *anon_vma; /* Serialized by page_table_lock */ /* Function pointers to deal with this struct. */ - struct vm_operations_struct * vm_ops; + const struct vm_operations_struct *vm_ops; /* Information about our backing store: */ unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 37aaf2b39863..4e768dda87b0 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -17,7 +17,7 @@ extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); #endif extern const struct file_operations ramfs_file_operations; -extern struct vm_operations_struct generic_file_vm_ops; +extern const struct vm_operations_struct generic_file_vm_ops; extern int __init init_rootfs(void); #endif -- cgit v1.2.3 From f278a2f7bbc2239f479eaf63d0b3ae573b1d746c Mon Sep 17 00:00:00 2001 From: Dave Young Date: Sun, 27 Sep 2009 16:00:42 +0000 Subject: tty: Fix regressions caused by commit b50989dc The following commit made console open fails while booting: commit b50989dc444599c8b21edc23536fc305f4e9b7d5 Author: Alan Cox Date: Sat Sep 19 13:13:22 2009 -0700 tty: make the kref destructor occur asynchronously Due to tty release routines run in a workqueue now, error like the following will be reported while booting: INIT open /dev/console Input/output error It also causes hibernation regression to appear as reported at http://bugzilla.kernel.org/show_bug.cgi?id=14229 The reason is that now there's latency issue with closing, but when we open a "closing not finished" tty, -EIO will be returned. Fix it as per the following Alan's suggestion: Fun but it's actually not a bug and the fix is wrong in itself as the port may be closing but not yet being destructed, in which case it seems to do the wrong thing. Opening a tty that is closing (and could be closing for long periods) is supposed to return -EIO. I suspect a better way to deal with this and keep the old console timing is to split tty->shutdown into two functions. tty->shutdown() - called synchronously just before we dump the tty onto the waitqueue for destruction tty->cleanup() - called when the destructor runs. We would then do the shutdown part which can occur in IRQ context fine, before queueing the rest of the release (from tty->magic = 0 ... the end) to occur asynchronously The USB update in -next would then need a call like if (tty->cleanup) tty->cleanup(tty); at the top of the async function and the USB shutdown to be split between shutdown and cleanup as the USB resource cleanup and final tidy cannot occur synchronously as it needs to sleep. In other words the logic becomes final kref put make object unfindable async clean it up Signed-off-by: Dave Young [ rjw: Rebased on top of 2.6.31-git, reworked the changelog. ] Signed-off-by: "Rafael J. Wysocki" [ Changed serial naming to match new rules, dropped tty_shutdown as per comments from Alan Stern - Linus ] Signed-off-by: Linus Torvalds --- include/linux/tty_driver.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 3566129384a4..b08677982525 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -45,8 +45,16 @@ * * void (*shutdown)(struct tty_struct * tty); * - * This routine is called when a particular tty device is closed for - * the last time freeing up the resources. + * This routine is called synchronously when a particular tty device + * is closed for the last time freeing up the resources. + * + * + * void (*cleanup)(struct tty_struct * tty); + * + * This routine is called asynchronously when a particular tty device + * is closed for the last time freeing up the resources. This is + * actually the second part of shutdown for routines that might sleep. + * * * int (*write)(struct tty_struct * tty, * const unsigned char *buf, int count); @@ -233,6 +241,7 @@ struct tty_operations { int (*open)(struct tty_struct * tty, struct file * filp); void (*close)(struct tty_struct * tty, struct file * filp); void (*shutdown)(struct tty_struct *tty); + void (*cleanup)(struct tty_struct *tty); int (*write)(struct tty_struct * tty, const unsigned char *buf, int count); int (*put_char)(struct tty_struct *tty, unsigned char ch); -- cgit v1.2.3 From bf6993276f74d46776f35c45ddef29b981b1d1c6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 30 Sep 2009 00:32:06 -0400 Subject: jbd2: Use tracepoints for history file The /proc/fs/jbd2//history was maintained manually; by using tracepoints, we can get all of the existing functionality of the /proc file plus extra capabilities thanks to the ftrace infrastructure. We save memory as a bonus. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 52695d3dfd0b..f1011f7f3d41 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -464,9 +464,9 @@ struct handle_s */ struct transaction_chp_stats_s { unsigned long cs_chp_time; - unsigned long cs_forced_to_close; - unsigned long cs_written; - unsigned long cs_dropped; + __u32 cs_forced_to_close; + __u32 cs_written; + __u32 cs_dropped; }; /* The transaction_t type is the guts of the journaling mechanism. It @@ -668,23 +668,16 @@ struct transaction_run_stats_s { unsigned long rs_flushing; unsigned long rs_logging; - unsigned long rs_handle_count; - unsigned long rs_blocks; - unsigned long rs_blocks_logged; + __u32 rs_handle_count; + __u32 rs_blocks; + __u32 rs_blocks_logged; }; struct transaction_stats_s { - int ts_type; unsigned long ts_tid; - union { - struct transaction_run_stats_s run; - struct transaction_chp_stats_s chp; - } u; + struct transaction_run_stats_s run; }; -#define JBD2_STATS_RUN 1 -#define JBD2_STATS_CHECKPOINT 2 - static inline unsigned long jbd2_time_diff(unsigned long start, unsigned long end) { @@ -988,12 +981,6 @@ struct journal_s /* * Journal statistics */ - struct transaction_stats_s *j_history; - int j_history_max; - int j_history_cur; - /* - * Protect the transactions statistics history - */ spinlock_t j_history_lock; struct proc_dir_entry *j_proc_entry; struct transaction_stats_s j_stats; -- cgit v1.2.3 From 9fcd66e572b94974365a9119b073e0a43d496eb7 Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Fri, 18 Sep 2009 13:04:58 +0200 Subject: MIPS: BCM63xx: Add serial driver for bcm63xx integrated UART. Signed-off-by: Maxime Bizon Acked-by: Greg Kroah-Hartman Signed-off-by: Ralf Baechle --- include/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index fe661afe0713..db532ce288be 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -176,6 +176,9 @@ /* Qualcomm MSM SoCs */ #define PORT_MSM 88 +/* BCM63xx family SoCs */ +#define PORT_BCM63XX 89 + #ifdef __KERNEL__ #include -- cgit v1.2.3 From b7058842c940ad2c08dd829b21e5c92ebe3b8758 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 30 Sep 2009 16:12:20 -0700 Subject: net: Make setsockopt() optlen be unsigned. This provides safety against negative optlen at the type level instead of depending upon (sometimes non-trivial) checks against this sprinkled all over the the place, in each and every implementation. Based upon work done by Arjan van de Ven and feedback from Linus Torvalds. Signed-off-by: David S. Miller --- include/linux/atmdev.h | 2 +- include/linux/mroute.h | 4 ++-- include/linux/mroute6.h | 4 ++-- include/linux/net.h | 8 ++++---- include/linux/netfilter.h | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 086e5c362d3a..817b23705c91 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -397,7 +397,7 @@ struct atmdev_ops { /* only send is required */ int (*getsockopt)(struct atm_vcc *vcc,int level,int optname, void __user *optval,int optlen); int (*setsockopt)(struct atm_vcc *vcc,int level,int optname, - void __user *optval,int optlen); + void __user *optval,unsigned int optlen); int (*send)(struct atm_vcc *vcc,struct sk_buff *skb); int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags); void (*phy_put)(struct atm_dev *dev,unsigned char value, diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 0d45b4e8d367..08bc776d05e2 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -145,14 +145,14 @@ static inline int ip_mroute_opt(int opt) #endif #ifdef CONFIG_IP_MROUTE -extern int ip_mroute_setsockopt(struct sock *, int, char __user *, int); +extern int ip_mroute_setsockopt(struct sock *, int, char __user *, unsigned int); extern int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); extern int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); extern int ip_mr_init(void); #else static inline int ip_mroute_setsockopt(struct sock *sock, - int optname, char __user *optval, int optlen) + int optname, char __user *optval, unsigned int optlen) { return -ENOPROTOOPT; } diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 43dc97e32183..b191865a6ca3 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -134,7 +134,7 @@ static inline int ip6_mroute_opt(int opt) struct sock; #ifdef CONFIG_IPV6_MROUTE -extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, int); +extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, unsigned int); extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *); extern int ip6_mr_input(struct sk_buff *skb); extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg); @@ -143,7 +143,7 @@ extern void ip6_mr_cleanup(void); #else static inline int ip6_mroute_setsockopt(struct sock *sock, - int optname, char __user *optval, int optlen) + int optname, char __user *optval, unsigned int optlen) { return -ENOPROTOOPT; } diff --git a/include/linux/net.h b/include/linux/net.h index 9040a10584f7..529a0931711d 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -178,11 +178,11 @@ struct proto_ops { int (*listen) (struct socket *sock, int len); int (*shutdown) (struct socket *sock, int flags); int (*setsockopt)(struct socket *sock, int level, - int optname, char __user *optval, int optlen); + int optname, char __user *optval, unsigned int optlen); int (*getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); int (*compat_setsockopt)(struct socket *sock, int level, - int optname, char __user *optval, int optlen); + int optname, char __user *optval, unsigned int optlen); int (*compat_getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); int (*sendmsg) (struct kiocb *iocb, struct socket *sock, @@ -256,7 +256,7 @@ extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr, extern int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen); extern int kernel_setsockopt(struct socket *sock, int level, int optname, - char *optval, int optlen); + char *optval, unsigned int optlen); extern int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); @@ -313,7 +313,7 @@ SOCKCALL_WRAP(name, compat_ioctl, (struct socket *sock, unsigned int cmd, \ SOCKCALL_WRAP(name, listen, (struct socket *sock, int len), (sock, len)) \ SOCKCALL_WRAP(name, shutdown, (struct socket *sock, int flags), (sock, flags)) \ SOCKCALL_WRAP(name, setsockopt, (struct socket *sock, int level, int optname, \ - char __user *optval, int optlen), (sock, level, optname, optval, optlen)) \ + char __user *optval, unsigned int optlen), (sock, level, optname, optval, optlen)) \ SOCKCALL_WRAP(name, getsockopt, (struct socket *sock, int level, int optname, \ char __user *optval, int __user *optlen), (sock, level, optname, optval, optlen)) \ SOCKCALL_WRAP(name, sendmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len), \ diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 48cfe51bfddc..6132b5e6d9d3 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -221,12 +221,12 @@ __ret;}) /* Call setsockopt() */ int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, - int len); + unsigned int len); int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, - char __user *opt, int len); + char __user *opt, unsigned int len); int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); -- cgit v1.2.3 From 48c0d4d4c04dd520c55e0fd756fa4e7c83de3d13 Mon Sep 17 00:00:00 2001 From: Zdenek Kabelac Date: Fri, 25 Sep 2009 06:19:26 +0200 Subject: Add missing blk_trace_remove_sysfs to be in pair with blk_trace_init_sysfs Add missing blk_trace_remove_sysfs to be in pair with blk_trace_init_sysfs introduced in commit 1d54ad6da9192fed5dd3b60224d9f2dfea0dcd82. Release kobject also in case the request_fn is NULL. Problem was noticed via kmemleak backtrace when some sysfs entries were note properly destroyed during device removal: unreferenced object 0xffff88001aa76640 (size 80): comm "lvcreate", pid 2120, jiffies 4294885144 hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 f0 65 a7 1a 00 88 ff ff .........e...... 90 66 a7 1a 00 88 ff ff 86 1d 53 81 ff ff ff ff .f........S..... backtrace: [] kmemleak_alloc+0x26/0x60 [] kmem_cache_alloc+0x133/0x1c0 [] sysfs_new_dirent+0x41/0x120 [] sysfs_add_file_mode+0x3c/0xb0 [] internal_create_group+0xc1/0x1a0 [] sysfs_create_group+0x13/0x20 [] blk_trace_init_sysfs+0x14/0x20 [] blk_register_queue+0x3c/0xf0 [] add_disk+0x94/0x160 [] dm_create+0x598/0x6e0 [dm_mod] [] dev_create+0x51/0x350 [dm_mod] [] ctl_ioctl+0x1a3/0x240 [dm_mod] [] dm_compat_ctl_ioctl+0x12/0x20 [dm_mod] [] compat_sys_ioctl+0xcd/0x4f0 [] sysenter_dispatch+0x7/0x2c [] 0xffffffffffffffff Signed-off-by: Zdenek Kabelac Reviewed-by: Li Zefan Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7e4350ece0f8..622939a23299 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -198,6 +198,7 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); +extern void blk_trace_remove_sysfs(struct device *dev); extern int blk_trace_init_sysfs(struct device *dev); extern struct attribute_group blk_trace_attr_group; @@ -211,6 +212,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) +# define blk_trace_remove_sysfs(struct device *dev) do { } while (0) static inline int blk_trace_init_sysfs(struct device *dev) { return 0; -- cgit v1.2.3 From c15227de132f1295f3db6b7df9079956b1020fd8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Sep 2009 13:52:12 +0200 Subject: block: use normal I/O path for discard requests prepare_discard_fn() was being called in a place where memory allocation was effectively impossible. This makes it inappropriate for all but the most trivial translations of Linux's DISCARD operation to the block command set. Additionally adding a payload there makes the ownership of the bio backing unclear as it's now allocated by the device driver and not the submitter as usual. It is replaced with QUEUE_FLAG_DISCARD which is used to indicate whether the queue supports discard operations or not. blkdev_issue_discard now allocates a one-page, sector-length payload which is the right thing for the common ATA and SCSI implementations. The mtd implementation of prepare_discard_fn() is replaced with simply checking for the request being a discard. Largely based on a previous patch from Matthew Wilcox which did the prepare_discard_fn but not the different payload allocation yet. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e23a86cae5ac..f62d45e87618 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -82,7 +82,6 @@ enum rq_cmd_type_bits { enum { REQ_LB_OP_EJECT = 0x40, /* eject request */ REQ_LB_OP_FLUSH = 0x41, /* flush request */ - REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ }; /* @@ -261,7 +260,6 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); -typedef int (prepare_discard_fn) (struct request_queue *, struct request *); struct bio_vec; struct bvec_merge_data { @@ -340,7 +338,6 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unplug_fn *unplug_fn; - prepare_discard_fn *prepare_discard_fn; merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; @@ -460,6 +457,7 @@ struct request_queue #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ #define QUEUE_FLAG_CQ 16 /* hardware does queuing */ +#define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ @@ -591,6 +589,7 @@ enum { #define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) +#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) @@ -955,7 +954,6 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); -extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -- cgit v1.2.3 From 67efc9258010da35b27b3854d0880c7e193004ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Sep 2009 13:54:20 +0200 Subject: block: allow large discard requests Currently we set the bio size to the byte equivalent of the blocks to be trimmed when submitting the initial DISCARD ioctl. That means it is subject to the max_hw_sectors limitation of the HBA which is much lower than the size of a DISCARD request we can support. Add a separate max_discard_sectors tunable to limit the size for discard requests. We limit the max discard request size in bytes to 32bit as that is the limit for bio->bi_size. This could be much larger if we had a way to pass that information through the block layer. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f62d45e87618..1a03b715dfad 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -311,6 +311,7 @@ struct queue_limits { unsigned int alignment_offset; unsigned int io_min; unsigned int io_opt; + unsigned int max_discard_sectors; unsigned short logical_block_size; unsigned short max_hw_segments; @@ -928,6 +929,8 @@ extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); +extern void blk_queue_max_discard_sectors(struct request_queue *q, + unsigned int max_discard_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); extern void blk_queue_alignment_offset(struct request_queue *q, -- cgit v1.2.3 From b0da3f0dada78832c9da03ad2152ae76bd9a2496 Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Thu, 1 Oct 2009 21:16:13 +0200 Subject: Add a tracepoint for block request remapping Since 2.6.31 now has request-based device-mapper, it's useful to have a tracepoint for request-remapping as well as bio-remapping. This patch adds a tracepoint for request-remapping, trace_block_rq_remap(). Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Alasdair G Kergon Cc: Li Zefan Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 622939a23299..3b73b9992b26 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -212,7 +212,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) -# define blk_trace_remove_sysfs(struct device *dev) do { } while (0) +# define blk_trace_remove_sysfs(dev) do { } while (0) static inline int blk_trace_init_sysfs(struct device *dev) { return 0; -- cgit v1.2.3 From 828c09509b9695271bcbdc53e9fc9a6a737148d2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 1 Oct 2009 15:43:56 -0700 Subject: const: constify remaining file_operations [akpm@linux-foundation.org: fix KVM] Signed-off-by: Alexey Dobriyan Acked-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b62bb9294d0c..0008dee66514 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -37,7 +37,7 @@ extern void cgroup_exit(struct task_struct *p, int run_callbacks); extern int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); -extern struct file_operations proc_cgroup_operations; +extern const struct file_operations proc_cgroup_operations; /* Define the enumeration of all cgroup subsystems */ #define SUBSYS(_x) _x ## _subsys_id, diff --git a/include/linux/fs.h b/include/linux/fs.h index 2adaa2529f18..a1e6899d4b6c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2446,7 +2446,7 @@ static int __fops ## _open(struct inode *inode, struct file *file) \ __simple_attr_check_format(__fmt, 0ull); \ return simple_attr_open(inode, file, __get, __set, __fmt); \ } \ -static struct file_operations __fops = { \ +static const struct file_operations __fops = { \ .owner = THIS_MODULE, \ .open = __fops ## _open, \ .release = simple_attr_release, \ -- cgit v1.2.3 From 4e649152cbaa1aedd01821d200ab9d597fe469e4 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 1 Oct 2009 15:44:11 -0700 Subject: memcg: some modification to softlimit under hierarchical memory reclaim. This patch clean up/fixes for memcg's uncharge soft limit path. Problems: Now, res_counter_charge()/uncharge() handles softlimit information at charge/uncharge and softlimit-check is done when event counter per memcg goes over limit. Now, event counter per memcg is updated only when memory usage is over soft limit. Here, considering hierarchical memcg management, ancesotors should be taken care of. Now, ancerstors(hierarchy) are handled in charge() but not in uncharge(). This is not good. Prolems: 1. memcg's event counter incremented only when softlimit hits. That's bad. It makes event counter hard to be reused for other purpose. 2. At uncharge, only the lowest level rescounter is handled. This is bug. Because ancesotor's event counter is not incremented, children should take care of them. 3. res_counter_uncharge()'s 3rd argument is NULL in most case. ops under res_counter->lock should be small. No "if" sentense is better. Fixes: * Removed soft_limit_xx poitner and checks in charge and uncharge. Do-check-only-when-necessary scheme works enough well without them. * make event-counter of memcg incremented at every charge/uncharge. (per-cpu area will be accessed soon anyway) * All ancestors are checked at soft-limit-check. This is necessary because ancesotor's event counter may never be modified. Then, they should be checked at the same time. Reviewed-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Cc: Paul Menage Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 731af71cddc9..fcb9884df618 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -114,8 +114,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent); int __must_check res_counter_charge_locked(struct res_counter *counter, unsigned long val); int __must_check res_counter_charge(struct res_counter *counter, - unsigned long val, struct res_counter **limit_fail_at, - struct res_counter **soft_limit_at); + unsigned long val, struct res_counter **limit_fail_at); /* * uncharge - tell that some portion of the resource is released @@ -128,8 +127,7 @@ int __must_check res_counter_charge(struct res_counter *counter, */ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); -void res_counter_uncharge(struct res_counter *counter, unsigned long val, - bool *was_soft_limit_excess); +void res_counter_uncharge(struct res_counter *counter, unsigned long val); static inline bool res_counter_limit_check_locked(struct res_counter *cnt) { -- cgit v1.2.3 From 329bd4119c8a0afea95f9db6d6b402a2f2b40e84 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 2 Oct 2009 15:23:21 +0200 Subject: initcalls: Add early_initcall() for modules Complete the early_initcall() API by making it available in modules too. To be used by the EDAC/MCE code. Signed-off-by: Borislav Petkov Acked-by: Linus Torvalds Cc: Andi Kleen LKML-Reference: <20091002132321.GC28682@aftab> Signed-off-by: Ingo Molnar --- include/linux/init.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 400adbb45414..ff8bde520d03 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -271,6 +271,7 @@ void __init parse_early_options(char *cmdline); #else /* MODULE */ /* Don't use these in modules, but some people do... */ +#define early_initcall(fn) module_init(fn) #define core_initcall(fn) module_init(fn) #define postcore_initcall(fn) module_init(fn) #define arch_initcall(fn) module_init(fn) -- cgit v1.2.3 From 293500a23f4b0698cb04abfecfc9a954d8ab2742 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 2 Oct 2009 02:40:04 +0000 Subject: connector: Keep the skb in cn_callback_data Signed-off-by: Philipp Reisner Acked-by: Lars Ellenberg Acked-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- include/linux/connector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index 47ebf416f512..05a7a14126d8 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -134,8 +134,8 @@ struct cn_callback_id { struct cn_callback_data { void (*destruct_data) (void *); void *ddata; - - void *callback_priv; + + struct sk_buff *skb; void (*callback) (struct cn_msg *); void *free; -- cgit v1.2.3 From 7069331dbe7155f23966f5944109f909fea0c7e4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 2 Oct 2009 02:40:05 +0000 Subject: connector: Provide the sender's credentials to the callback Signed-off-by: Philipp Reisner Acked-by: Lars Ellenberg Acked-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- include/linux/connector.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index 05a7a14126d8..545728e20b63 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -136,7 +136,7 @@ struct cn_callback_data { void *ddata; struct sk_buff *skb; - void (*callback) (struct cn_msg *); + void (*callback) (struct cn_msg *, struct netlink_skb_parms *); void *free; }; @@ -167,11 +167,11 @@ struct cn_dev { struct cn_queue_dev *cbdev; }; -int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *)); +int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *, struct netlink_skb_parms *)); void cn_del_callback(struct cb_id *); int cn_netlink_send(struct cn_msg *, u32, gfp_t); -int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *)); +int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *, struct netlink_skb_parms *)); void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id); int queue_cn_work(struct cn_callback_entry *cbq, struct work_struct *work); -- cgit v1.2.3 From f1489cfb173509a3c13444b46b6c989bad4f5b16 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 2 Oct 2009 02:40:07 +0000 Subject: connector: Removed the destruct_data callback since it is always kfree_skb() Signed-off-by: Philipp Reisner Acked-by: Lars Ellenberg Acked-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- include/linux/connector.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index 545728e20b63..3a14615fd35c 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -132,9 +132,6 @@ struct cn_callback_id { }; struct cn_callback_data { - void (*destruct_data) (void *); - void *ddata; - struct sk_buff *skb; void (*callback) (struct cn_msg *, struct netlink_skb_parms *); -- cgit v1.2.3 From 8e2967555571659d2c8a70dd120710110ed7bba4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 3 Oct 2009 16:26:03 +0200 Subject: cfq-iosched: implement slower async initiate and queue ramp up This slowly ramps up the async queue depth based on the time passed since the sync IO, and doesn't allow async at all until a sync slice period has passed. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1a03b715dfad..a7323930d2ba 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1147,7 +1147,11 @@ static inline void put_dev_sector(Sector p) } struct work_struct; +struct delayed_work; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); +int kblockd_schedule_delayed_work(struct request_queue *q, + struct delayed_work *work, + unsigned long delay); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) -- cgit v1.2.3 From ac481c20ef8f6c6f2be75d581863f40c43874ef7 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Sat, 3 Oct 2009 20:52:01 +0200 Subject: block: Topology ioctls Not all users of the topology information want to use libblkid. Provide the topology information through bdev ioctls. Also clarify sector size comments for existing BLK ioctls. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 35 ++++++++++++++++++++++++++++++----- include/linux/fs.h | 4 ++++ 2 files changed, 34 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a7323930d2ba..25119041e034 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1081,25 +1081,37 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q) return q->limits.physical_block_size; } +static inline int bdev_physical_block_size(struct block_device *bdev) +{ + return queue_physical_block_size(bdev_get_queue(bdev)); +} + static inline unsigned int queue_io_min(struct request_queue *q) { return q->limits.io_min; } +static inline int bdev_io_min(struct block_device *bdev) +{ + return queue_io_min(bdev_get_queue(bdev)); +} + static inline unsigned int queue_io_opt(struct request_queue *q) { return q->limits.io_opt; } +static inline int bdev_io_opt(struct block_device *bdev) +{ + return queue_io_opt(bdev_get_queue(bdev)); +} + static inline int queue_alignment_offset(struct request_queue *q) { - if (q && q->limits.misaligned) + if (q->limits.misaligned) return -1; - if (q && q->limits.alignment_offset) - return q->limits.alignment_offset; - - return 0; + return q->limits.alignment_offset; } static inline int queue_sector_alignment_offset(struct request_queue *q, @@ -1109,6 +1121,19 @@ static inline int queue_sector_alignment_offset(struct request_queue *q, & (q->limits.io_min - 1); } +static inline int bdev_alignment_offset(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q->limits.misaligned) + return -1; + + if (bdev != bdev->bd_contains) + return bdev->bd_part->alignment_offset; + + return q->limits.alignment_offset; +} + static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; diff --git a/include/linux/fs.h b/include/linux/fs.h index 2adaa2529f18..883eaacfd924 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -300,6 +300,10 @@ struct inodes_stat_t { #define BLKTRACESTOP _IO(0x12,117) #define BLKTRACETEARDOWN _IO(0x12,118) #define BLKDISCARD _IO(0x12,119) +#define BLKIOMIN _IO(0x12,120) +#define BLKIOOPT _IO(0x12,121) +#define BLKALIGNOFF _IO(0x12,122) +#define BLKPBSZGET _IO(0x12,123) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- cgit v1.2.3 From 1087e9b4ff708976499b4de541d9e1d57d49b60a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 4 Oct 2009 02:20:11 +0200 Subject: HWPOISON: Clean up PR_MCE_KILL interface While writing the manpage I noticed some shortcomings in the current interface. - Define symbolic names for all the different values - Boundary check the kill mode values - For symmetry add a get interface too. This allows library code to get/set the current state. - For consistency define a PR_MCE_KILL_DEFAULT value Signed-off-by: Andi Kleen --- include/linux/prctl.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 931150566ade..a3baeb2c2161 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -88,6 +88,18 @@ #define PR_TASK_PERF_EVENTS_DISABLE 31 #define PR_TASK_PERF_EVENTS_ENABLE 32 +/* + * Set early/late kill mode for hwpoison memory corruption. + * This influences when the process gets killed on a memory corruption. + */ #define PR_MCE_KILL 33 +# define PR_MCE_KILL_CLEAR 0 +# define PR_MCE_KILL_SET 1 + +# define PR_MCE_KILL_LATE 0 +# define PR_MCE_KILL_EARLY 1 +# define PR_MCE_KILL_DEFAULT 2 + +#define PR_MCE_KILL_GET 34 #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 0f78ab9899e9d6acb09d5465def618704255963b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 4 Oct 2009 21:04:38 +0200 Subject: Revert "Seperate read and write statistics of in_flight requests" This reverts commit a9327cac440be4d8333bba975cbbf76045096275. Corrado Zoccolo reports: "with 2.6.32-rc1 I started getting the following strange output from "iostat -kx 2": Linux 2.6.31bisect (et2) 04/10/2009 _i686_ (2 CPU) avg-cpu: %user %nice %system %iowait %steal %idle 10,70 0,00 3,16 15,75 0,00 70,38 Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util sda 18,22 0,00 0,67 0,01 14,77 0,02 43,94 0,01 10,53 39043915,03 2629219,87 sdb 60,89 9,68 50,79 3,04 1724,43 50,52 65,95 0,70 13,06 488437,47 2629219,87 avg-cpu: %user %nice %system %iowait %steal %idle 2,72 0,00 0,74 0,00 0,00 96,53 Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util sda 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 sdb 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 avg-cpu: %user %nice %system %iowait %steal %idle 6,68 0,00 0,99 0,00 0,00 92,33 Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util sda 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 sdb 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 avg-cpu: %user %nice %system %iowait %steal %idle 4,40 0,00 0,73 1,47 0,00 93,40 Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util sda 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 sdb 0,00 4,00 0,00 3,00 0,00 28,00 18,67 0,06 19,50 333,33 100,00 Global values for service time and utilization are garbage. For interval values, utilization is always 100%, and service time is higher than normal. I bisected it down to: [a9327cac440be4d8333bba975cbbf76045096275] Seperate read and write statistics of in_flight requests and verified that reverting just that commit indeed solves the issue on 2.6.32-rc1." So until this is debugged, revert the bad commit. Signed-off-by: Jens Axboe --- include/linux/genhd.h | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 297df45ffd0a..7beaa21b3880 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -98,7 +98,7 @@ struct hd_struct { int make_it_fail; #endif unsigned long stamp; - int in_flight[2]; + int in_flight; #ifdef CONFIG_SMP struct disk_stats *dkstats; #else @@ -322,23 +322,18 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_sub(cpu, gendiskp, field, subnd) \ part_stat_add(cpu, gendiskp, field, -subnd) -static inline void part_inc_in_flight(struct hd_struct *part, int rw) +static inline void part_inc_in_flight(struct hd_struct *part) { - part->in_flight[rw]++; + part->in_flight++; if (part->partno) - part_to_disk(part)->part0.in_flight[rw]++; + part_to_disk(part)->part0.in_flight++; } -static inline void part_dec_in_flight(struct hd_struct *part, int rw) +static inline void part_dec_in_flight(struct hd_struct *part) { - part->in_flight[rw]--; + part->in_flight--; if (part->partno) - part_to_disk(part)->part0.in_flight[rw]--; -} - -static inline int part_in_flight(struct hd_struct *part) -{ - return part->in_flight[0] + part->in_flight[1]; + part_to_disk(part)->part0.in_flight--; } /* block/blk-core.c */ @@ -551,8 +546,6 @@ extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf); -extern ssize_t part_inflight_show(struct device *dev, - struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); -- cgit v1.2.3 From a99bbaf5ee6bad1aca0c88ea65ec6e5373e86184 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 4 Oct 2009 16:11:37 +0400 Subject: headers: remove sched.h from poll.h Signed-off-by: Alexey Dobriyan Signed-off-by: Linus Torvalds --- include/linux/poll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/poll.h b/include/linux/poll.h index fa287f25138d..6673743946f7 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -6,10 +6,10 @@ #ifdef __KERNEL__ #include +#include #include #include #include -#include #include /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating -- cgit v1.2.3 From 9c501935a3cdcf6b1d35aaee3aa11c7a7051a305 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 5 Oct 2009 00:24:36 -0700 Subject: net: Support inclusion of before The following user-space program fails to compile: #include #include int main() { return 0; } The reason is that tests __GLIBC__ to decide whether it should define various structures and macros that are now defined for user-space by , but __GLIBC__ is not defined if no libc headers have yet been included. It seems safe to drop support for libc 5 now. Signed-off-by: Ben Hutchings Signed-off-by: Bastian Blank Signed-off-by: David S. Miller --- include/linux/socket.h | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 3b461dffe244..3273a0c5043b 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -16,7 +16,7 @@ struct __kernel_sockaddr_storage { /* _SS_MAXSIZE value minus size of ss_family */ } __attribute__ ((aligned(_K_SS_ALIGNSIZE))); /* force desired alignment */ -#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) +#ifdef __KERNEL__ #include /* arch-dependent defines */ #include /* the SIOCxxx I/O controls */ @@ -100,21 +100,6 @@ struct cmsghdr { ((mhdr)->msg_controllen - \ ((char *)(cmsg) - (char *)(mhdr)->msg_control))) -/* - * This mess will go away with glibc - */ - -#ifdef __KERNEL__ -#define __KINLINE static inline -#elif defined(__GNUC__) -#define __KINLINE static __inline__ -#elif defined(__cplusplus) -#define __KINLINE static inline -#else -#define __KINLINE static -#endif - - /* * Get the next cmsg header * @@ -128,7 +113,7 @@ struct cmsghdr { * ancillary object DATA. --ANK (980731) */ -__KINLINE struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, +static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, struct cmsghdr *__cmsg) { struct cmsghdr * __ptr; @@ -140,7 +125,7 @@ __KINLINE struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, return __ptr; } -__KINLINE struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg) +static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg) { return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } -- cgit v1.2.3 From 23e018a1b083ecb4b8bb2fb43d58e7c19b5d7959 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 5 Oct 2009 08:52:35 +0200 Subject: block: get rid of kblock_schedule_delayed_work() It was briefly introduced to allow CFQ to to delayed scheduling, but we ended up removing that feature again. So lets kill the function and export, and just switch CFQ back to the normal work schedule since it is now passing in a '0' delay from all call sites. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25119041e034..221cecd86bd3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1172,11 +1172,7 @@ static inline void put_dev_sector(Sector p) } struct work_struct; -struct delayed_work; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); -int kblockd_schedule_delayed_work(struct request_queue *q, - struct delayed_work *work, - unsigned long delay); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) -- cgit v1.2.3 From 3d76c082907e8f83c5d5c4572f38d53ad8f00c4b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 Sep 2009 07:46:32 -0700 Subject: rcu: Clean up code based on review feedback from Josh Triplett, part 3 Whitespace fixes, updated comments, and trivial code movement. o Fix whitespace error in RCU_HEAD_INIT() o Move "So where is rcu_write_lock()" comment so that it does not come between the rcu_read_unlock() header comment and the rcu_read_unlock() definition. o Move the module_param statements for blimit, qhimark, and qlowmark to immediately follow the corresponding definitions. o In __rcu_offline_cpu(), move the assignment to rdp_me inside the "if" statement, given that rdp_me is not used outside of that "if" statement. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12541491931164-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 70331218e4b4..3ebd0b7bcb08 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -77,7 +77,7 @@ extern int rcu_scheduler_active; #error "Unknown RCU implementation specified to kernel configuration" #endif -#define RCU_HEAD_INIT { .next = NULL, .func = NULL } +#define RCU_HEAD_INIT { .next = NULL, .func = NULL } #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT #define INIT_RCU_HEAD(ptr) do { \ (ptr)->next = NULL; (ptr)->func = NULL; \ @@ -129,12 +129,6 @@ static inline void rcu_read_lock(void) rcu_read_acquire(); } -/** - * rcu_read_unlock - marks the end of an RCU read-side critical section. - * - * See rcu_read_lock() for more information. - */ - /* * So where is rcu_write_lock()? It does not exist, as there is no * way for writers to lock out RCU readers. This is a feature, not @@ -144,6 +138,12 @@ static inline void rcu_read_lock(void) * used as well. RCU does not care how the writers keep out of each * others' way, as long as they do so. */ + +/** + * rcu_read_unlock - marks the end of an RCU read-side critical section. + * + * See rcu_read_lock() for more information. + */ static inline void rcu_read_unlock(void) { rcu_read_release(); -- cgit v1.2.3 From f1bce7f80e3b400cf29787b0afa9c3042b959017 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Sep 2009 04:16:04 +0900 Subject: libata: cosmetic updates We're about to add more SATA_* and ATA_ACPI_FILTER_* constants. Reformat them in preparation. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/ata.h | 6 +++--- include/linux/libata.h | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 6299a259ed19..7c5beafa972c 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -334,9 +334,9 @@ enum { SETFEATURES_SATA_DISABLE = 0x90, /* Disable use of SATA feature */ /* SETFEATURE Sector counts for SATA features */ - SATA_AN = 0x05, /* Asynchronous Notification */ - SATA_DIPM = 0x03, /* Device Initiated Power Management */ - SATA_FPDMA_AA = 0x02, /* DMA Setup FIS Auto-Activate */ + SATA_FPDMA_AA = 0x02, /* FPDMA Setup FIS Auto-Activate */ + SATA_DIPM = 0x03, /* Device Initiated Power Management */ + SATA_AN = 0x05, /* Asynchronous Notification */ /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, diff --git a/include/linux/libata.h b/include/linux/libata.h index 76319bf03e37..5b2f7491fb26 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -418,6 +418,15 @@ enum { ATA_TIMING_ACTIVE | ATA_TIMING_RECOVER | ATA_TIMING_DMACK_HOLD | ATA_TIMING_CYCLE | ATA_TIMING_UDMA, + + /* ACPI constants */ + ATA_ACPI_FILTER_SETXFER = 1 << 0, + ATA_ACPI_FILTER_LOCK = 1 << 1, + ATA_ACPI_FILTER_DIPM = 1 << 2, + + ATA_ACPI_FILTER_DEFAULT = ATA_ACPI_FILTER_SETXFER | + ATA_ACPI_FILTER_LOCK | + ATA_ACPI_FILTER_DIPM, }; enum ata_xfer_mask { -- cgit v1.2.3 From fa5b561c4ea170caf9759109acc2e961a7e83bea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Sep 2009 04:17:02 +0900 Subject: libata: implement more acpi filtering options Currently libata-acpi can only filter DIPM among SATA feature enables via _GTF. This patch adds the capability to filter out FPDMA non-zero offset, in-order guarantee and auto-activation. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/ata.h | 3 +++ include/linux/libata.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 7c5beafa972c..4fb357312b3b 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -334,9 +334,12 @@ enum { SETFEATURES_SATA_DISABLE = 0x90, /* Disable use of SATA feature */ /* SETFEATURE Sector counts for SATA features */ + SATA_FPDMA_OFFSET = 0x01, /* FPDMA non-zero buffer offsets */ SATA_FPDMA_AA = 0x02, /* FPDMA Setup FIS Auto-Activate */ SATA_DIPM = 0x03, /* Device Initiated Power Management */ + SATA_FPDMA_IN_ORDER = 0x04, /* FPDMA in-order data delivery */ SATA_AN = 0x05, /* Asynchronous Notification */ + SATA_SSP = 0x06, /* Software Settings Preservation */ /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, diff --git a/include/linux/libata.h b/include/linux/libata.h index 5b2f7491fb26..aa52794d2a03 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -423,6 +423,8 @@ enum { ATA_ACPI_FILTER_SETXFER = 1 << 0, ATA_ACPI_FILTER_LOCK = 1 << 1, ATA_ACPI_FILTER_DIPM = 1 << 2, + ATA_ACPI_FILTER_FPDMA_OFFSET = 1 << 3, /* FPDMA non-zero offset */ + ATA_ACPI_FILTER_FPDMA_AA = 1 << 4, /* FPDMA auto activate */ ATA_ACPI_FILTER_DEFAULT = ATA_ACPI_FILTER_SETXFER | ATA_ACPI_FILTER_LOCK | -- cgit v1.2.3 From 110f66d25c33c2259b1125255fa7063ab07b8340 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Sep 2009 04:17:28 +0900 Subject: libata: make gtf_filter per-dev Add ->gtf_filter to ata_device and set it to ata_acpi_gtf_filter when initializing ata_link. This is to allow quirks which apply different gtf filters. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index aa52794d2a03..87698640c091 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -598,6 +598,7 @@ struct ata_device { #ifdef CONFIG_ATA_ACPI acpi_handle acpi_handle; union acpi_object *gtf_cache; + unsigned int gtf_filter; #endif /* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */ u64 n_sectors; /* size of device, if ATA */ -- cgit v1.2.3 From acf442dc560437858e6a4c904678052616f8226e Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Mon, 5 Oct 2009 21:43:44 -0700 Subject: Input: fix rx51 board keymap The original driver was written with the KEY() macro defined as (col, row) instead of (row, col) as defined by the matrix keypad infrastructure. So the keymap was defined accordingly. Since the driver that was merged upstream uses the matrix keypad infrastructure, modify the keymap accordingly. While we are at it, fix the comments in twl4030.h and define PERSISTENT_KEY as (r,c) instead of (c, r) Tested on a RX51 (N900) device. Signed-off-by: Amit Kucheria Acked-by: Tony Lindgren Signed-off-by: Dmitry Torokhov --- include/linux/i2c/twl4030.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index 3fd21d7cb6bf..007572536ea9 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -305,11 +305,11 @@ struct twl4030_madc_platform_data { int irq_line; }; -/* Boards have uniqe mappings of {col, row} --> keycode. - * Column and row are 4 bits, but range only from 0..7. +/* Boards have uniqe mappings of {row, col} --> keycode. + * Column and row are 8 bits each, but range only from 0..7. * a PERSISTENT_KEY is "always on" and never reported. */ -#define PERSISTENT_KEY(c, r) KEY((c), (r), KEY_RESERVED) +#define PERSISTENT_KEY(r, c) KEY((r), (c), KEY_RESERVED) struct twl4030_keypad_data { const struct matrix_keymap_data *keymap_data; -- cgit v1.2.3 From ea2a4d3a3a929ef494952bba57a0ef1a8a877881 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 6 Oct 2009 10:34:13 +0200 Subject: [S390] 64-bit register support for 31-bit processes From: Heiko Carstens From: Martin Schwidefsky Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- include/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/elf.h b/include/linux/elf.h index 45a937be6d38..90a4ed0ea0e5 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -361,6 +361,7 @@ typedef struct elf64_shdr { #define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ #define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ +#define NT_PRXSTATUS 0x300 /* s390 upper register halves */ /* Note header in a PT_NOTE section */ -- cgit v1.2.3 From e13dbd7d75d1ecc315c6e3071b3c4e8fba4f6bec Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Tue, 6 Oct 2009 07:38:51 -0400 Subject: perf_events: Make ABI definitions available to userspace Signed-off-by: Chuck Ebbert LKML-Reference: <200910061138.n96BcqkJ004709@int-mx03.intmail.prod.int.phx2.redhat.com> Signed-off-by: Ingo Molnar --- include/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index cff4a101f266..3f384d4b163a 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -126,6 +126,7 @@ header-y += nfs_mount.h header-y += nl80211.h header-y += param.h header-y += pci_regs.h +header-y += perf_event.h header-y += pfkeyv2.h header-y += pg.h header-y += phantom.h -- cgit v1.2.3 From 906010b2134e14a2e377decbadd357b3d0ab9c6a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 21 Sep 2009 16:08:49 +0200 Subject: perf_event: Provide vmalloc() based mmap() backing Some architectures such as Sparc, ARM and MIPS (basically everything with flush_dcache_page()) need to deal with dcache aliases by carefully placing pages in both kernel and user maps. These architectures typically have to use vmalloc_user() for this. However, on other architectures, vmalloc() is not needed and has the downsides of being more restricted and slower than regular allocations. Signed-off-by: Peter Zijlstra Acked-by: David Miller Cc: Andrew Morton Cc: Jens Axboe Cc: Paul Mackerras LKML-Reference: <1254830228.21044.272.camel@laptop> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3a9d36d1e92a..2e6d95f97419 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -442,6 +442,7 @@ enum perf_callchain_context { #include #include #include +#include #include #define PERF_MAX_STACK_DEPTH 255 @@ -513,6 +514,10 @@ struct file; struct perf_mmap_data { struct rcu_head rcu_head; +#ifdef CONFIG_PERF_USE_VMALLOC + struct work_struct work; +#endif + int data_order; int nr_pages; /* nr of data pages */ int writable; /* are we writable */ int nr_locked; /* nr pages mlocked */ -- cgit v1.2.3 From 316d315bffa4026f28085f6b24ebcebede370ac7 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Tue, 6 Oct 2009 20:16:55 +0200 Subject: block: Seperate read and write statistics of in_flight requests v2 Commit a9327cac440be4d8333bba975cbbf76045096275 added seperate read and write statistics of in_flight requests. And exported the number of read and write requests in progress seperately through sysfs. But Corrado Zoccolo reported getting strange output from "iostat -kx 2". Global values for service time and utilization were garbage. For interval values, utilization was always 100%, and service time is higher than normal. So this was reverted by commit 0f78ab9899e9d6acb09d5465def618704255963b The problem was in part_round_stats_single(), I missed the following: if (now == part->stamp) return; - if (part->in_flight) { + if (part_in_flight(part)) { __part_stat_add(cpu, part, time_in_queue, part_in_flight(part) * (now - part->stamp)); __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); With this chunk included, the reported regression gets fixed. Signed-off-by: Nikanth Karthikesan -- Signed-off-by: Jens Axboe --- include/linux/genhd.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7beaa21b3880..297df45ffd0a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -98,7 +98,7 @@ struct hd_struct { int make_it_fail; #endif unsigned long stamp; - int in_flight; + int in_flight[2]; #ifdef CONFIG_SMP struct disk_stats *dkstats; #else @@ -322,18 +322,23 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_sub(cpu, gendiskp, field, subnd) \ part_stat_add(cpu, gendiskp, field, -subnd) -static inline void part_inc_in_flight(struct hd_struct *part) +static inline void part_inc_in_flight(struct hd_struct *part, int rw) { - part->in_flight++; + part->in_flight[rw]++; if (part->partno) - part_to_disk(part)->part0.in_flight++; + part_to_disk(part)->part0.in_flight[rw]++; } -static inline void part_dec_in_flight(struct hd_struct *part) +static inline void part_dec_in_flight(struct hd_struct *part, int rw) { - part->in_flight--; + part->in_flight[rw]--; if (part->partno) - part_to_disk(part)->part0.in_flight--; + part_to_disk(part)->part0.in_flight[rw]--; +} + +static inline int part_in_flight(struct hd_struct *part) +{ + return part->in_flight[0] + part->in_flight[1]; } /* block/blk-core.c */ @@ -546,6 +551,8 @@ extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t part_inflight_show(struct device *dev, + struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); -- cgit v1.2.3 From f8d1e548931cfa5ea9a082e020c2a47d27e5d793 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 7 Oct 2009 11:13:58 +1100 Subject: futex: Fix typo in FUTEX_WAIT/WAKE_BITSET_PRIVATE definitions Looks like a typo, FUTEX_WAKE_BITS should be FUTEX_WAIT_BITSET. Signed-off-by: Anton Blanchard LKML-Reference: <20091007001358.GE16073@kryten> Signed-off-by: Thomas Gleixner --- include/linux/futex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 34956c8fdebf..78b92ec9edbd 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -38,8 +38,8 @@ union ktime; #define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) -#define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) -#define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) +#define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG) +#define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ -- cgit v1.2.3 From 1f56f4a2b4d12c1c348cab23024024396ec7cddc Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 6 Oct 2009 09:19:45 -0500 Subject: PCI quirk: TI XIO200a erroneously reports support for fast b2b transfers This quirk will disable fast back to back transfer on the secondary bus segment of the TI Bridge. Signed-off-by: Gabe Black Signed-off-by: Jesse Barnes --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index da1fda8623e0..f490e7a7307a 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -776,6 +776,7 @@ #define PCI_DEVICE_ID_TI_X515 0x8036 #define PCI_DEVICE_ID_TI_XX12 0x8039 #define PCI_DEVICE_ID_TI_XX12_FM 0x803b +#define PCI_DEVICE_ID_TI_XIO2000A 0x8231 #define PCI_DEVICE_ID_TI_1130 0xac12 #define PCI_DEVICE_ID_TI_1031 0xac13 #define PCI_DEVICE_ID_TI_1131 0xac15 -- cgit v1.2.3 From e7247a15ff3bbdab0a8b402dffa1171e5c05a8e0 Mon Sep 17 00:00:00 2001 From: "jolsa@redhat.com" Date: Wed, 7 Oct 2009 19:00:35 +0200 Subject: tracing: correct module boundaries for ftrace_release When the module is about the unload we release its call records. The ftrace_release function was given wrong values representing the module core boundaries, thus not releasing its call records. Plus making ftrace_release function module specific. Signed-off-by: Jiri Olsa LKML-Reference: <1254934835-363-3-git-send-email-jolsa@redhat.com> Cc: stable@kernel.org Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index cd3d2abaf30a..0b4f97d24d7f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -241,7 +241,7 @@ extern void ftrace_enable_daemon(void); # define ftrace_set_filter(buf, len, reset) do { } while (0) # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) -static inline void ftrace_release(void *start, unsigned long size) { } +static inline void ftrace_release_mod(struct module *mod) {} static inline int register_ftrace_command(struct ftrace_func_command *cmd) { return -EINVAL; -- cgit v1.2.3 From d308e38fa5467fbb523fc13e4b984375c2198c3d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 7 Oct 2009 13:53:11 -0700 Subject: include/linux/netdevice.h: fix nanodoc mismatch nanodoc was missing an ndo_-prefix. Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 94958c109761..812a5f3c2abe 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -557,7 +557,7 @@ struct netdev_queue { * Callback uses when the transmitter has not made any progress * for dev->watchdog ticks. * - * struct net_device_stats* (*get_stats)(struct net_device *dev); + * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); * Called when a user wants to get the network device usage * statistics. If not defined, the counters in dev->stats will * be used. -- cgit v1.2.3 From a4720c650b68a5fe7faed2edeb0ad12645f7ae63 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 9 Oct 2009 12:43:12 -0400 Subject: USB: serial: don't call release without attach This patch (as1295) fixes a recently-added bug in the USB serial core. If certain kinds of errors occur during probing, the core may call a serial driver's release method without previously calling the attach method. This causes some drivers (io_ti in particular) to perform an invalid memory access. The patch adds a new flag to keep track of whether or not attach has been called. Signed-off-by: Alan Stern Tested-by: Jean-Denis Girard CC: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/serial.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index c17eb64d7213..ce911ebf91e8 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -150,6 +150,7 @@ struct usb_serial { struct usb_interface *interface; unsigned char disconnected:1; unsigned char suspending:1; + unsigned char attached:1; unsigned char minor; unsigned char num_ports; unsigned char num_port_pointers; -- cgit v1.2.3 From d43c36dc6b357fa1806800f18aa30123c747a6d1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 7 Oct 2009 17:09:06 +0400 Subject: headers: remove sched.h from interrupt.h After m68k's task_thread_info() doesn't refer to current, it's possible to remove sched.h from interrupt.h and not break m68k! Many thanks to Heiko Carstens for allowing this. Signed-off-by: Alexey Dobriyan --- include/linux/interrupt.h | 2 +- include/linux/mmc/host.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index b78cf8194957..7ca72b74eec7 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -610,6 +609,7 @@ extern void debug_poll_all_shared_irqs(void); static inline void debug_poll_all_shared_irqs(void) { } #endif +struct seq_file; int show_interrupts(struct seq_file *p, void *v); struct irq_desc; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 81bb42358595..eaf36364b7d4 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -11,6 +11,7 @@ #define LINUX_MMC_HOST_H #include +#include #include -- cgit v1.2.3 From c01226c3145d173a0d38f9d5b4f229cc23d99ae2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 21 Sep 2009 16:37:12 +0200 Subject: warn about use of uninstalled kernel headers User applications frequently hit problems when they try to use the kernel headers directly, rather than the exported headers. This adds an explicit warning for this case, and points to a URL holding an explanation of why this is wrong and what to do about it. Signed-off-by: Arnd Bergmann Signed-off-by: Sam Ravnborg --- include/linux/kernel.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d3cd23f30039..f4e3184fa054 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -659,6 +659,12 @@ extern int do_sysinfo(struct sysinfo *info); #endif /* __KERNEL__ */ +#ifndef __EXPORTED_HEADERS__ +#ifndef __KERNEL__ +#warning Attempt to use kernel headers from user space, see http://kernelnewbies.org/KernelHeaders +#endif /* __KERNEL__ */ +#endif /* __EXPORTED_HEADERS__ */ + #define SI_LOAD_SHIFT 16 struct sysinfo { long uptime; /* Seconds since boot */ -- cgit v1.2.3 From 89eda06837094ce9f34fae269b8773fcfd70f046 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 4 Oct 2009 21:49:47 +0900 Subject: LSM: Add security_path_chmod() and security_path_chown(). This patch allows pathname based LSM modules to check chmod()/chown() operations. Since notify_change() does not receive "struct vfsmount *", we add security_path_chmod() and security_path_chown() to the caller of notify_change(). These hooks are used by TOMOYO. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/security.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 239e40d0450b..c8a584c26f7b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -447,6 +447,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @new_dir contains the path structure for parent of the new link. * @new_dentry contains the dentry structure of the new link. * Return 0 if permission is granted. + * @path_chmod: + * Check for permission to change DAC's permission of a file or directory. + * @dentry contains the dentry structure. + * @mnt contains the vfsmnt structure. + * @mode contains DAC's mode. + * Return 0 if permission is granted. + * @path_chown: + * Check for permission to change owner/group of a file or directory. + * @path contains the path structure. + * @uid contains new owner's ID. + * @gid contains new group's ID. + * Return 0 if permission is granted. * @inode_readlink: * Check the permission to read the symbolic link. * @dentry contains the dentry structure for the file link. @@ -1488,6 +1500,9 @@ struct security_operations { struct dentry *new_dentry); int (*path_rename) (struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); + int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt, + mode_t mode); + int (*path_chown) (struct path *path, uid_t uid, gid_t gid); #endif int (*inode_alloc_security) (struct inode *inode); @@ -2952,6 +2967,9 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); +int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, + mode_t mode); +int security_path_chown(struct path *path, uid_t uid, gid_t gid); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(struct path *dir, struct dentry *dentry) { @@ -3001,6 +3019,18 @@ static inline int security_path_rename(struct path *old_dir, { return 0; } + +static inline int security_path_chmod(struct dentry *dentry, + struct vfsmount *mnt, + mode_t mode) +{ + return 0; +} + +static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid) +{ + return 0; +} #endif /* CONFIG_SECURITY_PATH */ #ifdef CONFIG_KEYS -- cgit v1.2.3 From 8b8efb44033c7e86b3dc76f825c693ec92ae30e9 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 4 Oct 2009 21:49:48 +0900 Subject: LSM: Add security_path_chroot(). This patch allows pathname based LSM modules to check chroot() operations. This hook is used by TOMOYO. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/security.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index c8a584c26f7b..ed0faea60b82 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -459,6 +459,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @uid contains new owner's ID. * @gid contains new group's ID. * Return 0 if permission is granted. + * @path_chroot: + * Check for permission to change root directory. + * @path contains the path structure. + * Return 0 if permission is granted. * @inode_readlink: * Check the permission to read the symbolic link. * @dentry contains the dentry structure for the file link. @@ -1503,6 +1507,7 @@ struct security_operations { int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt, mode_t mode); int (*path_chown) (struct path *path, uid_t uid, gid_t gid); + int (*path_chroot) (struct path *path); #endif int (*inode_alloc_security) (struct inode *inode); @@ -2970,6 +2975,7 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry, int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, mode_t mode); int security_path_chown(struct path *path, uid_t uid, gid_t gid); +int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(struct path *dir, struct dentry *dentry) { @@ -3031,6 +3037,11 @@ static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid) { return 0; } + +static inline int security_path_chroot(struct path *path) +{ + return 0; +} #endif /* CONFIG_SECURITY_PATH */ #ifdef CONFIG_KEYS -- cgit v1.2.3 From 799e2205ec65e174f752b558c62a92c4752df313 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Oct 2009 12:16:40 +0200 Subject: sched: Disable SD_PREFER_LOCAL for MC/CPU domains Yanmin reported that both tbench and hackbench were significantly hurt by trying to keep tasks local on these domains, esp on small cache machines. So disable it in order to promote spreading outside of the cache domains. Reported-by: "Zhang, Yanmin" Signed-off-by: Peter Zijlstra CC: Mike Galbraith LKML-Reference: <1255083400.8802.15.camel@laptop> Signed-off-by: Ingo Molnar --- include/linux/topology.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index fc0bf3edeb67..57e63579bfdd 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -129,7 +129,7 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 1*SD_PREFER_LOCAL \ + | 0*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 1*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ @@ -162,7 +162,7 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 1*SD_PREFER_LOCAL \ + | 0*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ -- cgit v1.2.3 From 43046b606673c9c991919ff75b980b72541e9ede Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 14 Oct 2009 09:16:42 -0700 Subject: workqueue: add 'flush_delayed_work()' to run and wait for delayed work It basically turns a delayed work into an immediate work, and then waits for it to finish. --- include/linux/workqueue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 7ef0c7b94f31..cf24c20de9e4 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -207,6 +207,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, extern void flush_workqueue(struct workqueue_struct *wq); extern void flush_scheduled_work(void); +extern void flush_delayed_work(struct delayed_work *work); extern int schedule_work(struct work_struct *work); extern int schedule_work_on(int cpu, struct work_struct *work); -- cgit v1.2.3 From 019129d595caaa5bd0b41d128308da1be6a91869 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 14 Oct 2009 10:15:56 -0700 Subject: rcu: Stopgap fix for synchronize_rcu_expedited() for TREE_PREEMPT_RCU For the short term, map synchronize_rcu_expedited() to synchronize_rcu() for TREE_PREEMPT_RCU and to synchronize_sched_expedited() for TREE_RCU. Longer term, there needs to be a real expedited grace period for TREE_PREEMPT_RCU, but candidate patches to date are considerably more complex and intrusive. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: npiggin@suse.de Cc: jens.axboe@oracle.com LKML-Reference: <12555405592331-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutree.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 46e9ab3ee6e1..9642c6bcb399 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -76,11 +76,7 @@ static inline void __rcu_read_unlock_bh(void) extern void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); - -static inline void synchronize_rcu_expedited(void) -{ - synchronize_sched_expedited(); -} +extern void synchronize_rcu_expedited(void); static inline void synchronize_rcu_bh_expedited(void) { -- cgit v1.2.3 From 5deab536654f95345ea11e8ec6ed5c778df348b5 Mon Sep 17 00:00:00 2001 From: Shane Huang Date: Tue, 13 Oct 2009 11:14:00 +0800 Subject: ahci / atiixp / pci quirks: rename AMD SB900 into Hudson-2 This patch renames the code name SB900 into Hudson-2 Signed-off-by: Shane Huang Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f490e7a7307a..86257a412732 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -379,9 +379,6 @@ #define PCI_DEVICE_ID_ATI_IXP600_IDE 0x438c #define PCI_DEVICE_ID_ATI_IXP700_SATA 0x4390 #define PCI_DEVICE_ID_ATI_IXP700_IDE 0x439c -/* AMD SB Chipset */ -#define PCI_DEVICE_ID_AMD_SB900_IDE 0x780c -#define PCI_DEVICE_ID_AMD_SB900_SATA_IDE 0x7800 #define PCI_VENDOR_ID_VLSI 0x1004 #define PCI_DEVICE_ID_VLSI_82C592 0x0005 @@ -553,9 +550,10 @@ #define PCI_DEVICE_ID_AMD_CS5536_UDC 0x2096 #define PCI_DEVICE_ID_AMD_CS5536_UOC 0x2097 #define PCI_DEVICE_ID_AMD_CS5536_IDE 0x209A - #define PCI_DEVICE_ID_AMD_LX_VIDEO 0x2081 #define PCI_DEVICE_ID_AMD_LX_AES 0x2082 +#define PCI_DEVICE_ID_AMD_HUDSON2_IDE 0x780c +#define PCI_DEVICE_ID_AMD_HUDSON2_SATA_IDE 0x7800 #define PCI_VENDOR_ID_TRIDENT 0x1023 #define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX 0x2000 -- cgit v1.2.3 From e95646c3ec33c8ec0693992da4332a6b32eb7e31 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 30 Sep 2009 11:17:21 +0200 Subject: virtio: let header files include virtio_ids.h Rusty, commit 3ca4f5ca73057a617f9444a91022d7127041970a virtio: add virtio IDs file moved all device IDs into a single file. While the change itself is a very good one, it can break userspace applications. For example if a userspace tool wanted to get the ID of virtio_net it used to include virtio_net.h. This does no longer work, since virtio_net.h does not include virtio_ids.h. This patch moves all "#include " from the C files into the header files, making the header files compatible with the old ones. In addition, this patch exports virtio_ids.h to userspace. CC: Fernando Luis Vazquez Cao Signed-off-by: Christian Borntraeger Signed-off-by: Rusty Russell --- include/linux/Kbuild | 1 + include/linux/virtio_9p.h | 1 + include/linux/virtio_balloon.h | 1 + include/linux/virtio_blk.h | 1 + include/linux/virtio_console.h | 1 + include/linux/virtio_net.h | 1 + include/linux/virtio_rng.h | 1 + 7 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 3f384d4b163a..1feed71551c9 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -364,6 +364,7 @@ unifdef-y += utsname.h unifdef-y += videodev2.h unifdef-y += videodev.h unifdef-y += virtio_config.h +unifdef-y += virtio_ids.h unifdef-y += virtio_blk.h unifdef-y += virtio_net.h unifdef-y += virtio_9p.h diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index ea7226a45acb..095e10d148b4 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -2,6 +2,7 @@ #define _LINUX_VIRTIO_9P_H /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. */ +#include #include /* Maximum number of virtio channels per partition (1 for now) */ diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index 09d730085060..1418f048cb34 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -2,6 +2,7 @@ #define _LINUX_VIRTIO_BALLOON_H /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. */ +#include #include /* The feature bitmap for virtio balloon */ diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 15cb666581d7..1e19470d2da2 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -3,6 +3,7 @@ /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. */ #include +#include #include /* Feature bits */ diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index b5f519806014..fe885174cc1f 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -1,6 +1,7 @@ #ifndef _LINUX_VIRTIO_CONSOLE_H #define _LINUX_VIRTIO_CONSOLE_H #include +#include #include /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so * anyone can use the definitions to implement compatible drivers/servers. */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 1f41734bbb77..085e42298ce5 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -3,6 +3,7 @@ /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. */ #include +#include #include #include diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h index 48121c3c434b..c4d5de896f0c 100644 --- a/include/linux/virtio_rng.h +++ b/include/linux/virtio_rng.h @@ -2,6 +2,7 @@ #define _LINUX_VIRTIO_RNG_H /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. */ +#include #include #endif /* _LINUX_VIRTIO_RNG_H */ -- cgit v1.2.3 From 3225beaba05d4f06087593f5e903ce867b6e118a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 22 Oct 2009 16:39:28 -0600 Subject: virtio_blk: Revert serial number support This reverts "Add serial number support for virtio_blk, V4a". Turns out that virtio_pci, lguest and s/390 all have an 8 bit limit on virtio config space, so noone could ever use this. This is coming back later in a cleaner form. Signed-off-by: Rusty Russell Cc: john cooper Cc: Jens Axboe --- include/linux/virtio_blk.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 1e19470d2da2..fd294c56d571 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -14,11 +14,8 @@ #define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ #define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ -#define VIRTIO_BLK_F_IDENTIFY 8 /* ATA IDENTIFY supported */ #define VIRTIO_BLK_F_FLUSH 9 /* Cache flush command support */ -#define VIRTIO_BLK_ID_BYTES (sizeof(__u16[256])) /* IDENTIFY DATA */ - struct virtio_blk_config { /* The capacity (in 512-byte sectors). */ __u64 capacity; @@ -34,7 +31,6 @@ struct virtio_blk_config { } geometry; /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ __u32 blk_size; - __u8 identify[VIRTIO_BLK_ID_BYTES]; } __attribute__((packed)); /* -- cgit v1.2.3 From 721a669b7225edeeb0ca8e2bf71b83882326a71b Mon Sep 17 00:00:00 2001 From: Soeren Sandmann Date: Tue, 15 Sep 2009 14:33:08 +0200 Subject: perf events: Fix swevent hrtimer sampling by keeping track of remaining time when enabling/disabling swevent hrtimers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the hrtimer based events work for sysprof. Whenever a swevent is scheduled out, the hrtimer is canceled. When it is scheduled back in, the timer is restarted. This happens every scheduler tick, which means the timer never expired because it was getting repeatedly restarted over and over with the same period. To fix that, save the remaining time when disabling; when reenabling, use that saved time as the period instead of the user-specified sampling period. Also, move the starting and stopping of the hrtimers to helper functions instead of duplicating the code. Signed-off-by: Søren Sandmann Pedersen LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2e6d95f97419..9e7012689a84 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -471,8 +471,8 @@ struct hw_perf_event { unsigned long event_base; int idx; }; - union { /* software */ - atomic64_t count; + struct { /* software */ + s64 remaining; struct hrtimer hrtimer; }; }; -- cgit v1.2.3 From 6d3f1e12f46a2f9a1bb7e7aa433df8dd31ce5647 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 23 Oct 2009 19:36:19 -0400 Subject: tracing: Remove cpu arg from the rb_time_stamp() function The cpu argument is not used inside the rb_time_stamp() function. Plus fix a typo. Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <20091023233647.118547500@goodmis.org> Signed-off-by: Ingo Molnar --- include/linux/trace_seq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index c134dd1fe6b6..09077f6ed128 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -7,7 +7,7 @@ /* * Trace sequences are used to allow a function to call several other functions - * to create a string of data to use (up to a max of PAGE_SIZE. + * to create a string of data to use (up to a max of PAGE_SIZE). */ struct trace_seq { -- cgit v1.2.3 From bb015f0c85362aa767f8f00f50a40d85e489414f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 19 Oct 2009 11:43:32 +0200 Subject: pcmcia: drop already defined PCI_IDs Out of 10 PCI_IDs found in the PCMCIA subsystem, only two were not defined in pci_ids.h. Move them and drop the duplicates. Successfully build-tested. Signed-off-by: Wolfram Sang Cc: Jesse Barnes Signed-off-by: Dominik Brodowski --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f490e7a7307a..857cc349bf71 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1632,6 +1632,8 @@ #define PCI_DEVICE_ID_O2_6730 0x673a #define PCI_DEVICE_ID_O2_6832 0x6832 #define PCI_DEVICE_ID_O2_6836 0x6836 +#define PCI_DEVICE_ID_O2_6812 0x6872 +#define PCI_DEVICE_ID_O2_6933 0x6933 #define PCI_VENDOR_ID_3DFX 0x121a #define PCI_DEVICE_ID_3DFX_VOODOO 0x0001 -- cgit v1.2.3 From d6ba452128178091dab7a04d54f7e66fdc32fb39 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Mon, 26 Oct 2009 09:26:18 -0400 Subject: tpm add default function definitions Add default tpm_pcr_read/extend function definitions required by IMA/Kconfig changes. Signed-off-by: Mimi Zohar Reviewed-by: Eric Paris Signed-off-by: James Morris --- include/linux/tpm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 3338b3f5c21a..8eaa8f83effb 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -31,5 +31,12 @@ extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf); extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash); +#else +static inline int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) { + return -ENODEV; +} +static inline int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) { + return -ENODEV; +} #endif #endif -- cgit v1.2.3 From 65afac7d80ab3bc9f81e75eafb71eeb92a3ebdef Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 29 Oct 2009 08:56:16 -0600 Subject: param: fix lots of bugs with writing charp params from sysfs, by leaking mem. e180a6b7759a "param: fix charp parameters set via sysfs" fixed the case where charp parameters written via sysfs were freed, leaving drivers accessing random memory. Unfortunately, storing a flag in the kparam struct was a bad idea: it's rodata so setting it causes an oops on some archs. But that's not all: 1) module_param_array() on charp doesn't work reliably, since we use an uninitialized temporary struct kernel_param. 2) there's a fundamental race if a module uses this parameter and then it's changed: they will still access the old, freed, memory. The simplest fix (ie. for 2.6.32) is to never free the memory. This prevents all these problems, at cost of a memory leak. In practice, there are only 18 places where a charp is writable via sysfs, and all are root-only writable. Reported-by: Takashi Iwai Cc: Sitsofe Wheeler Cc: Frederic Weisbecker Cc: Christof Schmitt Signed-off-by: Rusty Russell Cc: stable@kernel.org --- include/linux/moduleparam.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 6547c3cdbc4c..82a9124f7d75 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -37,7 +37,6 @@ typedef int (*param_set_fn)(const char *val, struct kernel_param *kp); typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp); /* Flag bits for kernel_param.flags */ -#define KPARAM_KMALLOCED 1 #define KPARAM_ISBOOL 2 struct kernel_param { -- cgit v1.2.3 From ff76ec18cabb12a6c8f3c65bd1d23f1a770fe908 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 28 Oct 2009 12:26:39 -0700 Subject: tpm: fix header for modular build Fix build for TCG_TPM=m. Header file doesn't handle this and incorrectly builds stubs. drivers/char/tpm/tpm.c:720: error: redefinition of 'tpm_pcr_read' include/linux/tpm.h:35: error:previous definition of 'tpm_pcr_read' was here drivers/char/tpm/tpm.c:752: error: redefinition of 'tpm_pcr_extend' include/linux/tpm.h:38: error:previous definition of 'tpm_pcr_extend' was here Repairs linux-next's commit d6ba452128178091dab7a04d54f7e66fdc32fb39 Author: Mimi Zohar Date: Mon Oct 26 09:26:18 2009 -0400 tpm add default function definitions Signed-off-by: Randy Dunlap Cc: Rajiv Andrade Cc: Mimi Zohar Cc: James Morris Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: James Morris --- include/linux/tpm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 8eaa8f83effb..ac5d1c1285d9 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -27,7 +27,7 @@ */ #define TPM_ANY_NUM 0xFFFF -#if defined(CONFIG_TCG_TPM) +#if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf); extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash); -- cgit v1.2.3 From 2eca40a8ccd4160dbfaa5cbd61038d921d0e5f13 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 26 Oct 2009 16:49:29 -0700 Subject: cpufreq: add cpufreq_get() stub for CONFIG_CPU_FREQ=n When CONFIG_CPU_FREQ is disabled, cpufreq_get() needs a stub. Used by kvm (although it looks like a bit of the kvm code could be omitted when CONFIG_CPU_FREQ is disabled). arch/x86/built-in.o: In function `kvm_arch_init': (.text+0x10de7): undefined reference to `cpufreq_get' (Needed in linux-next's KVM tree, but it's correct in 2.6.32). Signed-off-by: Randy Dunlap Tested-by: Eric Paris Cc: Jiri Slaby Cc: Avi Kivity Cc: Marcelo Tosatti Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpufreq.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 44717eb47639..79a2340d83cd 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -291,8 +291,15 @@ struct global_attr { int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); int cpufreq_update_policy(unsigned int cpu); +#ifdef CONFIG_CPU_FREQ /* query the current CPU frequency (in kHz). If zero, cpufreq couldn't detect it */ unsigned int cpufreq_get(unsigned int cpu); +#else +static inline unsigned int cpufreq_get(unsigned int cpu) +{ + return 0; +} +#endif /* query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it */ #ifdef CONFIG_CPU_FREQ -- cgit v1.2.3 From 0a1b71b4008d332e57b5605a8228ea7aa96687e8 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 26 Oct 2009 16:49:37 -0700 Subject: strstrip(): mark as as must_check strstrip() can return a modified value of its input argument, when removing elading whitesapce. So it is surely bug for this function's return value to be ignored. The caller is probably going to use the incorrect original pointer. So mark it __must_check to prevent this frm happening (as it has before). Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 489019ef1694..b8508868d5ad 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -62,7 +62,7 @@ extern char * strnchr(const char *, size_t, int); #ifndef __HAVE_ARCH_STRRCHR extern char * strrchr(const char *,int); #endif -extern char * strstrip(char *); +extern char * __must_check strstrip(char *); #ifndef __HAVE_ARCH_STRSTR extern char * strstr(const char *,const char *); #endif -- cgit v1.2.3 From 1b62cbf2140df510a56d38b9d49df2aae95cd0d2 Mon Sep 17 00:00:00 2001 From: "Krauth.Julien" Date: Mon, 26 Oct 2009 16:50:04 -0700 Subject: serial: add ADDI-DATA GmbH PCI-Express communication cards in 8250_pci.c and pci_ids.h Add support for ADDI-DATA GmbH PCI-Express communication cards: APCIe-7300 APCIe-7420 APCIe-7500 APCIe-7800 Warning: 8250_pci.c depends on pci_ids.h. 8250_pci.c Signed-off-by: Krauth Julien Acked-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 86257a412732..df48628d870b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2160,6 +2160,10 @@ #define PCI_DEVICE_ID_ADDIDATA_APCI7420_3 0x700D #define PCI_DEVICE_ID_ADDIDATA_APCI7300_3 0x700E #define PCI_DEVICE_ID_ADDIDATA_APCI7800_3 0x700F +#define PCI_DEVICE_ID_ADDIDATA_APCIe7300 0x7010 +#define PCI_DEVICE_ID_ADDIDATA_APCIe7420 0x7011 +#define PCI_DEVICE_ID_ADDIDATA_APCIe7500 0x7012 +#define PCI_DEVICE_ID_ADDIDATA_APCIe7800 0x7013 #define PCI_VENDOR_ID_PDC 0x15e9 -- cgit v1.2.3 From c68d2b1594548cda7f6dbac6a4d9d30a9b01558c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 26 Oct 2009 16:50:05 -0700 Subject: 8250_pci: add IBM Saturn serial card The IBM Saturn serial card has only one port. Without that fixup, the kernel thinks it has two, which confuses userland setup and admin tools as well. [akpm@linux-foundation.org: fix pci-ids.h layout] Signed-off-by: Benjamin Herrenschmidt Acked-by: Alan Cox Cc: Michael Reed Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index df48628d870b..b0f0f3851cd4 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -482,6 +482,9 @@ #define PCI_DEVICE_ID_IBM_ICOM_V2_ONE_PORT_RVX_ONE_PORT_MDM_PCIE 0x0361 #define PCI_DEVICE_ID_IBM_ICOM_FOUR_PORT_MODEL 0x252 +#define PCI_SUBVENDOR_ID_IBM 0x1014 +#define PCI_SUBDEVICE_ID_IBM_SATURN_SERIAL_ONE_PORT 0x03d4 + #define PCI_VENDOR_ID_UNISYS 0x1018 #define PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR 0x001C -- cgit v1.2.3 From 5975c725dfd6f7d36f493ab1453fbdbd35c1f0e3 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Thu, 29 Oct 2009 11:40:17 -0500 Subject: define convenient securebits masks for prctl users (v2) Hi James, would you mind taking the following into security-testing? The securebits are used by passing them to prctl with the PR_{S,G}ET_SECUREBITS commands. But the defines must be shifted to be used in prctl, which begs to be confused and misused by userspace. So define some more convenient values for userspace to specify. This way userspace does prctl(PR_SET_SECUREBITS, SECBIT_NOROOT); instead of prctl(PR_SET_SECUREBITS, 1 << SECURE_NOROOT); (Thanks to Michael for the idea) This patch also adds include/linux/securebits to the installed headers. Then perhaps it can be included by glibc's sys/prctl.h. Changelog: Oct 29: Stephen Rothwell points out that issecure can be under __KERNEL__. Oct 14: (Suggestions by Michael Kerrisk): 1. spell out SETUID in SECBIT_NO_SETUID* 2. SECBIT_X_LOCKED does not imply SECBIT_X 3. add definitions for keepcaps Oct 14: As suggested by Michael Kerrisk, don't use SB_* as that convention is already in use. Use SECBIT_ prefix instead. Signed-off-by: Serge E. Hallyn Acked-by: Andrew G. Morgan Acked-by: Michael Kerrisk Cc: Ulrich Drepper Cc: James Morris Signed-off-by: James Morris --- include/linux/Kbuild | 1 + include/linux/securebits.h | 24 ++++++++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index cff4a101f266..ffcdb9b509db 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -329,6 +329,7 @@ unifdef-y += scc.h unifdef-y += sched.h unifdef-y += screen_info.h unifdef-y += sdla.h +unifdef-y += securebits.h unifdef-y += selinux_netlink.h unifdef-y += sem.h unifdef-y += serial_core.h diff --git a/include/linux/securebits.h b/include/linux/securebits.h index d2c5ed845bcc..33406174cbe8 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -1,6 +1,15 @@ #ifndef _LINUX_SECUREBITS_H #define _LINUX_SECUREBITS_H 1 +/* Each securesetting is implemented using two bits. One bit specifies + whether the setting is on or off. The other bit specify whether the + setting is locked or not. A setting which is locked cannot be + changed from user-level. */ +#define issecure_mask(X) (1 << (X)) +#ifdef __KERNEL__ +#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) +#endif + #define SECUREBITS_DEFAULT 0x00000000 /* When set UID 0 has no special privileges. When unset, we support @@ -12,6 +21,9 @@ #define SECURE_NOROOT 0 #define SECURE_NOROOT_LOCKED 1 /* make bit-0 immutable */ +#define SECBIT_NOROOT (issecure_mask(SECURE_NOROOT)) +#define SECBIT_NOROOT_LOCKED (issecure_mask(SECURE_NOROOT_LOCKED)) + /* When set, setuid to/from uid 0 does not trigger capability-"fixup". When unset, to provide compatiblility with old programs relying on set*uid to gain/lose privilege, transitions to/from uid 0 cause @@ -19,6 +31,10 @@ #define SECURE_NO_SETUID_FIXUP 2 #define SECURE_NO_SETUID_FIXUP_LOCKED 3 /* make bit-2 immutable */ +#define SECBIT_NO_SETUID_FIXUP (issecure_mask(SECURE_NO_SETUID_FIXUP)) +#define SECBIT_NO_SETUID_FIXUP_LOCKED \ + (issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)) + /* When set, a process can retain its capabilities even after transitioning to a non-root user (the set-uid fixup suppressed by bit 2). Bit-4 is cleared when a process calls exec(); setting both @@ -27,12 +43,8 @@ #define SECURE_KEEP_CAPS 4 #define SECURE_KEEP_CAPS_LOCKED 5 /* make bit-4 immutable */ -/* Each securesetting is implemented using two bits. One bit specifies - whether the setting is on or off. The other bit specify whether the - setting is locked or not. A setting which is locked cannot be - changed from user-level. */ -#define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) +#define SECBIT_KEEP_CAPS (issecure_mask(SECURE_KEEP_CAPS)) +#define SECBIT_KEEP_CAPS_LOCKED (issecure_mask(SECURE_KEEP_CAPS_LOCKED)) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ -- cgit v1.2.3 From 14d18a81b5171d4433e41129619c75748b4f4d26 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Oct 2009 00:10:37 +0000 Subject: net: fix kmemcheck annotations struct sk_buff kmemcheck annotations enlarged this structure by 8/16 bytes Fix this by moving 'protocol' inside flags1 bitfield, and queue_mapping inside flags2 bitfield. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index df7b23ac66e6..6aebfceca3eb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -354,8 +354,8 @@ struct sk_buff { ipvs_property:1, peeked:1, nf_trace:1; + __be16 protocol:16; kmemcheck_bitfield_end(flags1); - __be16 protocol; void (*destructor)(struct sk_buff *skb); #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -367,7 +367,6 @@ struct sk_buff { #endif int iif; - __u16 queue_mapping; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT @@ -376,6 +375,7 @@ struct sk_buff { #endif kmemcheck_bitfield_begin(flags2); + __u16 queue_mapping:16; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif -- cgit v1.2.3 From 9d410c796067686b1e032d54ce475b7055537138 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 30 Oct 2009 05:03:53 +0000 Subject: net: fix sk_forward_alloc corruption On UDP sockets, we must call skb_free_datagram() with socket locked, or risk sk_forward_alloc corruption. This requirement is not respected in SUNRPC. Add a convenient helper, skb_free_datagram_locked() and use it in SUNRPC Reported-by: Francis Moreau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6aebfceca3eb..bcdd6606f468 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1757,6 +1757,8 @@ extern int skb_copy_datagram_const_iovec(const struct sk_buff *from, int to_offset, int size); extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); +extern void skb_free_datagram_locked(struct sock *sk, + struct sk_buff *skb); extern int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); extern __wsum skb_checksum(const struct sk_buff *skb, int offset, -- cgit v1.2.3 From 1a6f2a7512021ceae3c4201c7aab07f032e9ce91 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 12 Oct 2009 20:17:41 -0700 Subject: Driver core: allow certain drivers prohibit bind/unbind via sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Platform drivers registered via platform_driver_probe() can be bound to devices only once, upon registration, because discard their probe() routines to save memory. Unbinding the driver through sysfs 'unbind' leaves the device stranded and confuses users so let's not create bind and unbind attributes for such drivers. Signed-off-by: Dmitry Torokhov Cc: Éric Piel Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index aca31bf7d8ed..2ea3e4921812 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -124,7 +124,9 @@ struct device_driver { struct bus_type *bus; struct module *owner; - const char *mod_name; /* used for built-in modules */ + const char *mod_name; /* used for built-in modules */ + + bool suppress_bind_attrs; /* disables bind/unbind via sysfs */ int (*probe) (struct device *dev); int (*remove) (struct device *dev); -- cgit v1.2.3 From 3806e94b0148350c72f9a3214274026b6ca03f49 Mon Sep 17 00:00:00 2001 From: Crane Cai Date: Sat, 7 Nov 2009 13:10:46 +0100 Subject: i2c-piix4: Modify code name SB900 to Hudson-2 Change SB900 to its formal code name Hudson-2. Signed-off-by: Crane Cai Signed-off-by: Jean Delvare --- include/linux/pci_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b0f0f3851cd4..9ceb392cb984 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -543,7 +543,7 @@ #define PCI_DEVICE_ID_AMD_8131_BRIDGE 0x7450 #define PCI_DEVICE_ID_AMD_8131_APIC 0x7451 #define PCI_DEVICE_ID_AMD_8132_BRIDGE 0x7458 -#define PCI_DEVICE_ID_AMD_SB900_SMBUS 0x780b +#define PCI_DEVICE_ID_AMD_HUDSON2_SMBUS 0x780b #define PCI_DEVICE_ID_AMD_CS5535_IDE 0x208F #define PCI_DEVICE_ID_AMD_CS5536_ISA 0x2090 #define PCI_DEVICE_ID_AMD_CS5536_FLASH 0x2091 -- cgit v1.2.3 From afa08974fe80c198b8650f73ed8ab59135ca10d0 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 7 Nov 2009 13:10:46 +0100 Subject: i2c: Add an interface to lock/unlock an I2C bus segment Some drivers need to be able to prevent access to an I2C bus segment for a specific period of time. Add an interface for them to do so without twiddling with i2c-core internals. Signed-off-by: Jean Delvare Acked-by: Ben Hutchings --- include/linux/i2c.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 57d41b0abce2..7b40cda57a70 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -361,6 +361,24 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data) dev_set_drvdata(&dev->dev, data); } +/** + * i2c_lock_adapter - Prevent access to an I2C bus segment + * @adapter: Target I2C bus segment + */ +static inline void i2c_lock_adapter(struct i2c_adapter *adapter) +{ + mutex_lock(&adapter->bus_lock); +} + +/** + * i2c_unlock_adapter - Reauthorize access to an I2C bus segment + * @adapter: Target I2C bus segment + */ +static inline void i2c_unlock_adapter(struct i2c_adapter *adapter) +{ + mutex_unlock(&adapter->bus_lock); +} + /*flags for the client struct: */ #define I2C_CLIENT_PEC 0x04 /* Use Packet Error Checking */ #define I2C_CLIENT_TEN 0x10 /* we have a ten bit chip address */ -- cgit v1.2.3 From dd8dbf2e6880e30c00b18600c962d0cb5a03c555 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 3 Nov 2009 16:35:32 +1100 Subject: security: report the module name to security_module_request For SELinux to do better filtering in userspace we send the name of the module along with the AVC denial when a program is denied module_request. Example output: type=SYSCALL msg=audit(11/03/2009 10:59:43.510:9) : arch=x86_64 syscall=write success=yes exit=2 a0=3 a1=7fc28c0d56c0 a2=2 a3=7fffca0d7440 items=0 ppid=1727 pid=1729 auid=unset uid=root gid=root euid=root suid=root fsuid=root egid=root sgid=root fsgid=root tty=(none) ses=unset comm=rpc.nfsd exe=/usr/sbin/rpc.nfsd subj=system_u:system_r:nfsd_t:s0 key=(null) type=AVC msg=audit(11/03/2009 10:59:43.510:9) : avc: denied { module_request } for pid=1729 comm=rpc.nfsd kmod="net-pf-10" scontext=system_u:system_r:nfsd_t:s0 tcontext=system_u:system_r:kernel_t:s0 tclass=system Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/lsm_audit.h | 18 ++++++++++-------- include/linux/security.h | 7 ++++--- 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 190c37854870..f78f83d7663f 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -26,14 +26,15 @@ /* Auxiliary data to use in generating the audit record. */ struct common_audit_data { - char type; -#define LSM_AUDIT_DATA_FS 1 -#define LSM_AUDIT_DATA_NET 2 -#define LSM_AUDIT_DATA_CAP 3 -#define LSM_AUDIT_DATA_IPC 4 -#define LSM_AUDIT_DATA_TASK 5 -#define LSM_AUDIT_DATA_KEY 6 -#define LSM_AUDIT_NO_AUDIT 7 + char type; +#define LSM_AUDIT_DATA_FS 1 +#define LSM_AUDIT_DATA_NET 2 +#define LSM_AUDIT_DATA_CAP 3 +#define LSM_AUDIT_DATA_IPC 4 +#define LSM_AUDIT_DATA_TASK 5 +#define LSM_AUDIT_DATA_KEY 6 +#define LSM_AUDIT_NO_AUDIT 7 +#define LSM_AUDIT_DATA_KMOD 8 struct task_struct *tsk; union { struct { @@ -66,6 +67,7 @@ struct common_audit_data { char *key_desc; } key_struct; #endif + char *kmod_name; } u; /* this union contains LSM specific data */ union { diff --git a/include/linux/security.h b/include/linux/security.h index ed0faea60b82..466cbadbd1ef 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -706,6 +706,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @kernel_module_request: * Ability to trigger the kernel to automatically upcall to userspace for * userspace to load a kernel module with the given name. + * @kmod_name name of the module requested by the kernel * Return 0 if successful. * @task_setuid: * Check permission before setting one or more of the user identity @@ -1577,7 +1578,7 @@ struct security_operations { void (*cred_transfer)(struct cred *new, const struct cred *old); int (*kernel_act_as)(struct cred *new, u32 secid); int (*kernel_create_files_as)(struct cred *new, struct inode *inode); - int (*kernel_module_request)(void); + int (*kernel_module_request)(char *kmod_name); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); int (*task_fix_setuid) (struct cred *new, const struct cred *old, int flags); @@ -1842,7 +1843,7 @@ void security_commit_creds(struct cred *new, const struct cred *old); void security_transfer_creds(struct cred *new, const struct cred *old); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); -int security_kernel_module_request(void); +int security_kernel_module_request(char *kmod_name); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); @@ -2407,7 +2408,7 @@ static inline int security_kernel_create_files_as(struct cred *cred, return 0; } -static inline int security_kernel_module_request(void) +static inline int security_kernel_module_request(char *kmod_name) { return 0; } -- cgit v1.2.3 From 9d5ce73a64be2be8112147a3e0b551ad9cd1247b Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:16 +0900 Subject: x86: intel-iommu: Convert detect_intel_iommu to use iommu_init hook This changes detect_intel_iommu() to set intel_iommu_init() to iommu_init hook if detect_intel_iommu() finds the IOMMU. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-6-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: build fix for the !CONFIG_DMAR case ] Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 4a2b162c256a..5de4c9e5856d 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -208,16 +208,9 @@ struct dmar_atsr_unit { u8 include_all:1; /* include all ports */ }; -/* Intel DMAR initialization functions */ extern int intel_iommu_init(void); -#else -static inline int intel_iommu_init(void) -{ -#ifdef CONFIG_INTR_REMAP - return dmar_dev_scope_init(); -#else - return -ENODEV; -#endif -} -#endif /* !CONFIG_DMAR */ +#else /* !CONFIG_DMAR: */ +static inline int intel_iommu_init(void) { return -ENODEV; } +#endif /* CONFIG_DMAR */ + #endif /* __DMAR_H__ */ -- cgit v1.2.3 From 9f993ac3f708b661207ed7de521f245586217a68 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:17 +0900 Subject: bootmem: Add free_bootmem_late() Add a new function for freeing bootmem after the bootmem allocator has been released and the unreserved pages given to the page allocator. This allows us to reserve bootmem and then release it if we later discover it was not needed. ( This new API will be used by the swiotlb code to recover a significant amount of RAM (64MB). ) Signed-off-by: FUJITA Tomonori Acked-by: Pekka Enberg Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com Cc: hannes@cmpxchg.org Cc: tj@kernel.org Cc: akpm@linux-foundation.org Cc: Linus Torvalds LKML-Reference: <1257849980-22640-7-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- include/linux/bootmem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index dd97fb8408a8..b10ec49ee2dd 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -53,6 +53,7 @@ extern void free_bootmem_node(pg_data_t *pgdat, unsigned long addr, unsigned long size); extern void free_bootmem(unsigned long addr, unsigned long size); +extern void free_bootmem_late(unsigned long addr, unsigned long size); /* * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE, -- cgit v1.2.3 From 5740afdb68abadc473fd5392df733558a58c1254 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:18 +0900 Subject: swiotlb: Add swiotlb_free() function swiotlb_free() function frees all allocated memory for swiotlb. We need to initialize swiotlb before IOMMU initialization (x86 and powerpc needs to allocate memory from bootmem allocator). If IOMMU initialization is successful, we need to free swiotlb resource (don't want to waste 64MB). Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-8-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: build fix for the !CONFIG_SWIOTLB case ] Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 73b1f1cec423..59bafa690290 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -88,4 +88,10 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); +#ifdef CONFIG_SWIOTLB +extern void __init swiotlb_free(void); +#else +static inline void swiotlb_free(void) { } +#endif + #endif /* __LINUX_SWIOTLB_H */ -- cgit v1.2.3 From ad32e8cb86e7894aac51c8963eaa9f36bb8a4e14 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:19 +0900 Subject: swiotlb: Defer swiotlb init printing, export swiotlb_print_info() This enables us to avoid printing swiotlb memory info when we initialize swiotlb. After swiotlb initialization, we could find that we don't need swiotlb. This patch removes the code to print swiotlb memory info in swiotlb_init() and exports the function to do that. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com Cc: tony.luck@intel.com Cc: benh@kernel.crashing.org LKML-Reference: <1257849980-22640-9-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: merge up conflict ] Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 59bafa690290..eb9bdb4d4854 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -20,8 +20,7 @@ struct scatterlist; */ #define IO_TLB_SHIFT 11 -extern void -swiotlb_init(void); +extern void swiotlb_init(int verbose); extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, @@ -94,4 +93,5 @@ extern void __init swiotlb_free(void); static inline void swiotlb_free(void) { } #endif +extern void swiotlb_print_info(void); #endif /* __LINUX_SWIOTLB_H */ -- cgit v1.2.3 From bf3204cbff7d2606e758afb0994e8da6ae1c6c26 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 6 Nov 2009 21:39:07 -0800 Subject: Input: fix locking in memoryless force-feedback devices Now that input core acquires dev->event_lock spinlock and disables interrupts when propagating input events, using spin_lock_bh() in ff-memless driver is not allowed. Actually, the timer_lock itself is not needed anymore, we should simply use dev->event_lock as well. Also do a small cleanup in force-feedback core. Reported-by: kerneloops.org Reported-by: http://www.kerneloops.org/searchweek.php?search=ml_ff_set_gain Reported-by: Arjan van de Ven Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 0ccfc30cd40f..c2b1a7d244d9 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1377,6 +1377,10 @@ extern struct class input_class; * methods; erase() is optional. set_gain() and set_autocenter() need * only be implemented if driver sets up FF_GAIN and FF_AUTOCENTER * bits. + * + * Note that playback(), set_gain() and set_autocenter() are called with + * dev->event_lock spinlock held and interrupts off and thus may not + * sleep. */ struct ff_device { int (*upload)(struct input_dev *dev, struct ff_effect *effect, -- cgit v1.2.3 From fe8bc91c4c30122b357d197117705cfd4fabaf28 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 16 Oct 2009 19:26:15 +0200 Subject: ext3: Wait for proper transaction commit on fsync We cannot rely on buffer dirty bits during fsync because pdflush can come before fsync is called and clear dirty bits without forcing a transaction commit. What we do is that we track which transaction has last changed the inode and which transaction last changed allocation and force it to disk on fsync. Signed-off-by: Jan Kara Reviewed-by: Aneesh Kumar K.V --- include/linux/ext3_fs_i.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h index ca1bfe90004f..93e7428156ba 100644 --- a/include/linux/ext3_fs_i.h +++ b/include/linux/ext3_fs_i.h @@ -137,6 +137,14 @@ struct ext3_inode_info { * by other means, so we have truncate_mutex. */ struct mutex truncate_mutex; + + /* + * Transactions that contain inode's metadata needed to complete + * fsync and fdatasync, respectively. + */ + atomic_t i_sync_tid; + atomic_t i_datasync_tid; + struct inode vfs_inode; }; -- cgit v1.2.3 From f5c15d0b37ab1cd3969b8ce7828ab41c79f36f77 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Wed, 11 Nov 2009 14:26:22 -0800 Subject: fb: remove fb_save_state() and fb_restore_state operations Remove fb_save_state() and fb_restore_state operations from frame buffer layer. They are used only in two drivers: 1. savagefb - and cause bug #11248 2. uvesafb Usage of these operations is misunderstood in both drivers so kill these operations, fix the bug #11248 and avoid confusion in the future. Tested on Savage 3D/MV card and the patch fixes the bug #11248. The frame buffer layer uses these funtions during switch between graphics and text mode of the console, but these drivers saves state before switching of the frame buffer (in the fb_open) and after releasing it (in the fb_release). This defeats the purpose of these operations. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=11248 Signed-off-by: Krzysztof Helt Reported-by: Jochen Hein Tested-by: Jochen Hein Cc: Geert Uytterhoeven Cc: Michal Januszewski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fb.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index a34bdf5a9d23..de9c722e7b90 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -669,12 +669,6 @@ struct fb_ops { /* perform fb specific mmap */ int (*fb_mmap)(struct fb_info *info, struct vm_area_struct *vma); - /* save current hardware state */ - void (*fb_save_state)(struct fb_info *info); - - /* restore saved state */ - void (*fb_restore_state)(struct fb_info *info); - /* get capability given var */ void (*fb_get_caps)(struct fb_info *info, struct fb_blit_caps *caps, struct fb_var_screeninfo *var); -- cgit v1.2.3 From b87e5e2b8ed9336566100c8c796ab6dd52436881 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Wed, 11 Nov 2009 14:26:42 -0800 Subject: serial: add support for the Lava Quattro PCI quad-port 16550A card This seems to be a different model (with a different PCI ID) than the "Quatro" card that is also in the list. Signed-off-by: Lennert Buytenhek Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 9ceb392cb984..84cf1f3b7838 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1956,6 +1956,8 @@ #define PCI_DEVICE_ID_LAVA_DSERIAL 0x0100 /* 2x 16550 */ #define PCI_DEVICE_ID_LAVA_QUATRO_A 0x0101 /* 2x 16550, half of 4 port */ #define PCI_DEVICE_ID_LAVA_QUATRO_B 0x0102 /* 2x 16550, half of 4 port */ +#define PCI_DEVICE_ID_LAVA_QUATTRO_A 0x0120 /* 2x 16550A, half of 4 port */ +#define PCI_DEVICE_ID_LAVA_QUATTRO_B 0x0121 /* 2x 16550A, half of 4 port */ #define PCI_DEVICE_ID_LAVA_OCTO_A 0x0180 /* 4x 16550A, half of 8 port */ #define PCI_DEVICE_ID_LAVA_OCTO_B 0x0181 /* 4x 16550A, half of 8 port */ #define PCI_DEVICE_ID_LAVA_PORT_PLUS 0x0200 /* 2x 16650 */ -- cgit v1.2.3 From 6959450e567c1f17d3ce8489099fc56c3721d577 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 14 Nov 2009 20:46:38 +0900 Subject: swiotlb: Remove duplicate swiotlb_force extern declarations Signed-off-by: FUJITA Tomonori Cc: tony.luck@intel.com LKML-Reference: <1258199198-16657-4-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index eb9bdb4d4854..febedcf67c7e 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -7,6 +7,8 @@ struct device; struct dma_attrs; struct scatterlist; +extern int swiotlb_force; + /* * Maximum allowable number of contiguous slabs to map, * must be a power of 2. What is the appropriate value ? -- cgit v1.2.3 From 1a7af63108f07637504300671a72432c34e10021 Mon Sep 17 00:00:00 2001 From: Jiro SEKIBA Date: Sun, 15 Nov 2009 13:49:44 +0900 Subject: nilfs2: deleted struct nilfs_dat_group_desc struct nilfs_dat_group_desc is not used both in kernel and user spaces. struct nilfs_palloc_group_desc is used instead. Signed-off-by: Jiro SEKIBA Signed-off-by: Ryusuke Konishi --- include/linux/nilfs2_fs.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index 79fec6af3f9f..ce520402e840 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -424,15 +424,6 @@ struct nilfs_dat_entry { __le64 de_rsv; }; -/** - * struct nilfs_dat_group_desc - block group descriptor - * @dg_nfrees: number of free virtual block numbers in block group - */ -struct nilfs_dat_group_desc { - __le32 dg_nfrees; -}; - - /** * struct nilfs_snapshot_list - snapshot list * @ssl_next: next checkpoint number on snapshot list -- cgit v1.2.3 From e29d4363174949a7a4e46f670993d7ff43342c1c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 15 Nov 2009 22:23:47 -0800 Subject: Revert "isdn: isdn_ppp: Use SKB list facilities instead of home-grown implementation." This reverts commit 38783e671399b5405f1fd177d602c400a9577ae6. It causes kernel bugzilla #14594 Signed-off-by: David S. Miller --- include/linux/isdn_ppp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h index 4c218ee7587a..8687a7dc0632 100644 --- a/include/linux/isdn_ppp.h +++ b/include/linux/isdn_ppp.h @@ -157,7 +157,7 @@ typedef struct { typedef struct { int mp_mrru; /* unused */ - struct sk_buff_head frags; /* fragments sl list */ + struct sk_buff * frags; /* fragments sl list -- use skb->next */ long frames; /* number of frames in the frame list */ unsigned int seq; /* last processed packet seq #: any packets * with smaller seq # will be dropped -- cgit v1.2.3 From 6ad696d2cf535772dff659298ec7e7260e344595 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 17 Nov 2009 14:06:22 -0800 Subject: mm: allow memory hotplug and hibernation in the same kernel Allow memory hotplug and hibernation in the same kernel Memory hotplug and hibernation were exclusive in Kconfig. This is obviously a problem for distribution kernels who want to support both in the same image. After some discussions with Rafael and others the only problem is with parallel memory hotadd or removal while a hibernation operation is in process. It was also working for s390 before. This patch removes the Kconfig level exclusion, and simply makes the memory add / remove functions grab the pm_mutex to exclude against hibernation. Fixes a regression - old kernels didn't exclude memory hotadd and hibernation. Signed-off-by: Andi Kleen Cc: Gerald Schaefer Cc: KOSAKI Motohiro Cc: Yasunori Goto Acked-by: Rafael J. Wysocki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/suspend.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index cd15df6c63cd..5e781d824e6d 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -301,6 +301,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) #define pm_notifier(fn, pri) do { (void)(fn); } while (0) #endif /* !CONFIG_PM_SLEEP */ +extern struct mutex pm_mutex; + #ifndef CONFIG_HIBERNATION static inline void register_nosave_region(unsigned long b, unsigned long e) { @@ -308,8 +310,23 @@ static inline void register_nosave_region(unsigned long b, unsigned long e) static inline void register_nosave_region_late(unsigned long b, unsigned long e) { } -#endif -extern struct mutex pm_mutex; +static inline void lock_system_sleep(void) {} +static inline void unlock_system_sleep(void) {} + +#else + +/* Let some subsystems like memory hotadd exclude hibernation */ + +static inline void lock_system_sleep(void) +{ + mutex_lock(&pm_mutex); +} + +static inline void unlock_system_sleep(void) +{ + mutex_unlock(&pm_mutex); +} +#endif #endif /* _LINUX_SUSPEND_H */ -- cgit v1.2.3 From 3d7a641e544e428191667e8b1f83f96fa46dbd65 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:10:23 +0000 Subject: SLOW_WORK: Wait for outstanding work items belonging to a module to clear Wait for outstanding slow work items belonging to a module to clear when unregistering that module as a user of the facility. This prevents the put_ref code of a work item from being taken away before it returns. Signed-off-by: David Howells --- include/linux/slow-work.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index b65c8881f07a..9adb2b30754f 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -24,6 +24,9 @@ struct slow_work; * The operations used to support slow work items */ struct slow_work_ops { + /* owner */ + struct module *owner; + /* get a ref on a work item * - return 0 if successful, -ve if not */ @@ -42,6 +45,7 @@ struct slow_work_ops { * queued */ struct slow_work { + struct module *owner; /* the owning module */ unsigned long flags; #define SLOW_WORK_PENDING 0 /* item pending (further) execution */ #define SLOW_WORK_EXECUTING 1 /* item currently executing */ @@ -84,8 +88,8 @@ static inline void vslow_work_init(struct slow_work *work, } extern int slow_work_enqueue(struct slow_work *work); -extern int slow_work_register_user(void); -extern void slow_work_unregister_user(void); +extern int slow_work_register_user(struct module *owner); +extern void slow_work_unregister_user(struct module *owner); #ifdef CONFIG_SYSCTL extern ctl_table slow_work_sysctls[]; -- cgit v1.2.3 From 0160950297c08f8233c89b9f9e7dd59cfb080809 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Nov 2009 18:10:43 +0000 Subject: SLOW_WORK: Add support for cancellation of slow work Add support for cancellation of queued slow work and delayed slow work items. The cancellation functions will wait for items that are pending or undergoing execution to be discarded by the slow work facility. Attempting to enqueue work that is in the process of being cancelled will result in ECANCELED. Signed-off-by: Jens Axboe Signed-off-by: David Howells --- include/linux/slow-work.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index 9adb2b30754f..eef20182d5b4 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -51,6 +51,7 @@ struct slow_work { #define SLOW_WORK_EXECUTING 1 /* item currently executing */ #define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */ #define SLOW_WORK_VERY_SLOW 3 /* item is very slow */ +#define SLOW_WORK_CANCELLING 4 /* item is being cancelled, don't enqueue */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ }; @@ -88,6 +89,7 @@ static inline void vslow_work_init(struct slow_work *work, } extern int slow_work_enqueue(struct slow_work *work); +extern void slow_work_cancel(struct slow_work *work); extern int slow_work_register_user(struct module *owner); extern void slow_work_unregister_user(struct module *owner); -- cgit v1.2.3 From 6b8268b17a1ffc942bc72d7d00274e433d6b6719 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Nov 2009 18:10:47 +0000 Subject: SLOW_WORK: Add delayed_slow_work support This adds support for starting slow work with a delay, similar to the functionality we have for workqueues. Signed-off-by: Jens Axboe Signed-off-by: David Howells --- include/linux/slow-work.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index eef20182d5b4..b245b9a9cc0b 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -17,6 +17,7 @@ #ifdef CONFIG_SLOW_WORK #include +#include struct slow_work; @@ -52,10 +53,16 @@ struct slow_work { #define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */ #define SLOW_WORK_VERY_SLOW 3 /* item is very slow */ #define SLOW_WORK_CANCELLING 4 /* item is being cancelled, don't enqueue */ +#define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ }; +struct delayed_slow_work { + struct slow_work work; + struct timer_list timer; +}; + /** * slow_work_init - Initialise a slow work item * @work: The work item to initialise @@ -71,6 +78,20 @@ static inline void slow_work_init(struct slow_work *work, INIT_LIST_HEAD(&work->link); } +/** + * slow_work_init - Initialise a delayed slow work item + * @work: The work item to initialise + * @ops: The operations to use to handle the slow work item + * + * Initialise a delayed slow work item. + */ +static inline void delayed_slow_work_init(struct delayed_slow_work *dwork, + const struct slow_work_ops *ops) +{ + init_timer(&dwork->timer); + slow_work_init(&dwork->work, ops); +} + /** * vslow_work_init - Initialise a very slow work item * @work: The work item to initialise @@ -93,6 +114,14 @@ extern void slow_work_cancel(struct slow_work *work); extern int slow_work_register_user(struct module *owner); extern void slow_work_unregister_user(struct module *owner); +extern int delayed_slow_work_enqueue(struct delayed_slow_work *dwork, + unsigned long delay); + +static inline void delayed_slow_work_cancel(struct delayed_slow_work *dwork) +{ + slow_work_cancel(&dwork->work); +} + #ifdef CONFIG_SYSCTL extern ctl_table slow_work_sysctls[]; #endif -- cgit v1.2.3 From 8fba10a42d191de612e60e7009c8f0313f90a9b3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:10:51 +0000 Subject: SLOW_WORK: Allow the work items to be viewed through a /proc file Allow the executing and queued work items to be viewed through a /proc file for debugging purposes. The contents look something like the following: THR PID ITEM ADDR FL MARK DESC === ===== ================ == ===== ========== 0 3005 ffff880023f52348 a 952ms FSC: OBJ17d3: LOOK 1 3006 ffff880024e33668 2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2 2 3165 ffff8800296dd180 a 424ms FSC: OBJ17e4: LOOK 3 4089 ffff8800262c8d78 a 212ms FSC: OBJ17ea: CRTN 4 4090 ffff88002792bed8 2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2 5 4092 ffff88002a0ef308 2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2 6 4094 ffff88002abaf4b8 2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2 7 4095 ffff88002bb188e0 a 388ms FSC: OBJ17e9: CRTN vsq - ffff880023d99668 1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2 vsq - ffff8800295d1740 1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2 vsq - ffff880025ba3308 1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2 vsq - ffff880024ec83e0 1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2 vsq - ffff880026618e00 1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2 vsq - ffff880025a2a4b8 1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2 vsq - ffff880023cbe6d8 9 212ms FSC: OBJ17eb: LOOK vsq - ffff880024d37590 9 212ms FSC: OBJ17ec: LOOK vsq - ffff880027746cb0 9 212ms FSC: OBJ17ed: LOOK vsq - ffff880024d37ae8 9 212ms FSC: OBJ17ee: LOOK vsq - ffff880024d37cb0 9 212ms FSC: OBJ17ef: LOOK vsq - ffff880025036550 9 212ms FSC: OBJ17f0: LOOK vsq - ffff8800250368e0 9 212ms FSC: OBJ17f1: LOOK vsq - ffff880025036aa8 9 212ms FSC: OBJ17f2: LOOK In the 'THR' column, executing items show the thread they're occupying and queued threads indicate which queue they're on. 'PID' shows the process ID of a slow-work thread that's executing something. 'FL' shows the work item flags. 'MARK' indicates how long since an item was queued or began executing. Lastly, the 'DESC' column permits the owner of an item to give some information. Signed-off-by: David Howells --- include/linux/slow-work.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index b245b9a9cc0b..f41485145ed1 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -20,6 +20,9 @@ #include struct slow_work; +#ifdef CONFIG_SLOW_WORK_PROC +struct seq_file; +#endif /* * The operations used to support slow work items @@ -38,6 +41,11 @@ struct slow_work_ops { /* execute a work item */ void (*execute)(struct slow_work *work); + +#ifdef CONFIG_SLOW_WORK_PROC + /* describe a work item for /proc */ + void (*desc)(struct slow_work *work, struct seq_file *m); +#endif }; /* @@ -56,6 +64,9 @@ struct slow_work { #define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ +#ifdef CONFIG_SLOW_WORK_PROC + struct timespec mark; /* jiffies at which queued or exec begun */ +#endif }; struct delayed_slow_work { -- cgit v1.2.3 From 31ba99d304494cb28fa8671ccc769c5543e1165d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:10:53 +0000 Subject: SLOW_WORK: Allow the owner of a work item to determine if it is queued or not Add a function (slow_work_is_queued()) to permit the owner of a work item to determine if the item is queued or not. The work item is counted as being queued if it is actually on the queue, not just if it is pending. If it is executing and pending, then it is not on the queue, but will rather be put back on the queue when execution finishes. This permits a caller to quickly work out if it may be able to put another, dependent work item on the queue behind it, or whether it will have to wait till that is finished. This can be used by CacheFiles to work out whether the creation a new object can be immediately deferred when it has to wait for an old object to be deleted, or whether a wait must take place. If a wait is necessary, then the slow-work thread can otherwise get blocked, preventing the deletion from taking place. Signed-off-by: David Howells --- include/linux/slow-work.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index f41485145ed1..bfd3ab4c8898 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -120,6 +120,25 @@ static inline void vslow_work_init(struct slow_work *work, INIT_LIST_HEAD(&work->link); } +/** + * slow_work_is_queued - Determine if a slow work item is on the work queue + * work: The work item to test + * + * Determine if the specified slow-work item is on the work queue. This + * returns true if it is actually on the queue. + * + * If the item is executing and has been marked for requeue when execution + * finishes, then false will be returned. + * + * Anyone wishing to wait for completion of execution can wait on the + * SLOW_WORK_EXECUTING bit. + */ +static inline bool slow_work_is_queued(struct slow_work *work) +{ + unsigned long flags = work->flags; + return flags & SLOW_WORK_PENDING && !(flags & SLOW_WORK_EXECUTING); +} + extern int slow_work_enqueue(struct slow_work *work); extern void slow_work_cancel(struct slow_work *work); extern int slow_work_register_user(struct module *owner); -- cgit v1.2.3 From 3bde31a4ac225cb5805be02eff6eaaf7e0766ccd Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:10:57 +0000 Subject: SLOW_WORK: Allow a requeueable work item to sleep till the thread is needed Add a function to allow a requeueable work item to sleep till the thread processing it is needed by the slow-work facility to perform other work. Sometimes a work item can't progress immediately, but must wait for the completion of another work item that's currently being processed by another slow-work thread. In some circumstances, the waiting item could instead - theoretically - put itself back on the queue and yield its thread back to the slow-work facility, thus waiting till it gets processing time again before attempting to progress. This would allow other work items processing time on that thread. However, this only works if there is something on the queue for it to queue behind - otherwise it will just get a thread again immediately, and will end up cycling between the queue and the thread, eating up valuable CPU time. So, slow_work_sleep_till_thread_needed() is provided such that an item can put itself on a wait queue that will wake it up when the event it is actually interested in occurs, then call this function in lieu of calling schedule(). This function will then sleep until either the item's event occurs or another work item appears on the queue. If another work item is queued, but the item's event hasn't occurred, then the work item should requeue itself and yield the thread back to the slow-work facility by returning. This can be used by CacheFiles for an object that is being created on one thread to wait for an object being deleted on another thread where there is nothing on the queue for the creation to go and wait behind. As soon as an item appears on the queue that could be given thread time instead, CacheFiles can stick the creating object back on the queue and return to the slow-work facility - assuming the object deletion didn't also complete. Signed-off-by: David Howells --- include/linux/slow-work.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index bfd3ab4c8898..5035a2691739 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -152,6 +152,9 @@ static inline void delayed_slow_work_cancel(struct delayed_slow_work *dwork) slow_work_cancel(&dwork->work); } +extern bool slow_work_sleep_till_thread_needed(struct slow_work *work, + signed long *_timeout); + #ifdef CONFIG_SYSCTL extern ctl_table slow_work_sysctls[]; #endif -- cgit v1.2.3 From 440f0affe247e9990c8f8778f1861da4fd7d5e50 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:01 +0000 Subject: FS-Cache: Annotate slow-work runqueue proc lines for FS-Cache work items Annotate slow-work runqueue proc lines for FS-Cache work items. Objects include the object ID and the state. Operations include the object ID, the operation ID and the operation type and state. Signed-off-by: David Howells --- include/linux/fscache-cache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 84d3532dd3ea..7a9847ccd192 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -102,6 +102,16 @@ struct fscache_operation { /* operation releaser */ fscache_operation_release_t release; + +#ifdef CONFIG_SLOW_WORK_PROC + const char *name; /* operation name */ + const char *state; /* operation state */ +#define fscache_set_op_name(OP, N) do { (OP)->name = (N); } while(0) +#define fscache_set_op_state(OP, S) do { (OP)->state = (S); } while(0) +#else +#define fscache_set_op_name(OP, N) do { } while(0) +#define fscache_set_op_state(OP, S) do { } while(0) +#endif }; extern atomic_t fscache_op_debug_id; @@ -125,6 +135,7 @@ static inline void fscache_operation_init(struct fscache_operation *op, op->debug_id = atomic_inc_return(&fscache_op_debug_id); op->release = release; INIT_LIST_HEAD(&op->pend_link); + fscache_set_op_state(op, "Init"); } /** @@ -337,6 +348,7 @@ struct fscache_object { FSCACHE_OBJECT_RECYCLING, /* retiring object */ FSCACHE_OBJECT_WITHDRAWING, /* withdrawing object */ FSCACHE_OBJECT_DEAD, /* object is now dead */ + FSCACHE_OBJECT__NSTATES } state; int debug_id; /* debugging ID */ -- cgit v1.2.3 From 4fbf4291aa15926cd4fdca0ffe9122e89d0459db Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:04 +0000 Subject: FS-Cache: Allow the current state of all objects to be dumped Allow the current state of all fscache objects to be dumped by doing: cat /proc/fs/fscache/objects By default, all objects and all fields will be shown. This can be restricted by adding a suitable key to one of the caller's keyrings (such as the session keyring): keyctl add user fscache:objlist "" @s The are: K Show hexdump of object key (don't show if not given) A Show hexdump of object aux data (don't show if not given) And paired restrictions: C Show objects that have a cookie c Show objects that don't have a cookie B Show objects that are busy b Show objects that aren't busy W Show objects that have pending writes w Show objects that don't have pending writes R Show objects that have outstanding reads r Show objects that don't have outstanding reads S Show objects that have slow work queued s Show objects that don't have slow work queued If neither side of a restriction pair is given, then both are implied. For example: keyctl add user fscache:objlist KB @s shows objects that are busy, and lists their object keys, but does not dump their auxiliary data. It also implies "CcWwRrSs", but as 'B' is given, 'b' is not implied. Signed-off-by: David Howells --- include/linux/fscache-cache.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 7a9847ccd192..184cbdfbcc99 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -91,6 +91,8 @@ struct fscache_operation { #define FSCACHE_OP_WAITING 4 /* cleared when op is woken */ #define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */ #define FSCACHE_OP_DEAD 6 /* op is now dead */ +#define FSCACHE_OP_DEC_READ_CNT 7 /* decrement object->n_reads on destruction */ +#define FSCACHE_OP_KEEP_FLAGS 0xc0 /* flags to keep when repurposing an op */ atomic_t usage; unsigned debug_id; /* debugging ID */ @@ -357,6 +359,7 @@ struct fscache_object { int n_obj_ops; /* number of object ops outstanding on object */ int n_in_progress; /* number of ops in progress */ int n_exclusive; /* number of exclusive ops queued */ + atomic_t n_reads; /* number of read ops in progress */ spinlock_t lock; /* state and operations lock */ unsigned long lookup_jif; /* time at which lookup started */ @@ -370,6 +373,7 @@ struct fscache_object { #define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */ #define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */ #define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */ +#define FSCACHE_OBJECT_EVENTS_MASK 0x7f /* mask of all events*/ unsigned long flags; #define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ @@ -385,6 +389,9 @@ struct fscache_object { struct list_head dependents; /* FIFO of dependent objects */ struct list_head dep_link; /* link in parent's dependents list */ struct list_head pending_ops; /* unstarted operations on this object */ +#ifdef CONFIG_FSCACHE_OBJECT_LIST + struct rb_node objlist_link; /* link in global object list */ +#endif pgoff_t store_limit; /* current storage limit */ }; @@ -434,6 +441,12 @@ void fscache_object_init(struct fscache_object *object, extern void fscache_object_lookup_negative(struct fscache_object *object); extern void fscache_obtained_object(struct fscache_object *object); +#ifdef CONFIG_FSCACHE_OBJECT_LIST +extern void fscache_object_destroy(struct fscache_object *object); +#else +#define fscache_object_destroy(object) do {} while(0) +#endif + /** * fscache_object_destroyed - Note destruction of an object in a cache * @cache: The cache from which the object came -- cgit v1.2.3 From 1bccf513ac49d44604ba1cddcc29f5886e70f1b6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:25 +0000 Subject: FS-Cache: Fix lock misorder in fscache_write_op() FS-Cache has two structs internally for keeping track of the internal state of a cached file: the fscache_cookie struct, which represents the netfs's state, and fscache_object struct, which represents the cache's state. Each has a pointer that points to the other (when both are in existence), and each has a spinlock for pointer maintenance. Since netfs operations approach these structures from the cookie side, they get the cookie lock first, then the object lock. Cache operations, on the other hand, approach from the object side, and get the object lock first. It is not then permitted for a cache operation to get the cookie lock whilst it is holding the object lock lest deadlock occur; instead, it must do one of two things: (1) increment the cookie usage counter, drop the object lock and then get both locks in order, or (2) simply hold the object lock as certain parts of the cookie may not be altered whilst the object lock is held. It is also not permitted to follow either pointer without holding the lock at the end you start with. To break the pointers between the cookie and the object, both locks must be held. fscache_write_op(), however, violates the locking rules: It attempts to get the cookie lock without (a) checking that the cookie pointer is a valid pointer, and (b) holding the object lock to protect the cookie pointer whilst it follows it. This is so that it can access the pending page store tree without interference from __fscache_write_page(). This is fixed by splitting the cookie lock, such that the page store tracking tree is protected by its own lock, and checking that the cookie pointer is non-NULL before we attempt to follow it whilst holding the object lock. The new lock is subordinate to both the cookie lock and the object lock, and so should be taken after those. Signed-off-by: David Howells --- include/linux/fscache-cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 184cbdfbcc99..f3aa4bdafef6 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -310,6 +310,7 @@ struct fscache_cookie { atomic_t usage; /* number of users of this cookie */ atomic_t n_children; /* number of children of this cookie */ spinlock_t lock; + spinlock_t stores_lock; /* lock on page store tree */ struct hlist_head backing_objects; /* object(s) backing this file/index */ const struct fscache_cookie_def *def; /* definition */ struct fscache_cookie *parent; /* parent of this entry */ -- cgit v1.2.3 From 201a15428bd54f83eccec8b7c64a04b8f9431204 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:35 +0000 Subject: FS-Cache: Handle pages pending storage that get evicted under OOM conditions Handle netfs pages that the vmscan algorithm wants to evict from the pagecache under OOM conditions, but that are waiting for write to the cache. Under these conditions, vmscan calls the releasepage() function of the netfs, asking if a page can be discarded. The problem is typified by the following trace of a stuck process: kslowd005 D 0000000000000000 0 4253 2 0x00000080 ffff88001b14f370 0000000000000046 ffff880020d0d000 0000000000000007 0000000000000006 0000000000000001 ffff88001b14ffd8 ffff880020d0d2a8 000000000000ddf0 00000000000118c0 00000000000118c0 ffff880020d0d2a8 Call Trace: [] __fscache_wait_on_page_write+0x8b/0xa7 [fscache] [] ? autoremove_wake_function+0x0/0x34 [] ? __fscache_check_page_write+0x63/0x70 [fscache] [] nfs_fscache_release_page+0x4e/0xc4 [nfs] [] nfs_release_page+0x3c/0x41 [nfs] [] try_to_release_page+0x32/0x3b [] shrink_page_list+0x316/0x4ac [] shrink_inactive_list+0x392/0x67c [] ? __mutex_unlock_slowpath+0x100/0x10b [] ? trace_hardirqs_on_caller+0x10c/0x130 [] ? mutex_unlock+0x9/0xb [] shrink_list+0x8d/0x8f [] shrink_zone+0x278/0x33c [] ? ktime_get_ts+0xad/0xba [] try_to_free_pages+0x22e/0x392 [] ? isolate_pages_global+0x0/0x212 [] __alloc_pages_nodemask+0x3dc/0x5cf [] grab_cache_page_write_begin+0x65/0xaa [] ext3_write_begin+0x78/0x1eb [] generic_file_buffered_write+0x109/0x28c [] ? current_fs_time+0x22/0x29 [] __generic_file_aio_write+0x350/0x385 [] ? generic_file_aio_write+0x4a/0xae [] generic_file_aio_write+0x60/0xae [] do_sync_write+0xe3/0x120 [] ? autoremove_wake_function+0x0/0x34 [] ? __dentry_open+0x1a5/0x2b8 [] ? dentry_open+0x82/0x89 [] cachefiles_write_page+0x298/0x335 [cachefiles] [] fscache_write_op+0x178/0x2c2 [fscache] [] fscache_op_execute+0x7a/0xd1 [fscache] [] slow_work_execute+0x18f/0x2d1 [] slow_work_thread+0x1c5/0x308 [] ? autoremove_wake_function+0x0/0x34 [] ? slow_work_thread+0x0/0x308 [] kthread+0x7a/0x82 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? tg_shares_up+0x171/0x227 [] ? kthread+0x0/0x82 [] ? child_rip+0x0/0x20 In the above backtrace, the following is happening: (1) A page storage operation is being executed by a slow-work thread (fscache_write_op()). (2) FS-Cache farms the operation out to the cache to perform (cachefiles_write_page()). (3) CacheFiles is then calling Ext3 to perform the actual write, using Ext3's standard write (do_sync_write()) under KERNEL_DS directly from the netfs page. (4) However, for Ext3 to perform the write, it must allocate some memory, in particular, it must allocate at least one page cache page into which it can copy the data from the netfs page. (5) Under OOM conditions, the memory allocator can't immediately come up with a page, so it uses vmscan to find something to discard (try_to_free_pages()). (6) vmscan finds a clean netfs page it might be able to discard (possibly the one it's trying to write out). (7) The netfs is called to throw the page away (nfs_release_page()) - but it's called with __GFP_WAIT, so the netfs decides to wait for the store to complete (__fscache_wait_on_page_write()). (8) This blocks a slow-work processing thread - possibly against itself. The system ends up stuck because it can't write out any netfs pages to the cache without allocating more memory. To avoid this, we make FS-Cache cancel some writes that aren't in the middle of actually being performed. This means that some data won't make it into the cache this time. To support this, a new FS-Cache function is added fscache_maybe_release_page() that replaces what the netfs releasepage() functions used to do with respect to the cache. The decisions fscache_maybe_release_page() makes are counted and displayed through /proc/fs/fscache/stats on a line labelled "VmScan". There are four counters provided: "nos=N" - pages that weren't pending storage; "gon=N" - pages that were pending storage when we first looked, but weren't by the time we got the object lock; "bsy=N" - pages that we ignored as they were actively being written when we looked; and "can=N" - pages that we cancelled the storage of. What I'd really like to do is alter the behaviour of the cancellation heuristics, depending on how necessary it is to expel pages. If there are plenty of other pages that aren't waiting to be written to the cache that could be ejected first, then it would be nice to hold up on immediate cancellation of cache writes - but I don't see a way of doing that. Signed-off-by: David Howells --- include/linux/fscache-cache.h | 1 + include/linux/fscache.h | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index f3aa4bdafef6..4750d5fb419f 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -317,6 +317,7 @@ struct fscache_cookie { void *netfs_data; /* back pointer to netfs */ struct radix_tree_root stores; /* pages to be stored on this cookie */ #define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */ +#define FSCACHE_COOKIE_STORING_TAG 1 /* pages tag: writing to cache */ unsigned long flags; #define FSCACHE_COOKIE_LOOKING_UP 0 /* T if non-index cookie being looked up still */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 6d8ee466e0a0..595ce49288b7 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -202,6 +202,8 @@ extern int __fscache_write_page(struct fscache_cookie *, struct page *, gfp_t); extern void __fscache_uncache_page(struct fscache_cookie *, struct page *); extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *); extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *); +extern bool __fscache_maybe_release_page(struct fscache_cookie *, struct page *, + gfp_t); /** * fscache_register_netfs - Register a filesystem as desiring caching services @@ -615,4 +617,29 @@ void fscache_wait_on_page_write(struct fscache_cookie *cookie, __fscache_wait_on_page_write(cookie, page); } +/** + * fscache_maybe_release_page - Consider releasing a page, cancelling a store + * @cookie: The cookie representing the cache object + * @page: The netfs page that is being cached. + * @gfp: The gfp flags passed to releasepage() + * + * Consider releasing a page for the vmscan algorithm, on behalf of the netfs's + * releasepage() call. A storage request on the page may cancelled if it is + * not currently being processed. + * + * The function returns true if the page no longer has a storage request on it, + * and false if a storage request is left in place. If true is returned, the + * page will have been passed to fscache_uncache_page(). If false is returned + * the page cannot be freed yet. + */ +static inline +bool fscache_maybe_release_page(struct fscache_cookie *cookie, + struct page *page, + gfp_t gfp) +{ + if (fscache_cookie_valid(cookie) && PageFsCache(page)) + return __fscache_maybe_release_page(cookie, page, gfp); + return false; +} + #endif /* _LINUX_FSCACHE_H */ -- cgit v1.2.3 From 60d543ca724be155c2b6166e36a00c80b21bd810 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:45 +0000 Subject: FS-Cache: Start processing an object's operations on that object's death Start processing an object's operations when that object moves into the DYING state as the object cannot be destroyed until all its outstanding operations have completed. Furthermore, make sure that read and allocation operations handle being woken up on a dead object. Such events are recorded in the Allocs.abt and Retrvls.abt statistics as viewable through /proc/fs/fscache/stats. The code for waiting for object activation for the read and allocation operations is also extracted into its own function as it is much the same in all cases, differing only in the stats incremented. Signed-off-by: David Howells --- include/linux/fscache-cache.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 4750d5fb419f..907bb56c5888 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -404,6 +404,10 @@ extern const char *fscache_object_states[]; (obj)->state >= FSCACHE_OBJECT_AVAILABLE && \ (obj)->state < FSCACHE_OBJECT_DYING) +#define fscache_object_is_dead(obj) \ + (test_bit(FSCACHE_IOERROR, &(obj)->cache->flags) && \ + (obj)->state >= FSCACHE_OBJECT_DYING) + extern const struct slow_work_ops fscache_object_slow_work_ops; /** -- cgit v1.2.3 From a17754fb8c28af19cd70dcbec6d5b0773b94e0c1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:11:52 +0000 Subject: CacheFiles: Don't write a full page if there's only a partial page to cache cachefiles_write_page() writes a full page to the backing file for the last page of the netfs file, even if the netfs file's last page is only a partial page. This causes the EOF on the backing file to be extended beyond the EOF of the netfs, and thus the backing file will be truncated by cachefiles_attr_changed() called from cachefiles_lookup_object(). So we need to limit the write we make to the backing file on that last page such that it doesn't push the EOF too far. Also, if a backing file that has a partial page at the end is expanded, we discard the partial page and refetch it on the basis that we then have a hole in the file with invalid data, and should the power go out... A better way to deal with this could be to record a note that the partial page contains invalid data until the correct data is written into it. This isn't a problem for netfs's that discard the whole backing file if the file size changes (such as NFS). Signed-off-by: David Howells --- include/linux/fscache-cache.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 907bb56c5888..5db50002f3b5 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -395,6 +395,7 @@ struct fscache_object { struct rb_node objlist_link; /* link in global object list */ #endif pgoff_t store_limit; /* current storage limit */ + loff_t store_limit_l; /* current storage limit */ }; extern const char *fscache_object_states[]; @@ -439,6 +440,7 @@ void fscache_object_init(struct fscache_object *object, object->events = object->event_mask = 0; object->flags = 0; object->store_limit = 0; + object->store_limit_l = 0; object->cache = cache; object->cookie = cookie; object->parent = NULL; @@ -491,6 +493,7 @@ static inline void fscache_object_lookup_error(struct fscache_object *object) static inline void fscache_set_store_limit(struct fscache_object *object, loff_t i_size) { + object->store_limit_l = i_size; object->store_limit = i_size >> PAGE_SHIFT; if (i_size & ~PAGE_MASK) object->store_limit++; -- cgit v1.2.3 From fee096deb4f33897937b974cb2c5168bab7935be Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Nov 2009 18:12:05 +0000 Subject: CacheFiles: Catch an overly long wait for an old active object Catch an overly long wait for an old, dying active object when we want to replace it with a new one. The probability is that all the slow-work threads are hogged, and the delete can't get a look in. What we do instead is: (1) if there's nothing in the slow work queue, we sleep until either the dying object has finished dying or there is something in the slow work queue behind which we can queue our object. (2) if there is something in the slow work queue, we return ETIMEDOUT to fscache_lookup_object(), which then puts us back on the slow work queue, presumably behind the deletion that we're blocked by. We are then deferred for a while until we work our way back through the queue - without blocking a slow-work thread unnecessarily. A backtrace similar to the following may appear in the log without this patch: INFO: task kslowd004:5711 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kslowd004 D 0000000000000000 0 5711 2 0x00000080 ffff88000340bb80 0000000000000046 ffff88002550d000 0000000000000000 ffff88002550d000 0000000000000007 ffff88000340bfd8 ffff88002550d2a8 000000000000ddf0 00000000000118c0 00000000000118c0 ffff88002550d2a8 Call Trace: [] ? trace_hardirqs_on+0xd/0xf [] ? cachefiles_wait_bit+0x0/0xd [cachefiles] [] cachefiles_wait_bit+0x9/0xd [cachefiles] [] __wait_on_bit+0x43/0x76 [] ? ext3_xattr_get+0x1ec/0x270 [] out_of_line_wait_on_bit+0x69/0x74 [] ? cachefiles_wait_bit+0x0/0xd [cachefiles] [] ? wake_bit_function+0x0/0x2e [] cachefiles_mark_object_active+0x203/0x23b [cachefiles] [] cachefiles_walk_to_object+0x558/0x827 [cachefiles] [] cachefiles_lookup_object+0xac/0x12a [cachefiles] [] fscache_lookup_object+0x1c7/0x214 [fscache] [] fscache_object_state_machine+0xa5/0x52d [fscache] [] fscache_object_slow_work_execute+0x5f/0xa0 [fscache] [] slow_work_execute+0x18f/0x2d1 [] slow_work_thread+0x1c5/0x308 [] ? autoremove_wake_function+0x0/0x34 [] ? slow_work_thread+0x0/0x308 [] kthread+0x7a/0x82 [] child_rip+0xa/0x20 [] ? restore_args+0x0/0x30 [] ? kthread+0x0/0x82 [] ? child_rip+0x0/0x20 1 lock held by kslowd004/5711: #0: (&sb->s_type->i_mutex_key#7/1){+.+.+.}, at: [] cachefiles_walk_to_object+0x1b3/0x827 [cachefiles] Signed-off-by: David Howells --- include/linux/fscache-cache.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 5db50002f3b5..7be0c6fbe880 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -234,8 +234,10 @@ struct fscache_cache_ops { struct fscache_object *(*alloc_object)(struct fscache_cache *cache, struct fscache_cookie *cookie); - /* look up the object for a cookie */ - void (*lookup_object)(struct fscache_object *object); + /* look up the object for a cookie + * - return -ETIMEDOUT to be requeued + */ + int (*lookup_object)(struct fscache_object *object); /* finished looking up */ void (*lookup_complete)(struct fscache_object *object); -- cgit v1.2.3 From 308efab5e231d1510cd35931d87629bf5171caae Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 19 Nov 2009 13:30:36 +0000 Subject: vt: Fix use of "new" in a struct field As this struct is exposed to user space and the API was added for this release it's a bit of a pain for the C++ world and we still have time to fix it. Rename the fields before we end up with that pain in an actual release. Signed-off-by: Alan Cox Reported-by: Olivier Goffart Signed-off-by: Linus Torvalds --- include/linux/vt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vt.h b/include/linux/vt.h index 7afca0d72139..7ffa11f06232 100644 --- a/include/linux/vt.h +++ b/include/linux/vt.h @@ -70,8 +70,8 @@ struct vt_event { #define VT_EVENT_UNBLANK 0x0004 /* Screen unblank */ #define VT_EVENT_RESIZE 0x0008 /* Resize display */ #define VT_MAX_EVENT 0x000F - unsigned int old; /* Old console */ - unsigned int new; /* New console (if changing) */ + unsigned int oldev; /* Old console */ + unsigned int newev; /* New console (if changing) */ unsigned int pad[4]; /* Padding for expansion */ }; -- cgit v1.2.3 From 4ced24c8973f79113444d1e00ee8bd9e74fbf43e Mon Sep 17 00:00:00 2001 From: Kevin Wells Date: Thu, 12 Nov 2009 00:23:00 +0100 Subject: i2c: i2c-pnx: Made buf type unsigned to prevent sign extension Made buf type unsigned to prevent sign extension Signed-off-by: Kevin Wells Signed-off-by: Ben Dooks --- include/linux/i2c-pnx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h index f13255e06406..9eb07bbc6522 100644 --- a/include/linux/i2c-pnx.h +++ b/include/linux/i2c-pnx.h @@ -21,7 +21,7 @@ struct i2c_pnx_mif { int mode; /* Interface mode */ struct completion complete; /* I/O completion */ struct timer_list timer; /* Timeout */ - char * buf; /* Data buffer */ + u8 * buf; /* Data buffer */ int len; /* Length of data buffer */ }; -- cgit v1.2.3 From b3a222e52e4d4be77cc4520a57af1a4a0d8222d1 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 23 Nov 2009 16:21:30 -0600 Subject: remove CONFIG_SECURITY_FILE_CAPABILITIES compile option As far as I know, all distros currently ship kernels with default CONFIG_SECURITY_FILE_CAPABILITIES=y. Since having the option on leaves a 'no_file_caps' option to boot without file capabilities, the main reason to keep the option is that turning it off saves you (on my s390x partition) 5k. In particular, vmlinux sizes came to: without patch fscaps=n: 53598392 without patch fscaps=y: 53603406 with this patch applied: 53603342 with the security-next tree. Against this we must weigh the fact that there is no simple way for userspace to figure out whether file capabilities are supported, while things like per-process securebits, capability bounding sets, and adding bits to pI if CAP_SETPCAP is in pE are not supported with SECURITY_FILE_CAPABILITIES=n, leaving a bit of a problem for applications wanting to know whether they can use them and/or why something failed. It also adds another subtly different set of semantics which we must maintain at the risk of severe security regressions. So this patch removes the SECURITY_FILE_CAPABILITIES compile option. It drops the kernel size by about 50k over the stock SECURITY_FILE_CAPABILITIES=y kernel, by removing the cap_limit_ptraced_target() function. Changelog: Nov 20: remove cap_limit_ptraced_target() as it's logic was ifndef'ed. Signed-off-by: Serge E. Hallyn Acked-by: Andrew G. Morgan" Signed-off-by: James Morris --- include/linux/capability.h | 2 -- include/linux/init_task.h | 4 ---- 2 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index c8f2a5f70ed5..39e5ff512fbe 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -92,9 +92,7 @@ struct vfs_cap_data { #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 #define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES extern int file_caps_enabled; -#endif typedef struct kernel_cap_struct { __u32 cap[_KERNEL_CAPABILITY_U32S]; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 21a6f5d9af22..8d10aa7fd4c9 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -83,16 +83,12 @@ extern struct group_info init_groups; #define INIT_IDS #endif -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES /* * Because of the reduced scope of CAP_SETPCAP when filesystem * capabilities are in effect, it is safe to allow CAP_SETPCAP to * be available in the default configuration. */ # define CAP_INIT_BSET CAP_FULL_SET -#else -# define CAP_INIT_BSET CAP_INIT_EFF_SET -#endif #ifdef CONFIG_TREE_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ -- cgit v1.2.3 From 7716977b6ae5a0cdd0afab5c6035c4d0ce53f599 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 30 Nov 2009 13:24:18 +0000 Subject: mfd: Correct WM831X_MAX_ISEL_VALUE There was confusion between the array size and the highest ISEL value possible. Reported-by: Dan Carpenter Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- include/linux/mfd/wm831x/regulator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/wm831x/regulator.h b/include/linux/mfd/wm831x/regulator.h index f95466343fb2..955d30fc6a27 100644 --- a/include/linux/mfd/wm831x/regulator.h +++ b/include/linux/mfd/wm831x/regulator.h @@ -1212,7 +1212,7 @@ #define WM831X_LDO1_OK_SHIFT 0 /* LDO1_OK */ #define WM831X_LDO1_OK_WIDTH 1 /* LDO1_OK */ -#define WM831X_ISINK_MAX_ISEL 56 -extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL]; +#define WM831X_ISINK_MAX_ISEL 55 +extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1]; #endif -- cgit v1.2.3 From f13a48bd798a159291ca583b95453171b88b7448 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Dec 2009 15:36:11 +0000 Subject: SLOW_WORK: Move slow_work's proc file to debugfs Move slow_work's debugging proc file to debugfs. Signed-off-by: David Howells Requested-and-acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/slow-work.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index 5035a2691739..13337bf6c3f5 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -20,7 +20,7 @@ #include struct slow_work; -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct seq_file; #endif @@ -42,8 +42,8 @@ struct slow_work_ops { /* execute a work item */ void (*execute)(struct slow_work *work); -#ifdef CONFIG_SLOW_WORK_PROC - /* describe a work item for /proc */ +#ifdef CONFIG_SLOW_WORK_DEBUG + /* describe a work item for debugfs */ void (*desc)(struct slow_work *work, struct seq_file *m); #endif }; @@ -64,7 +64,7 @@ struct slow_work { #define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct timespec mark; /* jiffies at which queued or exec begun */ #endif }; -- cgit v1.2.3 From 796bd9524731850967d437b7f47a86acc776ea89 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 29 Sep 2009 12:27:23 +0100 Subject: VFS: Add forget_all_cached_acls() This is required for cluster filesystems which want to use cached ACLs so that they can invalidate the cache when required. Signed-off-by: Steven Whitehouse Cc: Alexander Viro Cc: Christoph Hellwig --- include/linux/posix_acl.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 065a3652a3ea..67608161df6b 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -147,6 +147,20 @@ static inline void forget_cached_acl(struct inode *inode, int type) if (old != ACL_NOT_CACHED) posix_acl_release(old); } + +static inline void forget_all_cached_acls(struct inode *inode) +{ + struct posix_acl *old_access, *old_default; + spin_lock(&inode->i_lock); + old_access = inode->i_acl; + old_default = inode->i_default_acl; + inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; + spin_unlock(&inode->i_lock); + if (old_access != ACL_NOT_CACHED) + posix_acl_release(old_access); + if (old_default != ACL_NOT_CACHED) + posix_acl_release(old_default); +} #endif static inline void cache_no_acl(struct inode *inode) -- cgit v1.2.3 From 86e931a35e93d94e6e91b57cc76456e16d188ea9 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 28 Sep 2009 12:35:17 +0100 Subject: VFS: Export dquot_send_warning Sending a message to userspace in a generic format to warn of events (e.g. quota exceeded) in the quota subsystem is a generically useful feature. This patch makes some minor changes to the send_message function from dquot.c renaming it quota_send_message, moving it to quota.c and exporting it for use by filesystems which do not use the dquot code. Signed-off-by: Steven Whitehouse --- include/linux/quota.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 78c48895b12a..ce9a9b2e5cd4 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -376,6 +376,17 @@ static inline unsigned int dquot_generic_flag(unsigned int flags, int type) return flags >> _DQUOT_STATE_FLAGS; } +#ifdef CONFIG_QUOTA_NETLINK_INTERFACE +extern void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype); +#else +static inline void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype) +{ + return; +} +#endif /* CONFIG_QUOTA_NETLINK_INTERFACE */ + struct quota_info { unsigned int flags; /* Flags for diskquotas on this device */ struct mutex dqio_mutex; /* lock device while I/O in progress */ -- cgit v1.2.3 From 0ab7d13fcbd7ce1658c563e345990ba453719deb Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 6 Nov 2009 16:20:51 +0000 Subject: GFS2: Tag all metadata with jid There are two spare field in the header common to all GFS2 metadata. One is just the right size to fit a journal id in it, and this patch updates the journal code so that each time a metadata block is modified, we tag it with the journal id of the node which is performing the modification. The reason for this is that it should make it much easier to debug issues which arise if we can tell which node was the last to modify a particular metadata block. Since the field is updated before the block is written into the journal, each journal should only contain metadata which is tagged with its own journal id. The one exception to this is the journal header block, which might have a different node's id in it, if that journal was recovered by another node in the cluster. Thus each journal will contain a record of which nodes recovered it, via the journal header. The other field in the metadata header could potentially be used to hold information about what kind of operation was performed, but for the time being we just zero it on each transaction so that if we use it for that in future, we'll know that the information (where it exists) is reliable. I did consider using the other field to hold the journal sequence number, however since in GFS2's journaling we write the modified data into the journal and not the original data, this gives no information as to what action caused the modification, so I think we can probably come up with a better use for those 64 bits in the future. Signed-off-by: Steven Whitehouse --- include/linux/gfs2_ondisk.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index b80c88dedbbb..81f90a59cda6 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -81,7 +81,11 @@ struct gfs2_meta_header { __be32 mh_type; __be64 __pad0; /* Was generation number in gfs1 */ __be32 mh_format; - __be32 __pad1; /* Was incarnation number in gfs1 */ + /* This union is to keep userspace happy */ + union { + __be32 mh_jid; /* Was incarnation number in gfs1 */ + __be32 __pad1; + }; }; /* -- cgit v1.2.3 From 38938c879eb0c39edf85d5164aa0cffe2874304c Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 4 Dec 2009 17:44:50 -0800 Subject: Add support for GCC-4.5's __builtin_unreachable() to compiler.h (v2) Starting with version 4.5, GCC has a new built-in function __builtin_unreachable() that can be used in places like the kernel's BUG() where inline assembly is used to transfer control flow. This eliminated the need for an endless loop in these places. The patch adds a new macro 'unreachable()' that will expand to either __builtin_unreachable() or an endless loop depending on the compiler version. Change from v1: Simplify unreachable() for non-GCC 4.5 case. Signed-off-by: David Daney Acked-by: Ralf Baechle Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc4.h | 14 ++++++++++++++ include/linux/compiler.h | 5 +++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 450fa597c94d..ab3af40a53c6 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -36,4 +36,18 @@ the kernel context */ #define __cold __attribute__((__cold__)) + +#if __GNUC_MINOR__ >= 5 +/* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer + * control elsewhere. + * + * Early snapshots of gcc 4.5 don't support this and we can't detect + * this in the preprocessor, but we can live with this because they're + * unreleased. Really, we need to have autoconf for the kernel. + */ +#define unreachable() __builtin_unreachable() +#endif + #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb5135b4e1..59f208926d13 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -144,6 +144,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define barrier() __memory_barrier() #endif +/* Unreachable code */ +#ifndef unreachable +# define unreachable() do { } while (1) +#endif + #ifndef RELOC_HIDE # define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ -- cgit v1.2.3