From 52494535103986dbbf689b44d8c2c7efe2132b16 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 14 Nov 2012 16:26:40 -0800 Subject: rcu: Reduce rcutorture tracing Currently, rcutorture traces every read-side access. This can be problematic because even a two-minute rcutorture run on a two-CPU system can generate 28,853,363 reads. Normally, only a failing read is of interest, so this commit traces adjusts rcutorture's tracing to only trace failing reads. The resulting event tracing records the time and the ->completed value captured at the beginning of the RCU read-side critical section, allowing correlation with other event-tracing messages. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett [ paulmck: Add fix to build problem located by Randy Dunlap based on diagnosis by Steven Rostedt. ] --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 3a353091a903..7d83f52fbade 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1008,6 +1008,7 @@ config RCU_CPU_STALL_INFO config RCU_TRACE bool "Enable tracing for RCU" depends on DEBUG_KERNEL + select TRACE_CLOCK help This option provides tracing in RCU which presents stats in debugfs for debugging RCU implementation. -- cgit v1.2.3 From 525c1f9204928649eca475e61bfb21e0b6416dbf Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Jan 2013 18:54:16 -0800 Subject: lib: remove depends on CONFIG_EXPERIMENTAL The CONFIG_EXPERIMENTAL config item has not carried much meaning for a while now and is almost always enabled by default. As agreed during the Linux kernel summit, remove it from any "depends on" lines in Kconfigs. CC: Andrew Morton Acked-by: Paul E. McKenney CC: Dmitry Kasatkin CC: James Morris CC: "Michael S. Tsirkin" CC: Akinobu Mita CC: Ingo Molnar Cc: Greg Kroah-Hartman Signed-off-by: Kees Cook Acked-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- lib/Kconfig | 2 +- lib/Kconfig.debug | 2 +- lib/Kconfig.kgdb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 75cdb77fa49d..3958dc4389f9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -322,7 +322,7 @@ config CPUMASK_OFFSTACK config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS - depends on EXPERIMENTAL && BROKEN + depends on BROKEN config CPU_RMAP bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 67604e599384..849ecaea2b43 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -455,7 +455,7 @@ config HAVE_DEBUG_KMEMLEAK config DEBUG_KMEMLEAK bool "Kernel memory leak detector" - depends on DEBUG_KERNEL && EXPERIMENTAL && HAVE_DEBUG_KMEMLEAK + depends on DEBUG_KERNEL && HAVE_DEBUG_KMEMLEAK select DEBUG_FS select STACKTRACE if STACKTRACE_SUPPORT select KALLSYMS diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 43cb93fa2651..77439eb8528d 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -5,7 +5,7 @@ config HAVE_ARCH_KGDB menuconfig KGDB bool "KGDB: kernel debugger" depends on HAVE_ARCH_KGDB - depends on DEBUG_KERNEL && EXPERIMENTAL + depends on DEBUG_KERNEL help If you say Y here, it will be possible to remotely debug the kernel using gdb. It is recommended but not required, that -- cgit v1.2.3 From f657fd21e16e3ab7432c03008e19069c2ef8e150 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 5 Dec 2012 16:48:26 -0500 Subject: dynamic_debug: Fix vpr_ logging styles vpr_info_dq should be a function and vpr_info should have a do {} while (0) Add missing newlines to pr_s. Miscellaneous neatening too. braces, coalescing formats, alignments, etc... Signed-off-by: Joe Perches Signed-off-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 118 +++++++++++++++++++++++++++------------------------- 1 file changed, 62 insertions(+), 56 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 1db1fc660538..ac7d27737e42 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -59,7 +59,7 @@ struct ddebug_iter { static DEFINE_MUTEX(ddebug_lock); static LIST_HEAD(ddebug_tables); -static int verbose = 0; +static int verbose; module_param(verbose, int, 0644); /* Return the path relative to source root */ @@ -100,24 +100,32 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf, return buf; } -#define vpr_info(fmt, ...) \ - if (verbose) do { pr_info(fmt, ##__VA_ARGS__); } while (0) - -#define vpr_info_dq(q, msg) \ +#define vpr_info(fmt, ...) \ do { \ - /* trim last char off format print */ \ - vpr_info("%s: func=\"%s\" file=\"%s\" " \ - "module=\"%s\" format=\"%.*s\" " \ - "lineno=%u-%u", \ - msg, \ - q->function ? q->function : "", \ - q->filename ? q->filename : "", \ - q->module ? q->module : "", \ - (int)(q->format ? strlen(q->format) - 1 : 0), \ - q->format ? q->format : "", \ - q->first_lineno, q->last_lineno); \ + if (verbose) \ + pr_info(fmt, ##__VA_ARGS__); \ } while (0) +static void vpr_info_dq(const struct ddebug_query *query, const char *msg) +{ + /* trim any trailing newlines */ + int fmtlen = 0; + + if (query->format) { + fmtlen = strlen(query->format); + while (fmtlen && query->format[fmtlen - 1] == '\n') + fmtlen--; + } + + vpr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u\n", + msg, + query->function ? query->function : "", + query->filename ? query->filename : "", + query->module ? query->module : "", + fmtlen, query->format ? query->format : "", + query->first_lineno, query->last_lineno); +} + /* * Search the tables for _ddebug's which match the given `query' and * apply the `flags' and `mask' to them. Returns number of matching @@ -141,7 +149,7 @@ static int ddebug_change(const struct ddebug_query *query, if (query->module && strcmp(query->module, dt->mod_name)) continue; - for (i = 0 ; i < dt->num_ddebugs ; i++) { + for (i = 0; i < dt->num_ddebugs; i++) { struct _ddebug *dp = &dt->ddebugs[i]; /* match against the source filename */ @@ -176,10 +184,10 @@ static int ddebug_change(const struct ddebug_query *query, continue; dp->flags = newflags; vpr_info("changed %s:%d [%s]%s =%s\n", - trim_prefix(dp->filename), dp->lineno, - dt->mod_name, dp->function, - ddebug_describe_flags(dp, flagbuf, - sizeof(flagbuf))); + trim_prefix(dp->filename), dp->lineno, + dt->mod_name, dp->function, + ddebug_describe_flags(dp, flagbuf, + sizeof(flagbuf))); } } mutex_unlock(&ddebug_lock); @@ -213,12 +221,12 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) /* find `end' of word, whitespace separated or quoted */ if (*buf == '"' || *buf == '\'') { int quote = *buf++; - for (end = buf ; *end && *end != quote ; end++) + for (end = buf; *end && *end != quote; end++) ; if (!*end) return -EINVAL; /* unclosed quote */ } else { - for (end = buf ; *end && !isspace(*end) ; end++) + for (end = buf; *end && !isspace(*end); end++) ; BUG_ON(end == buf); } @@ -235,7 +243,7 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) if (verbose) { int i; pr_info("split into words:"); - for (i = 0 ; i < nwords ; i++) + for (i = 0; i < nwords; i++) pr_cont(" \"%s\"", words[i]); pr_cont("\n"); } @@ -286,11 +294,11 @@ static char *unescape(char *str) in += 2; continue; } else if (isodigit(in[1]) && - isodigit(in[2]) && - isodigit(in[3])) { - *out++ = ((in[1] - '0')<<6) | - ((in[2] - '0')<<3) | - (in[3] - '0'); + isodigit(in[2]) && + isodigit(in[3])) { + *out++ = (((in[1] - '0') << 6) | + ((in[2] - '0') << 3) | + (in[3] - '0')); in += 4; continue; } @@ -308,8 +316,8 @@ static int check_set(const char **dest, char *src, char *name) if (*dest) { rc = -EINVAL; - pr_err("match-spec:%s val:%s overridden by %s", - name, *dest, src); + pr_err("match-spec:%s val:%s overridden by %s\n", + name, *dest, src); } *dest = src; return rc; @@ -345,17 +353,17 @@ static int ddebug_parse_query(char *words[], int nwords, /* support $modname.dyndbg= */ query->module = modname; - for (i = 0 ; i < nwords ; i += 2) { - if (!strcmp(words[i], "func")) + for (i = 0; i < nwords; i += 2) { + if (!strcmp(words[i], "func")) { rc = check_set(&query->function, words[i+1], "func"); - else if (!strcmp(words[i], "file")) + } else if (!strcmp(words[i], "file")) { rc = check_set(&query->filename, words[i+1], "file"); - else if (!strcmp(words[i], "module")) + } else if (!strcmp(words[i], "module")) { rc = check_set(&query->module, words[i+1], "module"); - else if (!strcmp(words[i], "format")) + } else if (!strcmp(words[i], "format")) { rc = check_set(&query->format, unescape(words[i+1]), - "format"); - else if (!strcmp(words[i], "line")) { + "format"); + } else if (!strcmp(words[i], "line")) { char *first = words[i+1]; char *last = strchr(first, '-'); if (query->first_lineno || query->last_lineno) { @@ -410,7 +418,7 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, } vpr_info("op='%c'\n", op); - for ( ; *str ; ++str) { + for (; *str ; ++str) { for (i = ARRAY_SIZE(opt_array) - 1; i >= 0; i--) { if (*str == opt_array[i].opt_char) { flags |= opt_array[i].flag; @@ -459,7 +467,7 @@ static int ddebug_exec_query(char *query_string, const char *modname) /* actually go and implement the change */ nfound = ddebug_change(&query, flags, mask); - vpr_info_dq((&query), (nfound) ? "applied" : "no-match"); + vpr_info_dq(&query, nfound ? "applied" : "no-match"); return nfound; } @@ -488,8 +496,9 @@ static int ddebug_exec_queries(char *query, const char *modname) if (rc < 0) { errs++; exitcode = rc; - } else + } else { nfound += rc; + } i++; } vpr_info("processed %d queries, with %d matches, %d errs\n", @@ -765,7 +774,7 @@ static void *ddebug_proc_next(struct seq_file *m, void *p, loff_t *pos) struct _ddebug *dp; vpr_info("called m=%p p=%p *pos=%lld\n", - m, p, (unsigned long long)*pos); + m, p, (unsigned long long)*pos); if (p == SEQ_START_TOKEN) dp = ddebug_iter_first(iter); @@ -791,14 +800,14 @@ static int ddebug_proc_show(struct seq_file *m, void *p) if (p == SEQ_START_TOKEN) { seq_puts(m, - "# filename:lineno [module]function flags format\n"); + "# filename:lineno [module]function flags format\n"); return 0; } seq_printf(m, "%s:%u [%s]%s =%s \"", - trim_prefix(dp->filename), dp->lineno, - iter->table->mod_name, dp->function, - ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf))); + trim_prefix(dp->filename), dp->lineno, + iter->table->mod_name, dp->function, + ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf))); seq_escape(m, dp->format, "\t\r\n\""); seq_puts(m, "\"\n"); @@ -845,7 +854,7 @@ static int ddebug_proc_open(struct inode *inode, struct file *file) kfree(iter); return err; } - ((struct seq_file *) file->private_data)->private = iter; + ((struct seq_file *)file->private_data)->private = iter; return 0; } @@ -1002,8 +1011,7 @@ static int __init dynamic_debug_init(void) int verbose_bytes = 0; if (__start___verbose == __stop___verbose) { - pr_warn("_ddebug table is empty in a " - "CONFIG_DYNAMIC_DEBUG build"); + pr_warn("_ddebug table is empty in a CONFIG_DYNAMIC_DEBUG build\n"); return 1; } iter = __start___verbose; @@ -1030,18 +1038,16 @@ static int __init dynamic_debug_init(void) goto out_err; ddebug_init_success = 1; - vpr_info("%d modules, %d entries and %d bytes in ddebug tables," - " %d bytes in (readonly) verbose section\n", - modct, entries, (int)( modct * sizeof(struct ddebug_table)), - verbose_bytes + (int)(__stop___verbose - __start___verbose)); + vpr_info("%d modules, %d entries and %d bytes in ddebug tables, %d bytes in (readonly) verbose section\n", + modct, entries, (int)(modct * sizeof(struct ddebug_table)), + verbose_bytes + (int)(__stop___verbose - __start___verbose)); /* apply ddebug_query boot param, dont unload tables on err */ if (ddebug_setup_string[0] != '\0') { - pr_warn("ddebug_query param name is deprecated," - " change it to dyndbg\n"); + pr_warn("ddebug_query param name is deprecated, change it to dyndbg\n"); ret = ddebug_exec_queries(ddebug_setup_string, NULL); if (ret < 0) - pr_warn("Invalid ddebug boot param %s", + pr_warn("Invalid ddebug boot param %s\n", ddebug_setup_string); else pr_info("%d changes by ddebug_query\n", ret); -- cgit v1.2.3 From 7a555613eb77c69eb6e48b61bc5f72dd42fa1780 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Wed, 5 Dec 2012 16:48:27 -0500 Subject: dynamic_debug: dynamic hex dump Introduce print_hex_dump_debug() that can be dynamically controlled, similar to pr_debug. Also, make print_hex_dump_bytes() dynamically controlled Implement only 'p' flag (_DPRINTK_FLAGS_PRINT) to keep it simple since hex dump prints multiple lines and long prefix would impact readability. To provide line/file etc. information, use pr_debug or similar before/after print_hex_dump_debug() Signed-off-by: Vladimir Kondratiev Signed-off-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- lib/hexdump.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/hexdump.c b/lib/hexdump.c index 6540d657dca4..3f0494c9d57a 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -227,6 +227,7 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, } EXPORT_SYMBOL(print_hex_dump); +#if !defined(CONFIG_DYNAMIC_DEBUG) /** * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params * @prefix_str: string to prefix each line with; @@ -246,4 +247,5 @@ void print_hex_dump_bytes(const char *prefix_str, int prefix_type, buf, len, true); } EXPORT_SYMBOL(print_hex_dump_bytes); -#endif +#endif /* !defined(CONFIG_DYNAMIC_DEBUG) */ +#endif /* defined(CONFIG_PRINTK) */ -- cgit v1.2.3 From 18c216c53b29f15b17c4c05a46395fc90ebb6f0c Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Wed, 5 Dec 2012 16:48:27 -0500 Subject: dynamic_debug: add pr_errs before -EINVALs Ma noted that dynamic-debug is silent about many query errors, so add pr_err()s to explain those errors, and tweak a few others. Also parse flags 1st, so that match-spec errs are slightly clearer. CC: Jianpeng Ma CC: Joe Perches Signed-off-by: Jim Cromie Signed-off-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index ac7d27737e42..5276b99ca650 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -223,8 +223,10 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) int quote = *buf++; for (end = buf; *end && *end != quote; end++) ; - if (!*end) + if (!*end) { + pr_err("unclosed quote: %s\n", buf); return -EINVAL; /* unclosed quote */ + } } else { for (end = buf; *end && !isspace(*end); end++) ; @@ -232,8 +234,10 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) } /* `buf' is start of word, `end' is one past its end */ - if (nwords == maxwords) + if (nwords == maxwords) { + pr_err("too many words, legal max <=%d\n", maxwords); return -EINVAL; /* ran out of words[] before bytes */ + } if (*end) *end++ = '\0'; /* terminate the word */ words[nwords++] = buf; @@ -265,7 +269,11 @@ static inline int parse_lineno(const char *str, unsigned int *val) return 0; } *val = simple_strtoul(str, &end, 10); - return end == NULL || end == str || *end != '\0' ? -EINVAL : 0; + if (end == NULL || end == str || *end != '\0') { + pr_err("bad line-number: %s\n", str); + return -EINVAL; + } + return 0; } /* @@ -345,8 +353,10 @@ static int ddebug_parse_query(char *words[], int nwords, int rc; /* check we have an even number of words */ - if (nwords % 2 != 0) + if (nwords % 2 != 0) { + pr_err("expecting pairs of match-spec \n"); return -EINVAL; + } memset(query, 0, sizeof(*query)); if (modname) @@ -367,18 +377,22 @@ static int ddebug_parse_query(char *words[], int nwords, char *first = words[i+1]; char *last = strchr(first, '-'); if (query->first_lineno || query->last_lineno) { - pr_err("match-spec:line given 2 times\n"); + pr_err("match-spec: line used 2x\n"); return -EINVAL; } if (last) *last++ = '\0'; - if (parse_lineno(first, &query->first_lineno) < 0) + if (parse_lineno(first, &query->first_lineno) < 0) { + pr_err("line-number is <0\n"); return -EINVAL; + } if (last) { /* range - */ if (parse_lineno(last, &query->last_lineno) < query->first_lineno) { - pr_err("last-line < 1st-line\n"); + pr_err("last-line:%d < 1st-line:%d\n", + query->last_lineno, + query->first_lineno); return -EINVAL; } } else { @@ -414,6 +428,7 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, op = *str++; break; default: + pr_err("bad flag-op %c, at start of %s\n", *str, str); return -EINVAL; } vpr_info("op='%c'\n", op); @@ -425,8 +440,10 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, break; } } - if (i < 0) + if (i < 0) { + pr_err("unknown flag '%c' in \"%s\"\n", *str, str); return -EINVAL; + } } vpr_info("flags=0x%x\n", flags); @@ -458,13 +475,19 @@ static int ddebug_exec_query(char *query_string, const char *modname) char *words[MAXWORDS]; nwords = ddebug_tokenize(query_string, words, MAXWORDS); - if (nwords <= 0) + if (nwords <= 0) { + pr_err("tokenize failed\n"); return -EINVAL; - if (ddebug_parse_query(words, nwords-1, &query, modname)) + } + /* check flags 1st (last arg) so query is pairs of spec,val */ + if (ddebug_parse_flags(words[nwords-1], &flags, &mask)) { + pr_err("flags parse failed\n"); return -EINVAL; - if (ddebug_parse_flags(words[nwords-1], &flags, &mask)) + } + if (ddebug_parse_query(words, nwords-1, &query, modname)) { + pr_err("query parse failed\n"); return -EINVAL; - + } /* actually go and implement the change */ nfound = ddebug_change(&query, flags, mask); vpr_info_dq(&query, nfound ? "applied" : "no-match"); -- cgit v1.2.3 From 4f73bc4dd3e8563ef4109f293a092820dff66d92 Mon Sep 17 00:00:00 2001 From: Joe Millenbach Date: Thu, 17 Jan 2013 22:44:22 -0800 Subject: tty: Added a CONFIG_TTY option to allow removal of TTY The option allows you to remove TTY and compile without errors. This saves space on systems that won't support TTY interfaces anyway. bloat-o-meter output is below. The bulk of this patch consists of Kconfig changes adding "depends on TTY" to various serial devices and similar drivers that require the TTY layer. Ideally, these dependencies would occur on a common intermediate symbol such as SERIO, but most drivers "select SERIO" rather than "depends on SERIO", and "select" does not respect dependencies. bloat-o-meter output comparing our previous minimal to new minimal by removing TTY. The list is filtered to not show removed entries with awk '$3 != "-"' as the list was very long. add/remove: 0/226 grow/shrink: 2/14 up/down: 6/-35356 (-35350) function old new delta chr_dev_init 166 170 +4 allow_signal 80 82 +2 static.__warned 143 142 -1 disallow_signal 63 62 -1 __set_special_pids 95 94 -1 unregister_console 126 121 -5 start_kernel 546 541 -5 register_console 593 588 -5 copy_from_user 45 40 -5 sys_setsid 128 120 -8 sys_vhangup 32 19 -13 do_exit 1543 1526 -17 bitmap_zero 60 40 -20 arch_local_irq_save 137 117 -20 release_task 674 652 -22 static.spin_unlock_irqrestore 308 260 -48 Signed-off-by: Joe Millenbach Reviewed-by: Jamey Sharp Reviewed-by: Josh Triplett Signed-off-by: Greg Kroah-Hartman --- lib/Kconfig.kgdb | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 43cb93fa2651..30894fab84d6 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -22,6 +22,7 @@ config KGDB_SERIAL_CONSOLE tristate "KGDB: use kgdb over the serial console" select CONSOLE_POLL select MAGIC_SYSRQ + depends on TTY default y help Share a serial console with kgdb. Sysrq-g must be used -- cgit v1.2.3 From 373d4d099761cb1f637bed488ab3871945882273 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 21 Jan 2013 17:17:39 +1030 Subject: taint: add explicit flag to show whether lock dep is still OK. Fix up all callers as they were before, with make one change: an unsigned module taints the kernel, but doesn't turn off lockdep. Signed-off-by: Rusty Russell --- lib/bug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/bug.c b/lib/bug.c index d0cdf14c651a..168603477f02 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -166,7 +166,8 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) print_modules(); show_regs(regs); print_oops_end_marker(); - add_taint(BUG_GET_TAINT(bug)); + /* Just a warning, don't kill lockdep. */ + add_taint(BUG_GET_TAINT(bug), LOCKDEP_STILL_OK); return BUG_TRAP_TYPE_WARN; } -- cgit v1.2.3 From 75096579c3ac39ddc2f8b0d9a8924eba31f4d920 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 21 Jan 2013 11:08:54 +0100 Subject: lib: devres: Introduce devm_ioremap_resource() The devm_request_and_ioremap() function is very useful and helps avoid a whole lot of boilerplate. However, one issue that keeps popping up is its lack of a specific error code to determine which of the steps that it performs failed. Furthermore, while the function gives an example and suggests what error code to return on failure, a wide variety of error codes are used throughout the tree. In an attempt to fix these problems, this patch adds a new function that drivers can transition to. The devm_ioremap_resource() returns a pointer to the remapped I/O memory on success or an ERR_PTR() encoded error code on failure. Callers can check for failure using IS_ERR() and determine its cause by extracting the error code using PTR_ERR(). devm_request_and_ioremap() is implemented as a wrapper around the new API and return NULL on failure as before. This ensures that backwards compatibility is maintained until all users have been converted to the new API, at which point the old devm_request_and_ioremap() function should be removed. A semantic patch is included which can be used to convert from the old devm_request_and_ioremap() API to the new devm_ioremap_resource() API. Some non-trivial cases may require manual intervention, though. Signed-off-by: Thierry Reding Cc: Arnd Bergmann Acked-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- lib/devres.c | 57 ++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/devres.c b/lib/devres.c index 80b9c76d436a..9c76b3a9cc72 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -86,22 +86,24 @@ void devm_iounmap(struct device *dev, void __iomem *addr) EXPORT_SYMBOL(devm_iounmap); /** - * devm_request_and_ioremap() - Check, request region, and ioremap resource - * @dev: Generic device to handle the resource for + * devm_ioremap_resource() - check, request region, and ioremap resource + * @dev: generic device to handle the resource for * @res: resource to be handled * - * Takes all necessary steps to ioremap a mem resource. Uses managed device, so - * everything is undone on driver detach. Checks arguments, so you can feed - * it the result from e.g. platform_get_resource() directly. Returns the - * remapped pointer or NULL on error. Usage example: + * Checks that a resource is a valid memory region, requests the memory region + * and ioremaps it either as cacheable or as non-cacheable memory depending on + * the resource's flags. All operations are managed and will be undone on + * driver detach. + * + * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code + * on failure. Usage example: * * res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - * base = devm_request_and_ioremap(&pdev->dev, res); - * if (!base) - * return -EADDRNOTAVAIL; + * base = devm_ioremap_resource(&pdev->dev, res); + * if (IS_ERR(base)) + * return PTR_ERR(base); */ -void __iomem *devm_request_and_ioremap(struct device *dev, - struct resource *res) +void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) { resource_size_t size; const char *name; @@ -111,7 +113,7 @@ void __iomem *devm_request_and_ioremap(struct device *dev, if (!res || resource_type(res) != IORESOURCE_MEM) { dev_err(dev, "invalid resource\n"); - return NULL; + return ERR_PTR(-EINVAL); } size = resource_size(res); @@ -119,7 +121,7 @@ void __iomem *devm_request_and_ioremap(struct device *dev, if (!devm_request_mem_region(dev, res->start, size, name)) { dev_err(dev, "can't request region for resource %pR\n", res); - return NULL; + return ERR_PTR(-EBUSY); } if (res->flags & IORESOURCE_CACHEABLE) @@ -130,10 +132,39 @@ void __iomem *devm_request_and_ioremap(struct device *dev, if (!dest_ptr) { dev_err(dev, "ioremap failed for resource %pR\n", res); devm_release_mem_region(dev, res->start, size); + dest_ptr = ERR_PTR(-ENOMEM); } return dest_ptr; } +EXPORT_SYMBOL(devm_ioremap_resource); + +/** + * devm_request_and_ioremap() - Check, request region, and ioremap resource + * @dev: Generic device to handle the resource for + * @res: resource to be handled + * + * Takes all necessary steps to ioremap a mem resource. Uses managed device, so + * everything is undone on driver detach. Checks arguments, so you can feed + * it the result from e.g. platform_get_resource() directly. Returns the + * remapped pointer or NULL on error. Usage example: + * + * res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + * base = devm_request_and_ioremap(&pdev->dev, res); + * if (!base) + * return -EADDRNOTAVAIL; + */ +void __iomem *devm_request_and_ioremap(struct device *device, + struct resource *res) +{ + void __iomem *dest_ptr; + + dest_ptr = devm_ioremap_resource(device, res); + if (IS_ERR(dest_ptr)) + return NULL; + + return dest_ptr; +} EXPORT_SYMBOL(devm_request_and_ioremap); #ifdef CONFIG_HAS_IOPORT -- cgit v1.2.3 From f4a18312f46a6c6e0ba7b81776b01fc36edea9fc Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 22 Jan 2013 22:24:46 +0100 Subject: lib: devres: Fix build breakage The ERR_PTR() and IS_ERR() macros used by the devm_ioremap_resource() function are defined in the linux/err.h header. On ARM this seems to be pulled in by one of the other headers but the build fails at least on OpenRISC. Signed-off-by: Thierry Reding Reported-by: kbuild test robot Signed-off-by: Greg Kroah-Hartman --- lib/devres.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/devres.c b/lib/devres.c index 9c76b3a9cc72..88ad75952a76 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -1,3 +1,4 @@ +#include #include #include #include -- cgit v1.2.3 From 2f03e3ca74a7f8b17ce626d337d321163dce2dad Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 7 Jan 2013 08:19:23 -0800 Subject: rcu: Consolidate debugging Kconfig options The RCU-related debugging Kconfig options are in two different places, and consume too much screen real estate. This commit therefore consolidates them into their own menu. Signed-off-by: Dave Hansen Signed-off-by: Paul E. McKenney --- lib/Kconfig.debug | 114 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 59 insertions(+), 55 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 3a353091a903..122db3d323c8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -605,61 +605,6 @@ config PROVE_LOCKING For more details, see Documentation/lockdep-design.txt. -config PROVE_RCU - bool "RCU debugging: prove RCU correctness" - depends on PROVE_LOCKING - default n - help - This feature enables lockdep extensions that check for correct - use of RCU APIs. This is currently under development. Say Y - if you want to debug RCU usage or help work on the PROVE_RCU - feature. - - Say N if you are unsure. - -config PROVE_RCU_REPEATEDLY - bool "RCU debugging: don't disable PROVE_RCU on first splat" - depends on PROVE_RCU - default n - help - By itself, PROVE_RCU will disable checking upon issuing the - first warning (or "splat"). This feature prevents such - disabling, allowing multiple RCU-lockdep warnings to be printed - on a single reboot. - - Say Y to allow multiple RCU-lockdep warnings per boot. - - Say N if you are unsure. - -config PROVE_RCU_DELAY - bool "RCU debugging: preemptible RCU race provocation" - depends on DEBUG_KERNEL && PREEMPT_RCU - default n - help - There is a class of races that involve an unlikely preemption - of __rcu_read_unlock() just after ->rcu_read_lock_nesting has - been set to INT_MIN. This feature inserts a delay at that - point to increase the probability of these races. - - Say Y to increase probability of preemption of __rcu_read_unlock(). - - Say N if you are unsure. - -config SPARSE_RCU_POINTER - bool "RCU debugging: sparse-based checks for pointer usage" - default n - help - This feature enables the __rcu sparse annotation for - RCU-protected pointers. This annotation will cause sparse - to flag any non-RCU used of annotated pointers. This can be - helpful when debugging RCU usage. Please note that this feature - is not intended to enforce code cleanliness; it is instead merely - a debugging aid. - - Say Y to make sparse flag questionable use of RCU-protected pointers - - Say N if you are unsure. - config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT @@ -937,6 +882,63 @@ config BOOT_PRINTK_DELAY BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect what it believes to be lockup conditions. +menu "RCU Debugging" + +config PROVE_RCU + bool "RCU debugging: prove RCU correctness" + depends on PROVE_LOCKING + default n + help + This feature enables lockdep extensions that check for correct + use of RCU APIs. This is currently under development. Say Y + if you want to debug RCU usage or help work on the PROVE_RCU + feature. + + Say N if you are unsure. + +config PROVE_RCU_REPEATEDLY + bool "RCU debugging: don't disable PROVE_RCU on first splat" + depends on PROVE_RCU + default n + help + By itself, PROVE_RCU will disable checking upon issuing the + first warning (or "splat"). This feature prevents such + disabling, allowing multiple RCU-lockdep warnings to be printed + on a single reboot. + + Say Y to allow multiple RCU-lockdep warnings per boot. + + Say N if you are unsure. + +config PROVE_RCU_DELAY + bool "RCU debugging: preemptible RCU race provocation" + depends on DEBUG_KERNEL && PREEMPT_RCU + default n + help + There is a class of races that involve an unlikely preemption + of __rcu_read_unlock() just after ->rcu_read_lock_nesting has + been set to INT_MIN. This feature inserts a delay at that + point to increase the probability of these races. + + Say Y to increase probability of preemption of __rcu_read_unlock(). + + Say N if you are unsure. + +config SPARSE_RCU_POINTER + bool "RCU debugging: sparse-based checks for pointer usage" + default n + help + This feature enables the __rcu sparse annotation for + RCU-protected pointers. This annotation will cause sparse + to flag any non-RCU used of annotated pointers. This can be + helpful when debugging RCU usage. Please note that this feature + is not intended to enforce code cleanliness; it is instead merely + a debugging aid. + + Say Y to make sparse flag questionable use of RCU-protected pointers + + Say N if you are unsure. + config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL @@ -1015,6 +1017,8 @@ config RCU_TRACE Say Y here if you want to enable RCU tracing Say N if you are unsure. +endmenu # "RCU Debugging" + config KPROBES_SANITY_TEST bool "Kprobes sanity tests" depends on DEBUG_KERNEL -- cgit v1.2.3 From 6bfc09e2327dfbffc312004c16188dbf8dfb0297 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 19 Oct 2012 12:49:17 -0700 Subject: rcu: Provide RCU CPU stall warnings for tiny RCU Tiny RCU has historically omitted RCU CPU stall warnings in order to reduce memory requirements, however, lack of these warnings caused Thomas Gleixner some debugging pain recently. Therefore, this commit adds RCU CPU stall warnings to tiny RCU if RCU_TRACE=y. This keeps the memory footprint small, while still enabling CPU stall warnings in kernels built to enable them. Updated to include Josh Triplett's suggested use of RCU_STALL_COMMON config variable to simplify #if expressions. Reported-by: Thomas Gleixner Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 3a353091a903..f7329b3d4c8d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -970,7 +970,7 @@ config RCU_TORTURE_TEST_RUNNABLE config RCU_CPU_STALL_TIMEOUT int "RCU CPU stall timeout in seconds" - depends on TREE_RCU || TREE_PREEMPT_RCU + depends on RCU_STALL_COMMON range 3 300 default 21 help -- cgit v1.2.3 From ac2cbab21f318e19bc176a7f38a120cec835220f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:16 -0800 Subject: x86: Don't panic if can not alloc buffer for swiotlb Normal boot path on system with iommu support: swiotlb buffer will be allocated early at first and then try to initialize iommu, if iommu for intel or AMD could setup properly, swiotlb buffer will be freed. The early allocating is with bootmem, and could panic when we try to use kdump with buffer above 4G only, or with memmap to limit mem under 4G. for example: memmap=4095M$1M to remove memory under 4G. According to Eric, add _nopanic version and no_iotlb_memory to fail map single later if swiotlb is still needed. -v2: don't pass nopanic, and use -ENOMEM return value according to Eric. panic early instead of using swiotlb_full to panic...according to Eric/Konrad. -v3: make swiotlb_init to be notpanic, but will affect: arm64, ia64, powerpc, tile, unicore32, x86. -v4: cleanup swiotlb_init by removing swiotlb_init_with_default_size. Suggested-by: Eric W. Biederman Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-36-git-send-email-yinghai@kernel.org Reviewed-and-tested-by: Konrad Rzeszutek Wilk Cc: Joerg Roedel Cc: Ralf Baechle Cc: Jeremy Fitzhardinge Cc: Kyungmin Park Cc: Marek Szyprowski Cc: Arnd Bergmann Cc: Andrzej Pietrasiewicz Cc: linux-mips@linux-mips.org Cc: xen-devel@lists.xensource.com Cc: virtualization@lists.linux-foundation.org Cc: Shuah Khan Signed-off-by: H. Peter Anvin --- lib/swiotlb.c | 47 +++++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 196b06984dec..bfe02b8fc55b 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -122,11 +122,18 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, return phys_to_dma(hwdev, virt_to_phys(address)); } +static bool no_iotlb_memory; + void swiotlb_print_info(void) { unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; unsigned char *vstart, *vend; + if (no_iotlb_memory) { + pr_warn("software IO TLB: No low mem\n"); + return; + } + vstart = phys_to_virt(io_tlb_start); vend = phys_to_virt(io_tlb_end); @@ -136,7 +143,7 @@ void swiotlb_print_info(void) bytes >> 20, vstart, vend - 1); } -void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) +int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) { void *v_overflow_buffer; unsigned long i, bytes; @@ -150,9 +157,10 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) /* * Get the overflow emergency buffer */ - v_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow)); + v_overflow_buffer = alloc_bootmem_low_pages_nopanic( + PAGE_ALIGN(io_tlb_overflow)); if (!v_overflow_buffer) - panic("Cannot allocate SWIOTLB overflow buffer!\n"); + return -ENOMEM; io_tlb_overflow_buffer = __pa(v_overflow_buffer); @@ -169,15 +177,19 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) if (verbose) swiotlb_print_info(); + + return 0; } /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. */ -static void __init -swiotlb_init_with_default_size(size_t default_size, int verbose) +void __init +swiotlb_init(int verbose) { + /* default to 64MB */ + size_t default_size = 64UL<<20; unsigned char *vstart; unsigned long bytes; @@ -188,20 +200,16 @@ swiotlb_init_with_default_size(size_t default_size, int verbose) bytes = io_tlb_nslabs << IO_TLB_SHIFT; - /* - * Get IO TLB memory from the low pages - */ - vstart = alloc_bootmem_low_pages(PAGE_ALIGN(bytes)); - if (!vstart) - panic("Cannot allocate SWIOTLB buffer"); - - swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose); -} + /* Get IO TLB memory from the low pages */ + vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes)); + if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) + return; -void __init -swiotlb_init(int verbose) -{ - swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ + if (io_tlb_start) + free_bootmem(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + pr_warn("Cannot allocate SWIOTLB buffer"); + no_iotlb_memory = true; } /* @@ -405,6 +413,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, unsigned long offset_slots; unsigned long max_slots; + if (no_iotlb_memory) + panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); + mask = dma_get_seg_boundary(hwdev); tbl_dma_addr &= mask; -- cgit v1.2.3 From 26d438457ed1b99b6cb26d8f694e8d3de336f9d8 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Wed, 30 Jan 2013 11:30:05 +0200 Subject: digsig: remove unnecessary memory allocation and copying In existing use case, copying of the decoded data is unnecessary in pkcs_1_v1_5_decode_emsa. It is just enough to get pointer to the message. Removing copying and extra buffer allocation. Signed-off-by: Dmitry Kasatkin Signed-off-by: James Morris --- lib/digsig.c | 41 ++++++++++++++--------------------------- 1 file changed, 14 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/digsig.c b/lib/digsig.c index 8c0e62975c88..0103c5b9b802 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -30,11 +30,10 @@ static struct crypto_shash *shash; -static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg, - unsigned long msglen, - unsigned long modulus_bitlen, - unsigned char *out, - unsigned long *outlen) +static const char *pkcs_1_v1_5_decode_emsa(const unsigned char *msg, + unsigned long msglen, + unsigned long modulus_bitlen, + unsigned long *outlen) { unsigned long modulus_len, ps_len, i; @@ -42,11 +41,11 @@ static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg, /* test message size */ if ((msglen > modulus_len) || (modulus_len < 11)) - return -EINVAL; + return NULL; /* separate encoded message */ - if ((msg[0] != 0x00) || (msg[1] != (unsigned char)1)) - return -EINVAL; + if (msg[0] != 0x00 || msg[1] != 0x01) + return NULL; for (i = 2; i < modulus_len - 1; i++) if (msg[i] != 0xFF) @@ -56,19 +55,13 @@ static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg, if (msg[i] != 0) /* There was no octet with hexadecimal value 0x00 to separate ps from m. */ - return -EINVAL; + return NULL; ps_len = i - 2; - if (*outlen < (msglen - (2 + ps_len + 1))) { - *outlen = msglen - (2 + ps_len + 1); - return -EOVERFLOW; - } - *outlen = (msglen - (2 + ps_len + 1)); - memcpy(out, &msg[2 + ps_len + 1], *outlen); - return 0; + return msg + 2 + ps_len + 1; } /* @@ -83,7 +76,8 @@ static int digsig_verify_rsa(struct key *key, unsigned long mlen, mblen; unsigned nret, l; int head, i; - unsigned char *out1 = NULL, *out2 = NULL; + unsigned char *out1 = NULL; + const char *m; MPI in = NULL, res = NULL, pkey[2]; uint8_t *p, *datap, *endp; struct user_key_payload *ukp; @@ -120,7 +114,7 @@ static int digsig_verify_rsa(struct key *key, } mblen = mpi_get_nbits(pkey[0]); - mlen = (mblen + 7)/8; + mlen = DIV_ROUND_UP(mblen, 8); if (mlen == 0) goto err; @@ -129,10 +123,6 @@ static int digsig_verify_rsa(struct key *key, if (!out1) goto err; - out2 = kzalloc(mlen, GFP_KERNEL); - if (!out2) - goto err; - nret = siglen; in = mpi_read_from_buffer(sig, &nret); if (!in) @@ -162,18 +152,15 @@ static int digsig_verify_rsa(struct key *key, memset(out1, 0, head); memcpy(out1 + head, p, l); - err = pkcs_1_v1_5_decode_emsa(out1, len, mblen, out2, &len); - if (err) - goto err; + m = pkcs_1_v1_5_decode_emsa(out1, len, mblen, &len); - if (len != hlen || memcmp(out2, h, hlen)) + if (!m || len != hlen || memcmp(m, h, hlen)) err = -EINVAL; err: mpi_free(in); mpi_free(res); kfree(out1); - kfree(out2); while (--i >= 0) mpi_free(pkey[i]); err1: -- cgit v1.2.3 From 0d2a1b2d03dfd5ee79e7ebc59635690c8f08810f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 30 Jan 2013 11:30:06 +0200 Subject: mpilib: use DIV_ROUND_UP and remove unused macros Remove MIN, MAX and ABS macros that are duplicates kernel's native implementation. Signed-off-by: Andy Shevchenko Signed-off-by: James Morris --- lib/mpi/mpi-internal.h | 4 ---- lib/mpi/mpicoder.c | 8 ++++---- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h index 77adcf6bc257..60cf765628e9 100644 --- a/lib/mpi/mpi-internal.h +++ b/lib/mpi/mpi-internal.h @@ -65,10 +65,6 @@ typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */ typedef int mpi_size_t; /* (must be a signed type) */ -#define ABS(x) (x >= 0 ? x : -x) -#define MIN(l, o) ((l) < (o) ? (l) : (o)) -#define MAX(h, i) ((h) > (i) ? (h) : (i)) - static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) { if (a->alloced < b) diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 3962b7f7fe3f..5f9c44cdf1f5 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -52,7 +52,7 @@ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes) else nbits = 0; - nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB; + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) return NULL; @@ -96,8 +96,8 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) buffer += 2; nread = 2; - nbytes = (nbits + 7) / 8; - nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB; + nbytes = DIV_ROUND_UP(nbits, 8); + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) return NULL; @@ -193,7 +193,7 @@ int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign) int nlimbs; int i; - nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB; + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); if (RESIZE_IF_NEEDED(a, nlimbs) < 0) return -ENOMEM; a->sign = sign; -- cgit v1.2.3 From 64e69073c35439fa19c2ad2a4a18834e0314f071 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 18 Jan 2013 15:12:16 +0530 Subject: asm-generic headers: Allow yet more arch overrides in checksum.h arches can have more efficient implementation of these routines Acked-by: Arnd Bergmann Signed-off-by: Vineet Gupta --- lib/checksum.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/checksum.c b/lib/checksum.c index 12dceb27ff20..129775eb6de6 100644 --- a/lib/checksum.c +++ b/lib/checksum.c @@ -102,6 +102,7 @@ out: } #endif +#ifndef ip_fast_csum /* * This is a version of ip_compute_csum() optimized for IP headers, * which always checksum on 4 octet boundaries. @@ -111,6 +112,7 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl) return (__force __sum16)~do_csum(iph, ihl*4); } EXPORT_SYMBOL(ip_fast_csum); +#endif /* * computes the checksum of a memory block at buff, length len, -- cgit v1.2.3 From ce6711f3d196f09ca0ed29a24dfad42d83912b20 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 5 Feb 2013 21:11:55 +0800 Subject: rwsem: Implement writer lock-stealing for better scalability Commit 5a505085f043 ("mm/rmap: Convert the struct anon_vma::mutex to an rwsem") changed struct anon_vma::mutex to an rwsem, which caused aim7 fork_test performance to drop by 50%. Yuanhan Liu did the following excellent analysis: https://lkml.org/lkml/2013/1/29/84 and found that the regression is caused by strict, serialized, FIFO sequential write-ownership of rwsems. Ingo suggested implementing opportunistic lock-stealing for the front writer task in the waitqueue. Yuanhan Liu implemented lock-stealing for spinlock-rwsems, which indeed recovered much of the regression - confirming the analysis that the main factor in the regression was the FIFO writer-fairness of rwsems. In this patch we allow lock-stealing to happen when the first waiter is also writer. With that change in place the aim7 fork_test performance is fully recovered on my Intel NHM EP, NHM EX, SNB EP 2S and 4S test-machines. Reported-by: lkp@linux.intel.com Reported-by: Yuanhan Liu Signed-off-by: Alex Shi Cc: David Howells Cc: Michel Lespinasse Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Anton Blanchard Cc: Arjan van de Ven Cc: paul.gortmaker@windriver.com Link: https://lkml.org/lkml/2013/1/29/84 Link: http://lkml.kernel.org/r/1360069915-31619-1-git-send-email-alex.shi@intel.com [ Small stylistic fixes, updated changelog. ] Signed-off-by: Ingo Molnar --- lib/rwsem.c | 75 +++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/rwsem.c b/lib/rwsem.c index 8337e1b9bb8d..ad5e0df16ab4 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -2,6 +2,8 @@ * * Written by David Howells (dhowells@redhat.com). * Derived from arch/i386/kernel/semaphore.c + * + * Writer lock-stealing by Alex Shi */ #include #include @@ -60,7 +62,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) struct rwsem_waiter *waiter; struct task_struct *tsk; struct list_head *next; - signed long oldcount, woken, loop, adjustment; + signed long woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) @@ -72,30 +74,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) */ goto out; - /* There's a writer at the front of the queue - try to grant it the - * write lock. However, we only wake this writer if we can transition - * the active part of the count from 0 -> 1 - */ - adjustment = RWSEM_ACTIVE_WRITE_BIAS; - if (waiter->list.next == &sem->wait_list) - adjustment -= RWSEM_WAITING_BIAS; - - try_again_write: - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; - if (oldcount & RWSEM_ACTIVE_MASK) - /* Someone grabbed the sem already */ - goto undo_write; - - /* We must be careful not to touch 'waiter' after we set ->task = NULL. - * It is an allocated on the waiter's stack and may become invalid at - * any time after that point (due to a wakeup from another source). - */ - list_del(&waiter->list); - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); + /* Wake up the writing waiter and let the task grab the sem: */ + wake_up_process(waiter->task); goto out; readers_only: @@ -157,12 +137,40 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type) out: return sem; +} + +/* Try to get write sem, caller holds sem->wait_lock: */ +static int try_get_writer_sem(struct rw_semaphore *sem, + struct rwsem_waiter *waiter) +{ + struct rwsem_waiter *fwaiter; + long oldcount, adjustment; - /* undo the change to the active count, but check for a transition - * 1->0 */ - undo_write: + /* only steal when first waiter is writing */ + fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); + if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE)) + return 0; + + adjustment = RWSEM_ACTIVE_WRITE_BIAS; + /* Only one waiter in the queue: */ + if (fwaiter == waiter && waiter->list.next == &sem->wait_list) + adjustment -= RWSEM_WAITING_BIAS; + +try_again_write: + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + if (!(oldcount & RWSEM_ACTIVE_MASK)) { + /* No active lock: */ + struct task_struct *tsk = waiter->task; + + list_del(&waiter->list); + smp_mb(); + put_task_struct(tsk); + tsk->state = TASK_RUNNING; + return 1; + } + /* some one grabbed the sem already */ if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) - goto out; + return 0; goto try_again_write; } @@ -210,6 +218,15 @@ rwsem_down_failed_common(struct rw_semaphore *sem, for (;;) { if (!waiter.task) break; + + raw_spin_lock_irq(&sem->wait_lock); + /* Try to get the writer sem, may steal from the head writer: */ + if (flags == RWSEM_WAITING_FOR_WRITE) + if (try_get_writer_sem(sem, &waiter)) { + raw_spin_unlock_irq(&sem->wait_lock); + return sem; + } + raw_spin_unlock_irq(&sem->wait_lock); schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); } -- cgit v1.2.3 From 9fb1b90ce0a847a8cc9492a6c1f347b5be1f33ff Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Mon, 16 Apr 2012 15:01:55 +0800 Subject: lockdep: Selftest: convert spinlock to raw spinlock To make the lockdep selftest working on RT we need to convert the spinlock tests to a raw spinlock. Otherwise we cannot run the irq context checks. For mainline this is just annotational as spinlocks are mapped to raw_spinlocks anyway. Signed-off-by: Yong Zhang Link: http://lkml.kernel.org/r/1334559716-18447-2-git-send-email-yong.zhang0@gmail.com Signed-off-by: Thomas Gleixner --- lib/locking-selftest.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 7aae0f2a5e0a..c3eb261a7df3 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -47,10 +47,10 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose); * Normal standalone locks, for the circular and irq-context * dependency tests: */ -static DEFINE_SPINLOCK(lock_A); -static DEFINE_SPINLOCK(lock_B); -static DEFINE_SPINLOCK(lock_C); -static DEFINE_SPINLOCK(lock_D); +static DEFINE_RAW_SPINLOCK(lock_A); +static DEFINE_RAW_SPINLOCK(lock_B); +static DEFINE_RAW_SPINLOCK(lock_C); +static DEFINE_RAW_SPINLOCK(lock_D); static DEFINE_RWLOCK(rwlock_A); static DEFINE_RWLOCK(rwlock_B); @@ -73,12 +73,12 @@ static DECLARE_RWSEM(rwsem_D); * but X* and Y* are different classes. We do this so that * we do not trigger a real lockup: */ -static DEFINE_SPINLOCK(lock_X1); -static DEFINE_SPINLOCK(lock_X2); -static DEFINE_SPINLOCK(lock_Y1); -static DEFINE_SPINLOCK(lock_Y2); -static DEFINE_SPINLOCK(lock_Z1); -static DEFINE_SPINLOCK(lock_Z2); +static DEFINE_RAW_SPINLOCK(lock_X1); +static DEFINE_RAW_SPINLOCK(lock_X2); +static DEFINE_RAW_SPINLOCK(lock_Y1); +static DEFINE_RAW_SPINLOCK(lock_Y2); +static DEFINE_RAW_SPINLOCK(lock_Z1); +static DEFINE_RAW_SPINLOCK(lock_Z2); static DEFINE_RWLOCK(rwlock_X1); static DEFINE_RWLOCK(rwlock_X2); @@ -107,10 +107,10 @@ static DECLARE_RWSEM(rwsem_Z2); */ #define INIT_CLASS_FUNC(class) \ static noinline void \ -init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \ - struct rw_semaphore *rwsem) \ +init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \ + struct mutex *mutex, struct rw_semaphore *rwsem)\ { \ - spin_lock_init(lock); \ + raw_spin_lock_init(lock); \ rwlock_init(rwlock); \ mutex_init(mutex); \ init_rwsem(rwsem); \ @@ -168,10 +168,10 @@ static void init_shared_classes(void) * Shortcuts for lock/unlock API variants, to keep * the testcases compact: */ -#define L(x) spin_lock(&lock_##x) -#define U(x) spin_unlock(&lock_##x) +#define L(x) raw_spin_lock(&lock_##x) +#define U(x) raw_spin_unlock(&lock_##x) #define LU(x) L(x); U(x) -#define SI(x) spin_lock_init(&lock_##x) +#define SI(x) raw_spin_lock_init(&lock_##x) #define WL(x) write_lock(&rwlock_##x) #define WU(x) write_unlock(&rwlock_##x) @@ -911,7 +911,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) #define I2(x) \ do { \ - spin_lock_init(&lock_##x); \ + raw_spin_lock_init(&lock_##x); \ rwlock_init(&rwlock_##x); \ mutex_init(&mutex_##x); \ init_rwsem(&rwsem_##x); \ -- cgit v1.2.3 From 41ef8f826692c8f65882bec0a8211bd4d1d2d19a Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Fri, 1 Feb 2013 18:59:16 +0800 Subject: rwsem-spinlock: Implement writer lock-stealing for better scalability We (Linux Kernel Performance project) found a regression introduced by commit: 5a505085f043 mm/rmap: Convert the struct anon_vma::mutex to an rwsem which converted all anon_vma::mutex locks rwsem write locks. The semantics are the same, but the behavioral difference is quite huge in some cases. After investigating it we found the root cause: mutexes support lock stealing while rwsems don't. Here is the link for the detailed regression report: https://lkml.org/lkml/2013/1/29/84 Ingo suggested adding write lock stealing to rwsems: "I think we should allow lock-steal between rwsem writers - that will not hurt fairness as most rwsem fairness concerns relate to reader vs. writer fairness" And here is the rwsem-spinlock version. With this patch, we got a double performance increase in one test box with following aim7 workfile: FILESIZE: 1M POOLSIZE: 10M 10 fork_test /usr/bin/time output w/o patch /usr/bin/time_output with patch -- Percent of CPU this job got: 369% Percent of CPU this job got: 537% Voluntary context switches: 640595016 Voluntary context switches: 157915561 We got a 45% increase in CPU usage and saved about 3/4 voluntary context switches. Reported-by: LKP project Suggested-by: Ingo Molnar Signed-off-by: Yuanhan Liu Cc: Alex Shi Cc: David Howells Cc: Michel Lespinasse Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Anton Blanchard Cc: Arjan van de Ven Cc: paul.gortmaker@windriver.com Link: http://lkml.kernel.org/r/1359716356-23865-1-git-send-email-yuanhan.liu@linux.intel.com Signed-off-by: Ingo Molnar --- lib/rwsem-spinlock.c | 69 ++++++++++++++++++---------------------------------- 1 file changed, 24 insertions(+), 45 deletions(-) (limited to 'lib') diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index 7e0d6a58fc83..7542afbb22b3 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -73,20 +73,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) goto dont_wake_writers; } - /* if we are allowed to wake writers try to grant a single write lock - * if there's a writer at the front of the queue - * - we leave the 'waiting count' incremented to signify potential - * contention + /* + * as we support write lock stealing, we can't set sem->activity + * to -1 here to indicate we get the lock. Instead, we wake it up + * to let it go get it again. */ if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { - sem->activity = -1; - list_del(&waiter->list); - tsk = waiter->task; - /* Don't touch waiter after ->task has been NULLed */ - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); + wake_up_process(waiter->task); goto out; } @@ -121,18 +114,10 @@ static inline struct rw_semaphore * __rwsem_wake_one_writer(struct rw_semaphore *sem) { struct rwsem_waiter *waiter; - struct task_struct *tsk; - - sem->activity = -1; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - list_del(&waiter->list); + wake_up_process(waiter->task); - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); return sem; } @@ -204,7 +189,6 @@ int __down_read_trylock(struct rw_semaphore *sem) /* * get a write lock on the semaphore - * - we increment the waiting count anyway to indicate an exclusive lock */ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) { @@ -214,37 +198,32 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (sem->activity == 0 && list_empty(&sem->wait_list)) { - /* granted */ - sem->activity = -1; - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - goto out; - } - - tsk = current; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ + tsk = current; waiter.task = tsk; waiter.flags = RWSEM_WAITING_FOR_WRITE; - get_task_struct(tsk); - list_add_tail(&waiter.list, &sem->wait_list); - /* we don't need to touch the semaphore struct anymore */ - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - /* wait to be given the lock */ + /* wait for someone to release the lock */ for (;;) { - if (!waiter.task) + /* + * That is the key to support write lock stealing: allows the + * task already on CPU to get the lock soon rather than put + * itself into sleep and waiting for system woke it or someone + * else in the head of the wait list up. + */ + if (sem->activity == 0) break; - schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + schedule(); + raw_spin_lock_irqsave(&sem->wait_lock, flags); } + /* got the lock */ + sem->activity = -1; + list_del(&waiter.list); - tsk->state = TASK_RUNNING; - out: - ; + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } void __sched __down_write(struct rw_semaphore *sem) @@ -262,8 +241,8 @@ int __down_write_trylock(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (sem->activity == 0 && list_empty(&sem->wait_list)) { - /* granted */ + if (sem->activity == 0) { + /* got the lock */ sem->activity = -1; ret = 1; } -- cgit v1.2.3 From b6bec26cea948148a9420e7a0ac337f925de49e7 Mon Sep 17 00:00:00 2001 From: "Markus F.X.J. Oberhumer" Date: Mon, 13 Aug 2012 17:24:24 +0200 Subject: lib/lzo: Rename lzo1x_decompress.c to lzo1x_decompress_safe.c Rename the source file to match the function name and thereby also make room for a possible future even slightly faster "non-safe" decompressor version. Signed-off-by: Markus F.X.J. Oberhumer --- lib/decompress_unlzo.c | 2 +- lib/lzo/Makefile | 2 +- lib/lzo/lzo1x_decompress.c | 255 ---------------------------------------- lib/lzo/lzo1x_decompress_safe.c | 255 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 257 insertions(+), 257 deletions(-) delete mode 100644 lib/lzo/lzo1x_decompress.c create mode 100644 lib/lzo/lzo1x_decompress_safe.c (limited to 'lib') diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c index 4531294fa62f..960183d4258f 100644 --- a/lib/decompress_unlzo.c +++ b/lib/decompress_unlzo.c @@ -31,7 +31,7 @@ */ #ifdef STATIC -#include "lzo/lzo1x_decompress.c" +#include "lzo/lzo1x_decompress_safe.c" #else #include #endif diff --git a/lib/lzo/Makefile b/lib/lzo/Makefile index e764116ea12d..f0f7d7ca2b83 100644 --- a/lib/lzo/Makefile +++ b/lib/lzo/Makefile @@ -1,5 +1,5 @@ lzo_compress-objs := lzo1x_compress.o -lzo_decompress-objs := lzo1x_decompress.o +lzo_decompress-objs := lzo1x_decompress_safe.o obj-$(CONFIG_LZO_COMPRESS) += lzo_compress.o obj-$(CONFIG_LZO_DECOMPRESS) += lzo_decompress.o diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c deleted file mode 100644 index f2fd09850223..000000000000 --- a/lib/lzo/lzo1x_decompress.c +++ /dev/null @@ -1,255 +0,0 @@ -/* - * LZO1X Decompressor from MiniLZO - * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer - * - * The full LZO package can be found at: - * http://www.oberhumer.com/opensource/lzo/ - * - * Changed for kernel use by: - * Nitin Gupta - * Richard Purdie - */ - -#ifndef STATIC -#include -#include -#endif - -#include -#include -#include "lzodefs.h" - -#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) -#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x)) -#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op) - -#define COPY4(dst, src) \ - put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) - -int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, - unsigned char *out, size_t *out_len) -{ - const unsigned char * const ip_end = in + in_len; - unsigned char * const op_end = out + *out_len; - const unsigned char *ip = in, *m_pos; - unsigned char *op = out; - size_t t; - - *out_len = 0; - - if (*ip > 17) { - t = *ip++ - 17; - if (t < 4) - goto match_next; - if (HAVE_OP(t, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 1, ip_end, ip)) - goto input_overrun; - do { - *op++ = *ip++; - } while (--t > 0); - goto first_literal_run; - } - - while ((ip < ip_end)) { - t = *ip++; - if (t >= 16) - goto match; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { - t += 255; - ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - } - t += 15 + *ip++; - } - if (HAVE_OP(t + 3, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 4, ip_end, ip)) - goto input_overrun; - - COPY4(op, ip); - op += 4; - ip += 4; - if (--t > 0) { - if (t >= 4) { - do { - COPY4(op, ip); - op += 4; - ip += 4; - t -= 4; - } while (t >= 4); - if (t > 0) { - do { - *op++ = *ip++; - } while (--t > 0); - } - } else { - do { - *op++ = *ip++; - } while (--t > 0); - } - } - -first_literal_run: - t = *ip++; - if (t >= 16) - goto match; - m_pos = op - (1 + M2_MAX_OFFSET); - m_pos -= t >> 2; - m_pos -= *ip++ << 2; - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - - if (HAVE_OP(3, op_end, op)) - goto output_overrun; - *op++ = *m_pos++; - *op++ = *m_pos++; - *op++ = *m_pos; - - goto match_done; - - do { -match: - if (t >= 64) { - m_pos = op - 1; - m_pos -= (t >> 2) & 7; - m_pos -= *ip++ << 3; - t = (t >> 5) - 1; - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(t + 3 - 1, op_end, op)) - goto output_overrun; - goto copy_match; - } else if (t >= 32) { - t &= 31; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { - t += 255; - ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - } - t += 31 + *ip++; - } - m_pos = op - 1; - m_pos -= get_unaligned_le16(ip) >> 2; - ip += 2; - } else if (t >= 16) { - m_pos = op; - m_pos -= (t & 8) << 11; - - t &= 7; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { - t += 255; - ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - } - t += 7 + *ip++; - } - m_pos -= get_unaligned_le16(ip) >> 2; - ip += 2; - if (m_pos == op) - goto eof_found; - m_pos -= 0x4000; - } else { - m_pos = op - 1; - m_pos -= t >> 2; - m_pos -= *ip++ << 2; - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(2, op_end, op)) - goto output_overrun; - - *op++ = *m_pos++; - *op++ = *m_pos; - goto match_done; - } - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(t + 3 - 1, op_end, op)) - goto output_overrun; - - if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { - COPY4(op, m_pos); - op += 4; - m_pos += 4; - t -= 4 - (3 - 1); - do { - COPY4(op, m_pos); - op += 4; - m_pos += 4; - t -= 4; - } while (t >= 4); - if (t > 0) - do { - *op++ = *m_pos++; - } while (--t > 0); - } else { -copy_match: - *op++ = *m_pos++; - *op++ = *m_pos++; - do { - *op++ = *m_pos++; - } while (--t > 0); - } -match_done: - t = ip[-2] & 3; - if (t == 0) - break; -match_next: - if (HAVE_OP(t, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 1, ip_end, ip)) - goto input_overrun; - - *op++ = *ip++; - if (t > 1) { - *op++ = *ip++; - if (t > 2) - *op++ = *ip++; - } - - t = *ip++; - } while (ip < ip_end); - } - - *out_len = op - out; - return LZO_E_EOF_NOT_FOUND; - -eof_found: - *out_len = op - out; - return (ip == ip_end ? LZO_E_OK : - (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); -input_overrun: - *out_len = op - out; - return LZO_E_INPUT_OVERRUN; - -output_overrun: - *out_len = op - out; - return LZO_E_OUTPUT_OVERRUN; - -lookbehind_overrun: - *out_len = op - out; - return LZO_E_LOOKBEHIND_OVERRUN; -} -#ifndef STATIC -EXPORT_SYMBOL_GPL(lzo1x_decompress_safe); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("LZO1X Decompressor"); - -#endif diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c new file mode 100644 index 000000000000..f2fd09850223 --- /dev/null +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -0,0 +1,255 @@ +/* + * LZO1X Decompressor from MiniLZO + * + * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * + * The full LZO package can be found at: + * http://www.oberhumer.com/opensource/lzo/ + * + * Changed for kernel use by: + * Nitin Gupta + * Richard Purdie + */ + +#ifndef STATIC +#include +#include +#endif + +#include +#include +#include "lzodefs.h" + +#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) +#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x)) +#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op) + +#define COPY4(dst, src) \ + put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) + +int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len) +{ + const unsigned char * const ip_end = in + in_len; + unsigned char * const op_end = out + *out_len; + const unsigned char *ip = in, *m_pos; + unsigned char *op = out; + size_t t; + + *out_len = 0; + + if (*ip > 17) { + t = *ip++ - 17; + if (t < 4) + goto match_next; + if (HAVE_OP(t, op_end, op)) + goto output_overrun; + if (HAVE_IP(t + 1, ip_end, ip)) + goto input_overrun; + do { + *op++ = *ip++; + } while (--t > 0); + goto first_literal_run; + } + + while ((ip < ip_end)) { + t = *ip++; + if (t >= 16) + goto match; + if (t == 0) { + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + while (*ip == 0) { + t += 255; + ip++; + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + } + t += 15 + *ip++; + } + if (HAVE_OP(t + 3, op_end, op)) + goto output_overrun; + if (HAVE_IP(t + 4, ip_end, ip)) + goto input_overrun; + + COPY4(op, ip); + op += 4; + ip += 4; + if (--t > 0) { + if (t >= 4) { + do { + COPY4(op, ip); + op += 4; + ip += 4; + t -= 4; + } while (t >= 4); + if (t > 0) { + do { + *op++ = *ip++; + } while (--t > 0); + } + } else { + do { + *op++ = *ip++; + } while (--t > 0); + } + } + +first_literal_run: + t = *ip++; + if (t >= 16) + goto match; + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + + if (HAVE_LB(m_pos, out, op)) + goto lookbehind_overrun; + + if (HAVE_OP(3, op_end, op)) + goto output_overrun; + *op++ = *m_pos++; + *op++ = *m_pos++; + *op++ = *m_pos; + + goto match_done; + + do { +match: + if (t >= 64) { + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1; + if (HAVE_LB(m_pos, out, op)) + goto lookbehind_overrun; + if (HAVE_OP(t + 3 - 1, op_end, op)) + goto output_overrun; + goto copy_match; + } else if (t >= 32) { + t &= 31; + if (t == 0) { + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + while (*ip == 0) { + t += 255; + ip++; + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + } + t += 31 + *ip++; + } + m_pos = op - 1; + m_pos -= get_unaligned_le16(ip) >> 2; + ip += 2; + } else if (t >= 16) { + m_pos = op; + m_pos -= (t & 8) << 11; + + t &= 7; + if (t == 0) { + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + while (*ip == 0) { + t += 255; + ip++; + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + } + t += 7 + *ip++; + } + m_pos -= get_unaligned_le16(ip) >> 2; + ip += 2; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; + } else { + m_pos = op - 1; + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + + if (HAVE_LB(m_pos, out, op)) + goto lookbehind_overrun; + if (HAVE_OP(2, op_end, op)) + goto output_overrun; + + *op++ = *m_pos++; + *op++ = *m_pos; + goto match_done; + } + + if (HAVE_LB(m_pos, out, op)) + goto lookbehind_overrun; + if (HAVE_OP(t + 3 - 1, op_end, op)) + goto output_overrun; + + if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { + COPY4(op, m_pos); + op += 4; + m_pos += 4; + t -= 4 - (3 - 1); + do { + COPY4(op, m_pos); + op += 4; + m_pos += 4; + t -= 4; + } while (t >= 4); + if (t > 0) + do { + *op++ = *m_pos++; + } while (--t > 0); + } else { +copy_match: + *op++ = *m_pos++; + *op++ = *m_pos++; + do { + *op++ = *m_pos++; + } while (--t > 0); + } +match_done: + t = ip[-2] & 3; + if (t == 0) + break; +match_next: + if (HAVE_OP(t, op_end, op)) + goto output_overrun; + if (HAVE_IP(t + 1, ip_end, ip)) + goto input_overrun; + + *op++ = *ip++; + if (t > 1) { + *op++ = *ip++; + if (t > 2) + *op++ = *ip++; + } + + t = *ip++; + } while (ip < ip_end); + } + + *out_len = op - out; + return LZO_E_EOF_NOT_FOUND; + +eof_found: + *out_len = op - out; + return (ip == ip_end ? LZO_E_OK : + (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); +input_overrun: + *out_len = op - out; + return LZO_E_INPUT_OVERRUN; + +output_overrun: + *out_len = op - out; + return LZO_E_OUTPUT_OVERRUN; + +lookbehind_overrun: + *out_len = op - out; + return LZO_E_LOOKBEHIND_OVERRUN; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lzo1x_decompress_safe); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZO1X Decompressor"); + +#endif -- cgit v1.2.3 From 8b975bd3f9089f8ee5d7bbfd798537b992bbc7e7 Mon Sep 17 00:00:00 2001 From: "Markus F.X.J. Oberhumer" Date: Mon, 13 Aug 2012 17:25:44 +0200 Subject: lib/lzo: Update LZO compression to current upstream version This commit updates the kernel LZO code to the current upsteam version which features a significant speed improvement - benchmarking the Calgary and Silesia test corpora typically shows a doubled performance in both compression and decompression on modern i386/x86_64/powerpc machines. Signed-off-by: Markus F.X.J. Oberhumer --- lib/lzo/lzo1x_compress.c | 335 ++++++++++++++++++++++---------------- lib/lzo/lzo1x_decompress_safe.c | 350 +++++++++++++++++++--------------------- lib/lzo/lzodefs.h | 38 +++-- 3 files changed, 387 insertions(+), 336 deletions(-) (limited to 'lib') diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index a6040990a62e..236eb21167b5 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -1,194 +1,243 @@ /* - * LZO1X Compressor from MiniLZO + * LZO1X Compressor from LZO * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ #include #include -#include #include +#include #include "lzodefs.h" static noinline size_t -_lzo1x_1_do_compress(const unsigned char *in, size_t in_len, - unsigned char *out, size_t *out_len, void *wrkmem) +lzo1x_1_do_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + size_t ti, void *wrkmem) { + const unsigned char *ip; + unsigned char *op; const unsigned char * const in_end = in + in_len; - const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5; - const unsigned char ** const dict = wrkmem; - const unsigned char *ip = in, *ii = ip; - const unsigned char *end, *m, *m_pos; - size_t m_off, m_len, dindex; - unsigned char *op = out; + const unsigned char * const ip_end = in + in_len - 20; + const unsigned char *ii; + lzo_dict_t * const dict = (lzo_dict_t *) wrkmem; - ip += 4; + op = out; + ip = in; + ii = ip; + ip += ti < 4 ? 4 - ti : 0; for (;;) { - dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK; - m_pos = dict[dindex]; - - if (m_pos < in) - goto literal; - - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) - goto literal; - - m_off = ip - m_pos; - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) - goto try_match; - - dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f); - m_pos = dict[dindex]; - - if (m_pos < in) - goto literal; - - if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) - goto literal; - - m_off = ip - m_pos; - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) - goto try_match; - - goto literal; - -try_match: - if (get_unaligned((const unsigned short *)m_pos) - == get_unaligned((const unsigned short *)ip)) { - if (likely(m_pos[2] == ip[2])) - goto match; - } - + const unsigned char *m_pos; + size_t t, m_len, m_off; + u32 dv; literal: - dict[dindex] = ip; - ++ip; + ip += 1 + ((ip - ii) >> 5); +next: if (unlikely(ip >= ip_end)) break; - continue; - -match: - dict[dindex] = ip; - if (ip != ii) { - size_t t = ip - ii; + dv = get_unaligned_le32(ip); + t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; + m_pos = in + dict[t]; + dict[t] = (lzo_dict_t) (ip - in); + if (unlikely(dv != get_unaligned_le32(m_pos))) + goto literal; + ii -= ti; + ti = 0; + t = ip - ii; + if (t != 0) { if (t <= 3) { op[-2] |= t; - } else if (t <= 18) { + COPY4(op, ii); + op += t; + } else if (t <= 16) { *op++ = (t - 3); + COPY8(op, ii); + COPY8(op + 8, ii + 8); + op += t; } else { - size_t tt = t - 18; - - *op++ = 0; - while (tt > 255) { - tt -= 255; + if (t <= 18) { + *op++ = (t - 3); + } else { + size_t tt = t - 18; *op++ = 0; + while (unlikely(tt > 255)) { + tt -= 255; + *op++ = 0; + } + *op++ = tt; } - *op++ = tt; + do { + COPY8(op, ii); + COPY8(op + 8, ii + 8); + op += 16; + ii += 16; + t -= 16; + } while (t >= 16); + if (t > 0) do { + *op++ = *ii++; + } while (--t > 0); } - do { - *op++ = *ii++; - } while (--t > 0); } - ip += 3; - if (m_pos[3] != *ip++ || m_pos[4] != *ip++ - || m_pos[5] != *ip++ || m_pos[6] != *ip++ - || m_pos[7] != *ip++ || m_pos[8] != *ip++) { - --ip; - m_len = ip - ii; + m_len = 4; + { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) + u64 v; + v = get_unaligned((const u64 *) (ip + m_len)) ^ + get_unaligned((const u64 *) (m_pos + m_len)); + if (unlikely(v == 0)) { + do { + m_len += 8; + v = get_unaligned((const u64 *) (ip + m_len)) ^ + get_unaligned((const u64 *) (m_pos + m_len)); + if (unlikely(ip + m_len >= ip_end)) + goto m_len_done; + } while (v == 0); + } +# if defined(__LITTLE_ENDIAN) + m_len += (unsigned) __builtin_ctzll(v) / 8; +# elif defined(__BIG_ENDIAN) + m_len += (unsigned) __builtin_clzll(v) / 8; +# else +# error "missing endian definition" +# endif +#elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32) + u32 v; + v = get_unaligned((const u32 *) (ip + m_len)) ^ + get_unaligned((const u32 *) (m_pos + m_len)); + if (unlikely(v == 0)) { + do { + m_len += 4; + v = get_unaligned((const u32 *) (ip + m_len)) ^ + get_unaligned((const u32 *) (m_pos + m_len)); + if (v != 0) + break; + m_len += 4; + v = get_unaligned((const u32 *) (ip + m_len)) ^ + get_unaligned((const u32 *) (m_pos + m_len)); + if (unlikely(ip + m_len >= ip_end)) + goto m_len_done; + } while (v == 0); + } +# if defined(__LITTLE_ENDIAN) + m_len += (unsigned) __builtin_ctz(v) / 8; +# elif defined(__BIG_ENDIAN) + m_len += (unsigned) __builtin_clz(v) / 8; +# else +# error "missing endian definition" +# endif +#else + if (unlikely(ip[m_len] == m_pos[m_len])) { + do { + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (ip[m_len] != m_pos[m_len]) + break; + m_len += 1; + if (unlikely(ip + m_len >= ip_end)) + goto m_len_done; + } while (ip[m_len] == m_pos[m_len]); + } +#endif + } +m_len_done: - if (m_off <= M2_MAX_OFFSET) { - m_off -= 1; - *op++ = (((m_len - 1) << 5) - | ((m_off & 7) << 2)); - *op++ = (m_off >> 3); - } else if (m_off <= M3_MAX_OFFSET) { - m_off -= 1; + m_off = ip - m_pos; + ip += m_len; + ii = ip; + if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { + m_off -= 1; + *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); + *op++ = (m_off >> 3); + } else if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + if (m_len <= M3_MAX_LEN) *op++ = (M3_MARKER | (m_len - 2)); - goto m3_m4_offset; - } else { - m_off -= 0x4000; - - *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11) - | (m_len - 2)); - goto m3_m4_offset; + else { + m_len -= M3_MAX_LEN; + *op++ = M3_MARKER | 0; + while (unlikely(m_len > 255)) { + m_len -= 255; + *op++ = 0; + } + *op++ = (m_len); } + *op++ = (m_off << 2); + *op++ = (m_off >> 6); } else { - end = in_end; - m = m_pos + M2_MAX_LEN + 1; - - while (ip < end && *m == *ip) { - m++; - ip++; - } - m_len = ip - ii; - - if (m_off <= M3_MAX_OFFSET) { - m_off -= 1; - if (m_len <= 33) { - *op++ = (M3_MARKER | (m_len - 2)); - } else { - m_len -= 33; - *op++ = M3_MARKER | 0; - goto m3_m4_len; - } - } else { - m_off -= 0x4000; - if (m_len <= M4_MAX_LEN) { - *op++ = (M4_MARKER - | ((m_off & 0x4000) >> 11) + m_off -= 0x4000; + if (m_len <= M4_MAX_LEN) + *op++ = (M4_MARKER | ((m_off >> 11) & 8) | (m_len - 2)); - } else { - m_len -= M4_MAX_LEN; - *op++ = (M4_MARKER - | ((m_off & 0x4000) >> 11)); -m3_m4_len: - while (m_len > 255) { - m_len -= 255; - *op++ = 0; - } - - *op++ = (m_len); + else { + m_len -= M4_MAX_LEN; + *op++ = (M4_MARKER | ((m_off >> 11) & 8)); + while (unlikely(m_len > 255)) { + m_len -= 255; + *op++ = 0; } + *op++ = (m_len); } -m3_m4_offset: - *op++ = ((m_off & 63) << 2); + *op++ = (m_off << 2); *op++ = (m_off >> 6); } - - ii = ip; - if (unlikely(ip >= ip_end)) - break; + goto next; } - *out_len = op - out; - return in_end - ii; + return in_end - (ii - ti); } -int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, - size_t *out_len, void *wrkmem) +int lzo1x_1_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + void *wrkmem) { - const unsigned char *ii; + const unsigned char *ip = in; unsigned char *op = out; - size_t t; + size_t l = in_len; + size_t t = 0; - if (unlikely(in_len <= M2_MAX_LEN + 5)) { - t = in_len; - } else { - t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem); + while (l > 20) { + size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); + uintptr_t ll_end = (uintptr_t) ip + ll; + if ((ll_end + ((t + ll) >> 5)) <= ll_end) + break; + BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); + memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); + t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem); + ip += ll; op += *out_len; + l -= ll; } + t += l; if (t > 0) { - ii = in + in_len - t; + const unsigned char *ii = in + in_len - t; if (op == out && t <= 238) { *op++ = (17 + t); @@ -198,16 +247,21 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, *op++ = (t - 3); } else { size_t tt = t - 18; - *op++ = 0; while (tt > 255) { tt -= 255; *op++ = 0; } - *op++ = tt; } - do { + if (t >= 16) do { + COPY8(op, ii); + COPY8(op + 8, ii + 8); + op += 16; + ii += 16; + t -= 16; + } while (t >= 16); + if (t > 0) do { *op++ = *ii++; } while (--t > 0); } @@ -223,4 +277,3 @@ EXPORT_SYMBOL_GPL(lzo1x_1_compress); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZO1X-1 Compressor"); - diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index f2fd09850223..569985d522d5 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -1,12 +1,12 @@ /* - * LZO1X Decompressor from MiniLZO + * LZO1X Decompressor from LZO * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ @@ -15,225 +15,207 @@ #include #include #endif - #include #include #include "lzodefs.h" -#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) -#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x)) -#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op) - -#define COPY4(dst, src) \ - put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) +#define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x)) +#define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x)) +#define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun +#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun +#define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, - unsigned char *out, size_t *out_len) + unsigned char *out, size_t *out_len) { + unsigned char *op; + const unsigned char *ip; + size_t t, next; + size_t state = 0; + const unsigned char *m_pos; const unsigned char * const ip_end = in + in_len; unsigned char * const op_end = out + *out_len; - const unsigned char *ip = in, *m_pos; - unsigned char *op = out; - size_t t; - *out_len = 0; + op = out; + ip = in; + if (unlikely(in_len < 3)) + goto input_overrun; if (*ip > 17) { t = *ip++ - 17; - if (t < 4) + if (t < 4) { + next = t; goto match_next; - if (HAVE_OP(t, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 1, ip_end, ip)) - goto input_overrun; - do { - *op++ = *ip++; - } while (--t > 0); - goto first_literal_run; - } - - while ((ip < ip_end)) { - t = *ip++; - if (t >= 16) - goto match; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { - t += 255; - ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - } - t += 15 + *ip++; - } - if (HAVE_OP(t + 3, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 4, ip_end, ip)) - goto input_overrun; - - COPY4(op, ip); - op += 4; - ip += 4; - if (--t > 0) { - if (t >= 4) { - do { - COPY4(op, ip); - op += 4; - ip += 4; - t -= 4; - } while (t >= 4); - if (t > 0) { - do { - *op++ = *ip++; - } while (--t > 0); - } - } else { - do { - *op++ = *ip++; - } while (--t > 0); - } } + goto copy_literal_run; + } -first_literal_run: + for (;;) { t = *ip++; - if (t >= 16) - goto match; - m_pos = op - (1 + M2_MAX_OFFSET); - m_pos -= t >> 2; - m_pos -= *ip++ << 2; - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - - if (HAVE_OP(3, op_end, op)) - goto output_overrun; - *op++ = *m_pos++; - *op++ = *m_pos++; - *op++ = *m_pos; - - goto match_done; - - do { -match: - if (t >= 64) { - m_pos = op - 1; - m_pos -= (t >> 2) & 7; - m_pos -= *ip++ << 3; - t = (t >> 5) - 1; - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(t + 3 - 1, op_end, op)) - goto output_overrun; - goto copy_match; - } else if (t >= 32) { - t &= 31; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { + if (t < 16) { + if (likely(state == 0)) { + if (unlikely(t == 0)) { + while (unlikely(*ip == 0)) { t += 255; ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; + NEED_IP(1); } - t += 31 + *ip++; + t += 15 + *ip++; } - m_pos = op - 1; - m_pos -= get_unaligned_le16(ip) >> 2; - ip += 2; - } else if (t >= 16) { - m_pos = op; - m_pos -= (t & 8) << 11; - - t &= 7; - if (t == 0) { - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - while (*ip == 0) { - t += 255; - ip++; - if (HAVE_IP(1, ip_end, ip)) - goto input_overrun; - } - t += 7 + *ip++; + t += 3; +copy_literal_run: +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) { + const unsigned char *ie = ip + t; + unsigned char *oe = op + t; + do { + COPY8(op, ip); + op += 8; + ip += 8; + COPY8(op, ip); + op += 8; + ip += 8; + } while (ip < ie); + ip = ie; + op = oe; + } else +#endif + { + NEED_OP(t); + NEED_IP(t + 3); + do { + *op++ = *ip++; + } while (--t > 0); } - m_pos -= get_unaligned_le16(ip) >> 2; - ip += 2; - if (m_pos == op) - goto eof_found; - m_pos -= 0x4000; - } else { + state = 4; + continue; + } else if (state != 4) { + next = t & 3; m_pos = op - 1; m_pos -= t >> 2; m_pos -= *ip++ << 2; - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(2, op_end, op)) - goto output_overrun; - - *op++ = *m_pos++; - *op++ = *m_pos; - goto match_done; + TEST_LB(m_pos); + NEED_OP(2); + op[0] = m_pos[0]; + op[1] = m_pos[1]; + op += 2; + goto match_next; + } else { + next = t & 3; + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + t = 3; } - - if (HAVE_LB(m_pos, out, op)) - goto lookbehind_overrun; - if (HAVE_OP(t + 3 - 1, op_end, op)) - goto output_overrun; - - if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { - COPY4(op, m_pos); - op += 4; - m_pos += 4; - t -= 4 - (3 - 1); + } else if (t >= 64) { + next = t & 3; + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1 + (3 - 1); + } else if (t >= 32) { + t = (t & 31) + (3 - 1); + if (unlikely(t == 2)) { + while (unlikely(*ip == 0)) { + t += 255; + ip++; + NEED_IP(1); + } + t += 31 + *ip++; + NEED_IP(2); + } + m_pos = op - 1; + next = get_unaligned_le16(ip); + ip += 2; + m_pos -= next >> 2; + next &= 3; + } else { + m_pos = op; + m_pos -= (t & 8) << 11; + t = (t & 7) + (3 - 1); + if (unlikely(t == 2)) { + while (unlikely(*ip == 0)) { + t += 255; + ip++; + NEED_IP(1); + } + t += 7 + *ip++; + NEED_IP(2); + } + next = get_unaligned_le16(ip); + ip += 2; + m_pos -= next >> 2; + next &= 3; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; + } + TEST_LB(m_pos); +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + if (op - m_pos >= 8) { + unsigned char *oe = op + t; + if (likely(HAVE_OP(t + 15))) { do { - COPY4(op, m_pos); - op += 4; - m_pos += 4; - t -= 4; - } while (t >= 4); - if (t > 0) - do { - *op++ = *m_pos++; - } while (--t > 0); + COPY8(op, m_pos); + op += 8; + m_pos += 8; + COPY8(op, m_pos); + op += 8; + m_pos += 8; + } while (op < oe); + op = oe; + if (HAVE_IP(6)) { + state = next; + COPY4(op, ip); + op += next; + ip += next; + continue; + } } else { -copy_match: - *op++ = *m_pos++; - *op++ = *m_pos++; + NEED_OP(t); do { *op++ = *m_pos++; - } while (--t > 0); + } while (op < oe); } -match_done: - t = ip[-2] & 3; - if (t == 0) - break; + } else +#endif + { + unsigned char *oe = op + t; + NEED_OP(t); + op[0] = m_pos[0]; + op[1] = m_pos[1]; + op += 2; + m_pos += 2; + do { + *op++ = *m_pos++; + } while (op < oe); + } match_next: - if (HAVE_OP(t, op_end, op)) - goto output_overrun; - if (HAVE_IP(t + 1, ip_end, ip)) - goto input_overrun; - - *op++ = *ip++; - if (t > 1) { + state = next; + t = next; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + if (likely(HAVE_IP(6) && HAVE_OP(4))) { + COPY4(op, ip); + op += t; + ip += t; + } else +#endif + { + NEED_IP(t + 3); + NEED_OP(t); + while (t > 0) { *op++ = *ip++; - if (t > 2) - *op++ = *ip++; + t--; } - - t = *ip++; - } while (ip < ip_end); + } } - *out_len = op - out; - return LZO_E_EOF_NOT_FOUND; - eof_found: *out_len = op - out; - return (ip == ip_end ? LZO_E_OK : - (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); + return (t != 3 ? LZO_E_ERROR : + ip == ip_end ? LZO_E_OK : + ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN); + input_overrun: *out_len = op - out; return LZO_E_INPUT_OVERRUN; diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index b6d482c492ef..6710b83ce72e 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h @@ -1,19 +1,37 @@ /* * lzodefs.h -- architecture, OS and compiler specific defines * - * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer + * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer * * The full LZO package can be found at: * http://www.oberhumer.com/opensource/lzo/ * - * Changed for kernel use by: + * Changed for Linux kernel use by: * Nitin Gupta * Richard Purdie */ -#define LZO_VERSION 0x2020 -#define LZO_VERSION_STRING "2.02" -#define LZO_VERSION_DATE "Oct 17 2005" + +#define COPY4(dst, src) \ + put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) +#if defined(__x86_64__) +#define COPY8(dst, src) \ + put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst)) +#else +#define COPY8(dst, src) \ + COPY4(dst, src); COPY4((dst) + 4, (src) + 4) +#endif + +#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) +#error "conflicting endian definitions" +#elif defined(__x86_64__) +#define LZO_USE_CTZ64 1 +#define LZO_USE_CTZ32 1 +#elif defined(__i386__) || defined(__powerpc__) +#define LZO_USE_CTZ32 1 +#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5) +#define LZO_USE_CTZ32 1 +#endif #define M1_MAX_OFFSET 0x0400 #define M2_MAX_OFFSET 0x0800 @@ -34,10 +52,8 @@ #define M3_MARKER 32 #define M4_MARKER 16 -#define D_BITS 14 -#define D_MASK ((1u << D_BITS) - 1) +#define lzo_dict_t unsigned short +#define D_BITS 13 +#define D_SIZE (1u << D_BITS) +#define D_MASK (D_SIZE - 1) #define D_HIGH ((D_MASK >> 1) + 1) - -#define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \ - << (s1)) ^ (p)[0]) -#define DX3(p, s1, s2, s3) ((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0]) -- cgit v1.2.3 From 76e8402619cf777a3adae9cd70e56a848a9bf952 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Thu, 21 Feb 2013 16:43:04 -0800 Subject: lib/Kconfig.debug: unhide CONFIG_PANIC_ON_OOPS CONFIG_EXPERT doesn't really make sense, and hides it unintentionally. Remove superfluous "default n" pointed out by Ingo as well. Signed-off-by: Kyle McMartin Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a1714c897e3f..be767fd3d22b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -243,8 +243,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE default 1 if BOOTPARAM_SOFTLOCKUP_PANIC config PANIC_ON_OOPS - bool "Panic on Oops" if EXPERT - default n + bool "Panic on Oops" help Say Y here to enable the kernel to panic when it oopses. This has the same effect as setting oops=panic on the kernel command -- cgit v1.2.3 From 7d7992108d02aa92ad4c77e5d9ce14088c942e75 Mon Sep 17 00:00:00 2001 From: Stepan Moskovchenko Date: Thu, 21 Feb 2013 16:43:09 -0800 Subject: lib/vsprintf.c: add %pa format specifier for phys_addr_t types Add the %pa format specifier for printing a phys_addr_t type and its derivative types (such as resource_size_t), since the physical address size on some platforms can vary based on build options, regardless of the native integer type. Signed-off-by: Stepan Moskovchenko Cc: Rob Landley Cc: George Spelvin Cc: Andy Shevchenko Cc: Stephen Boyd Cc: Andrei Emeltchenko Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index fab33a9c5318..0d62fd700f68 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1030,6 +1030,7 @@ int kptr_restrict __read_mostly; * N no separator * The maximum supported length is 64 bytes of the input. Consider * to use print_hex_dump() for the larger input. + * - 'a' For a phys_addr_t type and its derivative types (passed by reference) * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a @@ -1120,6 +1121,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return netdev_feature_string(buf, end, ptr, spec); } break; + case 'a': + spec.flags |= SPECIAL | SMALL | ZEROPAD; + spec.field_width = sizeof(phys_addr_t) * 2 + 2; + spec.base = 16; + return number(buf, end, + (unsigned long long) *((phys_addr_t *)ptr), spec); } spec.flags |= SMALL; if (spec.field_width == -1) { -- cgit v1.2.3 From 53769627b93d5b1d04178fd1fb2558d933ee9e81 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 21 Feb 2013 16:44:08 -0800 Subject: lib/parser.c: fix up comments for valid return values from match_number match_number() has return values of -ENOMEM, -EINVAL and -ERANGE. So, for all the functions calling match_number, the return value should include these values. Fix up the comments to reflect the correct values. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/parser.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/parser.c b/lib/parser.c index 52cfa69f73df..807b2aaa33fa 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -157,7 +157,7 @@ static int match_number(substring_t *s, int *result, int base) * * Description: Attempts to parse the &substring_t @s as a decimal integer. On * success, sets @result to the integer represented by the string and returns 0. - * Returns either -ENOMEM or -EINVAL on failure. + * Returns -ENOMEM, -EINVAL, or -ERANGE on failure. */ int match_int(substring_t *s, int *result) { @@ -171,7 +171,7 @@ int match_int(substring_t *s, int *result) * * Description: Attempts to parse the &substring_t @s as an octal integer. On * success, sets @result to the integer represented by the string and returns - * 0. Returns either -ENOMEM or -EINVAL on failure. + * 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure. */ int match_octal(substring_t *s, int *result) { @@ -185,7 +185,7 @@ int match_octal(substring_t *s, int *result) * * Description: Attempts to parse the &substring_t @s as a hexadecimal integer. * On success, sets @result to the integer represented by the string and - * returns 0. Returns either -ENOMEM or -EINVAL on failure. + * returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure. */ int match_hex(substring_t *s, int *result) { -- cgit v1.2.3 From 9d7496296590d57e0745286711aa31ed1b828917 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 21 Feb 2013 16:44:10 -0800 Subject: decompressors: group XZ_DEC_* symbols under an if XZ_BCJ / endif Group all architecture-specific BCJ filter configuration symbols under an if XZ_BCJ / endif statement. Signed-off-by: Florian Fainelli Acked-by: Lasse Collin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/xz/Kconfig | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index 60a6088d0e5e..12d2d777f36b 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -6,42 +6,40 @@ config XZ_DEC the .xz file format as the container. For integrity checking, CRC32 is supported. See Documentation/xz.txt for more information. +if XZ_DEC + config XZ_DEC_X86 bool "x86 BCJ filter decoder" if EXPERT default y - depends on XZ_DEC select XZ_DEC_BCJ config XZ_DEC_POWERPC bool "PowerPC BCJ filter decoder" if EXPERT default y - depends on XZ_DEC select XZ_DEC_BCJ config XZ_DEC_IA64 bool "IA-64 BCJ filter decoder" if EXPERT default y - depends on XZ_DEC select XZ_DEC_BCJ config XZ_DEC_ARM bool "ARM BCJ filter decoder" if EXPERT default y - depends on XZ_DEC select XZ_DEC_BCJ config XZ_DEC_ARMTHUMB bool "ARM-Thumb BCJ filter decoder" if EXPERT default y - depends on XZ_DEC select XZ_DEC_BCJ config XZ_DEC_SPARC bool "SPARC BCJ filter decoder" if EXPERT default y - depends on XZ_DEC select XZ_DEC_BCJ +endif + config XZ_DEC_BCJ bool default n -- cgit v1.2.3 From 64dbfb444c150f5b64979323a197dedc2ec3e02c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 21 Feb 2013 16:44:11 -0800 Subject: decompressors: drop dependency on CONFIG_EXPERT Remove the XZ_DEC_* depedencey on CONFIG_EXPERT as recommended by Lasse Colin. Signed-off-by: Florian Fainelli Acked-by: Lasse Collin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/xz/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index 12d2d777f36b..8d464706d4eb 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -9,32 +9,32 @@ config XZ_DEC if XZ_DEC config XZ_DEC_X86 - bool "x86 BCJ filter decoder" if EXPERT + bool "x86 BCJ filter decoder" default y select XZ_DEC_BCJ config XZ_DEC_POWERPC - bool "PowerPC BCJ filter decoder" if EXPERT + bool "PowerPC BCJ filter decoder" default y select XZ_DEC_BCJ config XZ_DEC_IA64 - bool "IA-64 BCJ filter decoder" if EXPERT + bool "IA-64 BCJ filter decoder" default y select XZ_DEC_BCJ config XZ_DEC_ARM - bool "ARM BCJ filter decoder" if EXPERT + bool "ARM BCJ filter decoder" default y select XZ_DEC_BCJ config XZ_DEC_ARMTHUMB - bool "ARM-Thumb BCJ filter decoder" if EXPERT + bool "ARM-Thumb BCJ filter decoder" default y select XZ_DEC_BCJ config XZ_DEC_SPARC - bool "SPARC BCJ filter decoder" if EXPERT + bool "SPARC BCJ filter decoder" default y select XZ_DEC_BCJ -- cgit v1.2.3 From 5dc49c75a26b99e86a18441e0b64c1f7c7c6a500 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 21 Feb 2013 16:44:12 -0800 Subject: decompressors: make the default XZ_DEC_* config match the selected architecture Change the defautl XZ_DEC_* config symbol to match the configured architecture. It is perfectly legitimate to support multiple XZ BCJ filters for different architectures (e.g.: to mount foreign squashfs/xz compressed filesystems), it is however more natural not to select them all by default, but only the one matching the configured architecture. Signed-off-by: Florian Fainelli Acked-by: Lasse Collin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/xz/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index 8d464706d4eb..82a04d7ba99e 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -10,32 +10,32 @@ if XZ_DEC config XZ_DEC_X86 bool "x86 BCJ filter decoder" - default y + default y if X86 select XZ_DEC_BCJ config XZ_DEC_POWERPC bool "PowerPC BCJ filter decoder" - default y + default y if POWERPC select XZ_DEC_BCJ config XZ_DEC_IA64 bool "IA-64 BCJ filter decoder" - default y + default y if IA64 select XZ_DEC_BCJ config XZ_DEC_ARM bool "ARM BCJ filter decoder" - default y + default y if ARM select XZ_DEC_BCJ config XZ_DEC_ARMTHUMB bool "ARM-Thumb BCJ filter decoder" - default y + default y if (ARM && ARM_THUMB) select XZ_DEC_BCJ config XZ_DEC_SPARC bool "SPARC BCJ filter decoder" - default y + default y if SPARC select XZ_DEC_BCJ endif -- cgit v1.2.3 From 9ed8a30f3471347c1b763bd062fa78ae80f18eae Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 27 Feb 2013 17:02:42 -0800 Subject: lib/devres.c: fix misplaced #endif A misplaced #endif causes link errors related to pcim_*() functions. This is because pcim_*() functions are related to CONFIG_PCI option, however these are not related to CONFIG_HAS_IOPORT option. Therefore, when CONFIG_PCI is enabled and CONFIG_HAS_IOPORT is not enabled, it makes link errors related to pcim_*() functions as below: drivers/ata/libata-sff.c:3233: undefined reference to `pcim_iomap_regions' drivers/ata/libata-sff.c:3238: undefined reference to `pcim_iomap_table' drivers/built-in.o: In function `ata_pci_sff_init_host': drivers/ata/libata-sff.c:2318: undefined reference to `pcim_iomap_regions' drivers/ata/libata-sff.c:2329: undefined reference to `pcim_iomap_table Signed-off-by: Jingoo Han Cc: Greg KH Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/devres.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/devres.c b/lib/devres.c index 88ad75952a76..823533138fa0 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -227,6 +227,7 @@ void devm_ioport_unmap(struct device *dev, void __iomem *addr) devm_ioport_map_match, (void *)addr)); } EXPORT_SYMBOL(devm_ioport_unmap); +#endif /* CONFIG_HAS_IOPORT */ #ifdef CONFIG_PCI /* @@ -432,4 +433,3 @@ void pcim_iounmap_regions(struct pci_dev *pdev, int mask) } EXPORT_SYMBOL(pcim_iounmap_regions); #endif /* CONFIG_PCI */ -#endif /* CONFIG_HAS_IOPORT */ -- cgit v1.2.3 From a321e91b6d73ed011ffceed384c40d2785cf723b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 27 Feb 2013 17:02:56 -0800 Subject: lib/scatterlist: add simple page iterator Add an iterator to walk through a scatter list a page at a time starting at a specific page offset. As opposed to the mapping iterator this is meant to be small, performing well even in simple loops like collecting all pages on the scatterlist into an array or setting up an iommu table based on the pages' DMA address. Signed-off-by: Imre Deak Cc: Maxim Levitsky Cc: Tejun Heo Cc: Daniel Vetter Tested-by: Stephen Warren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/scatterlist.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'lib') diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7874b01e816e..a1d15647d7db 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -394,6 +394,44 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, } EXPORT_SYMBOL(sg_alloc_table_from_pages); +void __sg_page_iter_start(struct sg_page_iter *piter, + struct scatterlist *sglist, unsigned int nents, + unsigned long pgoffset) +{ + piter->__pg_advance = 0; + piter->__nents = nents; + + piter->page = NULL; + piter->sg = sglist; + piter->sg_pgoffset = pgoffset; +} +EXPORT_SYMBOL(__sg_page_iter_start); + +static int sg_page_count(struct scatterlist *sg) +{ + return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT; +} + +bool __sg_page_iter_next(struct sg_page_iter *piter) +{ + if (!piter->__nents || !piter->sg) + return false; + + piter->sg_pgoffset += piter->__pg_advance; + piter->__pg_advance = 1; + + while (piter->sg_pgoffset >= sg_page_count(piter->sg)) { + piter->sg_pgoffset -= sg_page_count(piter->sg); + piter->sg = sg_next(piter->sg); + if (!--piter->__nents || !piter->sg) + return false; + } + piter->page = nth_page(sg_page(piter->sg), piter->sg_pgoffset); + + return true; +} +EXPORT_SYMBOL(__sg_page_iter_next); + /** * sg_miter_start - start mapping iteration over a sg list * @miter: sg mapping iter to be started -- cgit v1.2.3 From 4225fc8555a992c7f91d174ef424384d6781e144 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 27 Feb 2013 17:02:57 -0800 Subject: lib/scatterlist: use page iterator in the mapping iterator For better code reuse use the newly added page iterator to iterate through the pages. The offset, length within the page is still calculated by the mapping iterator as well as the actual mapping. Idea from Tejun Heo. Signed-off-by: Imre Deak Cc: Maxim Levitsky Cc: Tejun Heo Cc: Daniel Vetter Cc: James Hogan Cc: Stephen Warren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/scatterlist.c | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/scatterlist.c b/lib/scatterlist.c index a1d15647d7db..b83c144d731f 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -449,9 +449,7 @@ void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl, { memset(miter, 0, sizeof(struct sg_mapping_iter)); - miter->__sg = sgl; - miter->__nents = nents; - miter->__offset = 0; + __sg_page_iter_start(&miter->piter, sgl, nents, 0); WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG))); miter->__flags = flags; } @@ -476,36 +474,35 @@ EXPORT_SYMBOL(sg_miter_start); */ bool sg_miter_next(struct sg_mapping_iter *miter) { - unsigned int off, len; - - /* check for end and drop resources from the last iteration */ - if (!miter->__nents) - return false; - sg_miter_stop(miter); - /* get to the next sg if necessary. __offset is adjusted by stop */ - while (miter->__offset == miter->__sg->length) { - if (--miter->__nents) { - miter->__sg = sg_next(miter->__sg); - miter->__offset = 0; - } else + /* + * Get to the next page if necessary. + * __remaining, __offset is adjusted by sg_miter_stop + */ + if (!miter->__remaining) { + struct scatterlist *sg; + unsigned long pgoffset; + + if (!__sg_page_iter_next(&miter->piter)) return false; - } - /* map the next page */ - off = miter->__sg->offset + miter->__offset; - len = miter->__sg->length - miter->__offset; + sg = miter->piter.sg; + pgoffset = miter->piter.sg_pgoffset; - miter->page = nth_page(sg_page(miter->__sg), off >> PAGE_SHIFT); - off &= ~PAGE_MASK; - miter->length = min_t(unsigned int, len, PAGE_SIZE - off); - miter->consumed = miter->length; + miter->__offset = pgoffset ? 0 : sg->offset; + miter->__remaining = sg->offset + sg->length - + (pgoffset << PAGE_SHIFT) - miter->__offset; + miter->__remaining = min_t(unsigned long, miter->__remaining, + PAGE_SIZE - miter->__offset); + } + miter->page = miter->piter.page; + miter->consumed = miter->length = miter->__remaining; if (miter->__flags & SG_MITER_ATOMIC) - miter->addr = kmap_atomic(miter->page) + off; + miter->addr = kmap_atomic(miter->page) + miter->__offset; else - miter->addr = kmap(miter->page) + off; + miter->addr = kmap(miter->page) + miter->__offset; return true; } @@ -532,6 +529,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) /* drop resources from the last iteration */ if (miter->addr) { miter->__offset += miter->consumed; + miter->__remaining -= miter->consumed; if (miter->__flags & SG_MITER_TO_SG) flush_kernel_dcache_page(miter->page); -- cgit v1.2.3 From 6cdae7416a1c45c2ce105a78187d9b7e8feb9e24 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:34 -0800 Subject: idr: fix a subtle bug in idr_get_next() The iteration logic of idr_get_next() is borrowed mostly verbatim from idr_for_each(). It walks down the tree looking for the slot matching the current ID. If the matching slot is not found, the ID is incremented by the distance of single slot at the given level and repeats. The implementation assumes that during the whole iteration id is aligned to the layer boundaries of the level closest to the leaf, which is true for all iterations starting from zero or an existing element and thus is fine for idr_for_each(). However, idr_get_next() may be given any point and if the starting id hits in the middle of a non-existent layer, increment to the next layer will end up skipping the same offset into it. For example, an IDR with IDs filled between [64, 127] would look like the following. [ 0 64 ... ] /----/ | | | NULL [ 64 ... 127 ] If idr_get_next() is called with 63 as the starting point, it will try to follow down the pointer from 0. As it is NULL, it will then try to proceed to the next slot in the same level by adding the slot distance at that level which is 64 - making the next try 127. It goes around the loop and finds and returns 127 skipping [64, 126]. Note that this bug also triggers in idr_for_each_entry() loop which deletes during iteration as deletions can make layers go away leaving the iteration with unaligned ID into missing layers. Fix it by ensuring proceeding to the next slot doesn't carry over the unaligned offset - ie. use round_up(id + 1, slot_distance) instead of id += slot_distance. Signed-off-by: Tejun Heo Reported-by: David Teigland Cc: KAMEZAWA Hiroyuki Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 648239079dd2..ca5aa000d6c3 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -625,7 +625,14 @@ void *idr_get_next(struct idr *idp, int *nextidp) return p; } - id += 1 << n; + /* + * Proceed to the next layer at the current level. Unlike + * idr_for_each(), @id isn't guaranteed to be aligned to + * layer boundary at this point and adding 1 << n may + * incorrectly skip IDs. Make sure we jump to the + * beginning of the next layer using round_up(). + */ + id = round_up(id + 1, 1 << n); while (n < fls(id)) { n += IDR_BITS; p = *--paa; -- cgit v1.2.3 From 9bb26bc1ffa32ec983860a5a66b6f291a875e39d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:35 -0800 Subject: idr: make idr_destroy() imply idr_remove_all() idr is silly in quite a few ways, one of which is how it's supposed to be destroyed - idr_destroy() doesn't release IDs and doesn't even whine if the idr isn't empty. If the caller forgets idr_remove_all(), it simply leaks memory. Even ida gets this wrong and leaks memory on destruction. There is absoltely no reason not to call idr_remove_all() from idr_destroy(). Nobody is abusing idr_destroy() for shrinking free layer buffer and continues to use idr after idr_destroy(), so it's safe to do remove_all from destroy. In the whole kernel, there is only one place where idr_remove_all() is legitimiately used without following idr_destroy() while there are quite a few places where the caller forgets either idr_remove_all() or idr_destroy() leaking memory. This patch makes idr_destroy() call idr_destroy_all() and updates the function description accordingly. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index ca5aa000d6c3..b8602e0b30da 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -436,15 +436,6 @@ EXPORT_SYMBOL(idr_remove); /** * idr_remove_all - remove all ids from the given idr tree * @idp: idr handle - * - * idr_destroy() only frees up unused, cached idp_layers, but this - * function will remove all id mappings and leave all idp_layers - * unused. - * - * A typical clean-up sequence for objects stored in an idr tree will - * use idr_for_each() to free all objects, if necessay, then - * idr_remove_all() to remove all ids, and idr_destroy() to free - * up the cached idr_layers. */ void idr_remove_all(struct idr *idp) { @@ -484,9 +475,20 @@ EXPORT_SYMBOL(idr_remove_all); /** * idr_destroy - release all cached layers within an idr tree * @idp: idr handle + * + * Free all id mappings and all idp_layers. After this function, @idp is + * completely unused and can be freed / recycled. The caller is + * responsible for ensuring that no one else accesses @idp during or after + * idr_destroy(). + * + * A typical clean-up sequence for objects stored in an idr tree will use + * idr_for_each() to free all objects, if necessay, then idr_destroy() to + * free up the id mappings and cached idr_layers. */ void idr_destroy(struct idr *idp) { + idr_remove_all(idp); + while (idp->id_free_cnt) { struct idr_layer *p = get_from_free_list(idp); kmem_cache_free(idr_layer_cache, p); -- cgit v1.2.3 From fe6e24ec90b753392c3f9ec1fbca196c4e88e511 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:50 -0800 Subject: idr: deprecate idr_remove_all() There was only one legitimate use of idr_remove_all() and a lot more of incorrect uses (or lack of it). Now that idr_destroy() implies idr_remove_all() and all the in-kernel users updated not to use it, there's no reason to keep it around. Mark it deprecated so that we can later unexport it. idr_remove_all() is made an inline function calling __idr_remove_all() to avoid triggering deprecated warning on EXPORT_SYMBOL(). Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index b8602e0b30da..814c53ce0d41 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -433,11 +433,7 @@ void idr_remove(struct idr *idp, int id) } EXPORT_SYMBOL(idr_remove); -/** - * idr_remove_all - remove all ids from the given idr tree - * @idp: idr handle - */ -void idr_remove_all(struct idr *idp) +void __idr_remove_all(struct idr *idp) { int n, id, max; int bt_mask; @@ -470,7 +466,7 @@ void idr_remove_all(struct idr *idp) } idp->layers = 0; } -EXPORT_SYMBOL(idr_remove_all); +EXPORT_SYMBOL(__idr_remove_all); /** * idr_destroy - release all cached layers within an idr tree @@ -487,7 +483,7 @@ EXPORT_SYMBOL(idr_remove_all); */ void idr_destroy(struct idr *idp) { - idr_remove_all(idp); + __idr_remove_all(idp); while (idp->id_free_cnt) { struct idr_layer *p = get_from_free_list(idp); -- cgit v1.2.3 From 49038ef4fbe2842bd4d8338f89ec5c9ba71b0ae1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:52 -0800 Subject: idr: relocate idr_for_each_entry() and reorganize id[r|a]_get_new() * Move idr_for_each_entry() definition next to other idr related definitions. * Make id[r|a]_get_new() inline wrappers of id[r|a]_get_new_above(). This changes the implementation of idr_get_new() but the new implementation is trivial. This patch doesn't introduce any functional change. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 49 ------------------------------------------------- 1 file changed, 49 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 814c53ce0d41..282841b5a561 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -317,36 +317,6 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) } EXPORT_SYMBOL(idr_get_new_above); -/** - * idr_get_new - allocate new idr entry - * @idp: idr handle - * @ptr: pointer you want associated with the id - * @id: pointer to the allocated handle - * - * If allocation from IDR's private freelist fails, idr_get_new_above() will - * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill - * IDR's preallocation and then retry the idr_get_new_above() call. - * - * If the idr is full idr_get_new_above() will return %-ENOSPC. - * - * @id returns a value in the range %0 ... %0x7fffffff - */ -int idr_get_new(struct idr *idp, void *ptr, int *id) -{ - int rv; - - rv = idr_get_new_above_int(idp, ptr, 0); - /* - * This is a cheap hack until the IDR code can be fixed to - * return proper error values. - */ - if (rv < 0) - return _idr_rc_to_errno(rv); - *id = rv; - return 0; -} -EXPORT_SYMBOL(idr_get_new); - static void idr_remove_warning(int id) { printk(KERN_WARNING @@ -856,25 +826,6 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) } EXPORT_SYMBOL(ida_get_new_above); -/** - * ida_get_new - allocate new ID - * @ida: idr handle - * @p_id: pointer to the allocated handle - * - * Allocate new ID. It should be called with any required locks. - * - * If memory is required, it will return %-EAGAIN, you should unlock - * and go back to the idr_pre_get() call. If the idr is full, it will - * return %-ENOSPC. - * - * @p_id returns a value in the range %0 ... %0x7fffffff. - */ -int ida_get_new(struct ida *ida, int *p_id) -{ - return ida_get_new_above(ida, 0, p_id); -} -EXPORT_SYMBOL(ida_get_new); - /** * ida_remove - remove the given ID * @ida: ida handle -- cgit v1.2.3 From 12d1b4393e0d8df36b2646a5e512f0513fb532d2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:53 -0800 Subject: idr: remove _idr_rc_to_errno() hack idr uses -1, IDR_NEED_TO_GROW and IDR_NOMORE_SPACE to communicate exception conditions internally. The return value is later translated to errno values using _idr_rc_to_errno(). This is confusing. Drop the custom ones and consistently use -EAGAIN for "tree needs to grow", -ENOMEM for "need more memory" and -ENOSPC for "ran out of ID space". Due to the weird memory preloading mechanism, [ra]_get_new*() return -EAGAIN on memory shortage, so we need to substitute -ENOMEM w/ -EAGAIN on those interface functions. They'll eventually be cleaned up and the translations will go away. This patch doesn't introduce any functional changes. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 282841b5a561..bde6eecb0e87 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -133,6 +133,21 @@ int idr_pre_get(struct idr *idp, gfp_t gfp_mask) } EXPORT_SYMBOL(idr_pre_get); +/** + * sub_alloc - try to allocate an id without growing the tree depth + * @idp: idr handle + * @starting_id: id to start search at + * @id: pointer to the allocated handle + * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer + * + * Allocate an id in range [@starting_id, INT_MAX] from @idp without + * growing its depth. Returns + * + * the allocated id >= 0 if successful, + * -EAGAIN if the tree needs to grow for allocation to succeed, + * -ENOSPC if the id space is exhausted, + * -ENOMEM if more idr_layers need to be allocated. + */ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) { int n, m, sh; @@ -161,7 +176,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) /* if already at the top layer, we need to grow */ if (id >= 1 << (idp->layers * IDR_BITS)) { *starting_id = id; - return IDR_NEED_TO_GROW; + return -EAGAIN; } p = pa[l]; BUG_ON(!p); @@ -180,7 +195,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) id = ((id >> sh) ^ n ^ m) << sh; } if ((id >= MAX_IDR_BIT) || (id < 0)) - return IDR_NOMORE_SPACE; + return -ENOSPC; if (l == 0) break; /* @@ -189,7 +204,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) if (!p->ary[m]) { new = get_from_free_list(idp); if (!new) - return -1; + return -ENOMEM; new->layer = l-1; rcu_assign_pointer(p->ary[m], new); p->count++; @@ -215,7 +230,7 @@ build_up: layers = idp->layers; if (unlikely(!p)) { if (!(p = get_from_free_list(idp))) - return -1; + return -ENOMEM; p->layer = 0; layers = 1; } @@ -246,7 +261,7 @@ build_up: __move_to_free_list(idp, new); } spin_unlock_irqrestore(&idp->lock, flags); - return -1; + return -ENOMEM; } new->ary[0] = p; new->count = 1; @@ -258,7 +273,7 @@ build_up: rcu_assign_pointer(idp->top, p); idp->layers = layers; v = sub_alloc(idp, &id, pa); - if (v == IDR_NEED_TO_GROW) + if (v == -EAGAIN) goto build_up; return(v); } @@ -306,12 +321,8 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) int rv; rv = idr_get_new_above_int(idp, ptr, starting_id); - /* - * This is a cheap hack until the IDR code can be fixed to - * return proper error values. - */ if (rv < 0) - return _idr_rc_to_errno(rv); + return rv == -ENOMEM ? -EAGAIN : rv; *id = rv; return 0; } @@ -766,7 +777,7 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) /* get vacant slot */ t = idr_get_empty_slot(&ida->idr, idr_id, pa); if (t < 0) - return _idr_rc_to_errno(t); + return t == -ENOMEM ? -EAGAIN : t; if (t * IDA_BITMAP_BITS >= MAX_IDR_BIT) return -ENOSPC; -- cgit v1.2.3 From 3594eb2894f571c9b9a497159b1e4d84fdac5688 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:54 -0800 Subject: idr: refactor idr_get_new_above() Move slot filling to idr_fill_slot() from idr_get_new_above_int() and make idr_get_new_above() directly call it. idr_get_new_above_int() is no longer needed and removed. This will be used to implement a new ID allocation interface. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index bde6eecb0e87..b13aae5bdc81 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -278,24 +278,15 @@ build_up: return(v); } -static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) +/* + * @id and @pa are from a successful allocation from idr_get_empty_slot(). + * Install the user pointer @ptr and mark the slot full. + */ +static void idr_fill_slot(void *ptr, int id, struct idr_layer **pa) { - struct idr_layer *pa[MAX_IDR_LEVEL]; - int id; - - id = idr_get_empty_slot(idp, starting_id, pa); - if (id >= 0) { - /* - * Successfully found an empty slot. Install the user - * pointer and mark the slot full. - */ - rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], - (struct idr_layer *)ptr); - pa[0]->count++; - idr_mark_full(pa, id); - } - - return id; + rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], (struct idr_layer *)ptr); + pa[0]->count++; + idr_mark_full(pa, id); } /** @@ -318,11 +309,14 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) */ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) { + struct idr_layer *pa[MAX_IDR_LEVEL]; int rv; - rv = idr_get_new_above_int(idp, ptr, starting_id); + rv = idr_get_empty_slot(idp, starting_id, pa); if (rv < 0) return rv == -ENOMEM ? -EAGAIN : rv; + + idr_fill_slot(ptr, rv, pa); *id = rv; return 0; } -- cgit v1.2.3 From d5c7409f79e14db49d00785692334657592c07ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:03:55 -0800 Subject: idr: implement idr_preload[_end]() and idr_alloc() The current idr interface is very cumbersome. * For all allocations, two function calls - idr_pre_get() and idr_get_new*() - should be made. * idr_pre_get() doesn't guarantee that the following idr_get_new*() will not fail from memory shortage. If idr_get_new*() returns -EAGAIN, the caller is expected to retry pre_get and allocation. * idr_get_new*() can't enforce upper limit. Upper limit can only be enforced by allocating and then freeing if above limit. * idr_layer buffer is unnecessarily per-idr. Each idr ends up keeping around MAX_IDR_FREE idr_layers. The memory consumed per idr is under two pages but it makes it difficult to make idr_layer larger. This patch implements the following new set of allocation functions. * idr_preload[_end]() - Similar to radix preload but doesn't fail. The first idr_alloc() inside preload section can be treated as if it were called with @gfp_mask used for idr_preload(). * idr_alloc() - Allocate an ID w/ lower and upper limits. Takes @gfp_flags and can be used w/o preloading. When used inside preloaded section, the allocation mask of preloading can be assumed. If idr_alloc() can be called from a context which allows sufficiently relaxed @gfp_mask, it can be used by itself. If, for example, idr_alloc() is called inside spinlock protected region, preloading can be used like the following. idr_preload(GFP_KERNEL); spin_lock(lock); id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT); spin_unlock(lock); idr_preload_end(); if (id < 0) error; which is much simpler and less error-prone than idr_pre_get and idr_get_new*() loop. The new interface uses per-pcu idr_layer buffer and thus the number of idr's in the system doesn't affect the amount of memory used for preloading. idr_layer_alloc() is introduced to handle idr_layer allocations for both old and new ID allocation paths. This is a bit hairy now but the new interface is expected to replace the old and the internal implementation eventually will become simpler. Signed-off-by: Tejun Heo Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 166 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index b13aae5bdc81..2d016f5c410e 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -35,8 +35,12 @@ #include #include #include +#include +#include static struct kmem_cache *idr_layer_cache; +static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head); +static DEFINE_PER_CPU(int, idr_preload_cnt); static DEFINE_SPINLOCK(simple_ida_lock); static struct idr_layer *get_from_free_list(struct idr *idp) @@ -54,6 +58,50 @@ static struct idr_layer *get_from_free_list(struct idr *idp) return(p); } +/** + * idr_layer_alloc - allocate a new idr_layer + * @gfp_mask: allocation mask + * @layer_idr: optional idr to allocate from + * + * If @layer_idr is %NULL, directly allocate one using @gfp_mask or fetch + * one from the per-cpu preload buffer. If @layer_idr is not %NULL, fetch + * an idr_layer from @idr->id_free. + * + * @layer_idr is to maintain backward compatibility with the old alloc + * interface - idr_pre_get() and idr_get_new*() - and will be removed + * together with per-pool preload buffer. + */ +static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr) +{ + struct idr_layer *new; + + /* this is the old path, bypass to get_from_free_list() */ + if (layer_idr) + return get_from_free_list(layer_idr); + + /* try to allocate directly from kmem_cache */ + new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); + if (new) + return new; + + /* + * Try to fetch one from the per-cpu preload buffer if in process + * context. See idr_preload() for details. + */ + if (in_interrupt()) + return NULL; + + preempt_disable(); + new = __this_cpu_read(idr_preload_head); + if (new) { + __this_cpu_write(idr_preload_head, new->ary[0]); + __this_cpu_dec(idr_preload_cnt); + new->ary[0] = NULL; + } + preempt_enable(); + return new; +} + static void idr_layer_rcu_free(struct rcu_head *head) { struct idr_layer *layer; @@ -139,6 +187,8 @@ EXPORT_SYMBOL(idr_pre_get); * @starting_id: id to start search at * @id: pointer to the allocated handle * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer + * @gfp_mask: allocation mask for idr_layer_alloc() + * @layer_idr: optional idr passed to idr_layer_alloc() * * Allocate an id in range [@starting_id, INT_MAX] from @idp without * growing its depth. Returns @@ -148,7 +198,8 @@ EXPORT_SYMBOL(idr_pre_get); * -ENOSPC if the id space is exhausted, * -ENOMEM if more idr_layers need to be allocated. */ -static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) +static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa, + gfp_t gfp_mask, struct idr *layer_idr) { int n, m, sh; struct idr_layer *p, *new; @@ -202,7 +253,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) * Create the layer below if it is missing. */ if (!p->ary[m]) { - new = get_from_free_list(idp); + new = idr_layer_alloc(gfp_mask, layer_idr); if (!new) return -ENOMEM; new->layer = l-1; @@ -218,7 +269,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) } static int idr_get_empty_slot(struct idr *idp, int starting_id, - struct idr_layer **pa) + struct idr_layer **pa, gfp_t gfp_mask, + struct idr *layer_idr) { struct idr_layer *p, *new; int layers, v, id; @@ -229,7 +281,7 @@ build_up: p = idp->top; layers = idp->layers; if (unlikely(!p)) { - if (!(p = get_from_free_list(idp))) + if (!(p = idr_layer_alloc(gfp_mask, layer_idr))) return -ENOMEM; p->layer = 0; layers = 1; @@ -248,7 +300,7 @@ build_up: p->layer++; continue; } - if (!(new = get_from_free_list(idp))) { + if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) { /* * The allocation failed. If we built part of * the structure tear it down. @@ -272,7 +324,7 @@ build_up: } rcu_assign_pointer(idp->top, p); idp->layers = layers; - v = sub_alloc(idp, &id, pa); + v = sub_alloc(idp, &id, pa, gfp_mask, layer_idr); if (v == -EAGAIN) goto build_up; return(v); @@ -312,7 +364,7 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) struct idr_layer *pa[MAX_IDR_LEVEL]; int rv; - rv = idr_get_empty_slot(idp, starting_id, pa); + rv = idr_get_empty_slot(idp, starting_id, pa, 0, idp); if (rv < 0) return rv == -ENOMEM ? -EAGAIN : rv; @@ -322,6 +374,112 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) } EXPORT_SYMBOL(idr_get_new_above); +/** + * idr_preload - preload for idr_alloc() + * @gfp_mask: allocation mask to use for preloading + * + * Preload per-cpu layer buffer for idr_alloc(). Can only be used from + * process context and each idr_preload() invocation should be matched with + * idr_preload_end(). Note that preemption is disabled while preloaded. + * + * The first idr_alloc() in the preloaded section can be treated as if it + * were invoked with @gfp_mask used for preloading. This allows using more + * permissive allocation masks for idrs protected by spinlocks. + * + * For example, if idr_alloc() below fails, the failure can be treated as + * if idr_alloc() were called with GFP_KERNEL rather than GFP_NOWAIT. + * + * idr_preload(GFP_KERNEL); + * spin_lock(lock); + * + * id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT); + * + * spin_unlock(lock); + * idr_preload_end(); + * if (id < 0) + * error; + */ +void idr_preload(gfp_t gfp_mask) +{ + /* + * Consuming preload buffer from non-process context breaks preload + * allocation guarantee. Disallow usage from those contexts. + */ + WARN_ON_ONCE(in_interrupt()); + might_sleep_if(gfp_mask & __GFP_WAIT); + + preempt_disable(); + + /* + * idr_alloc() is likely to succeed w/o full idr_layer buffer and + * return value from idr_alloc() needs to be checked for failure + * anyway. Silently give up if allocation fails. The caller can + * treat failures from idr_alloc() as if idr_alloc() were called + * with @gfp_mask which should be enough. + */ + while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) { + struct idr_layer *new; + + preempt_enable(); + new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); + preempt_disable(); + if (!new) + break; + + /* link the new one to per-cpu preload list */ + new->ary[0] = __this_cpu_read(idr_preload_head); + __this_cpu_write(idr_preload_head, new); + __this_cpu_inc(idr_preload_cnt); + } +} +EXPORT_SYMBOL(idr_preload); + +/** + * idr_alloc - allocate new idr entry + * @idr: the (initialized) idr + * @ptr: pointer to be associated with the new id + * @start: the minimum id (inclusive) + * @end: the maximum id (exclusive, <= 0 for max) + * @gfp_mask: memory allocation flags + * + * Allocate an id in [start, end) and associate it with @ptr. If no ID is + * available in the specified range, returns -ENOSPC. On memory allocation + * failure, returns -ENOMEM. + * + * Note that @end is treated as max when <= 0. This is to always allow + * using @start + N as @end as long as N is inside integer range. + * + * The user is responsible for exclusively synchronizing all operations + * which may modify @idr. However, read-only accesses such as idr_find() + * or iteration can be performed under RCU read lock provided the user + * destroys @ptr in RCU-safe way after removal from idr. + */ +int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask) +{ + int max = end > 0 ? end - 1 : INT_MAX; /* inclusive upper limit */ + struct idr_layer *pa[MAX_IDR_LEVEL]; + int id; + + might_sleep_if(gfp_mask & __GFP_WAIT); + + /* sanity checks */ + if (WARN_ON_ONCE(start < 0)) + return -EINVAL; + if (unlikely(max < start)) + return -ENOSPC; + + /* allocate id */ + id = idr_get_empty_slot(idr, start, pa, gfp_mask, NULL); + if (unlikely(id < 0)) + return id; + if (unlikely(id > max)) + return -ENOSPC; + + idr_fill_slot(ptr, id, pa); + return id; +} +EXPORT_SYMBOL_GPL(idr_alloc); + static void idr_remove_warning(int id) { printk(KERN_WARNING @@ -769,7 +927,7 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) restart: /* get vacant slot */ - t = idr_get_empty_slot(&ida->idr, idr_id, pa); + t = idr_get_empty_slot(&ida->idr, idr_id, pa, 0, &ida->idr); if (t < 0) return t == -ENOMEM ? -EAGAIN : t; -- cgit v1.2.3 From 326cf0f0f308933c10236280a322031f0097205d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:05:02 -0800 Subject: idr: fix top layer handling Most functions in idr fail to deal with the high bits when the idr tree grows to the maximum height. * idr_get_empty_slot() stops growing idr tree once the depth reaches MAX_IDR_LEVEL - 1, which is one depth shallower than necessary to cover the whole range. The function doesn't even notice that it didn't grow the tree enough and ends up allocating the wrong ID given sufficiently high @starting_id. For example, on 64 bit, if the starting id is 0x7fffff01, idr_get_empty_slot() will grow the tree 5 layer deep, which only covers the 30 bits and then proceed to allocate as if the bit 30 wasn't specified. It ends up allocating 0x3fffff01 without the bit 30 but still returns 0x7fffff01. * __idr_remove_all() will not remove anything if the tree is fully grown. * idr_find() can't find anything if the tree is fully grown. * idr_for_each() and idr_get_next() can't iterate anything if the tree is fully grown. Fix it by introducing idr_max() which returns the maximum possible ID given the depth of tree and replacing the id limit checks in all affected places. As the idr_layer pointer array pa[] needs to be 1 larger than the maximum depth, enlarge pa[] arrays by one. While this plugs the discovered issues, the whole code base is horrible and in desparate need of rewrite. It's fragile like hell, Signed-off-by: Tejun Heo Cc: Rusty Russell Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 2d016f5c410e..63dda62131b3 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -43,6 +43,14 @@ static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head); static DEFINE_PER_CPU(int, idr_preload_cnt); static DEFINE_SPINLOCK(simple_ida_lock); +/* the maximum ID which can be allocated given idr->layers */ +static int idr_max(int layers) +{ + int bits = min_t(int, layers * IDR_BITS, MAX_IDR_SHIFT); + + return (1 << bits) - 1; +} + static struct idr_layer *get_from_free_list(struct idr *idp) { struct idr_layer *p; @@ -290,7 +298,7 @@ build_up: * Add a new layer to the top of the tree if the requested * id is larger than the currently allocated space. */ - while ((layers < (MAX_IDR_LEVEL - 1)) && (id >= (1 << (layers*IDR_BITS)))) { + while (id > idr_max(layers)) { layers++; if (!p->count) { /* special case: if the tree is currently empty, @@ -361,7 +369,7 @@ static void idr_fill_slot(void *ptr, int id, struct idr_layer **pa) */ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) { - struct idr_layer *pa[MAX_IDR_LEVEL]; + struct idr_layer *pa[MAX_IDR_LEVEL + 1]; int rv; rv = idr_get_empty_slot(idp, starting_id, pa, 0, idp); @@ -457,7 +465,7 @@ EXPORT_SYMBOL(idr_preload); int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask) { int max = end > 0 ? end - 1 : INT_MAX; /* inclusive upper limit */ - struct idr_layer *pa[MAX_IDR_LEVEL]; + struct idr_layer *pa[MAX_IDR_LEVEL + 1]; int id; might_sleep_if(gfp_mask & __GFP_WAIT); @@ -490,7 +498,7 @@ static void idr_remove_warning(int id) static void sub_remove(struct idr *idp, int shift, int id) { struct idr_layer *p = idp->top; - struct idr_layer **pa[MAX_IDR_LEVEL]; + struct idr_layer **pa[MAX_IDR_LEVEL + 1]; struct idr_layer ***paa = &pa[0]; struct idr_layer *to_free; int n; @@ -571,16 +579,16 @@ void __idr_remove_all(struct idr *idp) int n, id, max; int bt_mask; struct idr_layer *p; - struct idr_layer *pa[MAX_IDR_LEVEL]; + struct idr_layer *pa[MAX_IDR_LEVEL + 1]; struct idr_layer **paa = &pa[0]; n = idp->layers * IDR_BITS; p = idp->top; rcu_assign_pointer(idp->top, NULL); - max = 1 << n; + max = idr_max(idp->layers); id = 0; - while (id < max) { + while (id >= 0 && id <= max) { while (n > IDR_BITS && p) { n -= IDR_BITS; *paa++ = p; @@ -650,7 +658,7 @@ void *idr_find(struct idr *idp, int id) /* Mask off upper bits we don't use for the search. */ id &= MAX_IDR_MASK; - if (id >= (1 << n)) + if (id > idr_max(p->layer + 1)) return NULL; BUG_ON(n == 0); @@ -686,15 +694,15 @@ int idr_for_each(struct idr *idp, { int n, id, max, error = 0; struct idr_layer *p; - struct idr_layer *pa[MAX_IDR_LEVEL]; + struct idr_layer *pa[MAX_IDR_LEVEL + 1]; struct idr_layer **paa = &pa[0]; n = idp->layers * IDR_BITS; p = rcu_dereference_raw(idp->top); - max = 1 << n; + max = idr_max(idp->layers); id = 0; - while (id < max) { + while (id >= 0 && id <= max) { while (n > 0 && p) { n -= IDR_BITS; *paa++ = p; @@ -732,7 +740,7 @@ EXPORT_SYMBOL(idr_for_each); */ void *idr_get_next(struct idr *idp, int *nextidp) { - struct idr_layer *p, *pa[MAX_IDR_LEVEL]; + struct idr_layer *p, *pa[MAX_IDR_LEVEL + 1]; struct idr_layer **paa = &pa[0]; int id = *nextidp; int n, max; @@ -742,9 +750,9 @@ void *idr_get_next(struct idr *idp, int *nextidp) if (!p) return NULL; n = (p->layer + 1) * IDR_BITS; - max = 1 << n; + max = idr_max(p->layer + 1); - while (id < max) { + while (id >= 0 && id <= max) { while (n > 0 && p) { n -= IDR_BITS; *paa++ = p; @@ -918,7 +926,7 @@ EXPORT_SYMBOL(ida_pre_get); */ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) { - struct idr_layer *pa[MAX_IDR_LEVEL]; + struct idr_layer *pa[MAX_IDR_LEVEL + 1]; struct ida_bitmap *bitmap; unsigned long flags; int idr_id = starting_id / IDA_BITMAP_BITS; -- cgit v1.2.3 From e8c8d1bc063bc88cfa1356266027b5075d3a82d7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:05:04 -0800 Subject: idr: remove MAX_IDR_MASK and move left MAX_IDR_* into idr.c MAX_IDR_MASK is another weirdness in the idr interface. As idr covers whole positive integer range, it's defined as 0x7fffffff or INT_MAX. Its usage in idr_find(), idr_replace() and idr_remove() is bizarre. They basically mask off the sign bit and operate on the rest, so if the caller, by accident, passes in a negative number, the sign bit will be masked off and the remaining part will be used as if that was the input, which is worse than crashing. The constant is visible in idr.h and there are several users in the kernel. * drivers/i2c/i2c-core.c:i2c_add_numbered_adapter() Basically used to test if adap->nr is a negative number which isn't -1 and returns -EINVAL if so. idr_alloc() already has negative @start checking (w/ WARN_ON_ONCE), so this can go away. * drivers/infiniband/core/cm.c:cm_alloc_id() drivers/infiniband/hw/mlx4/cm.c:id_map_alloc() Used to wrap cyclic @start. Can be replaced with max(next, 0). Note that this type of cyclic allocation using idr is buggy. These are prone to spurious -ENOSPC failure after the first wraparound. * fs/super.c:get_anon_bdev() The ID allocated from ida is masked off before being tested whether it's inside valid range. ida allocated ID can never be a negative number and the masking is unnecessary. Update idr_*() functions to fail with -EINVAL when negative @id is specified and update other MAX_IDR_MASK users as described above. This leaves MAX_IDR_MASK without any user, remove it and relocate other MAX_IDR_* constants to lib/idr.c. Signed-off-by: Tejun Heo Cc: Jean Delvare Cc: Roland Dreier Cc: Sean Hefty Cc: Hal Rosenstock Cc: "Marciniszyn, Mike" Cc: Jack Morgenstein Cc: Or Gerlitz Cc: Al Viro Acked-by: Wolfram Sang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 63dda62131b3..e2b799989ab0 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -38,6 +38,15 @@ #include #include +#define MAX_IDR_SHIFT (sizeof(int) * 8 - 1) +#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) + +/* Leave the possibility of an incomplete final layer */ +#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS) + +/* Number of id_layer structs to leave in free list */ +#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2) + static struct kmem_cache *idr_layer_cache; static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head); static DEFINE_PER_CPU(int, idr_preload_cnt); @@ -542,8 +551,8 @@ void idr_remove(struct idr *idp, int id) struct idr_layer *p; struct idr_layer *to_free; - /* Mask off upper bits we don't use for the search. */ - id &= MAX_IDR_MASK; + if (WARN_ON_ONCE(id < 0)) + return; sub_remove(idp, (idp->layers - 1) * IDR_BITS, id); if (idp->top && idp->top->count == 1 && (idp->layers > 1) && @@ -650,14 +659,14 @@ void *idr_find(struct idr *idp, int id) int n; struct idr_layer *p; + if (WARN_ON_ONCE(id < 0)) + return NULL; + p = rcu_dereference_raw(idp->top); if (!p) return NULL; n = (p->layer+1) * IDR_BITS; - /* Mask off upper bits we don't use for the search. */ - id &= MAX_IDR_MASK; - if (id > idr_max(p->layer + 1)) return NULL; BUG_ON(n == 0); @@ -799,14 +808,15 @@ void *idr_replace(struct idr *idp, void *ptr, int id) int n; struct idr_layer *p, *old_p; + if (WARN_ON_ONCE(id < 0)) + return ERR_PTR(-EINVAL); + p = idp->top; if (!p) return ERR_PTR(-EINVAL); n = (p->layer+1) * IDR_BITS; - id &= MAX_IDR_MASK; - if (id >= (1 << n)) return ERR_PTR(-EINVAL); -- cgit v1.2.3 From 1d9b2e1e663719d406e3a770979a19ba4233bba0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:05:05 -0800 Subject: idr: remove length restriction from idr_layer->bitmap Currently, idr->bitmap is declared as an unsigned long which restricts the number of bits an idr_layer can contain. All bitops can handle arbitrary positive integer bit number and there's no reason for this restriction. Declare idr_layer->bitmap using DECLARE_BITMAP() instead of a single unsigned long. * idr_layer->bitmap is now an array. '&' dropped from params to bitops. * Replaced "== IDR_FULL" tests with bitmap_full() and removed IDR_FULL. * Replaced find_next_bit() on ~bitmap with find_next_zero_bit(). * Replaced "bitmap = 0" with bitmap_clear(). This patch doesn't (or at least shouldn't) introduce any behavior changes. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index e2b799989ab0..d66e75bfc1a0 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -157,18 +157,18 @@ static void idr_mark_full(struct idr_layer **pa, int id) struct idr_layer *p = pa[0]; int l = 0; - __set_bit(id & IDR_MASK, &p->bitmap); + __set_bit(id & IDR_MASK, p->bitmap); /* * If this layer is full mark the bit in the layer above to * show that this part of the radix tree is full. This may * complete the layer above and require walking up the radix * tree. */ - while (p->bitmap == IDR_FULL) { + while (bitmap_full(p->bitmap, IDR_SIZE)) { if (!(p = pa[++l])) break; id = id >> IDR_BITS; - __set_bit((id & IDR_MASK), &p->bitmap); + __set_bit((id & IDR_MASK), p->bitmap); } } @@ -221,7 +221,6 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa, int n, m, sh; struct idr_layer *p, *new; int l, id, oid; - unsigned long bm; id = *starting_id; restart: @@ -233,8 +232,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa, * We run around this while until we reach the leaf node... */ n = (id >> (IDR_BITS*l)) & IDR_MASK; - bm = ~p->bitmap; - m = find_next_bit(&bm, IDR_SIZE, n); + m = find_next_zero_bit(p->bitmap, IDR_SIZE, n); if (m == IDR_SIZE) { /* no space available go back to previous layer. */ l++; @@ -326,7 +324,8 @@ build_up: for (new = p; p && p != idp->top; new = p) { p = p->ary[0]; new->ary[0] = NULL; - new->bitmap = new->count = 0; + new->count = 0; + bitmap_clear(new->bitmap, 0, IDR_SIZE); __move_to_free_list(idp, new); } spin_unlock_irqrestore(&idp->lock, flags); @@ -335,8 +334,8 @@ build_up: new->ary[0] = p; new->count = 1; new->layer = layers-1; - if (p->bitmap == IDR_FULL) - __set_bit(0, &new->bitmap); + if (bitmap_full(p->bitmap, IDR_SIZE)) + __set_bit(0, new->bitmap); p = new; } rcu_assign_pointer(idp->top, p); @@ -517,14 +516,14 @@ static void sub_remove(struct idr *idp, int shift, int id) while ((shift > 0) && p) { n = (id >> shift) & IDR_MASK; - __clear_bit(n, &p->bitmap); + __clear_bit(n, p->bitmap); *++paa = &p->ary[n]; p = p->ary[n]; shift -= IDR_BITS; } n = id & IDR_MASK; - if (likely(p != NULL && test_bit(n, &p->bitmap))){ - __clear_bit(n, &p->bitmap); + if (likely(p != NULL && test_bit(n, p->bitmap))) { + __clear_bit(n, p->bitmap); rcu_assign_pointer(p->ary[n], NULL); to_free = NULL; while(*paa && ! --((**paa)->count)){ @@ -567,7 +566,8 @@ void idr_remove(struct idr *idp, int id) p = idp->top->ary[0]; rcu_assign_pointer(idp->top, p); --idp->layers; - to_free->bitmap = to_free->count = 0; + to_free->count = 0; + bitmap_clear(to_free->bitmap, 0, IDR_SIZE); free_layer(to_free); } while (idp->id_free_cnt >= MAX_IDR_FREE) { @@ -827,7 +827,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id) } n = id & IDR_MASK; - if (unlikely(p == NULL || !test_bit(n, &p->bitmap))) + if (unlikely(p == NULL || !test_bit(n, p->bitmap))) return ERR_PTR(-ENOENT); old_p = p->ary[n]; @@ -1024,7 +1024,7 @@ void ida_remove(struct ida *ida, int id) /* clear full bits while looking up the leaf idr_layer */ while ((shift > 0) && p) { n = (idr_id >> shift) & IDR_MASK; - __clear_bit(n, &p->bitmap); + __clear_bit(n, p->bitmap); p = p->ary[n]; shift -= IDR_BITS; } @@ -1033,7 +1033,7 @@ void ida_remove(struct ida *ida, int id) goto err; n = idr_id & IDR_MASK; - __clear_bit(n, &p->bitmap); + __clear_bit(n, p->bitmap); bitmap = (void *)p->ary[n]; if (!test_bit(offset, bitmap->bitmap)) @@ -1042,7 +1042,7 @@ void ida_remove(struct ida *ida, int id) /* update bitmap and remove it if empty */ __clear_bit(offset, bitmap->bitmap); if (--bitmap->nr_busy == 0) { - __set_bit(n, &p->bitmap); /* to please idr_remove() */ + __set_bit(n, p->bitmap); /* to please idr_remove() */ idr_remove(&ida->idr, idr_id); free_bitmap(ida, bitmap); } -- cgit v1.2.3 From 54616283c2948812a44240858ced610e7cacbde1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:05:07 -0800 Subject: idr: add idr_layer->prefix Add a field which carries the prefix of ID the idr_layer covers. This will be used to implement lookup hint. This patch doesn't make use of the new field and doesn't introduce any behavior difference. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index d66e75bfc1a0..5cd602936645 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -60,6 +60,16 @@ static int idr_max(int layers) return (1 << bits) - 1; } +/* + * Prefix mask for an idr_layer at @layer. For layer 0, the prefix mask is + * all bits except for the lower IDR_BITS. For layer 1, 2 * IDR_BITS, and + * so on. + */ +static int idr_layer_prefix_mask(int layer) +{ + return ~idr_max(layer + 1); +} + static struct idr_layer *get_from_free_list(struct idr *idp) { struct idr_layer *p; @@ -272,6 +282,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa, if (!new) return -ENOMEM; new->layer = l-1; + new->prefix = id & idr_layer_prefix_mask(new->layer); rcu_assign_pointer(p->ary[m], new); p->count++; } @@ -313,6 +324,7 @@ build_up: * upwards. */ p->layer++; + WARN_ON_ONCE(p->prefix); continue; } if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) { @@ -334,6 +346,7 @@ build_up: new->ary[0] = p; new->count = 1; new->layer = layers-1; + new->prefix = id & idr_layer_prefix_mask(new->layer); if (bitmap_full(p->bitmap, IDR_SIZE)) __set_bit(0, new->bitmap); p = new; -- cgit v1.2.3 From 0ffc2a9c8072969253a20821c2c733a2cbb4c7c7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:05:08 -0800 Subject: idr: implement lookup hint While idr lookup isn't a particularly heavy operation, it still is too substantial to use in hot paths without worrying about the performance implications. With recent changes, each idr_layer covers 256 slots which should be enough to cover most use cases with single idr_layer making lookup hint very attractive. This patch adds idr->hint which points to the idr_layer which allocated an ID most recently and the fast path lookup becomes if (look up target's prefix matches that of the hinted layer) return hint->ary[ID's offset in the leaf layer]; which can be inlined. idr->hint is set to the leaf node on idr_fill_slot() and cleared from free_layer(). [andriy.shevchenko@linux.intel.com: always do slow path when hint is uninitialized] Signed-off-by: Tejun Heo Cc: Kirill A. Shutemov Cc: Sasha Levin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 5cd602936645..1a30272066c6 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -137,8 +137,10 @@ static void idr_layer_rcu_free(struct rcu_head *head) kmem_cache_free(idr_layer_cache, layer); } -static inline void free_layer(struct idr_layer *p) +static inline void free_layer(struct idr *idr, struct idr_layer *p) { + if (idr->hint && idr->hint == p) + RCU_INIT_POINTER(idr->hint, NULL); call_rcu(&p->rcu_head, idr_layer_rcu_free); } @@ -363,8 +365,12 @@ build_up: * @id and @pa are from a successful allocation from idr_get_empty_slot(). * Install the user pointer @ptr and mark the slot full. */ -static void idr_fill_slot(void *ptr, int id, struct idr_layer **pa) +static void idr_fill_slot(struct idr *idr, void *ptr, int id, + struct idr_layer **pa) { + /* update hint used for lookup, cleared from free_layer() */ + rcu_assign_pointer(idr->hint, pa[0]); + rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], (struct idr_layer *)ptr); pa[0]->count++; idr_mark_full(pa, id); @@ -397,7 +403,7 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) if (rv < 0) return rv == -ENOMEM ? -EAGAIN : rv; - idr_fill_slot(ptr, rv, pa); + idr_fill_slot(idp, ptr, rv, pa); *id = rv; return 0; } @@ -504,7 +510,7 @@ int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask) if (unlikely(id > max)) return -ENOSPC; - idr_fill_slot(ptr, id, pa); + idr_fill_slot(idr, ptr, id, pa); return id; } EXPORT_SYMBOL_GPL(idr_alloc); @@ -541,14 +547,14 @@ static void sub_remove(struct idr *idp, int shift, int id) to_free = NULL; while(*paa && ! --((**paa)->count)){ if (to_free) - free_layer(to_free); + free_layer(idp, to_free); to_free = **paa; **paa-- = NULL; } if (!*paa) idp->layers = 0; if (to_free) - free_layer(to_free); + free_layer(idp, to_free); } else idr_remove_warning(id); } @@ -581,7 +587,7 @@ void idr_remove(struct idr *idp, int id) --idp->layers; to_free->count = 0; bitmap_clear(to_free->bitmap, 0, IDR_SIZE); - free_layer(to_free); + free_layer(idp, to_free); } while (idp->id_free_cnt >= MAX_IDR_FREE) { p = get_from_free_list(idp); @@ -622,7 +628,7 @@ void __idr_remove_all(struct idr *idp) /* Get the highest bit that the above add changed from 0->1. */ while (n < fls(id ^ bt_mask)) { if (p) - free_layer(p); + free_layer(idp, p); n += IDR_BITS; p = *--paa; } @@ -655,19 +661,7 @@ void idr_destroy(struct idr *idp) } EXPORT_SYMBOL(idr_destroy); -/** - * idr_find - return pointer for given id - * @idp: idr handle - * @id: lookup key - * - * Return the pointer given the id it has been registered with. A %NULL - * return indicates that @id is not valid or you passed %NULL in - * idr_get_new(). - * - * This function can be called under rcu_read_lock(), given that the leaf - * pointers lifetimes are correctly managed. - */ -void *idr_find(struct idr *idp, int id) +void *idr_find_slowpath(struct idr *idp, int id) { int n; struct idr_layer *p; @@ -691,7 +685,7 @@ void *idr_find(struct idr *idp, int id) } return((void *)p); } -EXPORT_SYMBOL(idr_find); +EXPORT_SYMBOL(idr_find_slowpath); /** * idr_for_each - iterate through all stored pointers -- cgit v1.2.3 From 7175c61cc6b8e701441e79ef048c11ae97293463 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Feb 2013 17:05:10 -0800 Subject: idr: explain WARN_ON_ONCE() on negative IDs out-of-range ID Until recently, when an negative ID is specified, idr functions used to ignore the sign bit and proceeded with the operation with the rest of bits, which is bizarre and error-prone. The behavior recently got changed so that negative IDs are treated as invalid but we're triggering WARN_ON_ONCE() on negative IDs just in case somebody was depending on the sign bit being ignored, so that those can be detected and fixed easily. We only need this for a while. Explain why WARN_ON_ONCE()s are there and that they can be removed later. Signed-off-by: Tejun Heo Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 1a30272066c6..73f4d53c02f3 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -569,6 +569,7 @@ void idr_remove(struct idr *idp, int id) struct idr_layer *p; struct idr_layer *to_free; + /* see comment in idr_find_slowpath() */ if (WARN_ON_ONCE(id < 0)) return; @@ -666,6 +667,14 @@ void *idr_find_slowpath(struct idr *idp, int id) int n; struct idr_layer *p; + /* + * If @id is negative, idr_find() used to ignore the sign bit and + * performed lookup with the rest of bits, which is weird and can + * lead to very obscure bugs. We're now returning NULL for all + * negative IDs but just in case somebody was depending on the sign + * bit being ignored, let's trigger WARN_ON_ONCE() so that they can + * be detected and fixed. WARN_ON_ONCE() can later be removed. + */ if (WARN_ON_ONCE(id < 0)) return NULL; @@ -815,6 +824,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id) int n; struct idr_layer *p, *old_p; + /* see comment in idr_find_slowpath() */ if (WARN_ON_ONCE(id < 0)) return ERR_PTR(-EINVAL); -- cgit v1.2.3 From c759b35e6469fe7519e9fe45d5285d49f12cb657 Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Wed, 27 Feb 2013 17:05:50 -0800 Subject: kfifo: move kfifo.c from kernel/ to lib/ Move kfifo.c from kernel/ to lib/ Signed-off-by: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Makefile | 2 +- lib/kfifo.c | 609 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 610 insertions(+), 1 deletion(-) create mode 100644 lib/kfifo.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 02ed6c04cd7d..d7946ff75b2e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -23,7 +23,7 @@ lib-y += kobject.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \ - bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o + bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o diff --git a/lib/kfifo.c b/lib/kfifo.c new file mode 100644 index 000000000000..59dcf5b81d24 --- /dev/null +++ b/lib/kfifo.c @@ -0,0 +1,609 @@ +/* + * A generic kernel FIFO implementation + * + * Copyright (C) 2009/2010 Stefani Seibold + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * internal helper to calculate the unused elements in a fifo + */ +static inline unsigned int kfifo_unused(struct __kfifo *fifo) +{ + return (fifo->mask + 1) - (fifo->in - fifo->out); +} + +int __kfifo_alloc(struct __kfifo *fifo, unsigned int size, + size_t esize, gfp_t gfp_mask) +{ + /* + * round down to the next power of 2, since our 'let the indices + * wrap' technique works only in this case. + */ + if (!is_power_of_2(size)) + size = rounddown_pow_of_two(size); + + fifo->in = 0; + fifo->out = 0; + fifo->esize = esize; + + if (size < 2) { + fifo->data = NULL; + fifo->mask = 0; + return -EINVAL; + } + + fifo->data = kmalloc(size * esize, gfp_mask); + + if (!fifo->data) { + fifo->mask = 0; + return -ENOMEM; + } + fifo->mask = size - 1; + + return 0; +} +EXPORT_SYMBOL(__kfifo_alloc); + +void __kfifo_free(struct __kfifo *fifo) +{ + kfree(fifo->data); + fifo->in = 0; + fifo->out = 0; + fifo->esize = 0; + fifo->data = NULL; + fifo->mask = 0; +} +EXPORT_SYMBOL(__kfifo_free); + +int __kfifo_init(struct __kfifo *fifo, void *buffer, + unsigned int size, size_t esize) +{ + size /= esize; + + if (!is_power_of_2(size)) + size = rounddown_pow_of_two(size); + + fifo->in = 0; + fifo->out = 0; + fifo->esize = esize; + fifo->data = buffer; + + if (size < 2) { + fifo->mask = 0; + return -EINVAL; + } + fifo->mask = size - 1; + + return 0; +} +EXPORT_SYMBOL(__kfifo_init); + +static void kfifo_copy_in(struct __kfifo *fifo, const void *src, + unsigned int len, unsigned int off) +{ + unsigned int size = fifo->mask + 1; + unsigned int esize = fifo->esize; + unsigned int l; + + off &= fifo->mask; + if (esize != 1) { + off *= esize; + size *= esize; + len *= esize; + } + l = min(len, size - off); + + memcpy(fifo->data + off, src, l); + memcpy(fifo->data, src + l, len - l); + /* + * make sure that the data in the fifo is up to date before + * incrementing the fifo->in index counter + */ + smp_wmb(); +} + +unsigned int __kfifo_in(struct __kfifo *fifo, + const void *buf, unsigned int len) +{ + unsigned int l; + + l = kfifo_unused(fifo); + if (len > l) + len = l; + + kfifo_copy_in(fifo, buf, len, fifo->in); + fifo->in += len; + return len; +} +EXPORT_SYMBOL(__kfifo_in); + +static void kfifo_copy_out(struct __kfifo *fifo, void *dst, + unsigned int len, unsigned int off) +{ + unsigned int size = fifo->mask + 1; + unsigned int esize = fifo->esize; + unsigned int l; + + off &= fifo->mask; + if (esize != 1) { + off *= esize; + size *= esize; + len *= esize; + } + l = min(len, size - off); + + memcpy(dst, fifo->data + off, l); + memcpy(dst + l, fifo->data, len - l); + /* + * make sure that the data is copied before + * incrementing the fifo->out index counter + */ + smp_wmb(); +} + +unsigned int __kfifo_out_peek(struct __kfifo *fifo, + void *buf, unsigned int len) +{ + unsigned int l; + + l = fifo->in - fifo->out; + if (len > l) + len = l; + + kfifo_copy_out(fifo, buf, len, fifo->out); + return len; +} +EXPORT_SYMBOL(__kfifo_out_peek); + +unsigned int __kfifo_out(struct __kfifo *fifo, + void *buf, unsigned int len) +{ + len = __kfifo_out_peek(fifo, buf, len); + fifo->out += len; + return len; +} +EXPORT_SYMBOL(__kfifo_out); + +static unsigned long kfifo_copy_from_user(struct __kfifo *fifo, + const void __user *from, unsigned int len, unsigned int off, + unsigned int *copied) +{ + unsigned int size = fifo->mask + 1; + unsigned int esize = fifo->esize; + unsigned int l; + unsigned long ret; + + off &= fifo->mask; + if (esize != 1) { + off *= esize; + size *= esize; + len *= esize; + } + l = min(len, size - off); + + ret = copy_from_user(fifo->data + off, from, l); + if (unlikely(ret)) + ret = DIV_ROUND_UP(ret + len - l, esize); + else { + ret = copy_from_user(fifo->data, from + l, len - l); + if (unlikely(ret)) + ret = DIV_ROUND_UP(ret, esize); + } + /* + * make sure that the data in the fifo is up to date before + * incrementing the fifo->in index counter + */ + smp_wmb(); + *copied = len - ret; + /* return the number of elements which are not copied */ + return ret; +} + +int __kfifo_from_user(struct __kfifo *fifo, const void __user *from, + unsigned long len, unsigned int *copied) +{ + unsigned int l; + unsigned long ret; + unsigned int esize = fifo->esize; + int err; + + if (esize != 1) + len /= esize; + + l = kfifo_unused(fifo); + if (len > l) + len = l; + + ret = kfifo_copy_from_user(fifo, from, len, fifo->in, copied); + if (unlikely(ret)) { + len -= ret; + err = -EFAULT; + } else + err = 0; + fifo->in += len; + return err; +} +EXPORT_SYMBOL(__kfifo_from_user); + +static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to, + unsigned int len, unsigned int off, unsigned int *copied) +{ + unsigned int l; + unsigned long ret; + unsigned int size = fifo->mask + 1; + unsigned int esize = fifo->esize; + + off &= fifo->mask; + if (esize != 1) { + off *= esize; + size *= esize; + len *= esize; + } + l = min(len, size - off); + + ret = copy_to_user(to, fifo->data + off, l); + if (unlikely(ret)) + ret = DIV_ROUND_UP(ret + len - l, esize); + else { + ret = copy_to_user(to + l, fifo->data, len - l); + if (unlikely(ret)) + ret = DIV_ROUND_UP(ret, esize); + } + /* + * make sure that the data is copied before + * incrementing the fifo->out index counter + */ + smp_wmb(); + *copied = len - ret; + /* return the number of elements which are not copied */ + return ret; +} + +int __kfifo_to_user(struct __kfifo *fifo, void __user *to, + unsigned long len, unsigned int *copied) +{ + unsigned int l; + unsigned long ret; + unsigned int esize = fifo->esize; + int err; + + if (esize != 1) + len /= esize; + + l = fifo->in - fifo->out; + if (len > l) + len = l; + ret = kfifo_copy_to_user(fifo, to, len, fifo->out, copied); + if (unlikely(ret)) { + len -= ret; + err = -EFAULT; + } else + err = 0; + fifo->out += len; + return err; +} +EXPORT_SYMBOL(__kfifo_to_user); + +static int setup_sgl_buf(struct scatterlist *sgl, void *buf, + int nents, unsigned int len) +{ + int n; + unsigned int l; + unsigned int off; + struct page *page; + + if (!nents) + return 0; + + if (!len) + return 0; + + n = 0; + page = virt_to_page(buf); + off = offset_in_page(buf); + l = 0; + + while (len >= l + PAGE_SIZE - off) { + struct page *npage; + + l += PAGE_SIZE; + buf += PAGE_SIZE; + npage = virt_to_page(buf); + if (page_to_phys(page) != page_to_phys(npage) - l) { + sg_set_page(sgl, page, l - off, off); + sgl = sg_next(sgl); + if (++n == nents || sgl == NULL) + return n; + page = npage; + len -= l - off; + l = off = 0; + } + } + sg_set_page(sgl, page, len, off); + return n + 1; +} + +static unsigned int setup_sgl(struct __kfifo *fifo, struct scatterlist *sgl, + int nents, unsigned int len, unsigned int off) +{ + unsigned int size = fifo->mask + 1; + unsigned int esize = fifo->esize; + unsigned int l; + unsigned int n; + + off &= fifo->mask; + if (esize != 1) { + off *= esize; + size *= esize; + len *= esize; + } + l = min(len, size - off); + + n = setup_sgl_buf(sgl, fifo->data + off, nents, l); + n += setup_sgl_buf(sgl + n, fifo->data, nents - n, len - l); + + return n; +} + +unsigned int __kfifo_dma_in_prepare(struct __kfifo *fifo, + struct scatterlist *sgl, int nents, unsigned int len) +{ + unsigned int l; + + l = kfifo_unused(fifo); + if (len > l) + len = l; + + return setup_sgl(fifo, sgl, nents, len, fifo->in); +} +EXPORT_SYMBOL(__kfifo_dma_in_prepare); + +unsigned int __kfifo_dma_out_prepare(struct __kfifo *fifo, + struct scatterlist *sgl, int nents, unsigned int len) +{ + unsigned int l; + + l = fifo->in - fifo->out; + if (len > l) + len = l; + + return setup_sgl(fifo, sgl, nents, len, fifo->out); +} +EXPORT_SYMBOL(__kfifo_dma_out_prepare); + +unsigned int __kfifo_max_r(unsigned int len, size_t recsize) +{ + unsigned int max = (1 << (recsize << 3)) - 1; + + if (len > max) + return max; + return len; +} +EXPORT_SYMBOL(__kfifo_max_r); + +#define __KFIFO_PEEK(data, out, mask) \ + ((data)[(out) & (mask)]) +/* + * __kfifo_peek_n internal helper function for determinate the length of + * the next record in the fifo + */ +static unsigned int __kfifo_peek_n(struct __kfifo *fifo, size_t recsize) +{ + unsigned int l; + unsigned int mask = fifo->mask; + unsigned char *data = fifo->data; + + l = __KFIFO_PEEK(data, fifo->out, mask); + + if (--recsize) + l |= __KFIFO_PEEK(data, fifo->out + 1, mask) << 8; + + return l; +} + +#define __KFIFO_POKE(data, in, mask, val) \ + ( \ + (data)[(in) & (mask)] = (unsigned char)(val) \ + ) + +/* + * __kfifo_poke_n internal helper function for storeing the length of + * the record into the fifo + */ +static void __kfifo_poke_n(struct __kfifo *fifo, unsigned int n, size_t recsize) +{ + unsigned int mask = fifo->mask; + unsigned char *data = fifo->data; + + __KFIFO_POKE(data, fifo->in, mask, n); + + if (recsize > 1) + __KFIFO_POKE(data, fifo->in + 1, mask, n >> 8); +} + +unsigned int __kfifo_len_r(struct __kfifo *fifo, size_t recsize) +{ + return __kfifo_peek_n(fifo, recsize); +} +EXPORT_SYMBOL(__kfifo_len_r); + +unsigned int __kfifo_in_r(struct __kfifo *fifo, const void *buf, + unsigned int len, size_t recsize) +{ + if (len + recsize > kfifo_unused(fifo)) + return 0; + + __kfifo_poke_n(fifo, len, recsize); + + kfifo_copy_in(fifo, buf, len, fifo->in + recsize); + fifo->in += len + recsize; + return len; +} +EXPORT_SYMBOL(__kfifo_in_r); + +static unsigned int kfifo_out_copy_r(struct __kfifo *fifo, + void *buf, unsigned int len, size_t recsize, unsigned int *n) +{ + *n = __kfifo_peek_n(fifo, recsize); + + if (len > *n) + len = *n; + + kfifo_copy_out(fifo, buf, len, fifo->out + recsize); + return len; +} + +unsigned int __kfifo_out_peek_r(struct __kfifo *fifo, void *buf, + unsigned int len, size_t recsize) +{ + unsigned int n; + + if (fifo->in == fifo->out) + return 0; + + return kfifo_out_copy_r(fifo, buf, len, recsize, &n); +} +EXPORT_SYMBOL(__kfifo_out_peek_r); + +unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf, + unsigned int len, size_t recsize) +{ + unsigned int n; + + if (fifo->in == fifo->out) + return 0; + + len = kfifo_out_copy_r(fifo, buf, len, recsize, &n); + fifo->out += n + recsize; + return len; +} +EXPORT_SYMBOL(__kfifo_out_r); + +void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize) +{ + unsigned int n; + + n = __kfifo_peek_n(fifo, recsize); + fifo->out += n + recsize; +} +EXPORT_SYMBOL(__kfifo_skip_r); + +int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from, + unsigned long len, unsigned int *copied, size_t recsize) +{ + unsigned long ret; + + len = __kfifo_max_r(len, recsize); + + if (len + recsize > kfifo_unused(fifo)) { + *copied = 0; + return 0; + } + + __kfifo_poke_n(fifo, len, recsize); + + ret = kfifo_copy_from_user(fifo, from, len, fifo->in + recsize, copied); + if (unlikely(ret)) { + *copied = 0; + return -EFAULT; + } + fifo->in += len + recsize; + return 0; +} +EXPORT_SYMBOL(__kfifo_from_user_r); + +int __kfifo_to_user_r(struct __kfifo *fifo, void __user *to, + unsigned long len, unsigned int *copied, size_t recsize) +{ + unsigned long ret; + unsigned int n; + + if (fifo->in == fifo->out) { + *copied = 0; + return 0; + } + + n = __kfifo_peek_n(fifo, recsize); + if (len > n) + len = n; + + ret = kfifo_copy_to_user(fifo, to, len, fifo->out + recsize, copied); + if (unlikely(ret)) { + *copied = 0; + return -EFAULT; + } + fifo->out += n + recsize; + return 0; +} +EXPORT_SYMBOL(__kfifo_to_user_r); + +unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo, + struct scatterlist *sgl, int nents, unsigned int len, size_t recsize) +{ + if (!nents) + BUG(); + + len = __kfifo_max_r(len, recsize); + + if (len + recsize > kfifo_unused(fifo)) + return 0; + + return setup_sgl(fifo, sgl, nents, len, fifo->in + recsize); +} +EXPORT_SYMBOL(__kfifo_dma_in_prepare_r); + +void __kfifo_dma_in_finish_r(struct __kfifo *fifo, + unsigned int len, size_t recsize) +{ + len = __kfifo_max_r(len, recsize); + __kfifo_poke_n(fifo, len, recsize); + fifo->in += len + recsize; +} +EXPORT_SYMBOL(__kfifo_dma_in_finish_r); + +unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo, + struct scatterlist *sgl, int nents, unsigned int len, size_t recsize) +{ + if (!nents) + BUG(); + + len = __kfifo_max_r(len, recsize); + + if (len + recsize > fifo->in - fifo->out) + return 0; + + return setup_sgl(fifo, sgl, nents, len, fifo->out + recsize); +} +EXPORT_SYMBOL(__kfifo_dma_out_prepare_r); + +void __kfifo_dma_out_finish_r(struct __kfifo *fifo, size_t recsize) +{ + unsigned int len; + + len = __kfifo_peek_n(fifo, recsize); + fifo->out += len + recsize; +} +EXPORT_SYMBOL(__kfifo_dma_out_finish_r); -- cgit v1.2.3 From dfe2a77fd243ac3bf204fd0624e4d0b2134aaa6d Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Wed, 27 Feb 2013 17:05:51 -0800 Subject: kfifo: fix kfifo_alloc() and kfifo_init() Fix kfifo_alloc() and kfifo_init() to alloc at least the requested number of elements. Since the kfifo operates on power of 2 the request size will be rounded up to the next power of two. Signed-off-by: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/kfifo.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/kfifo.c b/lib/kfifo.c index 59dcf5b81d24..7b7f83027b7b 100644 --- a/lib/kfifo.c +++ b/lib/kfifo.c @@ -42,8 +42,7 @@ int __kfifo_alloc(struct __kfifo *fifo, unsigned int size, * round down to the next power of 2, since our 'let the indices * wrap' technique works only in this case. */ - if (!is_power_of_2(size)) - size = rounddown_pow_of_two(size); + size = roundup_pow_of_two(size); fifo->in = 0; fifo->out = 0; @@ -83,8 +82,7 @@ int __kfifo_init(struct __kfifo *fifo, void *buffer, { size /= esize; - if (!is_power_of_2(size)) - size = rounddown_pow_of_two(size); + size = roundup_pow_of_two(size); fifo->in = 0; fifo->out = 0; -- cgit v1.2.3 From b67bfe0d42cac56c512dd5da4b1b347a23f4b70a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 27 Feb 2013 17:06:00 -0800 Subject: hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin Acked-by: Paul E. McKenney Signed-off-by: Sasha Levin Cc: Wu Fengguang Cc: Marcelo Tosatti Cc: Gleb Natapov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/debugobjects.c | 21 ++++++++++----------- lib/lru_cache.c | 3 +-- 2 files changed, 11 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index d11808ca4bc4..37061ede8b81 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -109,11 +109,10 @@ static void fill_pool(void) */ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b) { - struct hlist_node *node; struct debug_obj *obj; int cnt = 0; - hlist_for_each_entry(obj, node, &b->list, node) { + hlist_for_each_entry(obj, &b->list, node) { cnt++; if (obj->object == addr) return obj; @@ -213,7 +212,7 @@ static void free_object(struct debug_obj *obj) static void debug_objects_oom(void) { struct debug_bucket *db = obj_hash; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; HLIST_HEAD(freelist); struct debug_obj *obj; unsigned long flags; @@ -227,7 +226,7 @@ static void debug_objects_oom(void) raw_spin_unlock_irqrestore(&db->lock, flags); /* Now free them */ - hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { + hlist_for_each_entry_safe(obj, tmp, &freelist, node) { hlist_del(&obj->node); free_object(obj); } @@ -658,7 +657,7 @@ debug_object_active_state(void *addr, struct debug_obj_descr *descr, static void __debug_check_no_obj_freed(const void *address, unsigned long size) { unsigned long flags, oaddr, saddr, eaddr, paddr, chunks; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; HLIST_HEAD(freelist); struct debug_obj_descr *descr; enum debug_obj_state state; @@ -678,7 +677,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size) repeat: cnt = 0; raw_spin_lock_irqsave(&db->lock, flags); - hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) { + hlist_for_each_entry_safe(obj, tmp, &db->list, node) { cnt++; oaddr = (unsigned long) obj->object; if (oaddr < saddr || oaddr >= eaddr) @@ -702,7 +701,7 @@ repeat: raw_spin_unlock_irqrestore(&db->lock, flags); /* Now free them */ - hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { + hlist_for_each_entry_safe(obj, tmp, &freelist, node) { hlist_del(&obj->node); free_object(obj); } @@ -1013,7 +1012,7 @@ void __init debug_objects_early_init(void) static int __init debug_objects_replace_static_objects(void) { struct debug_bucket *db = obj_hash; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct debug_obj *obj, *new; HLIST_HEAD(objects); int i, cnt = 0; @@ -1033,7 +1032,7 @@ static int __init debug_objects_replace_static_objects(void) local_irq_disable(); /* Remove the statically allocated objects from the pool */ - hlist_for_each_entry_safe(obj, node, tmp, &obj_pool, node) + hlist_for_each_entry_safe(obj, tmp, &obj_pool, node) hlist_del(&obj->node); /* Move the allocated objects to the pool */ hlist_move_list(&objects, &obj_pool); @@ -1042,7 +1041,7 @@ static int __init debug_objects_replace_static_objects(void) for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { hlist_move_list(&db->list, &objects); - hlist_for_each_entry(obj, node, &objects, node) { + hlist_for_each_entry(obj, &objects, node) { new = hlist_entry(obj_pool.first, typeof(*obj), node); hlist_del(&new->node); /* copy object data */ @@ -1057,7 +1056,7 @@ static int __init debug_objects_replace_static_objects(void) obj_pool_used); return 0; free: - hlist_for_each_entry_safe(obj, node, tmp, &objects, node) { + hlist_for_each_entry_safe(obj, tmp, &objects, node) { hlist_del(&obj->node); kmem_cache_free(obj_cache, obj); } diff --git a/lib/lru_cache.c b/lib/lru_cache.c index d71d89498943..8335d39d2ccd 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -262,12 +262,11 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, bool include_changing) { - struct hlist_node *n; struct lc_element *e; BUG_ON(!lc); BUG_ON(!lc->nr_elements); - hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { + hlist_for_each_entry(e, lc_hash_slot(lc, enr), colision) { /* "about to be changed" elements, pending transaction commit, * are hashed by their "new number". "Normal" elements have * lc_number == lc_new_number. */ -- cgit v1.2.3 From 3b0eb71ec9e22c6735c84f800d6523c96839efe6 Mon Sep 17 00:00:00 2001 From: Robert Obermeier Date: Sun, 16 Dec 2012 05:59:36 +0100 Subject: Fixed dead ifdef block by adding missing Kconfig option. Added missing Kconfig option KDB_CONTINUE_CATASTROPHIC which lead to a dead ifdef block in kernel/debug/kdb/kdb_main.c:73-75. The code using KDB_CONTINUE_CATASTROPHIC was originally introduced in commit '5d5314d6795f3c1c0f415348ff8c51f7de042b77' by Jason Wessel. This patchset ("kdb: core for kgdb back end (1 of 2)") added platform independent part of kdb to the linux kernel. The Kernel option however, even though it had the same options and behaviour on all supported architectures, was part of the x86 and ia64 patchset of KDB and therefore not pulled into the mainline kernel tree. I actually took the originally written Kconfig by Keith Owens (2003-06-20 according to KDB changelog) and changed it to reflect the correct behaviour, as the KDUMP patchset is not part of the kernel and the expected functionality is missing from it. Signed-off-by: Robert Obermeier Signed-off-by: Jason Wessel --- lib/Kconfig.kgdb | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 43cb93fa2651..960fa2ecd6e0 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -79,4 +79,22 @@ config KDB_KEYBOARD help KDB can use a PS/2 type keyboard for an input device +config KDB_CONTINUE_CATASTROPHIC + int "KDB: continue after catastrophic errors" + depends on KGDB_KDB + default "0" + help + This integer controls the behaviour of kdb when the kernel gets a + catastrophic error, i.e. for a panic or oops. + When KDB is active and a catastrophic error occurs, nothing extra + will happen until you type 'go'. + CONFIG_KDB_CONTINUE_CATASTROPHIC == 0 (default). The first time + you type 'go', you will be warned by kdb. The secend time you type + 'go', KDB tries to continue. No guarantees that the + kernel is still usable in this situation. + CONFIG_KDB_CONTINUE_CATASTROPHIC == 1. KDB tries to continue. + No guarantees that the kernel is still usable in this situation. + CONFIG_KDB_CONTINUE_CATASTROPHIC == 2. KDB forces a reboot. + If you are not sure, say 0. + endif # KGDB -- cgit v1.2.3 From 79f83c02944ccb3dffbc5852e63e966ea34ed63e Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 5 Dec 2012 11:34:13 +0000 Subject: Kconfig.debug: add METAG to dependency lists Add [!]METAG to a couple of Kconfig dependencies in lib/Kconfig.debug. Don't allow stack utilization instrumentation on metag, and allow building with frame pointers. Signed-off-by: James Hogan Cc: Andrew Morton Cc: "Paul E. McKenney" Cc: Akinobu Mita Cc: Michel Lespinasse Cc: Catalin Marinas --- lib/Kconfig.debug | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 67604e599384..dddee00b4ac5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -730,7 +730,7 @@ config STACKTRACE config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL && !IA64 && !PARISC + depends on DEBUG_KERNEL && !IA64 && !PARISC && !METAG help Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T and sysrq-P debug output. @@ -911,7 +911,7 @@ config FRAME_POINTER bool "Compile the kernel with frame pointers" depends on DEBUG_KERNEL && \ (CRIS || M68K || FRV || UML || \ - AVR32 || SUPERH || BLACKFIN || MN10300) || \ + AVR32 || SUPERH || BLACKFIN || MN10300 || METAG) || \ ARCH_WANT_FRAME_POINTERS default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS help -- cgit v1.2.3