diff options
Diffstat (limited to 'lib')
85 files changed, 5647 insertions, 1467 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 991c98bc4a3f..54cf309a92a5 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -51,6 +51,9 @@ config PERCPU_RWSEM config ARCH_USE_CMPXCHG_LOCKREF bool +config ARCH_HAS_FAST_MULTIPLIER + bool + config CRC_CCITT tristate "CRC-CCITT functions" help @@ -182,6 +185,15 @@ config AUDIT_GENERIC depends on AUDIT && !AUDIT_ARCH default y +config AUDIT_ARCH_COMPAT_GENERIC + bool + default n + +config AUDIT_COMPAT_GENERIC + bool + depends on AUDIT_GENERIC && AUDIT_ARCH_COMPAT_GENERIC && COMPAT + default y + config RANDOM32_SELFTEST bool "PRNG perform self test on init" default n @@ -322,6 +334,20 @@ config TEXTSEARCH_FSM config BTREE boolean +config INTERVAL_TREE + boolean + help + Simple, embeddable, interval-tree. Can find the start of an + overlapping range in log(n) time and then iterate over all + overlapping nodes. The algorithm is implemented as an + augmented rbtree. + + See: + + Documentation/rbtree.txt + + for more information. + config ASSOCIATIVE_ARRAY bool help @@ -342,9 +368,9 @@ config HAS_IOMEM select GENERIC_IO default y -config HAS_IOPORT +config HAS_IOPORT_MAP boolean - depends on HAS_IOMEM && !NO_IOPORT + depends on HAS_IOMEM && !NO_IOPORT_MAP default y config HAS_DMA @@ -373,6 +399,39 @@ config CPU_RMAP config DQL bool +config GLOB + bool +# This actually supports modular compilation, but the module overhead +# is ridiculous for the amount of code involved. Until an out-of-tree +# driver asks for it, we'll just link it directly it into the kernel +# when required. Since we're ignoring out-of-tree users, there's also +# no need bother prompting for a manual decision: +# prompt "glob_match() function" + help + This option provides a glob_match function for performing + simple text pattern matching. It originated in the ATA code + to blacklist particular drive models, but other device drivers + may need similar functionality. + + All drivers in the Linux kernel tree that require this function + should automatically select this option. Say N unless you + are compiling an out-of tree driver which tells you that it + depends on this. + +config GLOB_SELFTEST + bool "glob self-test on init" + default n + depends on GLOB + help + This option enables a simple self-test of the glob_match + function on startup. It is primarily useful for people + working on the code to ensure they haven't introduced any + regressions. + + It only adds a little bit of code and slows kernel boot (or + module load) by a small amount, so you're welcome to play with + it, but you probably don't need it. + # # Netlink attribute parsing support is select'ed if needed # @@ -428,7 +487,8 @@ config MPILIB config SIGNATURE tristate - depends on KEYS && CRYPTO + depends on KEYS + select CRYPTO select CRYPTO_SHA1 select MPILIB help @@ -451,4 +511,11 @@ config UCS2_STRING source "lib/fonts/Kconfig" +# +# sg chaining option +# + +config ARCH_HAS_SG_CHAIN + def_bool n + endmenu diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index db25707aa41b..3ac43f34437b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -15,7 +15,7 @@ config PRINTK_TIME The behavior is also controlled by the kernel command line parameter printk.time=1. See Documentation/kernel-parameters.txt -config DEFAULT_MESSAGE_LOGLEVEL +config MESSAGE_LOGLEVEL_DEFAULT int "Default message log level (1-7)" range 1 7 default "4" @@ -119,7 +119,7 @@ menu "Compile-time checks and compiler options" config DEBUG_INFO bool "Compile the kernel with debug info" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !COMPILE_TEST help If you say Y here the resulting kernel image will include debugging info resulting in a larger kernel image. @@ -143,6 +143,30 @@ config DEBUG_INFO_REDUCED DEBUG_INFO build and compile times are reduced too. Only works with newer gcc versions. +config DEBUG_INFO_SPLIT + bool "Produce split debuginfo in .dwo files" + depends on DEBUG_INFO + help + Generate debug info into separate .dwo files. This significantly + reduces the build directory size for builds with DEBUG_INFO, + because it stores the information only once on disk in .dwo + files instead of multiple times in object files and executables. + In addition the debug information is also compressed. + + Requires recent gcc (4.7+) and recent gdb/binutils. + Any tool that packages or reads debug information would need + to know about the .dwo files and include them. + Incompatible with older versions of ccache. + +config DEBUG_INFO_DWARF4 + bool "Generate dwarf4 debuginfo" + depends on DEBUG_INFO + help + Generate dwarf4 debug info. This requires recent versions + of gcc and gdb. It makes the debug information larger. + But it significantly improves the success of resolving + variables in gdb on optimized code. + config ENABLE_WARN_DEPRECATED bool "Enable __deprecated logic" default y @@ -501,12 +525,21 @@ config DEBUG_VM If unsure, say N. +config DEBUG_VM_VMACACHE + bool "Debug VMA caching" + depends on DEBUG_VM + help + Enable this to turn on VMA caching debug information. Doing so + can cause significant overhead, so only enable it in non-production + environments. + + If unsure, say N. + config DEBUG_VM_RB bool "Debug VM red-black trees" depends on DEBUG_VM help - Enable this to turn on more extended checks in the virtual-memory - system that may impact performance. + Enable VM red-black tree debugging information and extra validations. If unsure, say N. @@ -576,8 +609,8 @@ config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM help - This options enables addition error checking for high memory systems. - Disable for production systems. + This option enables additional error checking for high memory + systems. Disable for production systems. config HAVE_DEBUG_STACKOVERFLOW bool @@ -761,6 +794,15 @@ config PANIC_ON_OOPS_VALUE default 0 if !PANIC_ON_OOPS default 1 if PANIC_ON_OOPS +config PANIC_TIMEOUT + int "panic timeout" + default 0 + help + Set the timeout value (in seconds) until a reboot occurs when the + the kernel panics. If n = 0, then we wait forever. A timeout + value n > 0 will wait n seconds before rebooting, while a timeout + value n < 0 will reboot immediately. + config SCHED_DEBUG bool "Collect scheduler debugging info" depends on DEBUG_KERNEL && PROC_FS @@ -815,14 +857,9 @@ config DEBUG_RT_MUTEXES This allows rt mutex semantics violations and rt mutex related deadlocks (lockups) to be detected and reported automatically. -config DEBUG_PI_LIST - bool - default y - depends on DEBUG_RT_MUTEXES - config RT_MUTEX_TESTER bool "Built-in scriptable tester for rt-mutexes" - depends on DEBUG_KERNEL && RT_MUTEXES + depends on DEBUG_KERNEL && RT_MUTEXES && BROKEN help This option enables a rt-mutex tester. @@ -855,6 +892,10 @@ config DEBUG_WW_MUTEX_SLOWPATH the full mutex checks enabled with (CONFIG_PROVE_LOCKING) this will test all possible w/w mutex interface abuse with the exception of simply not acquiring all the required locks. + Note that this feature can introduce significant overhead, so + it really should not be enabled in a production or distro kernel, + even a debug kernel. If you are a driver writer, enable it. If + you are a distro, do not. config DEBUG_LOCK_ALLOC bool "Lock debugging: detect incorrect freeing of live locks" @@ -917,7 +958,7 @@ config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC + select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !SCORE select KALLSYMS select KALLSYMS_ALL @@ -971,6 +1012,21 @@ config DEBUG_LOCKING_API_SELFTESTS The following locking APIs are covered: spinlocks, rwlocks, mutexes and rwsems. +config LOCK_TORTURE_TEST + tristate "torture tests for locking" + depends on DEBUG_KERNEL + select TORTURE_TEST + default n + help + This option provides a kernel module that runs torture tests + on kernel locking primitives. The kernel module may be built + after the fact on the running kernel to be tested, if desired. + + Say Y here if you want kernel locking-primitive torture tests + to be built into the kernel. + Say M if you want these torture tests to build as a module. + Say N if you are unsure. + endmenu # lock debugging config TRACE_IRQFLAGS @@ -980,8 +1036,13 @@ config TRACE_IRQFLAGS either tracing or lock debugging. config STACKTRACE - bool + bool "Stack backtrace support" depends on STACKTRACE_SUPPORT + help + This option causes the kernel to create a /proc/pid/stack for + every process, showing its current stack trace. + It is also used by various kernel debugging features that require + stack trace generation. config DEBUG_KOBJECT bool "kobject debugging" @@ -1021,22 +1082,22 @@ config DEBUG_BUGVERBOSE of the BUG call as well as the EIP and oops trace. This aids debugging but costs about 70-100K of memory. -config DEBUG_WRITECOUNT - bool "Debug filesystem writers count" +config DEBUG_LIST + bool "Debug linked list manipulation" depends on DEBUG_KERNEL help - Enable this to catch wrong use of the writers count in struct - vfsmount. This will increase the size of each file struct by - 32 bits. + Enable this to turn on extended checks in the linked-list + walking routines. If unsure, say N. -config DEBUG_LIST - bool "Debug linked list manipulation" +config DEBUG_PI_LIST + bool "Debug priority linked list manipulation" depends on DEBUG_KERNEL help - Enable this to turn on extended checks in the linked-list - walking routines. + Enable this to turn on extended checks in the priority-ordered + linked-list (plist) walking routines. This checks the entire + list multiple times during each manipulation. If unsure, say N. @@ -1103,20 +1164,6 @@ config PROVE_RCU_REPEATEDLY Say N if you are unsure. -config PROVE_RCU_DELAY - bool "RCU debugging: preemptible RCU race provocation" - depends on DEBUG_KERNEL && PREEMPT_RCU - default n - help - There is a class of races that involve an unlikely preemption - of __rcu_read_unlock() just after ->rcu_read_lock_nesting has - been set to INT_MIN. This feature inserts a delay at that - point to increase the probability of these races. - - Say Y to increase probability of preemption of __rcu_read_unlock(). - - Say N if you are unsure. - config SPARSE_RCU_POINTER bool "RCU debugging: sparse-based checks for pointer usage" default n @@ -1132,9 +1179,14 @@ config SPARSE_RCU_POINTER Say N if you are unsure. +config TORTURE_TEST + tristate + default n + config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL + select TORTURE_TEST default n help This option provides a kernel module that runs torture tests @@ -1375,7 +1427,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT depends on !X86_64 select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !SCORE help Provide stacktrace filter for fault-injection capabilities @@ -1478,6 +1530,7 @@ config RBTREE_TEST config INTERVAL_TREE_TEST tristate "Interval tree test" depends on m && DEBUG_KERNEL + select INTERVAL_TREE help A benchmark measuring the performance of the interval tree library @@ -1516,6 +1569,14 @@ config TEST_STRING_HELPERS config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" +config TEST_RHASHTABLE + bool "Perform selftest on resizable hash table" + default n + help + Enable this option to test the rhashtable functions at boot. + + If unsure, say N. + endmenu # runtime tests config PROVIDE_OHCI1394_DMA_INIT @@ -1547,17 +1608,6 @@ config PROVIDE_OHCI1394_DMA_INIT See Documentation/debugging-via-ohci1394.txt for more information. -config FIREWIRE_OHCI_REMOTE_DMA - bool "Remote debugging over FireWire with firewire-ohci" - depends on FIREWIRE_OHCI - help - This option lets you use the FireWire bus for remote debugging - with help of the firewire-ohci driver. It enables unfiltered - remote DMA in firewire-ohci. - See Documentation/debugging-via-ohci1394.txt for more information. - - If unsure, say N. - config BUILD_DOCSRC bool "Build targets in Documentation/ tree" depends on HEADERS_CHECK @@ -1575,8 +1625,79 @@ config DMA_API_DEBUG With this option you will be able to detect common bugs in device drivers like double-freeing of DMA mappings or freeing mappings that were never allocated. - This option causes a performance degredation. Use only if you want - to debug device drivers. If unsure, say N. + + This also attempts to catch cases where a page owned by DMA is + accessed by the cpu in a way that could cause data corruption. For + example, this enables cow_user_page() to check that the source page is + not undergoing DMA. + + This option causes a performance degradation. Use only if you want to + debug device drivers and dma interactions. + + If unsure, say N. + +config TEST_MODULE + tristate "Test module loading with 'hello world' module" + default n + depends on m + help + This builds the "test_module" module that emits "Hello, world" + on printk when loaded. It is designed to be used for basic + evaluation of the module loading subsystem (for example when + validating module verification). It lacks any extra dependencies, + and will not normally be loaded by the system unless explicitly + requested by name. + + If unsure, say N. + +config TEST_USER_COPY + tristate "Test user/kernel boundary protections" + default n + depends on m + help + This builds the "test_user_copy" module that runs sanity checks + on the copy_to/from_user infrastructure, making sure basic + user/kernel boundary testing is working. If it fails to load, + a regression has been detected in the user/kernel memory boundary + protections. + + If unsure, say N. + +config TEST_BPF + tristate "Test BPF filter functionality" + default n + depends on m && NET + help + This builds the "test_bpf" module that runs various test vectors + against the BPF interpreter or BPF JIT compiler depending on the + current setting. This is in particular useful for BPF JIT compiler + development, but also to run regression tests against changes in + the interpreter code. It also enables test stubs for eBPF maps and + verifier used by user space verifier testsuite. + + If unsure, say N. + +config TEST_FIRMWARE + tristate "Test firmware loading via userspace interface" + default n + depends on FW_LOADER + help + This builds the "test_firmware" module that creates a userspace + interface for testing firmware loading. This can be used to + control the triggering of firmware loading without needing an + actual firmware-using device. The contents can be rechecked by + userspace. + + If unsure, say N. + +config TEST_UDELAY + tristate "udelay test driver" + default n + help + This builds the "udelay_test" module that helps to make sure + that udelay() is working properly. + + If unsure, say N. source "samples/Kconfig" diff --git a/lib/Makefile b/lib/Makefile index a459c31e8c6b..d6b4bc496408 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -26,11 +26,15 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o percpu_ida.o + percpu-refcount.o percpu_ida.o hash.o rhashtable.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o +obj-$(CONFIG_TEST_MODULE) += test_module.o +obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o +obj-$(CONFIG_TEST_BPF) += test_bpf.o +obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG @@ -43,10 +47,12 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o +GCOV_PROFILE_hweight.o := n CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_BTREE) += btree.o +obj-$(CONFIG_INTERVAL_TREE) += interval_tree.o obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o @@ -93,6 +99,7 @@ obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o obj-$(CONFIG_SMP) += percpu_counter.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o +obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o @@ -130,6 +137,8 @@ obj-$(CONFIG_CORDIC) += cordic.o obj-$(CONFIG_DQL) += dynamic_queue_limits.o +obj-$(CONFIG_GLOB) += glob.o + obj-$(CONFIG_MPILIB) += mpi/ obj-$(CONFIG_SIGNATURE) += digsig.o @@ -144,7 +153,8 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o -libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o +libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ + fdt_empty_tree.o $(foreach file, $(libfdt_files), \ $(eval CFLAGS_$(file) = -I$(src)/../scripts/dtc/libfdt)) lib-$(CONFIG_LIBFDT) += $(libfdt_files) @@ -152,8 +162,6 @@ lib-$(CONFIG_LIBFDT) += $(libfdt_files) obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o -interval_tree_test-objs := interval_tree_test_main.o interval_tree.o - obj-$(CONFIG_PERCPU_TEST) += percpu_test.o obj-$(CONFIG_ASN1) += asn1_decoder.o diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index 11b9b01fda6b..1a000bb050f9 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -140,7 +140,7 @@ error: * @decoder: The decoder definition (produced by asn1_compiler) * @context: The caller's context (to be passed to the action functions) * @data: The encoded data - * @datasize: The size of the encoded data + * @datalen: The size of the encoded data * * Decode BER/DER/CER encoded ASN.1 data according to a bytecode pattern * produced by asn1_compiler. Action functions are called on marked tags to diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 1b6a44f1ec3e..2404d03e251a 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -157,7 +157,7 @@ enum assoc_array_walk_status { assoc_array_walk_tree_empty, assoc_array_walk_found_terminal_node, assoc_array_walk_found_wrong_shortcut, -} status; +}; struct assoc_array_walk_result { struct { @@ -1723,11 +1723,13 @@ ascend_old_tree: shortcut = assoc_array_ptr_to_shortcut(ptr); slot = shortcut->parent_slot; cursor = shortcut->back_pointer; + if (!cursor) + goto gc_complete; } else { slot = node->parent_slot; cursor = ptr; } - BUG_ON(!ptr); + BUG_ON(!cursor); node = assoc_array_ptr_to_node(cursor); slot++; goto continue_node; @@ -1735,7 +1737,7 @@ ascend_old_tree: gc_complete: edit->set[0].to = new_root; assoc_array_apply_edit(edit); - edit->array->nr_leaves_on_tree = nr_leaves_on_tree; + array->nr_leaves_on_tree = nr_leaves_on_tree; return 0; enomem: diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 00bca223d1e1..0211d30d8c39 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -8,6 +8,9 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/init.h> #include <linux/bug.h> #include <linux/kernel.h> @@ -146,18 +149,18 @@ static __init int test_atomic64(void) BUG_ON(v.counter != r); #ifdef CONFIG_X86 - printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n", + pr_info("passed for %s platform %s CX8 and %s SSE\n", #ifdef CONFIG_X86_64 - "x86-64", + "x86-64", #elif defined(CONFIG_X86_CMPXCHG64) - "i586+", + "i586+", #else - "i386+", + "i386+", #endif boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without", boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without"); #else - printk(KERN_INFO "atomic64 test passed\n"); + pr_info("passed\n"); #endif return 0; diff --git a/lib/audit.c b/lib/audit.c index 76bbed4a20e5..1d726a22565b 100644 --- a/lib/audit.c +++ b/lib/audit.c @@ -30,11 +30,17 @@ static unsigned signal_class[] = { int audit_classify_arch(int arch) { - return 0; + if (audit_is_compat(arch)) + return 1; + else + return 0; } int audit_classify_syscall(int abi, unsigned syscall) { + if (audit_is_compat(abi)) + return audit_classify_compat_syscall(abi, syscall); + switch(syscall) { #ifdef __NR_open case __NR_open: @@ -57,6 +63,13 @@ int audit_classify_syscall(int abi, unsigned syscall) static int __init audit_classes_init(void) { +#ifdef CONFIG_AUDIT_COMPAT_GENERIC + audit_register_class(AUDIT_CLASS_WRITE_32, compat_write_class); + audit_register_class(AUDIT_CLASS_READ_32, compat_read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE_32, compat_dir_class); + audit_register_class(AUDIT_CLASS_CHATTR_32, compat_chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL_32, compat_signal_class); +#endif audit_register_class(AUDIT_CLASS_WRITE, write_class); audit_register_class(AUDIT_CLASS_READ, read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); diff --git a/lib/average.c b/lib/average.c index 99a67e662b3c..114d1beae0c7 100644 --- a/lib/average.c +++ b/lib/average.c @@ -53,8 +53,10 @@ EXPORT_SYMBOL(ewma_init); */ struct ewma *ewma_add(struct ewma *avg, unsigned long val) { - avg->internal = avg->internal ? - (((avg->internal << avg->weight) - avg->internal) + + unsigned long internal = ACCESS_ONCE(avg->internal); + + ACCESS_ONCE(avg->internal) = internal ? + (((internal << avg->weight) - internal) + (val << avg->factor)) >> avg->weight : (val << avg->factor); return avg; diff --git a/lib/bitmap.c b/lib/bitmap.c index 06f7e4fe8d2d..cd250a2e14cb 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -40,9 +40,9 @@ * for the best explanations of this ordering. */ -int __bitmap_empty(const unsigned long *bitmap, int bits) +int __bitmap_empty(const unsigned long *bitmap, unsigned int bits) { - int k, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap[k]) return 0; @@ -55,9 +55,9 @@ int __bitmap_empty(const unsigned long *bitmap, int bits) } EXPORT_SYMBOL(__bitmap_empty); -int __bitmap_full(const unsigned long *bitmap, int bits) +int __bitmap_full(const unsigned long *bitmap, unsigned int bits) { - int k, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (~bitmap[k]) return 0; @@ -71,9 +71,9 @@ int __bitmap_full(const unsigned long *bitmap, int bits) EXPORT_SYMBOL(__bitmap_full); int __bitmap_equal(const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] != bitmap2[k]) return 0; @@ -86,14 +86,14 @@ int __bitmap_equal(const unsigned long *bitmap1, } EXPORT_SYMBOL(__bitmap_equal); -void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits) +void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits) { - int k, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) dst[k] = ~src[k]; if (bits % BITS_PER_LONG) - dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits); + dst[k] = ~src[k]; } EXPORT_SYMBOL(__bitmap_complement); @@ -182,23 +182,26 @@ void __bitmap_shift_left(unsigned long *dst, EXPORT_SYMBOL(__bitmap_shift_left); int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k; - int nr = BITS_TO_LONGS(bits); + unsigned int k; + unsigned int lim = bits/BITS_PER_LONG; unsigned long result = 0; - for (k = 0; k < nr; k++) + for (k = 0; k < lim; k++) result |= (dst[k] = bitmap1[k] & bitmap2[k]); + if (bits % BITS_PER_LONG) + result |= (dst[k] = bitmap1[k] & bitmap2[k] & + BITMAP_LAST_WORD_MASK(bits)); return result != 0; } EXPORT_SYMBOL(__bitmap_and); void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k; - int nr = BITS_TO_LONGS(bits); + unsigned int k; + unsigned int nr = BITS_TO_LONGS(bits); for (k = 0; k < nr; k++) dst[k] = bitmap1[k] | bitmap2[k]; @@ -206,10 +209,10 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, EXPORT_SYMBOL(__bitmap_or); void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k; - int nr = BITS_TO_LONGS(bits); + unsigned int k; + unsigned int nr = BITS_TO_LONGS(bits); for (k = 0; k < nr; k++) dst[k] = bitmap1[k] ^ bitmap2[k]; @@ -217,22 +220,25 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, EXPORT_SYMBOL(__bitmap_xor); int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k; - int nr = BITS_TO_LONGS(bits); + unsigned int k; + unsigned int lim = bits/BITS_PER_LONG; unsigned long result = 0; - for (k = 0; k < nr; k++) + for (k = 0; k < lim; k++) result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); + if (bits % BITS_PER_LONG) + result |= (dst[k] = bitmap1[k] & ~bitmap2[k] & + BITMAP_LAST_WORD_MASK(bits)); return result != 0; } EXPORT_SYMBOL(__bitmap_andnot); int __bitmap_intersects(const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] & bitmap2[k]) return 1; @@ -245,9 +251,9 @@ int __bitmap_intersects(const unsigned long *bitmap1, EXPORT_SYMBOL(__bitmap_intersects); int __bitmap_subset(const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) + const unsigned long *bitmap2, unsigned int bits) { - int k, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] & ~bitmap2[k]) return 0; @@ -259,9 +265,10 @@ int __bitmap_subset(const unsigned long *bitmap1, } EXPORT_SYMBOL(__bitmap_subset); -int __bitmap_weight(const unsigned long *bitmap, int bits) +int __bitmap_weight(const unsigned long *bitmap, unsigned int bits) { - int k, w = 0, lim = bits/BITS_PER_LONG; + unsigned int k, lim = bits/BITS_PER_LONG; + int w = 0; for (k = 0; k < lim; k++) w += hweight_long(bitmap[k]); @@ -273,42 +280,42 @@ int __bitmap_weight(const unsigned long *bitmap, int bits) } EXPORT_SYMBOL(__bitmap_weight); -void bitmap_set(unsigned long *map, int start, int nr) +void bitmap_set(unsigned long *map, unsigned int start, int len) { unsigned long *p = map + BIT_WORD(start); - const int size = start + nr; + const unsigned int size = start + len; int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); - while (nr - bits_to_set >= 0) { + while (len - bits_to_set >= 0) { *p |= mask_to_set; - nr -= bits_to_set; + len -= bits_to_set; bits_to_set = BITS_PER_LONG; mask_to_set = ~0UL; p++; } - if (nr) { + if (len) { mask_to_set &= BITMAP_LAST_WORD_MASK(size); *p |= mask_to_set; } } EXPORT_SYMBOL(bitmap_set); -void bitmap_clear(unsigned long *map, int start, int nr) +void bitmap_clear(unsigned long *map, unsigned int start, int len) { unsigned long *p = map + BIT_WORD(start); - const int size = start + nr; + const unsigned int size = start + len; int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); - while (nr - bits_to_clear >= 0) { + while (len - bits_to_clear >= 0) { *p &= ~mask_to_clear; - nr -= bits_to_clear; + len -= bits_to_clear; bits_to_clear = BITS_PER_LONG; mask_to_clear = ~0UL; p++; } - if (nr) { + if (len) { mask_to_clear &= BITMAP_LAST_WORD_MASK(size); *p &= ~mask_to_clear; } @@ -664,13 +671,8 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits) { - char *nl = strchr(bp, '\n'); - int len; - - if (nl) - len = nl - bp; - else - len = strlen(bp); + char *nl = strchrnul(bp, '\n'); + int len = nl - bp; return __bitmap_parselist(bp, len, 0, maskp, nmaskbits); } @@ -716,7 +718,7 @@ EXPORT_SYMBOL(bitmap_parselist_user); * * If for example, just bits 4 through 7 are set in @buf, then @pos * values 4 through 7 will get mapped to 0 through 3, respectively, - * and other @pos values will get mapped to 0. When @pos value 7 + * and other @pos values will get mapped to -1. When @pos value 7 * gets mapped to (returns) @ord value 3 in this example, that means * that bit 7 is the 3rd (starting with 0th) set bit in @buf. * @@ -882,7 +884,7 @@ EXPORT_SYMBOL(bitmap_bitremap); * read it, you're overqualified for your current job.) * * In other words, @orig is mapped onto (surjectively) @dst, - * using the the map { <n, m> | the n-th bit of @relmap is the + * using the map { <n, m> | the n-th bit of @relmap is the * m-th set bit of @relmap }. * * Any set bits in @orig above bit number W, where W is the @@ -930,7 +932,7 @@ EXPORT_SYMBOL(bitmap_bitremap); * * Further lets say we use the following code, invoking * bitmap_fold() then bitmap_onto, as suggested above to - * avoid the possitility of an empty @dst result: + * avoid the possibility of an empty @dst result: * * unsigned long *tmp; // a temporary bitmap's bits * @@ -1046,7 +1048,7 @@ enum { REG_OP_RELEASE, /* clear all bits in region */ }; -static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op) +static int __reg_op(unsigned long *bitmap, unsigned int pos, int order, int reg_op) { int nbits_reg; /* number of bits in region */ int index; /* index first long of region in bitmap */ @@ -1112,11 +1114,11 @@ done: * Return the bit offset in bitmap of the allocated region, * or -errno on failure. */ -int bitmap_find_free_region(unsigned long *bitmap, int bits, int order) +int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) { - int pos, end; /* scans bitmap by regions of size order */ + unsigned int pos, end; /* scans bitmap by regions of size order */ - for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) { + for (pos = 0 ; (end = pos + (1U << order)) <= bits; pos = end) { if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE)) continue; __reg_op(bitmap, pos, order, REG_OP_ALLOC); @@ -1137,7 +1139,7 @@ EXPORT_SYMBOL(bitmap_find_free_region); * * No return value. */ -void bitmap_release_region(unsigned long *bitmap, int pos, int order) +void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order) { __reg_op(bitmap, pos, order, REG_OP_RELEASE); } @@ -1154,12 +1156,11 @@ EXPORT_SYMBOL(bitmap_release_region); * Return 0 on success, or %-EBUSY if specified region wasn't * free (not all bits were zero). */ -int bitmap_allocate_region(unsigned long *bitmap, int pos, int order) +int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) { if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE)) return -EBUSY; - __reg_op(bitmap, pos, order, REG_OP_ALLOC); - return 0; + return __reg_op(bitmap, pos, order, REG_OP_ALLOC); } EXPORT_SYMBOL(bitmap_allocate_region); diff --git a/lib/btree.c b/lib/btree.c index f9a484676cb6..4264871ea1a0 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -198,6 +198,7 @@ EXPORT_SYMBOL_GPL(btree_init); void btree_destroy(struct btree_head *head) { + mempool_free(head->node, head->mempool); mempool_destroy(head->mempool); head->mempool = NULL; } diff --git a/lib/bug.c b/lib/bug.c index 168603477f02..d1d7c7878900 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -37,6 +37,9 @@ Jeremy Fitzhardinge <jeremy@goop.org> 2006 */ + +#define pr_fmt(fmt) fmt + #include <linux/list.h> #include <linux/module.h> #include <linux/kernel.h> @@ -153,15 +156,13 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) if (warning) { /* this is a WARN_ON rather than BUG/BUG_ON */ - printk(KERN_WARNING "------------[ cut here ]------------\n"); + pr_warn("------------[ cut here ]------------\n"); if (file) - printk(KERN_WARNING "WARNING: at %s:%u\n", - file, line); + pr_warn("WARNING: at %s:%u\n", file, line); else - printk(KERN_WARNING "WARNING: at %p " - "[verbose debug info unavailable]\n", - (void *)bugaddr); + pr_warn("WARNING: at %p [verbose debug info unavailable]\n", + (void *)bugaddr); print_modules(); show_regs(regs); @@ -174,12 +175,10 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) printk(KERN_DEFAULT "------------[ cut here ]------------\n"); if (file) - printk(KERN_CRIT "kernel BUG at %s:%u!\n", - file, line); + pr_crit("kernel BUG at %s:%u!\n", file, line); else - printk(KERN_CRIT "Kernel BUG at %p " - "[verbose debug info unavailable]\n", - (void *)bugaddr); + pr_crit("Kernel BUG at %p [verbose debug info unavailable]\n", + (void *)bugaddr); return BUG_TRAP_TYPE_BUG; } diff --git a/lib/clz_ctz.c b/lib/clz_ctz.c index a8f8379eb49f..2e11e48446ab 100644 --- a/lib/clz_ctz.c +++ b/lib/clz_ctz.c @@ -6,6 +6,9 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. + * The functions in this file aren't called directly, but are required by + * GCC builtins such as __builtin_ctz, and therefore they can't be removed + * despite appearing unreferenced in kernel source. * * __c[lt]z[sd]i2 can be overridden by linking arch-specific versions. */ @@ -13,18 +16,22 @@ #include <linux/export.h> #include <linux/kernel.h> +int __weak __ctzsi2(int val); int __weak __ctzsi2(int val) { return __ffs(val); } EXPORT_SYMBOL(__ctzsi2); +int __weak __clzsi2(int val); int __weak __clzsi2(int val) { return 32 - fls(val); } EXPORT_SYMBOL(__clzsi2); +int __weak __clzdi2(long val); +int __weak __ctzdi2(long val); #if BITS_PER_LONG == 32 int __weak __clzdi2(long val) diff --git a/lib/cmdline.c b/lib/cmdline.c index eb6791188cf5..76a712e6e20e 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -49,13 +49,13 @@ static int get_range(char **str, int *pint) * 3 - hyphen found to denote a range */ -int get_option (char **str, int *pint) +int get_option(char **str, int *pint) { char *cur = *str; if (!cur || !(*cur)) return 0; - *pint = simple_strtol (cur, str, 0); + *pint = simple_strtol(cur, str, 0); if (cur == *str) return 0; if (**str == ',') { @@ -67,6 +67,7 @@ int get_option (char **str, int *pint) return 1; } +EXPORT_SYMBOL(get_option); /** * get_options - Parse a string into a list of integers @@ -84,13 +85,13 @@ int get_option (char **str, int *pint) * the parse to end (typically a null terminator, if @str is * completely parseable). */ - + char *get_options(const char *str, int nints, int *ints) { int res, i = 1; while (i < nints) { - res = get_option ((char **)&str, ints + i); + res = get_option((char **)&str, ints + i); if (res == 0) break; if (res == 3) { @@ -112,6 +113,7 @@ char *get_options(const char *str, int nints, int *ints) ints[0] = i - 1; return (char *)str; } +EXPORT_SYMBOL(get_options); /** * memparse - parse a string with mem suffixes into a number @@ -119,11 +121,7 @@ char *get_options(const char *str, int nints, int *ints) * @retptr: (output) Optional pointer to next char after parse completes * * Parses a string into a number. The number stored at @ptr is - * potentially suffixed with %K (for kilobytes, or 1024 bytes), - * %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or - * 1073741824). If the number is suffixed with K, M, or G, then - * the return value is the number multiplied by one kilobyte, one - * megabyte, or one gigabyte, respectively. + * potentially suffixed with K, M, G, T, P, E. */ unsigned long long memparse(const char *ptr, char **retptr) @@ -133,6 +131,15 @@ unsigned long long memparse(const char *ptr, char **retptr) unsigned long long ret = simple_strtoull(ptr, &endptr, 0); switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + case 'P': + case 'p': + ret <<= 10; + case 'T': + case 't': + ret <<= 10; case 'G': case 'g': ret <<= 10; @@ -152,8 +159,4 @@ unsigned long long memparse(const char *ptr, char **retptr) return ret; } - - EXPORT_SYMBOL(memparse); -EXPORT_SYMBOL(get_option); -EXPORT_SYMBOL(get_options); diff --git a/lib/compat_audit.c b/lib/compat_audit.c new file mode 100644 index 000000000000..873f75b640ab --- /dev/null +++ b/lib/compat_audit.c @@ -0,0 +1,50 @@ +#include <linux/init.h> +#include <linux/types.h> +#include <asm/unistd32.h> + +unsigned compat_dir_class[] = { +#include <asm-generic/audit_dir_write.h> +~0U +}; + +unsigned compat_read_class[] = { +#include <asm-generic/audit_read.h> +~0U +}; + +unsigned compat_write_class[] = { +#include <asm-generic/audit_write.h> +~0U +}; + +unsigned compat_chattr_class[] = { +#include <asm-generic/audit_change_attr.h> +~0U +}; + +unsigned compat_signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + +int audit_classify_compat_syscall(int abi, unsigned syscall) +{ + switch (syscall) { +#ifdef __NR_open + case __NR_open: + return 2; +#endif +#ifdef __NR_openat + case __NR_openat: + return 3; +#endif +#ifdef __NR_socketcall + case __NR_socketcall: + return 4; +#endif + case __NR_execve: + return 5; + default: + return 1; + } +} diff --git a/lib/cpumask.c b/lib/cpumask.c index d327b87c99b7..b6513a9f2892 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -140,7 +140,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var); */ void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) { - *mask = alloc_bootmem(cpumask_size()); + *mask = memblock_virt_alloc(cpumask_size(), 0); } /** @@ -161,6 +161,69 @@ EXPORT_SYMBOL(free_cpumask_var); */ void __init free_bootmem_cpumask_var(cpumask_var_t mask) { - free_bootmem(__pa(mask), cpumask_size()); + memblock_free_early(__pa(mask), cpumask_size()); } #endif + +/** + * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first + * + * @i: index number + * @numa_node: local numa_node + * @dstp: cpumask with the relevant cpu bit set according to the policy + * + * This function sets the cpumask according to a numa aware policy. + * cpumask could be used as an affinity hint for the IRQ related to a + * queue. When the policy is to spread queues across cores - local cores + * first. + * + * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set + * the cpu bit and need to re-call the function. + */ +int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +{ + cpumask_var_t mask; + int cpu; + int ret = 0; + + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + i %= num_online_cpus(); + + if (numa_node == -1 || !cpumask_of_node(numa_node)) { + /* Use all online cpu's for non numa aware system */ + cpumask_copy(mask, cpu_online_mask); + } else { + int n; + + cpumask_and(mask, + cpumask_of_node(numa_node), cpu_online_mask); + + n = cpumask_weight(mask); + if (i >= n) { + i -= n; + + /* If index > number of local cpu's, mask out local + * cpu's + */ + cpumask_andnot(mask, cpu_online_mask, mask); + } + } + + for_each_cpu(cpu, mask) { + if (--i < 0) + goto out; + } + + ret = -EAGAIN; + +out: + free_cpumask_var(mask); + + if (!ret) + cpumask_set_cpu(cpu, dstp); + + return ret; +} +EXPORT_SYMBOL(cpumask_set_cpu_local_first); diff --git a/lib/crc32.c b/lib/crc32.c index 70f00ca5ef1e..9a907d489d95 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -33,13 +33,13 @@ #include "crc32defs.h" #if CRC_LE_BITS > 8 -# define tole(x) ((__force u32) __constant_cpu_to_le32(x)) +# define tole(x) ((__force u32) cpu_to_le32(x)) #else # define tole(x) (x) #endif #if CRC_BE_BITS > 8 -# define tobe(x) ((__force u32) __constant_cpu_to_be32(x)) +# define tobe(x) ((__force u32) cpu_to_be32(x)) #else # define tobe(x) (x) #endif @@ -50,34 +50,10 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); MODULE_DESCRIPTION("Various CRC32 calculations"); MODULE_LICENSE("GPL"); -#define GF2_DIM 32 - -static u32 gf2_matrix_times(u32 *mat, u32 vec) -{ - u32 sum = 0; - - while (vec) { - if (vec & 1) - sum ^= *mat; - vec >>= 1; - mat++; - } - - return sum; -} - -static void gf2_matrix_square(u32 *square, u32 *mat) -{ - int i; - - for (i = 0; i < GF2_DIM; i++) - square[i] = gf2_matrix_times(mat, mat[i]); -} - #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 /* implements slicing-by-4 or slicing-by-8 algorithm */ -static inline u32 +static inline u32 __pure crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) { # ifdef __LITTLE_ENDIAN @@ -155,51 +131,6 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) } #endif -/* For conditions of distribution and use, see copyright notice in zlib.h */ -static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2, - u32 polynomial) -{ - u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */ - u32 odd[GF2_DIM]; /* Odd-power-of-two zeros operator */ - u32 row; - int i; - - if (len2 <= 0) - return crc1; - - /* Put operator for one zero bit in odd */ - odd[0] = polynomial; - row = 1; - for (i = 1; i < GF2_DIM; i++) { - odd[i] = row; - row <<= 1; - } - - gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */ - gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */ - - /* Apply len2 zeros to crc1 (first square will put the operator for one - * zero byte, eight zero bits, in even). - */ - do { - /* Apply zeros operator for this bit of len2 */ - gf2_matrix_square(even, odd); - if (len2 & 1) - crc1 = gf2_matrix_times(even, crc1); - len2 >>= 1; - /* If no more bits set, then done */ - if (len2 == 0) - break; - /* Another iteration of the loop with odd and even swapped */ - gf2_matrix_square(odd, even); - if (len2 & 1) - crc1 = gf2_matrix_times(odd, crc1); - len2 >>= 1; - } while (len2 != 0); - - crc1 ^= crc2; - return crc1; -} /** * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II @@ -271,19 +202,81 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE); } #endif -u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2) +EXPORT_SYMBOL(crc32_le); +EXPORT_SYMBOL(__crc32c_le); + +/* + * This multiplies the polynomials x and y modulo the given modulus. + * This follows the "little-endian" CRC convention that the lsbit + * represents the highest power of x, and the msbit represents x^0. + */ +static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus) { - return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE); + u32 product = x & 1 ? y : 0; + int i; + + for (i = 0; i < 31; i++) { + product = (product >> 1) ^ (product & 1 ? modulus : 0); + x >>= 1; + product ^= x & 1 ? y : 0; + } + + return product; } -u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2) +/** + * crc32_generic_shift - Append len 0 bytes to crc, in logarithmic time + * @crc: The original little-endian CRC (i.e. lsbit is x^31 coefficient) + * @len: The number of bytes. @crc is multiplied by x^(8*@len) + * @polynomial: The modulus used to reduce the result to 32 bits. + * + * It's possible to parallelize CRC computations by computing a CRC + * over separate ranges of a buffer, then summing them. + * This shifts the given CRC by 8*len bits (i.e. produces the same effect + * as appending len bytes of zero to the data), in time proportional + * to log(len). + */ +static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len, + u32 polynomial) { - return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE); + u32 power = polynomial; /* CRC of x^32 */ + int i; + + /* Shift up to 32 bits in the simple linear way */ + for (i = 0; i < 8 * (int)(len & 3); i++) + crc = (crc >> 1) ^ (crc & 1 ? polynomial : 0); + + len >>= 2; + if (!len) + return crc; + + for (;;) { + /* "power" is x^(2^i), modulo the polynomial */ + if (len & 1) + crc = gf2_multiply(crc, power, polynomial); + + len >>= 1; + if (!len) + break; + + /* Square power, advancing to x^(2^(i+1)) */ + power = gf2_multiply(power, power, polynomial); + } + + return crc; } -EXPORT_SYMBOL(crc32_le); -EXPORT_SYMBOL(crc32_le_combine); -EXPORT_SYMBOL(__crc32c_le); -EXPORT_SYMBOL(__crc32c_le_combine); + +u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len) +{ + return crc32_generic_shift(crc, len, CRCPOLY_LE); +} + +u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len) +{ + return crc32_generic_shift(crc, len, CRC32C_POLY_LE); +} +EXPORT_SYMBOL(crc32_le_shift); +EXPORT_SYMBOL(__crc32c_le_shift); /** * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 @@ -351,7 +344,7 @@ EXPORT_SYMBOL(crc32_be); #ifdef CONFIG_CRC32_SELFTEST /* 4096 random bytes */ -static u8 __attribute__((__aligned__(8))) test_buf[] = +static u8 const __aligned(8) test_buf[] __initconst = { 0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30, 0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4, @@ -875,7 +868,7 @@ static struct crc_test { u32 crc_le; /* expected crc32_le result */ u32 crc_be; /* expected crc32_be result */ u32 crc32c_le; /* expected crc32c_le result */ -} test[] = +} const test[] __initconst = { {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c}, {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca}, diff --git a/lib/crc7.c b/lib/crc7.c index f1c3a144cec1..bf6255e23919 100644 --- a/lib/crc7.c +++ b/lib/crc7.c @@ -10,42 +10,47 @@ #include <linux/crc7.h> -/* Table for CRC-7 (polynomial x^7 + x^3 + 1) */ -const u8 crc7_syndrome_table[256] = { - 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, - 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, - 0x19, 0x10, 0x0b, 0x02, 0x3d, 0x34, 0x2f, 0x26, - 0x51, 0x58, 0x43, 0x4a, 0x75, 0x7c, 0x67, 0x6e, - 0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d, - 0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45, - 0x2b, 0x22, 0x39, 0x30, 0x0f, 0x06, 0x1d, 0x14, - 0x63, 0x6a, 0x71, 0x78, 0x47, 0x4e, 0x55, 0x5c, - 0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, - 0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13, - 0x7d, 0x74, 0x6f, 0x66, 0x59, 0x50, 0x4b, 0x42, - 0x35, 0x3c, 0x27, 0x2e, 0x11, 0x18, 0x03, 0x0a, - 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, - 0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21, - 0x4f, 0x46, 0x5d, 0x54, 0x6b, 0x62, 0x79, 0x70, - 0x07, 0x0e, 0x15, 0x1c, 0x23, 0x2a, 0x31, 0x38, - 0x41, 0x48, 0x53, 0x5a, 0x65, 0x6c, 0x77, 0x7e, - 0x09, 0x00, 0x1b, 0x12, 0x2d, 0x24, 0x3f, 0x36, - 0x58, 0x51, 0x4a, 0x43, 0x7c, 0x75, 0x6e, 0x67, - 0x10, 0x19, 0x02, 0x0b, 0x34, 0x3d, 0x26, 0x2f, - 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, - 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, - 0x6a, 0x63, 0x78, 0x71, 0x4e, 0x47, 0x5c, 0x55, - 0x22, 0x2b, 0x30, 0x39, 0x06, 0x0f, 0x14, 0x1d, - 0x25, 0x2c, 0x37, 0x3e, 0x01, 0x08, 0x13, 0x1a, - 0x6d, 0x64, 0x7f, 0x76, 0x49, 0x40, 0x5b, 0x52, - 0x3c, 0x35, 0x2e, 0x27, 0x18, 0x11, 0x0a, 0x03, - 0x74, 0x7d, 0x66, 0x6f, 0x50, 0x59, 0x42, 0x4b, - 0x17, 0x1e, 0x05, 0x0c, 0x33, 0x3a, 0x21, 0x28, - 0x5f, 0x56, 0x4d, 0x44, 0x7b, 0x72, 0x69, 0x60, - 0x0e, 0x07, 0x1c, 0x15, 0x2a, 0x23, 0x38, 0x31, - 0x46, 0x4f, 0x54, 0x5d, 0x62, 0x6b, 0x70, 0x79 +/* + * Table for CRC-7 (polynomial x^7 + x^3 + 1). + * This is a big-endian CRC (msbit is highest power of x), + * aligned so the msbit of the byte is the x^6 coefficient + * and the lsbit is not used. + */ +const u8 crc7_be_syndrome_table[256] = { + 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, + 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee, + 0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c, + 0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc, + 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a, + 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a, + 0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28, + 0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8, + 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6, + 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26, + 0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84, + 0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14, + 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2, + 0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42, + 0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0, + 0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70, + 0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc, + 0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c, + 0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce, + 0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e, + 0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98, + 0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08, + 0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa, + 0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a, + 0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34, + 0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4, + 0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06, + 0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96, + 0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50, + 0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0, + 0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62, + 0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2 }; -EXPORT_SYMBOL(crc7_syndrome_table); +EXPORT_SYMBOL(crc7_be_syndrome_table); /** * crc7 - update the CRC7 for the data buffer @@ -55,14 +60,17 @@ EXPORT_SYMBOL(crc7_syndrome_table); * Context: any * * Returns the updated CRC7 value. + * The CRC7 is left-aligned in the byte (the lsbit is always 0), as that + * makes the computation easier, and all callers want it in that form. + * */ -u8 crc7(u8 crc, const u8 *buffer, size_t len) +u8 crc7_be(u8 crc, const u8 *buffer, size_t len) { while (len--) - crc = crc7_byte(crc, *buffer++); + crc = crc7_be_byte(crc, *buffer++); return crc; } -EXPORT_SYMBOL(crc7); +EXPORT_SYMBOL(crc7_be); MODULE_DESCRIPTION("CRC7 calculations"); MODULE_LICENSE("GPL"); diff --git a/lib/debugobjects.c b/lib/debugobjects.c index e0731c3db706..547f7f923dbc 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -7,6 +7,9 @@ * * For licencing details see kernel-base/COPYING */ + +#define pr_fmt(fmt) "ODEBUG: " fmt + #include <linux/debugobjects.h> #include <linux/interrupt.h> #include <linux/sched.h> @@ -218,7 +221,7 @@ static void debug_objects_oom(void) unsigned long flags; int i; - printk(KERN_WARNING "ODEBUG: Out of memory. ODEBUG disabled\n"); + pr_warn("Out of memory. ODEBUG disabled\n"); for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { raw_spin_lock_irqsave(&db->lock, flags); @@ -292,11 +295,9 @@ static void debug_object_is_on_stack(void *addr, int onstack) limit++; if (is_on_stack) - printk(KERN_WARNING - "ODEBUG: object is on stack, but not annotated\n"); + pr_warn("object is on stack, but not annotated\n"); else - printk(KERN_WARNING - "ODEBUG: object is not on stack, but annotated\n"); + pr_warn("object is not on stack, but annotated\n"); WARN_ON(1); } @@ -985,7 +986,7 @@ static void __init debug_objects_selftest(void) if (check_results(&obj, ODEBUG_STATE_NONE, ++fixups, ++warnings)) goto out; #endif - printk(KERN_INFO "ODEBUG: selftest passed\n"); + pr_info("selftest passed\n"); out: debug_objects_fixups = oldfixups; @@ -1060,8 +1061,8 @@ static int __init debug_objects_replace_static_objects(void) } local_irq_enable(); - printk(KERN_DEBUG "ODEBUG: %d of %d active objects replaced\n", cnt, - obj_pool_used); + pr_debug("%d of %d active objects replaced\n", + cnt, obj_pool_used); return 0; free: hlist_for_each_entry_safe(obj, tmp, &objects, node) { @@ -1090,7 +1091,7 @@ void __init debug_objects_mem_init(void) debug_objects_enabled = 0; if (obj_cache) kmem_cache_destroy(obj_cache); - printk(KERN_WARNING "ODEBUG: out of memory.\n"); + pr_warn("out of memory.\n"); } else debug_objects_selftest(); } diff --git a/lib/decompress.c b/lib/decompress.c index 4d1cd0397aab..37f3c786348f 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/string.h> #include <linux/init.h> +#include <linux/printk.h> #ifndef CONFIG_DECOMPRESS_GZIP # define gunzip NULL @@ -53,7 +54,7 @@ static const struct compress_format compressed_formats[] __initconst = { { {0, 0}, NULL, NULL } }; -decompress_fn __init decompress_method(const unsigned char *inbuf, int len, +decompress_fn __init decompress_method(const unsigned char *inbuf, long len, const char **name) { const struct compress_format *cf; @@ -61,6 +62,8 @@ decompress_fn __init decompress_method(const unsigned char *inbuf, int len, if (len < 2) return NULL; /* Need at least this much... */ + pr_debug("Compressed data magic: %#.2x %#.2x\n", inbuf[0], inbuf[1]); + for (cf = compressed_formats; cf->name; cf++) { if (!memcmp(inbuf, cf->magic, 2)) break; diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 31c5f7675fbf..8290e0bef7ea 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -92,8 +92,8 @@ struct bunzip_data { /* State for interrupting output loop */ int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; /* I/O tracking data (file handles, buffers, positions, etc.) */ - int (*fill)(void*, unsigned int); - int inbufCount, inbufPos /*, outbufPos*/; + long (*fill)(void*, unsigned long); + long inbufCount, inbufPos /*, outbufPos*/; unsigned char *inbuf /*,*outbuf*/; unsigned int inbufBitCount, inbufBits; /* The CRC values stored in the block header and calculated from the @@ -617,7 +617,7 @@ decode_next_byte: goto decode_next_byte; } -static int INIT nofill(void *buf, unsigned int len) +static long INIT nofill(void *buf, unsigned long len) { return -1; } @@ -625,8 +625,8 @@ static int INIT nofill(void *buf, unsigned int len) /* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are ignored, and data is read from file handle into temporary buffer. */ -static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, - int (*fill)(void*, unsigned int)) +static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len, + long (*fill)(void*, unsigned long)) { struct bunzip_data *bd; unsigned int i, j, c; @@ -675,11 +675,11 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, /* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data, not end of file.) */ -STATIC int INIT bunzip2(unsigned char *buf, int len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +STATIC int INIT bunzip2(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *outbuf, - int *pos, + long *pos, void(*error)(char *x)) { struct bunzip_data *bd; @@ -743,11 +743,11 @@ exit_0: } #ifdef PREBOOT -STATIC int INIT decompress(unsigned char *buf, int len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +STATIC int INIT decompress(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *outbuf, - int *pos, + long *pos, void(*error)(char *x)) { return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error); diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index d619b28c456f..d4c7891635ec 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -19,6 +19,7 @@ #include "zlib_inflate/inflate.h" #include "zlib_inflate/infutil.h" +#include <linux/decompress/inflate.h> #endif /* STATIC */ @@ -26,17 +27,17 @@ #define GZIP_IOBUF_SIZE (16*1024) -static int INIT nofill(void *buffer, unsigned int len) +static long INIT nofill(void *buffer, unsigned long len) { return -1; } /* Included from initramfs et al code */ -STATIC int INIT gunzip(unsigned char *buf, int len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +STATIC int INIT gunzip(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *out_buf, - int *pos, + long *pos, void(*error)(char *x)) { u8 *zbuf; struct z_stream_s *strm; @@ -141,7 +142,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len, /* Write any data generated */ if (flush && strm->next_out > out_buf) { - int l = strm->next_out - out_buf; + long l = strm->next_out - out_buf; if (l != flush(out_buf, l)) { rc = -1; error("write error"); diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c index 3e67cfad16ad..40f66ebe57b7 100644 --- a/lib/decompress_unlz4.c +++ b/lib/decompress_unlz4.c @@ -31,10 +31,10 @@ #define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20) #define ARCHIVE_MAGICNUMBER 0x184C2102 -STATIC inline int INIT unlz4(u8 *input, int in_len, - int (*fill) (void *, unsigned int), - int (*flush) (void *, unsigned int), - u8 *output, int *posp, +STATIC inline int INIT unlz4(u8 *input, long in_len, + long (*fill)(void *, unsigned long), + long (*flush)(void *, unsigned long), + u8 *output, long *posp, void (*error) (char *x)) { int ret = -1; @@ -43,7 +43,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len, u8 *inp; u8 *inp_start; u8 *outp; - int size = in_len; + long size = in_len; #ifdef PREBOOT size_t out_len = get_unaligned_le32(input + in_len); #endif @@ -83,13 +83,20 @@ STATIC inline int INIT unlz4(u8 *input, int in_len, if (posp) *posp = 0; - if (fill) - fill(inp, 4); + if (fill) { + size = fill(inp, 4); + if (size < 4) { + error("data corrupted"); + goto exit_2; + } + } chunksize = get_unaligned_le32(inp); if (chunksize == ARCHIVE_MAGICNUMBER) { - inp += 4; - size -= 4; + if (!fill) { + inp += 4; + size -= 4; + } } else { error("invalid header"); goto exit_2; @@ -100,29 +107,44 @@ STATIC inline int INIT unlz4(u8 *input, int in_len, for (;;) { - if (fill) - fill(inp, 4); + if (fill) { + size = fill(inp, 4); + if (size == 0) + break; + if (size < 4) { + error("data corrupted"); + goto exit_2; + } + } chunksize = get_unaligned_le32(inp); if (chunksize == ARCHIVE_MAGICNUMBER) { - inp += 4; - size -= 4; + if (!fill) { + inp += 4; + size -= 4; + } if (posp) *posp += 4; continue; } - inp += 4; - size -= 4; + if (posp) *posp += 4; - if (fill) { + if (!fill) { + inp += 4; + size -= 4; + } else { if (chunksize > lz4_compressbound(uncomp_chunksize)) { error("chunk length is longer than allocated"); goto exit_2; } - fill(inp, chunksize); + size = fill(inp, chunksize); + if (size < chunksize) { + error("data corrupted"); + goto exit_2; + } } #ifdef PREBOOT if (out_len >= uncomp_chunksize) { @@ -141,6 +163,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len, goto exit_2; } + ret = -1; if (flush && flush(outp, dest_len) != dest_len) goto exit_2; if (output) @@ -148,18 +171,17 @@ STATIC inline int INIT unlz4(u8 *input, int in_len, if (posp) *posp += chunksize; - size -= chunksize; + if (!fill) { + size -= chunksize; - if (size == 0) - break; - else if (size < 0) { - error("data corrupted"); - goto exit_2; + if (size == 0) + break; + else if (size < 0) { + error("data corrupted"); + goto exit_2; + } + inp += chunksize; } - - inp += chunksize; - if (fill) - inp = inp_start; } ret = 0; @@ -174,11 +196,11 @@ exit_0: } #ifdef PREBOOT -STATIC int INIT decompress(unsigned char *buf, int in_len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +STATIC int INIT decompress(unsigned char *buf, long in_len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *output, - int *posp, + long *posp, void(*error)(char *x) ) { diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 32adb73a9038..0be83af62b88 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -65,11 +65,11 @@ static long long INIT read_int(unsigned char *ptr, int size) #define LZMA_IOBUF_SIZE 0x10000 struct rc { - int (*fill)(void*, unsigned int); + long (*fill)(void*, unsigned long); uint8_t *ptr; uint8_t *buffer; uint8_t *buffer_end; - int buffer_size; + long buffer_size; uint32_t code; uint32_t range; uint32_t bound; @@ -82,7 +82,7 @@ struct rc { #define RC_MODEL_TOTAL_BITS 11 -static int INIT nofill(void *buffer, unsigned int len) +static long INIT nofill(void *buffer, unsigned long len) { return -1; } @@ -99,8 +99,8 @@ static void INIT rc_read(struct rc *rc) /* Called once */ static inline void INIT rc_init(struct rc *rc, - int (*fill)(void*, unsigned int), - char *buffer, int buffer_size) + long (*fill)(void*, unsigned long), + char *buffer, long buffer_size) { if (fill) rc->fill = fill; @@ -280,7 +280,7 @@ struct writer { size_t buffer_pos; int bufsize; size_t global_pos; - int(*flush)(void*, unsigned int); + long (*flush)(void*, unsigned long); struct lzma_header *header; }; @@ -534,11 +534,11 @@ static inline int INIT process_bit1(struct writer *wr, struct rc *rc, -STATIC inline int INIT unlzma(unsigned char *buf, int in_len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +STATIC inline int INIT unlzma(unsigned char *buf, long in_len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *output, - int *posp, + long *posp, void(*error)(char *x) ) { @@ -667,11 +667,11 @@ exit_0: } #ifdef PREBOOT -STATIC int INIT decompress(unsigned char *buf, int in_len, - int(*fill)(void*, unsigned int), - int(*flush)(void*, unsigned int), +STATIC int INIT decompress(unsigned char *buf, long in_len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), unsigned char *output, - int *posp, + long *posp, void(*error)(char *x) ) { diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c index 960183d4258f..b94a31bdd87d 100644 --- a/lib/decompress_unlzo.c +++ b/lib/decompress_unlzo.c @@ -51,7 +51,7 @@ static const unsigned char lzop_magic[] = { #define HEADER_SIZE_MIN (9 + 7 + 4 + 8 + 1 + 4) #define HEADER_SIZE_MAX (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4) -STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len) +STATIC inline long INIT parse_header(u8 *input, long *skip, long in_len) { int l; u8 *parse = input; @@ -108,14 +108,14 @@ STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len) return 1; } -STATIC inline int INIT unlzo(u8 *input, int in_len, - int (*fill) (void *, unsigned int), - int (*flush) (void *, unsigned int), - u8 *output, int *posp, +STATIC int INIT unlzo(u8 *input, long in_len, + long (*fill)(void *, unsigned long), + long (*flush)(void *, unsigned long), + u8 *output, long *posp, void (*error) (char *x)) { u8 r = 0; - int skip = 0; + long skip = 0; u32 src_len, dst_len; size_t tmp; u8 *in_buf, *in_buf_save, *out_buf; diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index 9f34eb56854d..b07a78340e9d 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c @@ -248,10 +248,10 @@ void *memmove(void *dest, const void *src, size_t size) * both input and output buffers are available as a single chunk, i.e. when * fill() and flush() won't be used. */ -STATIC int INIT unxz(unsigned char *in, int in_size, - int (*fill)(void *dest, unsigned int size), - int (*flush)(void *src, unsigned int size), - unsigned char *out, int *in_used, +STATIC int INIT unxz(unsigned char *in, long in_size, + long (*fill)(void *dest, unsigned long size), + long (*flush)(void *src, unsigned long size), + unsigned char *out, long *in_used, void (*error)(char *x)) { struct xz_buf b; @@ -329,7 +329,7 @@ STATIC int INIT unxz(unsigned char *in, int in_size, * returned by xz_dec_run(), but probably * it's not too bad. */ - if (flush(b.out, b.out_pos) != (int)b.out_pos) + if (flush(b.out, b.out_pos) != (long)b.out_pos) ret = XZ_BUF_ERROR; b.out_pos = 0; diff --git a/lib/devres.c b/lib/devres.c index 823533138fa0..f4a195a6efe4 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -81,7 +81,7 @@ EXPORT_SYMBOL(devm_ioremap_nocache); void devm_iounmap(struct device *dev, void __iomem *addr) { WARN_ON(devres_destroy(dev, devm_ioremap_release, devm_ioremap_match, - (void *)addr)); + (__force void *)addr)); iounmap(addr); } EXPORT_SYMBOL(devm_iounmap); @@ -114,7 +114,7 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) if (!res || resource_type(res) != IORESOURCE_MEM) { dev_err(dev, "invalid resource\n"); - return ERR_PTR(-EINVAL); + return IOMEM_ERR_PTR(-EINVAL); } size = resource_size(res); @@ -122,7 +122,7 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) if (!devm_request_mem_region(dev, res->start, size, name)) { dev_err(dev, "can't request region for resource %pR\n", res); - return ERR_PTR(-EBUSY); + return IOMEM_ERR_PTR(-EBUSY); } if (res->flags & IORESOURCE_CACHEABLE) @@ -133,42 +133,14 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) if (!dest_ptr) { dev_err(dev, "ioremap failed for resource %pR\n", res); devm_release_mem_region(dev, res->start, size); - dest_ptr = ERR_PTR(-ENOMEM); + dest_ptr = IOMEM_ERR_PTR(-ENOMEM); } return dest_ptr; } EXPORT_SYMBOL(devm_ioremap_resource); -/** - * devm_request_and_ioremap() - Check, request region, and ioremap resource - * @dev: Generic device to handle the resource for - * @res: resource to be handled - * - * Takes all necessary steps to ioremap a mem resource. Uses managed device, so - * everything is undone on driver detach. Checks arguments, so you can feed - * it the result from e.g. platform_get_resource() directly. Returns the - * remapped pointer or NULL on error. Usage example: - * - * res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - * base = devm_request_and_ioremap(&pdev->dev, res); - * if (!base) - * return -EADDRNOTAVAIL; - */ -void __iomem *devm_request_and_ioremap(struct device *device, - struct resource *res) -{ - void __iomem *dest_ptr; - - dest_ptr = devm_ioremap_resource(device, res); - if (IS_ERR(dest_ptr)) - return NULL; - - return dest_ptr; -} -EXPORT_SYMBOL(devm_request_and_ioremap); - -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP /* * Generic iomap devres */ @@ -192,7 +164,7 @@ static int devm_ioport_map_match(struct device *dev, void *res, * Managed ioport_map(). Map is automatically unmapped on driver * detach. */ -void __iomem * devm_ioport_map(struct device *dev, unsigned long port, +void __iomem *devm_ioport_map(struct device *dev, unsigned long port, unsigned int nr) { void __iomem **ptr, *addr; @@ -224,10 +196,10 @@ void devm_ioport_unmap(struct device *dev, void __iomem *addr) { ioport_unmap(addr); WARN_ON(devres_destroy(dev, devm_ioport_map_release, - devm_ioport_map_match, (void *)addr)); + devm_ioport_map_match, (__force void *)addr)); } EXPORT_SYMBOL(devm_ioport_unmap); -#endif /* CONFIG_HAS_IOPORT */ +#endif /* CONFIG_HAS_IOPORT_MAP */ #ifdef CONFIG_PCI /* @@ -263,7 +235,7 @@ static void pcim_iomap_release(struct device *gendev, void *res) * be safely called without context and guaranteed to succed once * allocated. */ -void __iomem * const * pcim_iomap_table(struct pci_dev *pdev) +void __iomem * const *pcim_iomap_table(struct pci_dev *pdev) { struct pcim_iomap_devres *dr, *new_dr; @@ -288,7 +260,7 @@ EXPORT_SYMBOL(pcim_iomap_table); * Managed pci_iomap(). Map is automatically unmapped on driver * detach. */ -void __iomem * pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen) +void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen) { void __iomem **tbl; diff --git a/lib/digsig.c b/lib/digsig.c index 8793aeda30ca..ae05ea393fc8 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -175,10 +175,11 @@ err1: * digsig_verify() - digital signature verification with public key * @keyring: keyring to search key in * @sig: digital signature - * @sigen: length of the signature + * @siglen: length of the signature * @data: data * @datalen: length of the data - * @return: 0 on success, -EINVAL otherwise + * + * Returns 0 on success, -EINVAL otherwise * * Verifies data integrity against digital signature. * Currently only RSA is supported. diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d87a17a819d0..add80cc02dbe 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -53,11 +53,26 @@ enum map_err_types { #define DMA_DEBUG_STACKTRACE_ENTRIES 5 +/** + * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping + * @list: node on pre-allocated free_entries list + * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent + * @type: single, page, sg, coherent + * @pfn: page frame of the start address + * @offset: offset of mapping relative to pfn + * @size: length of the mapping + * @direction: enum dma_data_direction + * @sg_call_ents: 'nents' from dma_map_sg + * @sg_mapped_ents: 'mapped_ents' from dma_map_sg + * @map_err_type: track whether dma_mapping_error() was checked + * @stacktrace: support backtraces when a violation is detected + */ struct dma_debug_entry { struct list_head list; struct device *dev; int type; - phys_addr_t paddr; + unsigned long pfn; + size_t offset; u64 dev_addr; u64 size; int direction; @@ -372,6 +387,11 @@ static void hash_bucket_del(struct dma_debug_entry *entry) list_del(&entry->list); } +static unsigned long long phys_addr(struct dma_debug_entry *entry) +{ + return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; +} + /* * Dump mapping entries for debugging purposes */ @@ -389,9 +409,9 @@ void debug_dma_dump_mappings(struct device *dev) list_for_each_entry(entry, &bucket->list, list) { if (!dev || dev == entry->dev) { dev_info(entry->dev, - "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n", + "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", type2name[entry->type], idx, - (unsigned long long)entry->paddr, + phys_addr(entry), entry->pfn, entry->dev_addr, entry->size, dir2name[entry->direction], maperr2str[entry->map_err_type]); @@ -404,6 +424,176 @@ void debug_dma_dump_mappings(struct device *dev) EXPORT_SYMBOL(debug_dma_dump_mappings); /* + * For each mapping (initial cacheline in the case of + * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a + * scatterlist, or the cacheline specified in dma_map_single) insert + * into this tree using the cacheline as the key. At + * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If + * the entry already exists at insertion time add a tag as a reference + * count for the overlapping mappings. For now, the overlap tracking + * just ensures that 'unmaps' balance 'maps' before marking the + * cacheline idle, but we should also be flagging overlaps as an API + * violation. + * + * Memory usage is mostly constrained by the maximum number of available + * dma-debug entries in that we need a free dma_debug_entry before + * inserting into the tree. In the case of dma_map_page and + * dma_alloc_coherent there is only one dma_debug_entry and one + * dma_active_cacheline entry to track per event. dma_map_sg(), on the + * other hand, consumes a single dma_debug_entry, but inserts 'nents' + * entries into the tree. + * + * At any time debug_dma_assert_idle() can be called to trigger a + * warning if any cachelines in the given page are in the active set. + */ +static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); +static DEFINE_SPINLOCK(radix_lock); +#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) +#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) +#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT) + +static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) +{ + return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + + (entry->offset >> L1_CACHE_SHIFT); +} + +static int active_cacheline_read_overlap(phys_addr_t cln) +{ + int overlap = 0, i; + + for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) + if (radix_tree_tag_get(&dma_active_cacheline, cln, i)) + overlap |= 1 << i; + return overlap; +} + +static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) +{ + int i; + + if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0) + return overlap; + + for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) + if (overlap & 1 << i) + radix_tree_tag_set(&dma_active_cacheline, cln, i); + else + radix_tree_tag_clear(&dma_active_cacheline, cln, i); + + return overlap; +} + +static void active_cacheline_inc_overlap(phys_addr_t cln) +{ + int overlap = active_cacheline_read_overlap(cln); + + overlap = active_cacheline_set_overlap(cln, ++overlap); + + /* If we overflowed the overlap counter then we're potentially + * leaking dma-mappings. Otherwise, if maps and unmaps are + * balanced then this overflow may cause false negatives in + * debug_dma_assert_idle() as the cacheline may be marked idle + * prematurely. + */ + WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, + "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", + ACTIVE_CACHELINE_MAX_OVERLAP, &cln); +} + +static int active_cacheline_dec_overlap(phys_addr_t cln) +{ + int overlap = active_cacheline_read_overlap(cln); + + return active_cacheline_set_overlap(cln, --overlap); +} + +static int active_cacheline_insert(struct dma_debug_entry *entry) +{ + phys_addr_t cln = to_cacheline_number(entry); + unsigned long flags; + int rc; + + /* If the device is not writing memory then we don't have any + * concerns about the cpu consuming stale data. This mitigates + * legitimate usages of overlapping mappings. + */ + if (entry->direction == DMA_TO_DEVICE) + return 0; + + spin_lock_irqsave(&radix_lock, flags); + rc = radix_tree_insert(&dma_active_cacheline, cln, entry); + if (rc == -EEXIST) + active_cacheline_inc_overlap(cln); + spin_unlock_irqrestore(&radix_lock, flags); + + return rc; +} + +static void active_cacheline_remove(struct dma_debug_entry *entry) +{ + phys_addr_t cln = to_cacheline_number(entry); + unsigned long flags; + + /* ...mirror the insert case */ + if (entry->direction == DMA_TO_DEVICE) + return; + + spin_lock_irqsave(&radix_lock, flags); + /* since we are counting overlaps the final put of the + * cacheline will occur when the overlap count is 0. + * active_cacheline_dec_overlap() returns -1 in that case + */ + if (active_cacheline_dec_overlap(cln) < 0) + radix_tree_delete(&dma_active_cacheline, cln); + spin_unlock_irqrestore(&radix_lock, flags); +} + +/** + * debug_dma_assert_idle() - assert that a page is not undergoing dma + * @page: page to lookup in the dma_active_cacheline tree + * + * Place a call to this routine in cases where the cpu touching the page + * before the dma completes (page is dma_unmapped) will lead to data + * corruption. + */ +void debug_dma_assert_idle(struct page *page) +{ + static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; + struct dma_debug_entry *entry = NULL; + void **results = (void **) &ents; + unsigned int nents, i; + unsigned long flags; + phys_addr_t cln; + + if (!page) + return; + + cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; + spin_lock_irqsave(&radix_lock, flags); + nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, + CACHELINES_PER_PAGE); + for (i = 0; i < nents; i++) { + phys_addr_t ent_cln = to_cacheline_number(ents[i]); + + if (ent_cln == cln) { + entry = ents[i]; + break; + } else if (ent_cln >= cln + CACHELINES_PER_PAGE) + break; + } + spin_unlock_irqrestore(&radix_lock, flags); + + if (!entry) + return; + + cln = to_cacheline_number(entry); + err_printk(entry->dev, entry, + "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", + &cln); +} + +/* * Wrapper function for adding an entry to the hash. * This function takes care of locking itself. */ @@ -411,10 +601,21 @@ static void add_dma_entry(struct dma_debug_entry *entry) { struct hash_bucket *bucket; unsigned long flags; + int rc; bucket = get_hash_bucket(entry, &flags); hash_bucket_add(bucket, entry); put_hash_bucket(bucket, &flags); + + rc = active_cacheline_insert(entry); + if (rc == -ENOMEM) { + pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); + global_disable = true; + } + + /* TODO: report -EEXIST errors here as overlapping mappings are + * not supported by the DMA API + */ } static struct dma_debug_entry *__dma_entry_alloc(void) @@ -469,6 +670,8 @@ static void dma_entry_free(struct dma_debug_entry *entry) { unsigned long flags; + active_cacheline_remove(entry); + /* * add to beginning of the list - this way the entries are * more likely cache hot when they are reallocated. @@ -895,15 +1098,15 @@ static void check_unmap(struct dma_debug_entry *ref) ref->dev_addr, ref->size, type2name[entry->type], type2name[ref->type]); } else if ((entry->type == dma_debug_coherent) && - (ref->paddr != entry->paddr)) { + (phys_addr(ref) != phys_addr(entry))) { err_printk(ref->dev, entry, "DMA-API: device driver frees " "DMA memory with different CPU address " "[device address=0x%016llx] [size=%llu bytes] " "[cpu alloc address=0x%016llx] " "[cpu free address=0x%016llx]", ref->dev_addr, ref->size, - (unsigned long long)entry->paddr, - (unsigned long long)ref->paddr); + phys_addr(entry), + phys_addr(ref)); } if (ref->sg_call_ents && ref->type == dma_debug_sg && @@ -946,7 +1149,7 @@ static void check_unmap(struct dma_debug_entry *ref) static void check_for_stack(struct device *dev, void *addr) { if (object_is_on_stack(addr)) - err_printk(dev, NULL, "DMA-API: device driver maps memory from" + err_printk(dev, NULL, "DMA-API: device driver maps memory from " "stack [addr=%p]\n", addr); } @@ -1052,7 +1255,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev = dev; entry->type = dma_debug_page; - entry->paddr = page_to_phys(page) + offset; + entry->pfn = page_to_pfn(page); + entry->offset = offset, entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; @@ -1148,7 +1352,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->type = dma_debug_sg; entry->dev = dev; - entry->paddr = sg_phys(s); + entry->pfn = page_to_pfn(sg_page(s)); + entry->offset = s->offset, entry->size = sg_dma_len(s); entry->dev_addr = sg_dma_address(s); entry->direction = direction; @@ -1198,7 +1403,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = dir, @@ -1233,7 +1439,8 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, entry->type = dma_debug_coherent; entry->dev = dev; - entry->paddr = virt_to_phys(virt); + entry->pfn = page_to_pfn(virt_to_page(virt)); + entry->offset = (size_t) virt & PAGE_MASK; entry->size = size; entry->dev_addr = dma_addr; entry->direction = DMA_BIDIRECTIONAL; @@ -1248,7 +1455,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size, struct dma_debug_entry ref = { .type = dma_debug_coherent, .dev = dev, - .paddr = virt_to_phys(virt), + .pfn = page_to_pfn(virt_to_page(virt)), + .offset = (size_t) virt & PAGE_MASK, .dev_addr = addr, .size = size, .direction = DMA_BIDIRECTIONAL, @@ -1356,7 +1564,8 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, @@ -1388,7 +1597,8 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, diff --git a/lib/dump_stack.c b/lib/dump_stack.c index f23b63f0a1c3..6745c6230db3 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -23,7 +23,7 @@ static void __dump_stack(void) #ifdef CONFIG_SMP static atomic_t dump_lock = ATOMIC_INIT(-1); -asmlinkage void dump_stack(void) +asmlinkage __visible void dump_stack(void) { int was_locked; int old; @@ -55,7 +55,7 @@ retry: preempt_enable(); } #else -asmlinkage void dump_stack(void) +asmlinkage __visible void dump_stack(void) { __dump_stack(); } diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index c37aeacd7651..31fe79e31ab8 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -8,6 +8,7 @@ * By Greg Banks <gnb@melbourne.sgi.com> * Copyright (c) 2008 Silicon Graphics Inc. All Rights Reserved. * Copyright (C) 2011 Bart Van Assche. All Rights Reserved. + * Copyright (C) 2013 Du, Changbin <changbin.du@gmail.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ @@ -24,6 +25,7 @@ #include <linux/sysctl.h> #include <linux/ctype.h> #include <linux/string.h> +#include <linux/parser.h> #include <linux/string_helpers.h> #include <linux/uaccess.h> #include <linux/dynamic_debug.h> @@ -147,7 +149,8 @@ static int ddebug_change(const struct ddebug_query *query, list_for_each_entry(dt, &ddebug_tables, link) { /* match against the module name */ - if (query->module && strcmp(query->module, dt->mod_name)) + if (query->module && + !match_wildcard(query->module, dt->mod_name)) continue; for (i = 0; i < dt->num_ddebugs; i++) { @@ -155,14 +158,16 @@ static int ddebug_change(const struct ddebug_query *query, /* match against the source filename */ if (query->filename && - strcmp(query->filename, dp->filename) && - strcmp(query->filename, kbasename(dp->filename)) && - strcmp(query->filename, trim_prefix(dp->filename))) + !match_wildcard(query->filename, dp->filename) && + !match_wildcard(query->filename, + kbasename(dp->filename)) && + !match_wildcard(query->filename, + trim_prefix(dp->filename))) continue; /* match against the function */ if (query->function && - strcmp(query->function, dp->function)) + !match_wildcard(query->function, dp->function)) continue; /* match against the format */ @@ -263,14 +268,12 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) */ static inline int parse_lineno(const char *str, unsigned int *val) { - char *end = NULL; BUG_ON(str == NULL); if (*str == '\0') { *val = 0; return 0; } - *val = simple_strtoul(str, &end, 10); - if (end == NULL || end == str || *end != '\0') { + if (kstrtouint(str, 10, val) < 0) { pr_err("bad line-number: %s\n", str); return -EINVAL; } @@ -343,14 +346,14 @@ static int ddebug_parse_query(char *words[], int nwords, } if (last) *last++ = '\0'; - if (parse_lineno(first, &query->first_lineno) < 0) { - pr_err("line-number is <0\n"); + if (parse_lineno(first, &query->first_lineno) < 0) return -EINVAL; - } if (last) { /* range <first>-<last> */ - if (parse_lineno(last, &query->last_lineno) - < query->first_lineno) { + if (parse_lineno(last, &query->last_lineno) < 0) + return -EINVAL; + + if (query->last_lineno < query->first_lineno) { pr_err("last-line:%d < 1st-line:%d\n", query->last_lineno, query->first_lineno); @@ -534,10 +537,9 @@ static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf) return buf; } -int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...) +void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...) { va_list args; - int res; struct va_format vaf; char buf[PREFIX_SIZE]; @@ -549,21 +551,17 @@ int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - res = printk(KERN_DEBUG "%s%pV", - dynamic_emit_prefix(descriptor, buf), &vaf); + printk(KERN_DEBUG "%s%pV", dynamic_emit_prefix(descriptor, buf), &vaf); va_end(args); - - return res; } EXPORT_SYMBOL(__dynamic_pr_debug); -int __dynamic_dev_dbg(struct _ddebug *descriptor, +void __dynamic_dev_dbg(struct _ddebug *descriptor, const struct device *dev, const char *fmt, ...) { struct va_format vaf; va_list args; - int res; BUG_ON(!descriptor); BUG_ON(!fmt); @@ -574,30 +572,27 @@ int __dynamic_dev_dbg(struct _ddebug *descriptor, vaf.va = &args; if (!dev) { - res = printk(KERN_DEBUG "(NULL device *): %pV", &vaf); + printk(KERN_DEBUG "(NULL device *): %pV", &vaf); } else { char buf[PREFIX_SIZE]; - res = dev_printk_emit(7, dev, "%s%s %s: %pV", - dynamic_emit_prefix(descriptor, buf), - dev_driver_string(dev), dev_name(dev), - &vaf); + dev_printk_emit(7, dev, "%s%s %s: %pV", + dynamic_emit_prefix(descriptor, buf), + dev_driver_string(dev), dev_name(dev), + &vaf); } va_end(args); - - return res; } EXPORT_SYMBOL(__dynamic_dev_dbg); #ifdef CONFIG_NET -int __dynamic_netdev_dbg(struct _ddebug *descriptor, - const struct net_device *dev, const char *fmt, ...) +void __dynamic_netdev_dbg(struct _ddebug *descriptor, + const struct net_device *dev, const char *fmt, ...) { struct va_format vaf; va_list args; - int res; BUG_ON(!descriptor); BUG_ON(!fmt); @@ -610,21 +605,21 @@ int __dynamic_netdev_dbg(struct _ddebug *descriptor, if (dev && dev->dev.parent) { char buf[PREFIX_SIZE]; - res = dev_printk_emit(7, dev->dev.parent, - "%s%s %s %s: %pV", - dynamic_emit_prefix(descriptor, buf), - dev_driver_string(dev->dev.parent), - dev_name(dev->dev.parent), - netdev_name(dev), &vaf); + dev_printk_emit(7, dev->dev.parent, + "%s%s %s %s%s: %pV", + dynamic_emit_prefix(descriptor, buf), + dev_driver_string(dev->dev.parent), + dev_name(dev->dev.parent), + netdev_name(dev), netdev_reg_state(dev), + &vaf); } else if (dev) { - res = printk(KERN_DEBUG "%s: %pV", netdev_name(dev), &vaf); + printk(KERN_DEBUG "%s%s: %pV", netdev_name(dev), + netdev_reg_state(dev), &vaf); } else { - res = printk(KERN_DEBUG "(NULL net_device): %pV", &vaf); + printk(KERN_DEBUG "(NULL net_device): %pV", &vaf); } va_end(args); - - return res; } EXPORT_SYMBOL(__dynamic_netdev_dbg); diff --git a/lib/fdt_empty_tree.c b/lib/fdt_empty_tree.c new file mode 100644 index 000000000000..5d30c58150ad --- /dev/null +++ b/lib/fdt_empty_tree.c @@ -0,0 +1,2 @@ +#include <linux/libfdt_env.h> +#include "../scripts/dtc/libfdt/fdt_empty_tree.c" diff --git a/lib/flex_array.c b/lib/flex_array.c index 6948a6692fc4..2eed22fa507c 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -90,8 +90,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total, { struct flex_array *ret; int elems_per_part = 0; - int reciprocal_elems = 0; int max_size = 0; + struct reciprocal_value reciprocal_elems = { 0 }; if (element_size) { elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size); @@ -119,6 +119,11 @@ EXPORT_SYMBOL(flex_array_alloc); static int fa_element_to_part_nr(struct flex_array *fa, unsigned int element_nr) { + /* + * if element_size == 0 we don't get here, so we never touch + * the zeroed fa->reciprocal_elems, which would yield invalid + * results + */ return reciprocal_divide(element_nr, fa->reciprocal_elems); } diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index ebf3bac460b0..8f25652f40d4 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -34,13 +34,13 @@ */ #include <linux/flex_proportions.h> -int fprop_global_init(struct fprop_global *p) +int fprop_global_init(struct fprop_global *p, gfp_t gfp) { int err; p->period = 0; /* Use 1 to avoid dealing with periods with 0 events... */ - err = percpu_counter_init(&p->events, 1); + err = percpu_counter_init(&p->events, 1, gfp); if (err) return err; seqcount_init(&p->sequence); @@ -168,11 +168,11 @@ void fprop_fraction_single(struct fprop_global *p, */ #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) -int fprop_local_init_percpu(struct fprop_local_percpu *pl) +int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp) { int err; - err = percpu_counter_init(&pl->events, 0); + err = percpu_counter_init(&pl->events, 0, gfp); if (err) return err; pl->period = 0; diff --git a/lib/genalloc.c b/lib/genalloc.c index dda31168844f..cce4dd68c40d 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -316,7 +316,7 @@ EXPORT_SYMBOL(gen_pool_alloc); * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage * @pool: pool to allocate from * @size: number of bytes to allocate from the pool - * @dma: dma-view physical address + * @dma: dma-view physical address return value. Use NULL if unneeded. * * Allocate the requested number of bytes from the specified pool. * Uses the pool allocation function (with first-fit algorithm by default). @@ -334,7 +334,8 @@ void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma) if (!vaddr) return NULL; - *dma = gen_pool_virt_to_phys(pool, vaddr); + if (dma) + *dma = gen_pool_virt_to_phys(pool, vaddr); return (void *)vaddr; } @@ -402,6 +403,35 @@ void gen_pool_for_each_chunk(struct gen_pool *pool, EXPORT_SYMBOL(gen_pool_for_each_chunk); /** + * addr_in_gen_pool - checks if an address falls within the range of a pool + * @pool: the generic memory pool + * @start: start address + * @size: size of the region + * + * Check if the range of addresses falls within the specified pool. Returns + * true if the entire range is contained in the pool and false otherwise. + */ +bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, + size_t size) +{ + bool found = false; + unsigned long end = start + size; + struct gen_pool_chunk *chunk; + + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk) { + if (start >= chunk->start_addr && start <= chunk->end_addr) { + if (end <= chunk->end_addr) { + found = true; + break; + } + } + } + rcu_read_unlock(); + return found; +} + +/** * gen_pool_avail - get available free space of the pool * @pool: pool to get available free space * @@ -480,6 +510,26 @@ unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size, EXPORT_SYMBOL(gen_pool_first_fit); /** + * gen_pool_first_fit_order_align - find the first available region + * of memory matching the size requirement. The region will be aligned + * to the order of the size specified. + * @map: The address to base the search on + * @size: The bitmap size in bits + * @start: The bitnumber to start searching at + * @nr: The number of zeroed bits we're looking for + * @data: additional data - unused + */ +unsigned long gen_pool_first_fit_order_align(unsigned long *map, + unsigned long size, unsigned long start, + unsigned int nr, void *data) +{ + unsigned long align_mask = roundup_pow_of_two(nr) - 1; + + return bitmap_find_next_zero_area(map, size, start, nr, align_mask); +} +EXPORT_SYMBOL(gen_pool_first_fit_order_align); + +/** * gen_pool_best_fit - find the best fitting region of memory * macthing the size requirement (no alignment constraint) * @map: The address to base the search on @@ -587,6 +637,7 @@ struct gen_pool *of_get_named_gen_pool(struct device_node *np, if (!np_pool) return NULL; pdev = of_find_device_by_node(np_pool); + of_node_put(np_pool); if (!pdev) return NULL; return dev_get_gen_pool(&pdev->dev); diff --git a/lib/glob.c b/lib/glob.c new file mode 100644 index 000000000000..500fc80d23e1 --- /dev/null +++ b/lib/glob.c @@ -0,0 +1,287 @@ +#include <linux/module.h> +#include <linux/glob.h> + +/* + * The only reason this code can be compiled as a module is because the + * ATA code that depends on it can be as well. In practice, they're + * both usually compiled in and the module overhead goes away. + */ +MODULE_DESCRIPTION("glob(7) matching"); +MODULE_LICENSE("Dual MIT/GPL"); + +/** + * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0) + * @pat: Shell-style pattern to match, e.g. "*.[ch]". + * @str: String to match. The pattern must match the entire string. + * + * Perform shell-style glob matching, returning true (1) if the match + * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0). + * + * Pattern metacharacters are ?, *, [ and \. + * (And, inside character classes, !, - and ].) + * + * This is small and simple implementation intended for device blacklists + * where a string is matched against a number of patterns. Thus, it + * does not preprocess the patterns. It is non-recursive, and run-time + * is at most quadratic: strlen(@str)*strlen(@pat). + * + * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa"); + * it takes 6 passes over the pattern before matching the string. + * + * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT + * treat / or leading . specially; it isn't actually used for pathnames. + * + * Note that according to glob(7) (and unlike bash), character classes + * are complemented by a leading !; this does not support the regex-style + * [^a-z] syntax. + * + * An opening bracket without a matching close is matched literally. + */ +bool __pure glob_match(char const *pat, char const *str) +{ + /* + * Backtrack to previous * on mismatch and retry starting one + * character later in the string. Because * matches all characters + * (no exception for /), it can be easily proved that there's + * never a need to backtrack multiple levels. + */ + char const *back_pat = NULL, *back_str = back_str; + + /* + * Loop over each token (character or class) in pat, matching + * it against the remaining unmatched tail of str. Return false + * on mismatch, or true after matching the trailing nul bytes. + */ + for (;;) { + unsigned char c = *str++; + unsigned char d = *pat++; + + switch (d) { + case '?': /* Wildcard: anything but nul */ + if (c == '\0') + return false; + break; + case '*': /* Any-length wildcard */ + if (*pat == '\0') /* Optimize trailing * case */ + return true; + back_pat = pat; + back_str = --str; /* Allow zero-length match */ + break; + case '[': { /* Character class */ + bool match = false, inverted = (*pat == '!'); + char const *class = pat + inverted; + unsigned char a = *class++; + + /* + * Iterate over each span in the character class. + * A span is either a single character a, or a + * range a-b. The first span may begin with ']'. + */ + do { + unsigned char b = a; + + if (a == '\0') /* Malformed */ + goto literal; + + if (class[0] == '-' && class[1] != ']') { + b = class[1]; + + if (b == '\0') + goto literal; + + class += 2; + /* Any special action if a > b? */ + } + match |= (a <= c && c <= b); + } while ((a = *class++) != ']'); + + if (match == inverted) + goto backtrack; + pat = class; + } + break; + case '\\': + d = *pat++; + /*FALLTHROUGH*/ + default: /* Literal character */ +literal: + if (c == d) { + if (d == '\0') + return true; + break; + } +backtrack: + if (c == '\0' || !back_pat) + return false; /* No point continuing */ + /* Try again from last *, one character later in str. */ + pat = back_pat; + str = ++back_str; + break; + } + } +} +EXPORT_SYMBOL(glob_match); + + +#ifdef CONFIG_GLOB_SELFTEST + +#include <linux/printk.h> +#include <linux/moduleparam.h> + +/* Boot with "glob.verbose=1" to show successful tests, too */ +static bool verbose = false; +module_param(verbose, bool, 0); + +struct glob_test { + char const *pat, *str; + bool expected; +}; + +static bool __pure __init test(char const *pat, char const *str, bool expected) +{ + bool match = glob_match(pat, str); + bool success = match == expected; + + /* Can't get string literals into a particular section, so... */ + static char const msg_error[] __initconst = + KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n"; + static char const msg_ok[] __initconst = + KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n"; + static char const mismatch[] __initconst = "mismatch"; + char const *message; + + if (!success) + message = msg_error; + else if (verbose) + message = msg_ok; + else + return success; + + printk(message, pat, str, mismatch + 3*match); + return success; +} + +/* + * The tests are all jammed together in one array to make it simpler + * to place that array in the .init.rodata section. The obvious + * "array of structures containing char *" has no way to force the + * pointed-to strings to be in a particular section. + * + * Anyway, a test consists of: + * 1. Expected glob_match result: '1' or '0'. + * 2. Pattern to match: null-terminated string + * 3. String to match against: null-terminated string + * + * The list of tests is terminated with a final '\0' instead of + * a glob_match result character. + */ +static char const glob_tests[] __initconst = + /* Some basic tests */ + "1" "a\0" "a\0" + "0" "a\0" "b\0" + "0" "a\0" "aa\0" + "0" "a\0" "\0" + "1" "\0" "\0" + "0" "\0" "a\0" + /* Simple character class tests */ + "1" "[a]\0" "a\0" + "0" "[a]\0" "b\0" + "0" "[!a]\0" "a\0" + "1" "[!a]\0" "b\0" + "1" "[ab]\0" "a\0" + "1" "[ab]\0" "b\0" + "0" "[ab]\0" "c\0" + "1" "[!ab]\0" "c\0" + "1" "[a-c]\0" "b\0" + "0" "[a-c]\0" "d\0" + /* Corner cases in character class parsing */ + "1" "[a-c-e-g]\0" "-\0" + "0" "[a-c-e-g]\0" "d\0" + "1" "[a-c-e-g]\0" "f\0" + "1" "[]a-ceg-ik[]\0" "a\0" + "1" "[]a-ceg-ik[]\0" "]\0" + "1" "[]a-ceg-ik[]\0" "[\0" + "1" "[]a-ceg-ik[]\0" "h\0" + "0" "[]a-ceg-ik[]\0" "f\0" + "0" "[!]a-ceg-ik[]\0" "h\0" + "0" "[!]a-ceg-ik[]\0" "]\0" + "1" "[!]a-ceg-ik[]\0" "f\0" + /* Simple wild cards */ + "1" "?\0" "a\0" + "0" "?\0" "aa\0" + "0" "??\0" "a\0" + "1" "?x?\0" "axb\0" + "0" "?x?\0" "abx\0" + "0" "?x?\0" "xab\0" + /* Asterisk wild cards (backtracking) */ + "0" "*??\0" "a\0" + "1" "*??\0" "ab\0" + "1" "*??\0" "abc\0" + "1" "*??\0" "abcd\0" + "0" "??*\0" "a\0" + "1" "??*\0" "ab\0" + "1" "??*\0" "abc\0" + "1" "??*\0" "abcd\0" + "0" "?*?\0" "a\0" + "1" "?*?\0" "ab\0" + "1" "?*?\0" "abc\0" + "1" "?*?\0" "abcd\0" + "1" "*b\0" "b\0" + "1" "*b\0" "ab\0" + "0" "*b\0" "ba\0" + "1" "*b\0" "bb\0" + "1" "*b\0" "abb\0" + "1" "*b\0" "bab\0" + "1" "*bc\0" "abbc\0" + "1" "*bc\0" "bc\0" + "1" "*bc\0" "bbc\0" + "1" "*bc\0" "bcbc\0" + /* Multiple asterisks (complex backtracking) */ + "1" "*ac*\0" "abacadaeafag\0" + "1" "*ac*ae*ag*\0" "abacadaeafag\0" + "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0" + "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0" + "1" "*abcd*\0" "abcabcabcabcdefg\0" + "1" "*ab*cd*\0" "abcabcabcabcdefg\0" + "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0" + "0" "*abcd*\0" "abcabcabcabcefg\0" + "0" "*ab*cd*\0" "abcabcabcabcefg\0"; + +static int __init glob_init(void) +{ + unsigned successes = 0; + unsigned n = 0; + char const *p = glob_tests; + static char const message[] __initconst = + KERN_INFO "glob: %u self-tests passed, %u failed\n"; + + /* + * Tests are jammed together in a string. The first byte is '1' + * or '0' to indicate the expected outcome, or '\0' to indicate the + * end of the tests. Then come two null-terminated strings: the + * pattern and the string to match it against. + */ + while (*p) { + bool expected = *p++ & 1; + char const *pat = p; + + p += strlen(p) + 1; + successes += test(pat, p, expected); + p += strlen(p) + 1; + n++; + } + + n -= successes; + printk(message, successes, n); + + /* What's the errno for "kernel bug detected"? Guess... */ + return n ? -ECANCELED : 0; +} + +/* We need a dummy exit function to allow unload */ +static void __exit glob_fini(void) { } + +module_init(glob_init); +module_exit(glob_fini); + +#endif /* CONFIG_GLOB_SELFTEST */ diff --git a/lib/hash.c b/lib/hash.c new file mode 100644 index 000000000000..fea973f4bd57 --- /dev/null +++ b/lib/hash.c @@ -0,0 +1,39 @@ +/* General purpose hashing library + * + * That's a start of a kernel hashing library, which can be extended + * with further algorithms in future. arch_fast_hash{2,}() will + * eventually resolve to an architecture optimized implementation. + * + * Copyright 2013 Francesco Fusco <ffusco@redhat.com> + * Copyright 2013 Daniel Borkmann <dborkman@redhat.com> + * Copyright 2013 Thomas Graf <tgraf@redhat.com> + * Licensed under the GNU General Public License, version 2.0 (GPLv2) + */ + +#include <linux/jhash.h> +#include <linux/hash.h> +#include <linux/cache.h> + +static struct fast_hash_ops arch_hash_ops __read_mostly = { + .hash = jhash, + .hash2 = jhash2, +}; + +u32 arch_fast_hash(const void *data, u32 len, u32 seed) +{ + return arch_hash_ops.hash(data, len, seed); +} +EXPORT_SYMBOL_GPL(arch_fast_hash); + +u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed) +{ + return arch_hash_ops.hash2(data, len, seed); +} +EXPORT_SYMBOL_GPL(arch_fast_hash2); + +static int __init hashlib_init(void) +{ + setup_arch_fast_hash(&arch_hash_ops); + return 0; +} +early_initcall(hashlib_init); diff --git a/lib/hweight.c b/lib/hweight.c index b7d81ba143d1..9a5c1f221558 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -11,7 +11,7 @@ unsigned int __sw_hweight32(unsigned int w) { -#ifdef ARCH_HAS_FAST_MULTIPLIER +#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x55555555; w = (w & 0x33333333) + ((w >> 2) & 0x33333333); w = (w + (w >> 4)) & 0x0f0f0f0f; @@ -49,7 +49,7 @@ unsigned long __sw_hweight64(__u64 w) return __sw_hweight32((unsigned int)(w >> 32)) + __sw_hweight32((unsigned int)w); #elif BITS_PER_LONG == 64 -#ifdef ARCH_HAS_FAST_MULTIPLIER +#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x5555555555555555ul; w = (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul); w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful; diff --git a/lib/idr.c b/lib/idr.c index bfe4db4e165f..e654aebd5f80 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -18,12 +18,6 @@ * pointer or what ever, we treat it as a (void *). You can pass this * id to a user for him to pass back at a later time. You then pass * that id to this code and it returns your pointer. - - * You can release ids at any time. When all ids are released, most of - * the memory is returned (we keep MAX_IDR_FREE) in a local pool so we - * don't need to go to the memory "store" during an id allocate, just - * so you don't need to be too concerned about locking and conflicts - * with the slab allocator. */ #ifndef TEST // to test in user space... @@ -151,7 +145,7 @@ static void idr_layer_rcu_free(struct rcu_head *head) static inline void free_layer(struct idr *idr, struct idr_layer *p) { - if (idr->hint && idr->hint == p) + if (idr->hint == p) RCU_INIT_POINTER(idr->hint, NULL); call_rcu(&p->rcu_head, idr_layer_rcu_free); } @@ -196,7 +190,7 @@ static void idr_mark_full(struct idr_layer **pa, int id) } } -int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) +static int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) { while (idp->id_free_cnt < MAX_IDR_FREE) { struct idr_layer *new; @@ -207,7 +201,6 @@ int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) } return 1; } -EXPORT_SYMBOL(__idr_pre_get); /** * sub_alloc - try to allocate an id without growing the tree depth @@ -250,7 +243,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa, id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1; /* if already at the top layer, we need to grow */ - if (id >= 1 << (idp->layers * IDR_BITS)) { + if (id > idr_max(idp->layers)) { *starting_id = id; return -EAGAIN; } @@ -374,20 +367,6 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id, idr_mark_full(pa, id); } -int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) -{ - struct idr_layer *pa[MAX_IDR_LEVEL + 1]; - int rv; - - rv = idr_get_empty_slot(idp, starting_id, pa, 0, idp); - if (rv < 0) - return rv == -ENOMEM ? -EAGAIN : rv; - - idr_fill_slot(idp, ptr, rv, pa); - *id = rv; - return 0; -} -EXPORT_SYMBOL(__idr_get_new_above); /** * idr_preload - preload for idr_alloc() @@ -548,7 +527,7 @@ static void sub_remove(struct idr *idp, int shift, int id) n = id & IDR_MASK; if (likely(p != NULL && test_bit(n, p->bitmap))) { __clear_bit(n, p->bitmap); - rcu_assign_pointer(p->ary[n], NULL); + RCU_INIT_POINTER(p->ary[n], NULL); to_free = NULL; while(*paa && ! --((**paa)->count)){ if (to_free) @@ -577,6 +556,11 @@ void idr_remove(struct idr *idp, int id) if (id < 0) return; + if (id > idr_max(idp->layers)) { + idr_remove_warning(id); + return; + } + sub_remove(idp, (idp->layers - 1) * IDR_BITS, id); if (idp->top && idp->top->count == 1 && (idp->layers > 1) && idp->top->ary[0]) { @@ -594,20 +578,10 @@ void idr_remove(struct idr *idp, int id) bitmap_clear(to_free->bitmap, 0, IDR_SIZE); free_layer(idp, to_free); } - while (idp->id_free_cnt >= MAX_IDR_FREE) { - p = get_from_free_list(idp); - /* - * Note: we don't call the rcu callback here, since the only - * layers that fall into the freelist are those that have been - * preallocated. - */ - kmem_cache_free(idr_layer_cache, p); - } - return; } EXPORT_SYMBOL(idr_remove); -void __idr_remove_all(struct idr *idp) +static void __idr_remove_all(struct idr *idp) { int n, id, max; int bt_mask; @@ -616,31 +590,31 @@ void __idr_remove_all(struct idr *idp) struct idr_layer **paa = &pa[0]; n = idp->layers * IDR_BITS; - p = idp->top; - rcu_assign_pointer(idp->top, NULL); + *paa = idp->top; + RCU_INIT_POINTER(idp->top, NULL); max = idr_max(idp->layers); id = 0; while (id >= 0 && id <= max) { + p = *paa; while (n > IDR_BITS && p) { n -= IDR_BITS; - *paa++ = p; p = p->ary[(id >> n) & IDR_MASK]; + *++paa = p; } bt_mask = id; id += 1 << n; /* Get the highest bit that the above add changed from 0->1. */ while (n < fls(id ^ bt_mask)) { - if (p) - free_layer(idp, p); + if (*paa) + free_layer(idp, *paa); n += IDR_BITS; - p = *--paa; + --paa; } } idp->layers = 0; } -EXPORT_SYMBOL(__idr_remove_all); /** * idr_destroy - release all cached layers within an idr tree @@ -652,7 +626,7 @@ EXPORT_SYMBOL(__idr_remove_all); * idr_destroy(). * * A typical clean-up sequence for objects stored in an idr tree will use - * idr_for_each() to free all objects, if necessay, then idr_destroy() to + * idr_for_each() to free all objects, if necessary, then idr_destroy() to * free up the id mappings and cached idr_layers. */ void idr_destroy(struct idr *idp) @@ -719,15 +693,16 @@ int idr_for_each(struct idr *idp, struct idr_layer **paa = &pa[0]; n = idp->layers * IDR_BITS; - p = rcu_dereference_raw(idp->top); + *paa = rcu_dereference_raw(idp->top); max = idr_max(idp->layers); id = 0; while (id >= 0 && id <= max) { + p = *paa; while (n > 0 && p) { n -= IDR_BITS; - *paa++ = p; p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); + *++paa = p; } if (p) { @@ -739,7 +714,7 @@ int idr_for_each(struct idr *idp, id += 1 << n; while (n < fls(id)) { n += IDR_BITS; - p = *--paa; + --paa; } } @@ -767,17 +742,18 @@ void *idr_get_next(struct idr *idp, int *nextidp) int n, max; /* find first ent */ - p = rcu_dereference_raw(idp->top); + p = *paa = rcu_dereference_raw(idp->top); if (!p) return NULL; n = (p->layer + 1) * IDR_BITS; max = idr_max(p->layer + 1); while (id >= 0 && id <= max) { + p = *paa; while (n > 0 && p) { n -= IDR_BITS; - *paa++ = p; p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); + *++paa = p; } if (p) { @@ -795,7 +771,7 @@ void *idr_get_next(struct idr *idp, int *nextidp) id = round_up(id + 1, 1 << n); while (n < fls(id)) { n += IDR_BITS; - p = *--paa; + --paa; } } return NULL; @@ -825,14 +801,12 @@ void *idr_replace(struct idr *idp, void *ptr, int id) p = idp->top; if (!p) - return ERR_PTR(-EINVAL); - - n = (p->layer+1) * IDR_BITS; + return ERR_PTR(-ENOENT); - if (id >= (1 << n)) - return ERR_PTR(-EINVAL); + if (id > idr_max(p->layer + 1)) + return ERR_PTR(-ENOENT); - n -= IDR_BITS; + n = p->layer * IDR_BITS; while ((n > 0) && p) { p = p->ary[(id >> n) & IDR_MASK]; n -= IDR_BITS; @@ -869,6 +843,16 @@ void idr_init(struct idr *idp) } EXPORT_SYMBOL(idr_init); +static int idr_has_entry(int id, void *p, void *data) +{ + return 1; +} + +bool idr_is_empty(struct idr *idp) +{ + return !idr_for_each(idp, idr_has_entry, NULL); +} +EXPORT_SYMBOL(idr_is_empty); /** * DOC: IDA description @@ -1033,6 +1017,9 @@ void ida_remove(struct ida *ida, int id) int n; struct ida_bitmap *bitmap; + if (idr_id > idr_max(ida->idr.layers)) + goto err; + /* clear full bits while looking up the leaf idr_layer */ while ((shift > 0) && p) { n = (idr_id >> shift) & IDR_MASK; @@ -1048,7 +1035,7 @@ void ida_remove(struct ida *ida, int id) __clear_bit(n, p->bitmap); bitmap = (void *)p->ary[n]; - if (!test_bit(offset, bitmap->bitmap)) + if (!bitmap || !test_bit(offset, bitmap->bitmap)) goto err; /* update bitmap and remove it if empty */ diff --git a/lib/interval_tree.c b/lib/interval_tree.c index e6eb406f2d65..f367f9ad544c 100644 --- a/lib/interval_tree.c +++ b/lib/interval_tree.c @@ -1,6 +1,7 @@ #include <linux/init.h> #include <linux/interval_tree.h> #include <linux/interval_tree_generic.h> +#include <linux/module.h> #define START(node) ((node)->start) #define LAST(node) ((node)->last) @@ -8,3 +9,8 @@ INTERVAL_TREE_DEFINE(struct interval_tree_node, rb, unsigned long, __subtree_last, START, LAST,, interval_tree) + +EXPORT_SYMBOL_GPL(interval_tree_insert); +EXPORT_SYMBOL_GPL(interval_tree_remove); +EXPORT_SYMBOL_GPL(interval_tree_iter_first); +EXPORT_SYMBOL_GPL(interval_tree_iter_next); diff --git a/lib/interval_tree_test_main.c b/lib/interval_tree_test.c index 245900b98c8e..245900b98c8e 100644 --- a/lib/interval_tree_test_main.c +++ b/lib/interval_tree_test.c diff --git a/lib/iomap.c b/lib/iomap.c index 2c08f36862eb..fc3dcb4b238e 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -224,7 +224,7 @@ EXPORT_SYMBOL(iowrite8_rep); EXPORT_SYMBOL(iowrite16_rep); EXPORT_SYMBOL(iowrite32_rep); -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP /* Create a virtual mapping cookie for an IO port range */ void __iomem *ioport_map(unsigned long port, unsigned int nr) { @@ -239,7 +239,7 @@ void ioport_unmap(void __iomem *addr) } EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); -#endif /* CONFIG_HAS_IOPORT */ +#endif /* CONFIG_HAS_IOPORT_MAP */ #ifdef CONFIG_PCI /* Hide the details if this is a MMIO or PIO address space and just do what diff --git a/lib/iovec.c b/lib/iovec.c index 454baa88bf27..df3abd1eaa4a 100644 --- a/lib/iovec.c +++ b/lib/iovec.c @@ -51,3 +51,62 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len) return 0; } EXPORT_SYMBOL(memcpy_toiovec); + +/* + * Copy kernel to iovec. Returns -EFAULT on error. + */ + +int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, + int offset, int len) +{ + int copy; + for (; len > 0; ++iov) { + /* Skip over the finished iovecs */ + if (unlikely(offset >= iov->iov_len)) { + offset -= iov->iov_len; + continue; + } + copy = min_t(unsigned int, iov->iov_len - offset, len); + if (copy_to_user(iov->iov_base + offset, kdata, copy)) + return -EFAULT; + offset = 0; + kdata += copy; + len -= copy; + } + + return 0; +} +EXPORT_SYMBOL(memcpy_toiovecend); + +/* + * Copy iovec to kernel. Returns -EFAULT on error. + */ + +int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, + int offset, int len) +{ + /* No data? Done! */ + if (len == 0) + return 0; + + /* Skip over the finished iovecs */ + while (offset >= iov->iov_len) { + offset -= iov->iov_len; + iov++; + } + + while (len > 0) { + u8 __user *base = iov->iov_base + offset; + int copy = min_t(unsigned int, len, iov->iov_len - offset); + + offset = 0; + if (copy_from_user(kdata, base, copy)) + return -EFAULT; + len -= copy; + kdata += copy; + iov++; + } + + return 0; +} +EXPORT_SYMBOL(memcpy_fromiovecend); diff --git a/lib/kfifo.c b/lib/kfifo.c index d79b9d222065..90ba1eb1df06 100644 --- a/lib/kfifo.c +++ b/lib/kfifo.c @@ -561,8 +561,7 @@ EXPORT_SYMBOL(__kfifo_to_user_r); unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo, struct scatterlist *sgl, int nents, unsigned int len, size_t recsize) { - if (!nents) - BUG(); + BUG_ON(!nents); len = __kfifo_max_r(len, recsize); @@ -585,8 +584,7 @@ EXPORT_SYMBOL(__kfifo_dma_in_finish_r); unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo, struct scatterlist *sgl, int nents, unsigned int len, size_t recsize) { - if (!nents) - BUG(); + BUG_ON(!nents); len = __kfifo_max_r(len, recsize); diff --git a/lib/klist.c b/lib/klist.c index 358a368a2947..89b485a2a58d 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -140,11 +140,11 @@ void klist_add_tail(struct klist_node *n, struct klist *k) EXPORT_SYMBOL_GPL(klist_add_tail); /** - * klist_add_after - Init a klist_node and add it after an existing node + * klist_add_behind - Init a klist_node and add it after an existing node * @n: node we're adding. * @pos: node to put @n after */ -void klist_add_after(struct klist_node *n, struct klist_node *pos) +void klist_add_behind(struct klist_node *n, struct klist_node *pos) { struct klist *k = knode_klist(pos); @@ -153,7 +153,7 @@ void klist_add_after(struct klist_node *n, struct klist_node *pos) list_add(&n->n_node, &pos->n_node); spin_unlock(&k->k_lock); } -EXPORT_SYMBOL_GPL(klist_add_after); +EXPORT_SYMBOL_GPL(klist_add_behind); /** * klist_add_before - Init a klist_node and add it before an existing node diff --git a/lib/kobject.c b/lib/kobject.c index 5b4b8886435e..58751bb80a7c 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -13,11 +13,11 @@ */ #include <linux/kobject.h> -#include <linux/kobj_completion.h> #include <linux/string.h> #include <linux/export.h> #include <linux/stat.h> #include <linux/slab.h> +#include <linux/random.h> /** * kobject_namespace - return @kobj's namespace tag @@ -65,13 +65,17 @@ static int populate_dir(struct kobject *kobj) static int create_dir(struct kobject *kobj) { + const struct kobj_ns_type_operations *ops; int error; error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj)); - if (!error) { - error = populate_dir(kobj); - if (error) - sysfs_remove_dir(kobj); + if (error) + return error; + + error = populate_dir(kobj); + if (error) { + sysfs_remove_dir(kobj); + return error; } /* @@ -80,7 +84,20 @@ static int create_dir(struct kobject *kobj) */ sysfs_get(kobj->sd); - return error; + /* + * If @kobj has ns_ops, its children need to be filtered based on + * their namespace tags. Enable namespace support on @kobj->sd. + */ + ops = kobj_child_ns_ops(kobj); + if (ops) { + BUG_ON(ops->type <= KOBJ_NS_TYPE_NONE); + BUG_ON(ops->type >= KOBJ_NS_TYPES); + BUG_ON(!kobj_ns_type_registered(ops->type)); + + sysfs_enable_ns(kobj->sd); + } + + return 0; } static int get_kobj_path_length(struct kobject *kobj) @@ -247,8 +264,10 @@ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, return 0; kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs); - if (!kobj->name) + if (!kobj->name) { + kobj->name = old_name; return -ENOMEM; + } /* ewww... some of these buggers have '/' in the name ... */ while ((s = strchr(kobj->name, '/'))) @@ -346,7 +365,7 @@ static int kobject_add_varg(struct kobject *kobj, struct kobject *parent, * * If @parent is set, then the parent of the @kobj will be set to it. * If @parent is NULL, then the parent of the @kobj will be set to the - * kobject associted with the kset assigned to this kobject. If no kset + * kobject associated with the kset assigned to this kobject. If no kset * is assigned to the kobject, then the kobject will be located in the * root of the sysfs tree. * @@ -536,7 +555,7 @@ out: */ void kobject_del(struct kobject *kobj) { - struct sysfs_dirent *sd; + struct kernfs_node *sd; if (!kobj) return; @@ -625,10 +644,12 @@ static void kobject_release(struct kref *kref) { struct kobject *kobj = container_of(kref, struct kobject, kref); #ifdef CONFIG_DEBUG_KOBJECT_RELEASE - pr_info("kobject: '%s' (%p): %s, parent %p (delayed)\n", - kobject_name(kobj), kobj, __func__, kobj->parent); + unsigned long delay = HZ + HZ * (get_random_int() & 0x3); + pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n", + kobject_name(kobj), kobj, __func__, kobj->parent, delay); INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); - schedule_delayed_work(&kobj->release, HZ); + + schedule_delayed_work(&kobj->release, delay); #else kobject_cleanup(kobj); #endif @@ -758,55 +779,7 @@ const struct sysfs_ops kobj_sysfs_ops = { .show = kobj_attr_show, .store = kobj_attr_store, }; - -/** - * kobj_completion_init - initialize a kobj_completion object. - * @kc: kobj_completion - * @ktype: type of kobject to initialize - * - * kobj_completion structures can be embedded within structures with different - * lifetime rules. During the release of the enclosing object, we can - * wait on the release of the kobject so that we don't free it while it's - * still busy. - */ -void kobj_completion_init(struct kobj_completion *kc, struct kobj_type *ktype) -{ - init_completion(&kc->kc_unregister); - kobject_init(&kc->kc_kobj, ktype); -} -EXPORT_SYMBOL_GPL(kobj_completion_init); - -/** - * kobj_completion_release - release a kobj_completion object - * @kobj: kobject embedded in kobj_completion - * - * Used with kobject_release to notify waiters that the kobject has been - * released. - */ -void kobj_completion_release(struct kobject *kobj) -{ - struct kobj_completion *kc = kobj_to_kobj_completion(kobj); - complete(&kc->kc_unregister); -} -EXPORT_SYMBOL_GPL(kobj_completion_release); - -/** - * kobj_completion_del_and_wait - release the kobject and wait for it - * @kc: kobj_completion object to release - * - * Delete the kobject from sysfs and drop the reference count. Then wait - * until any other outstanding references are also dropped. This routine - * is only necessary once other references may have been taken on the - * kobject. Typically this happens when the kobject has been published - * to sysfs via kobject_add. - */ -void kobj_completion_del_and_wait(struct kobj_completion *kc) -{ - kobject_del(&kc->kc_kobj); - kobject_put(&kc->kc_kobj); - wait_for_completion(&kc->kc_unregister); -} -EXPORT_SYMBOL_GPL(kobj_completion_del_and_wait); +EXPORT_SYMBOL_GPL(kobj_sysfs_ops); /** * kset_register - initialize and add a kset. @@ -835,6 +808,7 @@ void kset_unregister(struct kset *k) { if (!k) return; + kobject_del(&k->kobj); kobject_put(&k->kobj); } diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 52e5abbc41db..9ebf9e20de53 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -29,7 +29,9 @@ u64 uevent_seqnum; +#ifdef CONFIG_UEVENT_HELPER char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH; +#endif #ifdef CONFIG_NET struct uevent_sock { struct list_head list; @@ -88,11 +90,17 @@ out: #ifdef CONFIG_NET static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) { - struct kobject *kobj = data; + struct kobject *kobj = data, *ksobj; const struct kobj_ns_type_operations *ops; ops = kobj_ns_ops(kobj); - if (ops) { + if (!ops && kobj->kset) { + ksobj = &kobj->kset->kobj; + if (ksobj->parent != NULL) + ops = kobj_ns_ops(ksobj->parent); + } + + if (ops && ops->netlink_ns && kobj->ktype->namespace) { const void *sock_ns, *ns; ns = kobj->ktype->namespace(kobj); sock_ns = ops->netlink_ns(dsk); @@ -103,6 +111,7 @@ static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) } #endif +#ifdef CONFIG_UEVENT_HELPER static int kobj_usermode_filter(struct kobject *kobj) { const struct kobj_ns_type_operations *ops; @@ -118,6 +127,31 @@ static int kobj_usermode_filter(struct kobject *kobj) return 0; } +static int init_uevent_argv(struct kobj_uevent_env *env, const char *subsystem) +{ + int len; + + len = strlcpy(&env->buf[env->buflen], subsystem, + sizeof(env->buf) - env->buflen); + if (len >= (sizeof(env->buf) - env->buflen)) { + WARN(1, KERN_ERR "init_uevent_argv: buffer size too small\n"); + return -ENOMEM; + } + + env->argv[0] = uevent_helper; + env->argv[1] = &env->buf[env->buflen]; + env->argv[2] = NULL; + + env->buflen += len + 1; + return 0; +} + +static void cleanup_uevent_env(struct subprocess_info *info) +{ + kfree(info->data); +} +#endif + /** * kobject_uevent_env - send an uevent with environmental data * @@ -293,13 +327,11 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, #endif mutex_unlock(&uevent_sock_mutex); +#ifdef CONFIG_UEVENT_HELPER /* call uevent_helper, usually only enabled during early boot */ if (uevent_helper[0] && !kobj_usermode_filter(kobj)) { - char *argv [3]; + struct subprocess_info *info; - argv [0] = uevent_helper; - argv [1] = (char *)subsystem; - argv [2] = NULL; retval = add_uevent_var(env, "HOME=/"); if (retval) goto exit; @@ -307,10 +339,20 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, "PATH=/sbin:/bin:/usr/sbin:/usr/bin"); if (retval) goto exit; + retval = init_uevent_argv(env, subsystem); + if (retval) + goto exit; - retval = call_usermodehelper(argv[0], argv, - env->envp, UMH_WAIT_EXEC); + retval = -ENOMEM; + info = call_usermodehelper_setup(env->argv[0], env->argv, + env->envp, GFP_KERNEL, + NULL, cleanup_uevent_env, env); + if (info) { + retval = call_usermodehelper_exec(info, UMH_NO_WAIT); + env = NULL; /* freed by cleanup_uevent_env */ + } } +#endif exit: kfree(devpath); diff --git a/lib/kstrtox.c b/lib/kstrtox.c index f78ae0c0c4e2..ec8da78df9be 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -92,7 +92,6 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) rv = _parse_integer(s, base, &_res); if (rv & KSTRTOX_OVERFLOW) return -ERANGE; - rv &= ~KSTRTOX_OVERFLOW; if (rv == 0) return -EINVAL; s += rv; diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c index 244f5480c898..b3131f5cf8a2 100644 --- a/lib/libcrc32c.c +++ b/lib/libcrc32c.c @@ -62,10 +62,7 @@ EXPORT_SYMBOL(crc32c); static int __init libcrc32c_mod_init(void) { tfm = crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - return 0; + return PTR_ERR_OR_ZERO(tfm); } static void __exit libcrc32c_mod_fini(void) diff --git a/lib/list_sort.c b/lib/list_sort.c index 1183fa70a44d..12bcba1c8612 100644 --- a/lib/list_sort.c +++ b/lib/list_sort.c @@ -1,3 +1,6 @@ + +#define pr_fmt(fmt) "list_sort_test: " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/list_sort.h> @@ -47,6 +50,7 @@ static void merge_and_restore_back_links(void *priv, struct list_head *a, struct list_head *b) { struct list_head *tail = head; + u8 count = 0; while (a && b) { /* if equal, take 'a' -- important for sort stability */ @@ -70,7 +74,8 @@ static void merge_and_restore_back_links(void *priv, * element comparison is needed, so the client's cmp() * routine can invoke cond_resched() periodically. */ - (*cmp)(priv, tail->next, tail->next); + if (unlikely(!(++count))) + (*cmp)(priv, tail->next, tail->next); tail->next->prev = tail; tail = tail->next; @@ -123,9 +128,7 @@ void list_sort(void *priv, struct list_head *head, } if (lev > max_lev) { if (unlikely(lev >= ARRAY_SIZE(part)-1)) { - printk_once(KERN_DEBUG "list passed to" - " list_sort() too long for" - " efficiency\n"); + printk_once(KERN_DEBUG "list too long for efficiency\n"); lev--; } max_lev = lev; @@ -168,27 +171,25 @@ static struct debug_el **elts __initdata; static int __init check(struct debug_el *ela, struct debug_el *elb) { if (ela->serial >= TEST_LIST_LEN) { - printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n", - ela->serial); + pr_err("error: incorrect serial %d\n", ela->serial); return -EINVAL; } if (elb->serial >= TEST_LIST_LEN) { - printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n", - elb->serial); + pr_err("error: incorrect serial %d\n", elb->serial); return -EINVAL; } if (elts[ela->serial] != ela || elts[elb->serial] != elb) { - printk(KERN_ERR "list_sort_test: error: phantom element\n"); + pr_err("error: phantom element\n"); return -EINVAL; } if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) { - printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n", - ela->poison1, ela->poison2); + pr_err("error: bad poison: %#x/%#x\n", + ela->poison1, ela->poison2); return -EINVAL; } if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) { - printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n", - elb->poison1, elb->poison2); + pr_err("error: bad poison: %#x/%#x\n", + elb->poison1, elb->poison2); return -EINVAL; } return 0; @@ -207,25 +208,23 @@ static int __init cmp(void *priv, struct list_head *a, struct list_head *b) static int __init list_sort_test(void) { - int i, count = 1, err = -EINVAL; + int i, count = 1, err = -ENOMEM; struct debug_el *el; - struct list_head *cur, *tmp; + struct list_head *cur; LIST_HEAD(head); - printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n"); + pr_debug("start testing list_sort()\n"); - elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL); + elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL); if (!elts) { - printk(KERN_ERR "list_sort_test: error: cannot allocate " - "memory\n"); - goto exit; + pr_err("error: cannot allocate memory\n"); + return err; } for (i = 0; i < TEST_LIST_LEN; i++) { el = kmalloc(sizeof(*el), GFP_KERNEL); if (!el) { - printk(KERN_ERR "list_sort_test: error: cannot " - "allocate memory\n"); + pr_err("error: cannot allocate memory\n"); goto exit; } /* force some equivalencies */ @@ -239,52 +238,52 @@ static int __init list_sort_test(void) list_sort(NULL, &head, cmp); + err = -EINVAL; for (cur = head.next; cur->next != &head; cur = cur->next) { struct debug_el *el1; int cmp_result; if (cur->next->prev != cur) { - printk(KERN_ERR "list_sort_test: error: list is " - "corrupted\n"); + pr_err("error: list is corrupted\n"); goto exit; } cmp_result = cmp(NULL, cur, cur->next); if (cmp_result > 0) { - printk(KERN_ERR "list_sort_test: error: list is not " - "sorted\n"); + pr_err("error: list is not sorted\n"); goto exit; } el = container_of(cur, struct debug_el, list); el1 = container_of(cur->next, struct debug_el, list); if (cmp_result == 0 && el->serial >= el1->serial) { - printk(KERN_ERR "list_sort_test: error: order of " - "equivalent elements not preserved\n"); + pr_err("error: order of equivalent elements not " + "preserved\n"); goto exit; } if (check(el, el1)) { - printk(KERN_ERR "list_sort_test: error: element check " - "failed\n"); + pr_err("error: element check failed\n"); goto exit; } count++; } + if (head.prev != cur) { + pr_err("error: list is corrupted\n"); + goto exit; + } + if (count != TEST_LIST_LEN) { - printk(KERN_ERR "list_sort_test: error: bad list length %d", - count); + pr_err("error: bad list length %d", count); goto exit; } err = 0; exit: + for (i = 0; i < TEST_LIST_LEN; i++) + kfree(elts[i]); kfree(elts); - list_for_each_safe(cur, tmp, &head) { - list_del(cur); - kfree(container_of(cur, struct debug_el, list)); - } return err; } module_init(list_sort_test); diff --git a/lib/lockref.c b/lib/lockref.c index f07a40d33871..d2233de9a86e 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -1,6 +1,5 @@ #include <linux/export.h> #include <linux/lockref.h> -#include <linux/mutex.h> #if USE_CMPXCHG_LOCKREF @@ -29,7 +28,7 @@ if (likely(old.lock_count == prev.lock_count)) { \ SUCCESS; \ } \ - arch_mutex_cpu_relax(); \ + cpu_relax_lowlatency(); \ } \ } while (0) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 4a83ecd03650..852c81e3ba9a 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -169,7 +169,7 @@ out_fail: return NULL; } -void lc_free_by_index(struct lru_cache *lc, unsigned i) +static void lc_free_by_index(struct lru_cache *lc, unsigned i) { void *p = lc->lc_element[i]; WARN_ON(!p); @@ -643,9 +643,10 @@ void lc_set(struct lru_cache *lc, unsigned int enr, int index) * lc_dump - Dump a complete LRU cache to seq in textual form. * @lc: the lru cache to operate on * @seq: the &struct seq_file pointer to seq_printf into - * @utext: user supplied "heading" or other info + * @utext: user supplied additional "heading" or other info * @detail: function pointer the user may provide to dump further details - * of the object the lc_element is embedded in. + * of the object the lc_element is embedded in. May be NULL. + * Note: a leading space ' ' and trailing newline '\n' is implied. */ void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, void (*detail) (struct seq_file *, struct lc_element *)) @@ -654,16 +655,18 @@ void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext struct lc_element *e; int i; - seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext); + seq_printf(seq, "\tnn: lc_number (new nr) refcnt %s\n ", utext); for (i = 0; i < nr_elements; i++) { e = lc_element_by_index(lc, i); - if (e->lc_number == LC_FREE) { - seq_printf(seq, "\t%2d: FREE\n", i); - } else { - seq_printf(seq, "\t%2d: %4u %4u ", i, - e->lc_number, e->refcnt); + if (e->lc_number != e->lc_new_number) + seq_printf(seq, "\t%5d: %6d %8d %6d ", + i, e->lc_number, e->lc_new_number, e->refcnt); + else + seq_printf(seq, "\t%5d: %6d %-8s %6d ", + i, e->lc_number, "-\"-", e->refcnt); + if (detail) detail(seq, e); - } + seq_putc(seq, '\n'); } } diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index df6839e3ce08..7a85967060a5 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -72,6 +72,8 @@ static int lz4_uncompress(const char *source, char *dest, int osize) len = *ip++; for (; len == 255; length += 255) len = *ip++; + if (unlikely(length > (size_t)(length + len))) + goto _output_error; length += len; } @@ -106,6 +108,8 @@ static int lz4_uncompress(const char *source, char *dest, int osize) if (length == ML_MASK) { for (; *ip == 255; length += 255) ip++; + if (unlikely(length > (size_t)(length + *ip))) + goto _output_error; length += *ip++; } @@ -155,7 +159,7 @@ static int lz4_uncompress(const char *source, char *dest, int osize) /* write overflow error detected */ _output_error: - return (int) (-(((char *)ip) - source)); + return -1; } static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, @@ -188,6 +192,8 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, int s = 255; while ((ip < iend) && (s == 255)) { s = *ip++; + if (unlikely(length > (size_t)(length + s))) + goto _output_error; length += s; } } @@ -228,6 +234,8 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, if (length == ML_MASK) { while (ip < iend) { int s = *ip++; + if (unlikely(length > (size_t)(length + s))) + goto _output_error; length += s; if (s == 255) continue; @@ -280,7 +288,7 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, /* write overflow error detected */ _output_error: - return (int) (-(((char *) ip) - source)); + return -1; } int lz4_decompress(const unsigned char *src, size_t *src_len, diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index 569985d522d5..a1c387f6afba 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -25,6 +25,16 @@ #define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun #define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun +/* This MAX_255_COUNT is the maximum number of times we can add 255 to a base + * count without overflowing an integer. The multiply will overflow when + * multiplying 255 by more than MAXINT/255. The sum will overflow earlier + * depending on the base count. Since the base count is taken from a u8 + * and a few bits, it is safe to assume that it will always be lower than + * or equal to 2*255, thus we can always prevent any overflow by accepting + * two less 255 steps. See Documentation/lzo.txt for more information. + */ +#define MAX_255_COUNT ((((size_t)~0) / 255) - 2) + int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len) { @@ -55,12 +65,19 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, if (t < 16) { if (likely(state == 0)) { if (unlikely(t == 0)) { + size_t offset; + const unsigned char *ip_last = ip; + while (unlikely(*ip == 0)) { - t += 255; ip++; NEED_IP(1); } - t += 15 + *ip++; + offset = ip - ip_last; + if (unlikely(offset > MAX_255_COUNT)) + return LZO_E_ERROR; + + offset = (offset << 8) - offset; + t += offset + 15 + *ip++; } t += 3; copy_literal_run: @@ -116,12 +133,19 @@ copy_literal_run: } else if (t >= 32) { t = (t & 31) + (3 - 1); if (unlikely(t == 2)) { + size_t offset; + const unsigned char *ip_last = ip; + while (unlikely(*ip == 0)) { - t += 255; ip++; NEED_IP(1); } - t += 31 + *ip++; + offset = ip - ip_last; + if (unlikely(offset > MAX_255_COUNT)) + return LZO_E_ERROR; + + offset = (offset << 8) - offset; + t += offset + 31 + *ip++; NEED_IP(2); } m_pos = op - 1; @@ -134,12 +158,19 @@ copy_literal_run: m_pos -= (t & 8) << 11; t = (t & 7) + (3 - 1); if (unlikely(t == 2)) { + size_t offset; + const unsigned char *ip_last = ip; + while (unlikely(*ip == 0)) { - t += 255; ip++; NEED_IP(1); } - t += 7 + *ip++; + offset = ip - ip_last; + if (unlikely(offset > MAX_255_COUNT)) + return LZO_E_ERROR; + + offset = (offset << 8) - offset; + t += offset + 7 + *ip++; NEED_IP(2); } next = get_unaligned_le16(ip); diff --git a/lib/net_utils.c b/lib/net_utils.c index 2e3c52c8d050..148fc6e99ef6 100644 --- a/lib/net_utils.c +++ b/lib/net_utils.c @@ -3,24 +3,24 @@ #include <linux/ctype.h> #include <linux/kernel.h> -int mac_pton(const char *s, u8 *mac) +bool mac_pton(const char *s, u8 *mac) { int i; /* XX:XX:XX:XX:XX:XX */ if (strlen(s) < 3 * ETH_ALEN - 1) - return 0; + return false; /* Don't dirty result unless string is valid MAC. */ for (i = 0; i < ETH_ALEN; i++) { if (!isxdigit(s[i * 3]) || !isxdigit(s[i * 3 + 1])) - return 0; + return false; if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':') - return 0; + return false; } for (i = 0; i < ETH_ALEN; i++) { mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]); } - return 1; + return true; } EXPORT_SYMBOL(mac_pton); diff --git a/lib/nlattr.c b/lib/nlattr.c index 18eca7809b08..9c3e85ff0a6c 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -136,6 +136,7 @@ int nla_validate(const struct nlattr *head, int len, int maxtype, errout: return err; } +EXPORT_SYMBOL(nla_validate); /** * nla_policy_len - Determin the max. length of a policy @@ -162,6 +163,7 @@ nla_policy_len(const struct nla_policy *p, int n) return len; } +EXPORT_SYMBOL(nla_policy_len); /** * nla_parse - Parse a stream of attributes into a tb buffer @@ -201,13 +203,14 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, } if (unlikely(rem > 0)) - printk(KERN_WARNING "netlink: %d bytes leftover after parsing " - "attributes.\n", rem); + pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n", + rem, current->comm); err = 0; errout: return err; } +EXPORT_SYMBOL(nla_parse); /** * nla_find - Find a specific attribute in a stream of attributes @@ -228,6 +231,7 @@ struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype) return NULL; } +EXPORT_SYMBOL(nla_find); /** * nla_strlcpy - Copy string attribute payload into a sized buffer @@ -258,6 +262,7 @@ size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) return srclen; } +EXPORT_SYMBOL(nla_strlcpy); /** * nla_memcpy - Copy a netlink attribute into another memory area @@ -278,6 +283,7 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count) return minlen; } +EXPORT_SYMBOL(nla_memcpy); /** * nla_memcmp - Compare an attribute with sized memory area @@ -295,6 +301,7 @@ int nla_memcmp(const struct nlattr *nla, const void *data, return d; } +EXPORT_SYMBOL(nla_memcmp); /** * nla_strcmp - Compare a string attribute against a string @@ -303,14 +310,21 @@ int nla_memcmp(const struct nlattr *nla, const void *data, */ int nla_strcmp(const struct nlattr *nla, const char *str) { - int len = strlen(str) + 1; - int d = nla_len(nla) - len; + int len = strlen(str); + char *buf = nla_data(nla); + int attrlen = nla_len(nla); + int d; + if (attrlen > 0 && buf[attrlen - 1] == '\0') + attrlen--; + + d = attrlen - len; if (d == 0) d = memcmp(nla_data(nla), str, len); return d; } +EXPORT_SYMBOL(nla_strcmp); #ifdef CONFIG_NET /** @@ -496,12 +510,3 @@ int nla_append(struct sk_buff *skb, int attrlen, const void *data) } EXPORT_SYMBOL(nla_append); #endif - -EXPORT_SYMBOL(nla_validate); -EXPORT_SYMBOL(nla_policy_len); -EXPORT_SYMBOL(nla_parse); -EXPORT_SYMBOL(nla_find); -EXPORT_SYMBOL(nla_strlcpy); -EXPORT_SYMBOL(nla_memcpy); -EXPORT_SYMBOL(nla_memcmp); -EXPORT_SYMBOL(nla_strcmp); diff --git a/lib/parser.c b/lib/parser.c index 807b2aaa33fa..b6d11631231b 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -113,6 +113,7 @@ int match_token(char *s, const match_table_t table, substring_t args[]) return p->token; } +EXPORT_SYMBOL(match_token); /** * match_number: scan a number in the given base from a substring_t @@ -163,6 +164,7 @@ int match_int(substring_t *s, int *result) { return match_number(s, result, 0); } +EXPORT_SYMBOL(match_int); /** * match_octal: - scan an octal representation of an integer from a substring_t @@ -177,6 +179,7 @@ int match_octal(substring_t *s, int *result) { return match_number(s, result, 8); } +EXPORT_SYMBOL(match_octal); /** * match_hex: - scan a hex representation of an integer from a substring_t @@ -191,6 +194,58 @@ int match_hex(substring_t *s, int *result) { return match_number(s, result, 16); } +EXPORT_SYMBOL(match_hex); + +/** + * match_wildcard: - parse if a string matches given wildcard pattern + * @pattern: wildcard pattern + * @str: the string to be parsed + * + * Description: Parse the string @str to check if matches wildcard + * pattern @pattern. The pattern may contain two type wildcardes: + * '*' - matches zero or more characters + * '?' - matches one character + * If it's matched, return true, else return false. + */ +bool match_wildcard(const char *pattern, const char *str) +{ + const char *s = str; + const char *p = pattern; + bool star = false; + + while (*s) { + switch (*p) { + case '?': + s++; + p++; + break; + case '*': + star = true; + str = s; + if (!*++p) + return true; + pattern = p; + break; + default: + if (*s == *p) { + s++; + p++; + } else { + if (!star) + return false; + str++; + s = str; + p = pattern; + } + break; + } + } + + if (*p == '*') + ++p; + return !*p; +} +EXPORT_SYMBOL(match_wildcard); /** * match_strlcpy: - Copy the characters from a substring_t to a sized buffer @@ -213,6 +268,7 @@ size_t match_strlcpy(char *dest, const substring_t *src, size_t size) } return ret; } +EXPORT_SYMBOL(match_strlcpy); /** * match_strdup: - allocate a new string with the contents of a substring_t @@ -230,10 +286,4 @@ char *match_strdup(const substring_t *s) match_strlcpy(p, s, sz); return p; } - -EXPORT_SYMBOL(match_token); -EXPORT_SYMBOL(match_int); -EXPORT_SYMBOL(match_octal); -EXPORT_SYMBOL(match_hex); -EXPORT_SYMBOL(match_strlcpy); EXPORT_SYMBOL(match_strdup); diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 1a53d497a8c5..6111bcb28376 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -1,6 +1,8 @@ #define pr_fmt(fmt) "%s: " fmt "\n", __func__ #include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/wait.h> #include <linux/percpu-refcount.h> /* @@ -11,8 +13,8 @@ * percpu counters will all sum to the correct value * * (More precisely: because moduler arithmatic is commutative the sum of all the - * pcpu_count vars will be equal to what it would have been if all the gets and - * puts were done to a single integer, even if some of the percpu integers + * percpu_count vars will be equal to what it would have been if all the gets + * and puts were done to a single integer, even if some of the percpu integers * overflow or underflow). * * The real trick to implementing percpu refcounts is shutdown. We can't detect @@ -25,85 +27,110 @@ * works. * * Converting to non percpu mode is done with some RCUish stuff in - * percpu_ref_kill. Additionally, we need a bias value so that the atomic_t - * can't hit 0 before we've added up all the percpu refs. + * percpu_ref_kill. Additionally, we need a bias value so that the + * atomic_long_t can't hit 0 before we've added up all the percpu refs. */ -#define PCPU_COUNT_BIAS (1U << 31) +#define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1)) + +static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq); + +static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref) +{ + return (unsigned long __percpu *) + (ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD); +} /** * percpu_ref_init - initialize a percpu refcount * @ref: percpu_ref to initialize * @release: function which will be called when refcount hits 0 + * @flags: PERCPU_REF_INIT_* flags + * @gfp: allocation mask to use * - * Initializes the refcount in single atomic counter mode with a refcount of 1; - * analagous to atomic_set(ref, 1). + * Initializes @ref. If @flags is zero, @ref starts in percpu mode with a + * refcount of 1; analagous to atomic_long_set(ref, 1). See the + * definitions of PERCPU_REF_INIT_* flags for flag behaviors. * * Note that @release must not sleep - it may potentially be called from RCU * callback context by percpu_ref_kill(). */ -int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release) +int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, + unsigned int flags, gfp_t gfp) { - atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); + size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS, + __alignof__(unsigned long)); + unsigned long start_count = 0; - ref->pcpu_count = alloc_percpu(unsigned); - if (!ref->pcpu_count) + ref->percpu_count_ptr = (unsigned long) + __alloc_percpu_gfp(sizeof(unsigned long), align, gfp); + if (!ref->percpu_count_ptr) return -ENOMEM; + ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC; + + if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) + ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; + else + start_count += PERCPU_COUNT_BIAS; + + if (flags & PERCPU_REF_INIT_DEAD) + ref->percpu_count_ptr |= __PERCPU_REF_DEAD; + else + start_count++; + + atomic_long_set(&ref->count, start_count); + ref->release = release; return 0; } EXPORT_SYMBOL_GPL(percpu_ref_init); /** - * percpu_ref_cancel_init - cancel percpu_ref_init() - * @ref: percpu_ref to cancel init for - * - * Once a percpu_ref is initialized, its destruction is initiated by - * percpu_ref_kill() and completes asynchronously, which can be painful to - * do when destroying a half-constructed object in init failure path. + * percpu_ref_exit - undo percpu_ref_init() + * @ref: percpu_ref to exit * - * This function destroys @ref without invoking @ref->release and the - * memory area containing it can be freed immediately on return. To - * prevent accidental misuse, it's required that @ref has finished - * percpu_ref_init(), whether successful or not, but never used. - * - * The weird name and usage restriction are to prevent people from using - * this function by mistake for normal shutdown instead of - * percpu_ref_kill(). + * This function exits @ref. The caller is responsible for ensuring that + * @ref is no longer in active use. The usual places to invoke this + * function from are the @ref->release() callback or in init failure path + * where percpu_ref_init() succeeded but other parts of the initialization + * of the embedding object failed. */ -void percpu_ref_cancel_init(struct percpu_ref *ref) +void percpu_ref_exit(struct percpu_ref *ref) { - unsigned __percpu *pcpu_count = ref->pcpu_count; - int cpu; - - WARN_ON_ONCE(atomic_read(&ref->count) != 1 + PCPU_COUNT_BIAS); + unsigned long __percpu *percpu_count = percpu_count_ptr(ref); - if (pcpu_count) { - for_each_possible_cpu(cpu) - WARN_ON_ONCE(*per_cpu_ptr(pcpu_count, cpu)); - free_percpu(ref->pcpu_count); + if (percpu_count) { + free_percpu(percpu_count); + ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD; } } -EXPORT_SYMBOL_GPL(percpu_ref_cancel_init); +EXPORT_SYMBOL_GPL(percpu_ref_exit); -static void percpu_ref_kill_rcu(struct rcu_head *rcu) +static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu) { struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); - unsigned __percpu *pcpu_count = ref->pcpu_count; - unsigned count = 0; - int cpu; - /* Mask out PCPU_REF_DEAD */ - pcpu_count = (unsigned __percpu *) - (((unsigned long) pcpu_count) & ~PCPU_STATUS_MASK); + ref->confirm_switch(ref); + ref->confirm_switch = NULL; + wake_up_all(&percpu_ref_switch_waitq); - for_each_possible_cpu(cpu) - count += *per_cpu_ptr(pcpu_count, cpu); + /* drop ref from percpu_ref_switch_to_atomic() */ + percpu_ref_put(ref); +} + +static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu) +{ + struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); + unsigned long __percpu *percpu_count = percpu_count_ptr(ref); + unsigned long count = 0; + int cpu; - free_percpu(pcpu_count); + for_each_possible_cpu(cpu) + count += *per_cpu_ptr(percpu_count, cpu); - pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count); + pr_debug("global %ld percpu %ld", + atomic_long_read(&ref->count), (long)count); /* * It's crucial that we sum the percpu counters _before_ adding the sum @@ -117,18 +144,137 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu) * reaching 0 before we add the percpu counts. But doing it at the same * time is equivalent and saves us atomic operations: */ + atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count); + + WARN_ONCE(atomic_long_read(&ref->count) <= 0, + "percpu ref (%pf) <= 0 (%ld) after switching to atomic", + ref->release, atomic_long_read(&ref->count)); + + /* @ref is viewed as dead on all CPUs, send out switch confirmation */ + percpu_ref_call_confirm_rcu(rcu); +} + +static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref) +{ +} - atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count); +static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, + percpu_ref_func_t *confirm_switch) +{ + if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) { + /* switching from percpu to atomic */ + ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; + + /* + * Non-NULL ->confirm_switch is used to indicate that + * switching is in progress. Use noop one if unspecified. + */ + WARN_ON_ONCE(ref->confirm_switch); + ref->confirm_switch = + confirm_switch ?: percpu_ref_noop_confirm_switch; + + percpu_ref_get(ref); /* put after confirmation */ + call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu); + } else if (confirm_switch) { + /* + * Somebody already set ATOMIC. Switching may still be in + * progress. @confirm_switch must be invoked after the + * switching is complete and a full sched RCU grace period + * has passed. Wait synchronously for the previous + * switching and schedule @confirm_switch invocation. + */ + wait_event(percpu_ref_switch_waitq, !ref->confirm_switch); + ref->confirm_switch = confirm_switch; - /* @ref is viewed as dead on all CPUs, send out kill confirmation */ - if (ref->confirm_kill) - ref->confirm_kill(ref); + percpu_ref_get(ref); /* put after confirmation */ + call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu); + } +} + +/** + * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode + * @ref: percpu_ref to switch to atomic mode + * @confirm_switch: optional confirmation callback + * + * There's no reason to use this function for the usual reference counting. + * Use percpu_ref_kill[_and_confirm](). + * + * Schedule switching of @ref to atomic mode. All its percpu counts will + * be collected to the main atomic counter. On completion, when all CPUs + * are guaraneed to be in atomic mode, @confirm_switch, which may not + * block, is invoked. This function may be invoked concurrently with all + * the get/put operations and can safely be mixed with kill and reinit + * operations. Note that @ref will stay in atomic mode across kill/reinit + * cycles until percpu_ref_switch_to_percpu() is called. + * + * This function normally doesn't block and can be called from any context + * but it may block if @confirm_kill is specified and @ref is already in + * the process of switching to atomic mode. In such cases, @confirm_switch + * will be invoked after the switching is complete. + * + * Due to the way percpu_ref is implemented, @confirm_switch will be called + * after at least one full sched RCU grace period has passed but this is an + * implementation detail and must not be depended upon. + */ +void percpu_ref_switch_to_atomic(struct percpu_ref *ref, + percpu_ref_func_t *confirm_switch) +{ + ref->force_atomic = true; + __percpu_ref_switch_to_atomic(ref, confirm_switch); +} + +static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) +{ + unsigned long __percpu *percpu_count = percpu_count_ptr(ref); + int cpu; + + BUG_ON(!percpu_count); + + if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) + return; + + wait_event(percpu_ref_switch_waitq, !ref->confirm_switch); + + atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); /* - * Now we're in single atomic_t mode with a consistent refcount, so it's - * safe to drop our initial ref: + * Restore per-cpu operation. smp_store_release() is paired with + * smp_read_barrier_depends() in __ref_is_percpu() and guarantees + * that the zeroing is visible to all percpu accesses which can see + * the following __PERCPU_REF_ATOMIC clearing. */ - percpu_ref_put(ref); + for_each_possible_cpu(cpu) + *per_cpu_ptr(percpu_count, cpu) = 0; + + smp_store_release(&ref->percpu_count_ptr, + ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC); +} + +/** + * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode + * @ref: percpu_ref to switch to percpu mode + * + * There's no reason to use this function for the usual reference counting. + * To re-use an expired ref, use percpu_ref_reinit(). + * + * Switch @ref to percpu mode. This function may be invoked concurrently + * with all the get/put operations and can safely be mixed with kill and + * reinit operations. This function reverses the sticky atomic state set + * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic(). If @ref is + * dying or dead, the actual switching takes place on the following + * percpu_ref_reinit(). + * + * This function normally doesn't block and can be called from any context + * but it may block if @ref is in the process of switching to atomic mode + * by percpu_ref_switch_atomic(). + */ +void percpu_ref_switch_to_percpu(struct percpu_ref *ref) +{ + ref->force_atomic = false; + + /* a dying or dead ref can't be switched to percpu mode w/o reinit */ + if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) + __percpu_ref_switch_to_percpu(ref); } /** @@ -138,24 +284,48 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu) * * Equivalent to percpu_ref_kill() but also schedules kill confirmation if * @confirm_kill is not NULL. @confirm_kill, which may not block, will be - * called after @ref is seen as dead from all CPUs - all further - * invocations of percpu_ref_tryget() will fail. See percpu_ref_tryget() - * for more details. + * called after @ref is seen as dead from all CPUs at which point all + * further invocations of percpu_ref_tryget_live() will fail. See + * percpu_ref_tryget_live() for details. + * + * This function normally doesn't block and can be called from any context + * but it may block if @confirm_kill is specified and @ref is in the + * process of switching to atomic mode by percpu_ref_switch_atomic(). * - * Due to the way percpu_ref is implemented, @confirm_kill will be called - * after at least one full RCU grace period has passed but this is an - * implementation detail and callers must not depend on it. + * Due to the way percpu_ref is implemented, @confirm_switch will be called + * after at least one full sched RCU grace period has passed but this is an + * implementation detail and must not be depended upon. */ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill) { - WARN_ONCE(REF_STATUS(ref->pcpu_count) == PCPU_REF_DEAD, - "percpu_ref_kill() called more than once!\n"); + WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, + "%s called more than once on %pf!", __func__, ref->release); - ref->pcpu_count = (unsigned __percpu *) - (((unsigned long) ref->pcpu_count)|PCPU_REF_DEAD); - ref->confirm_kill = confirm_kill; - - call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu); + ref->percpu_count_ptr |= __PERCPU_REF_DEAD; + __percpu_ref_switch_to_atomic(ref, confirm_kill); + percpu_ref_put(ref); } EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); + +/** + * percpu_ref_reinit - re-initialize a percpu refcount + * @ref: perpcu_ref to re-initialize + * + * Re-initialize @ref so that it's in the same state as when it finished + * percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD. @ref must have been + * initialized successfully and reached 0 but not exited. + * + * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while + * this function is in progress. + */ +void percpu_ref_reinit(struct percpu_ref *ref) +{ + WARN_ON_ONCE(!percpu_ref_is_zero(ref)); + + ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; + percpu_ref_get(ref); + if (!ref->force_atomic) + __percpu_ref_switch_to_percpu(ref); +} +EXPORT_SYMBOL_GPL(percpu_ref_reinit); diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 7473ee3b4ee7..48144cdae819 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -82,10 +82,10 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) unsigned long flags; raw_spin_lock_irqsave(&fbc->lock, flags); fbc->count += count; + __this_cpu_sub(*fbc->counters, count - amount); raw_spin_unlock_irqrestore(&fbc->lock, flags); - __this_cpu_write(*fbc->counters, 0); } else { - __this_cpu_write(*fbc->counters, count); + this_cpu_add(*fbc->counters, amount); } preempt_enable(); } @@ -112,13 +112,15 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) } EXPORT_SYMBOL(__percpu_counter_sum); -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, +int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key) { + unsigned long flags __maybe_unused; + raw_spin_lock_init(&fbc->lock); lockdep_set_class(&fbc->lock, key); fbc->count = amount; - fbc->counters = alloc_percpu(s32); + fbc->counters = alloc_percpu_gfp(s32, gfp); if (!fbc->counters) return -ENOMEM; @@ -126,9 +128,9 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, #ifdef CONFIG_HOTPLUG_CPU INIT_LIST_HEAD(&fbc->list); - spin_lock(&percpu_counters_lock); + spin_lock_irqsave(&percpu_counters_lock, flags); list_add(&fbc->list, &percpu_counters); - spin_unlock(&percpu_counters_lock); + spin_unlock_irqrestore(&percpu_counters_lock, flags); #endif return 0; } @@ -136,15 +138,17 @@ EXPORT_SYMBOL(__percpu_counter_init); void percpu_counter_destroy(struct percpu_counter *fbc) { + unsigned long flags __maybe_unused; + if (!fbc->counters) return; debug_percpu_counter_deactivate(fbc); #ifdef CONFIG_HOTPLUG_CPU - spin_lock(&percpu_counters_lock); + spin_lock_irqsave(&percpu_counters_lock, flags); list_del(&fbc->list); - spin_unlock(&percpu_counters_lock); + spin_unlock_irqrestore(&percpu_counters_lock, flags); #endif free_percpu(fbc->counters); fbc->counters = NULL; @@ -169,11 +173,11 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, struct percpu_counter *fbc; compute_batch_value(); - if (action != CPU_DEAD) + if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) return NOTIFY_OK; cpu = (unsigned long)hcpu; - spin_lock(&percpu_counters_lock); + spin_lock_irq(&percpu_counters_lock); list_for_each_entry(fbc, &percpu_counters, list) { s32 *pcount; unsigned long flags; @@ -184,7 +188,7 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, *pcount = 0; raw_spin_unlock_irqrestore(&fbc->lock, flags); } - spin_unlock(&percpu_counters_lock); + spin_unlock_irq(&percpu_counters_lock); #endif return NOTIFY_OK; } diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index 9d054bf91d0f..93d145e5539c 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -54,9 +54,7 @@ static inline void move_tags(unsigned *dst, unsigned *dst_nr, /* * Try to steal tags from a remote cpu's percpu freelist. * - * We first check how many percpu freelists have tags - we don't steal tags - * unless enough percpu freelists have tags on them that it's possible more than - * half the total tags could be stuck on remote percpu freelists. + * We first check how many percpu freelists have tags * * Then we iterate through the cpus until we find some tags - we don't attempt * to find the "best" cpu to steal from, to keep cacheline bouncing to a @@ -69,8 +67,7 @@ static inline void steal_tags(struct percpu_ida *pool, struct percpu_ida_cpu *remote; for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); - cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2; - cpus_have_tags--) { + cpus_have_tags; cpus_have_tags--) { cpu = cpumask_next(cpu, &pool->cpus_have_tags); if (cpu >= nr_cpu_ids) { @@ -132,22 +129,22 @@ static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags) /** * percpu_ida_alloc - allocate a tag * @pool: pool to allocate from - * @gfp: gfp flags + * @state: task state for prepare_to_wait * * Returns a tag - an integer in the range [0..nr_tags) (passed to * tag_pool_init()), or otherwise -ENOSPC on allocation failure. * * Safe to be called from interrupt context (assuming it isn't passed - * __GFP_WAIT, of course). + * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course). * * @gfp indicates whether or not to wait until a free id is available (it's not * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep * however long it takes until another thread frees an id (same semantics as a * mempool). * - * Will not fail if passed __GFP_WAIT. + * Will not fail if passed TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE. */ -int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) +int percpu_ida_alloc(struct percpu_ida *pool, int state) { DEFINE_WAIT(wait); struct percpu_ida_cpu *tags; @@ -174,7 +171,8 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) * * global lock held and irqs disabled, don't need percpu lock */ - prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); + if (state != TASK_RUNNING) + prepare_to_wait(&pool->wait, &wait, state); if (!tags->nr_free) alloc_global_tags(pool, tags); @@ -191,16 +189,22 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) spin_unlock(&pool->lock); local_irq_restore(flags); - if (tag >= 0 || !(gfp & __GFP_WAIT)) + if (tag >= 0 || state == TASK_RUNNING) break; + if (signal_pending_state(state, current)) { + tag = -ERESTARTSYS; + break; + } + schedule(); local_irq_save(flags); tags = this_cpu_ptr(pool->tag_cpu); } + if (state != TASK_RUNNING) + finish_wait(&pool->wait, &wait); - finish_wait(&pool->wait, &wait); return tag; } EXPORT_SYMBOL_GPL(percpu_ida_alloc); diff --git a/lib/plist.c b/lib/plist.c index 1ebc95f7a46f..d408e774b746 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -134,6 +134,46 @@ void plist_del(struct plist_node *node, struct plist_head *head) plist_check_head(head); } +/** + * plist_requeue - Requeue @node at end of same-prio entries. + * + * This is essentially an optimized plist_del() followed by + * plist_add(). It moves an entry already in the plist to + * after any other same-priority entries. + * + * @node: &struct plist_node pointer - entry to be moved + * @head: &struct plist_head pointer - list head + */ +void plist_requeue(struct plist_node *node, struct plist_head *head) +{ + struct plist_node *iter; + struct list_head *node_next = &head->node_list; + + plist_check_head(head); + BUG_ON(plist_head_empty(head)); + BUG_ON(plist_node_empty(node)); + + if (node == plist_last(head)) + return; + + iter = plist_next(node); + + if (node->prio != iter->prio) + return; + + plist_del(node, head); + + plist_for_each_continue(iter, head) { + if (node->prio != iter->prio) { + node_next = &iter->node_list; + break; + } + } + list_add_tail(&node->node_list, node_next); + + plist_check_head(head); +} + #ifdef CONFIG_DEBUG_PI_LIST #include <linux/sched.h> #include <linux/module.h> @@ -170,12 +210,20 @@ static void __init plist_test_check(int nr_expect) BUG_ON(prio_pos->prio_list.next != &first->prio_list); } +static void __init plist_test_requeue(struct plist_node *node) +{ + plist_requeue(node, &test_head); + + if (node != plist_last(&test_head)) + BUG_ON(node->prio == plist_next(node)->prio); +} + static int __init plist_test(void) { int nr_expect = 0, i, loop; unsigned int r = local_clock(); - pr_debug("start plist test\n"); + printk(KERN_DEBUG "start plist test\n"); plist_head_init(&test_head); for (i = 0; i < ARRAY_SIZE(test_node); i++) plist_node_init(test_node + i, 0); @@ -193,6 +241,10 @@ static int __init plist_test(void) nr_expect--; } plist_test_check(nr_expect); + if (!plist_node_empty(test_node + i)) { + plist_test_requeue(test_node + i); + plist_test_check(nr_expect); + } } for (i = 0; i < ARRAY_SIZE(test_node); i++) { @@ -203,7 +255,7 @@ static int __init plist_test(void) plist_test_check(nr_expect); } - pr_debug("end plist test\n"); + printk(KERN_DEBUG "end plist test\n"); return 0; } diff --git a/lib/proportions.c b/lib/proportions.c index 05df84801b56..6f724298f67a 100644 --- a/lib/proportions.c +++ b/lib/proportions.c @@ -73,7 +73,7 @@ #include <linux/proportions.h> #include <linux/rcupdate.h> -int prop_descriptor_init(struct prop_descriptor *pd, int shift) +int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp) { int err; @@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift) pd->index = 0; pd->pg[0].shift = shift; mutex_init(&pd->mutex); - err = percpu_counter_init(&pd->pg[0].events, 0); + err = percpu_counter_init(&pd->pg[0].events, 0, gfp); if (err) goto out; - err = percpu_counter_init(&pd->pg[1].events, 0); + err = percpu_counter_init(&pd->pg[1].events, 0, gfp); if (err) percpu_counter_destroy(&pd->pg[0].events); @@ -188,12 +188,12 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) -int prop_local_init_percpu(struct prop_local_percpu *pl) +int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp) { raw_spin_lock_init(&pl->lock); pl->shift = 0; pl->period = 0; - return percpu_counter_init(&pl->events, 0); + return percpu_counter_init(&pl->events, 0, gfp); } void prop_local_destroy_percpu(struct prop_local_percpu *pl) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 7811ed3b4e70..3291a8e37490 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -27,6 +27,7 @@ #include <linux/radix-tree.h> #include <linux/percpu.h> #include <linux/slab.h> +#include <linux/kmemleak.h> #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/string.h> @@ -35,33 +36,6 @@ #include <linux/hardirq.h> /* in_interrupt() */ -#ifdef __KERNEL__ -#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) -#else -#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ -#endif - -#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) -#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) - -#define RADIX_TREE_TAG_LONGS \ - ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) - -struct radix_tree_node { - unsigned int height; /* Height from the bottom */ - unsigned int count; - union { - struct radix_tree_node *parent; /* Used when ascending tree */ - struct rcu_head rcu_head; /* Used when freeing node */ - }; - void __rcu *slots[RADIX_TREE_MAP_SIZE]; - unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; -}; - -#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) -#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ - RADIX_TREE_MAP_SHIFT)) - /* * The height_to_maxindex array needs to be one deeper than the maximum * path as height 0 holds only 1 entry. @@ -221,12 +195,17 @@ radix_tree_node_alloc(struct radix_tree_root *root) * succeed in getting a node here (and never reach * kmem_cache_alloc) */ - rtp = &__get_cpu_var(radix_tree_preloads); + rtp = this_cpu_ptr(&radix_tree_preloads); if (rtp->nr) { ret = rtp->nodes[rtp->nr - 1]; rtp->nodes[rtp->nr - 1] = NULL; rtp->nr--; } + /* + * Update the allocation stack trace as this is more useful + * for debugging. + */ + kmemleak_update_trace(ret); } if (ret == NULL) ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); @@ -277,14 +256,14 @@ static int __radix_tree_preload(gfp_t gfp_mask) int ret = -ENOMEM; preempt_disable(); - rtp = &__get_cpu_var(radix_tree_preloads); + rtp = this_cpu_ptr(&radix_tree_preloads); while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { preempt_enable(); node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); if (node == NULL) goto out; preempt_disable(); - rtp = &__get_cpu_var(radix_tree_preloads); + rtp = this_cpu_ptr(&radix_tree_preloads); if (rtp->nr < ARRAY_SIZE(rtp->nodes)) rtp->nodes[rtp->nr++] = node; else @@ -369,7 +348,8 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) /* Increase the height. */ newheight = root->height+1; - node->height = newheight; + BUG_ON(newheight & ~RADIX_TREE_HEIGHT_MASK); + node->path = newheight; node->count = 1; node->parent = NULL; slot = root->rnode; @@ -387,23 +367,28 @@ out: } /** - * radix_tree_insert - insert into a radix tree + * __radix_tree_create - create a slot in a radix tree * @root: radix tree root * @index: index key - * @item: item to insert + * @nodep: returns node + * @slotp: returns slot * - * Insert an item into the radix tree at position @index. + * Create, if necessary, and return the node and slot for an item + * at position @index in the radix tree @root. + * + * Until there is more than one item in the tree, no nodes are + * allocated and @root->rnode is used as a direct slot instead of + * pointing to a node, in which case *@nodep will be NULL. + * + * Returns -ENOMEM, or 0 for success. */ -int radix_tree_insert(struct radix_tree_root *root, - unsigned long index, void *item) +int __radix_tree_create(struct radix_tree_root *root, unsigned long index, + struct radix_tree_node **nodep, void ***slotp) { struct radix_tree_node *node = NULL, *slot; - unsigned int height, shift; - int offset; + unsigned int height, shift, offset; int error; - BUG_ON(radix_tree_is_indirect_ptr(item)); - /* Make sure the tree is high enough. */ if (index > radix_tree_maxindex(root->height)) { error = radix_tree_extend(root, index); @@ -422,11 +407,12 @@ int radix_tree_insert(struct radix_tree_root *root, /* Have to add a child node. */ if (!(slot = radix_tree_node_alloc(root))) return -ENOMEM; - slot->height = height; + slot->path = height; slot->parent = node; if (node) { rcu_assign_pointer(node->slots[offset], slot); node->count++; + slot->path |= offset << RADIX_TREE_HEIGHT_SHIFT; } else rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); } @@ -439,16 +425,42 @@ int radix_tree_insert(struct radix_tree_root *root, height--; } - if (slot != NULL) + if (nodep) + *nodep = node; + if (slotp) + *slotp = node ? node->slots + offset : (void **)&root->rnode; + return 0; +} + +/** + * radix_tree_insert - insert into a radix tree + * @root: radix tree root + * @index: index key + * @item: item to insert + * + * Insert an item into the radix tree at position @index. + */ +int radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) +{ + struct radix_tree_node *node; + void **slot; + int error; + + BUG_ON(radix_tree_is_indirect_ptr(item)); + + error = __radix_tree_create(root, index, &node, &slot); + if (error) + return error; + if (*slot != NULL) return -EEXIST; + rcu_assign_pointer(*slot, item); if (node) { node->count++; - rcu_assign_pointer(node->slots[offset], item); - BUG_ON(tag_get(node, 0, offset)); - BUG_ON(tag_get(node, 1, offset)); + BUG_ON(tag_get(node, 0, index & RADIX_TREE_MAP_MASK)); + BUG_ON(tag_get(node, 1, index & RADIX_TREE_MAP_MASK)); } else { - rcu_assign_pointer(root->rnode, item); BUG_ON(root_tag_get(root, 0)); BUG_ON(root_tag_get(root, 1)); } @@ -457,15 +469,26 @@ int radix_tree_insert(struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_insert); -/* - * is_slot == 1 : search for the slot. - * is_slot == 0 : search for the node. +/** + * __radix_tree_lookup - lookup an item in a radix tree + * @root: radix tree root + * @index: index key + * @nodep: returns node + * @slotp: returns slot + * + * Lookup and return the item at position @index in the radix + * tree @root. + * + * Until there is more than one item in the tree, no nodes are + * allocated and @root->rnode is used as a direct slot instead of + * pointing to a node, in which case *@nodep will be NULL. */ -static void *radix_tree_lookup_element(struct radix_tree_root *root, - unsigned long index, int is_slot) +void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, + struct radix_tree_node **nodep, void ***slotp) { + struct radix_tree_node *node, *parent; unsigned int height, shift; - struct radix_tree_node *node, **slot; + void **slot; node = rcu_dereference_raw(root->rnode); if (node == NULL) @@ -474,19 +497,24 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, if (!radix_tree_is_indirect_ptr(node)) { if (index > 0) return NULL; - return is_slot ? (void *)&root->rnode : node; + + if (nodep) + *nodep = NULL; + if (slotp) + *slotp = (void **)&root->rnode; + return node; } node = indirect_to_ptr(node); - height = node->height; + height = node->path & RADIX_TREE_HEIGHT_MASK; if (index > radix_tree_maxindex(height)) return NULL; shift = (height-1) * RADIX_TREE_MAP_SHIFT; do { - slot = (struct radix_tree_node **) - (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); + parent = node; + slot = node->slots + ((index >> shift) & RADIX_TREE_MAP_MASK); node = rcu_dereference_raw(*slot); if (node == NULL) return NULL; @@ -495,7 +523,11 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, height--; } while (height > 0); - return is_slot ? (void *)slot : indirect_to_ptr(node); + if (nodep) + *nodep = parent; + if (slotp) + *slotp = slot; + return node; } /** @@ -513,7 +545,11 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, */ void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) { - return (void **)radix_tree_lookup_element(root, index, 1); + void **slot; + + if (!__radix_tree_lookup(root, index, NULL, &slot)) + return NULL; + return slot; } EXPORT_SYMBOL(radix_tree_lookup_slot); @@ -531,7 +567,7 @@ EXPORT_SYMBOL(radix_tree_lookup_slot); */ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) { - return radix_tree_lookup_element(root, index, 0); + return __radix_tree_lookup(root, index, NULL, NULL); } EXPORT_SYMBOL(radix_tree_lookup); @@ -676,7 +712,7 @@ int radix_tree_tag_get(struct radix_tree_root *root, return (index == 0); node = indirect_to_ptr(node); - height = node->height; + height = node->path & RADIX_TREE_HEIGHT_MASK; if (index > radix_tree_maxindex(height)) return 0; @@ -713,7 +749,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, { unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; struct radix_tree_node *rnode, *node; - unsigned long index, offset; + unsigned long index, offset, height; if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) return NULL; @@ -744,7 +780,8 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, return NULL; restart: - shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT; + height = rnode->path & RADIX_TREE_HEIGHT_MASK; + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; offset = index >> shift; /* Index outside of the tree */ @@ -946,81 +983,6 @@ next: } EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); - -/** - * radix_tree_next_hole - find the next hole (not-present entry) - * @root: tree root - * @index: index key - * @max_scan: maximum range to search - * - * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest - * indexed hole. - * - * Returns: the index of the hole if found, otherwise returns an index - * outside of the set specified (in which case 'return - index >= max_scan' - * will be true). In rare cases of index wrap-around, 0 will be returned. - * - * radix_tree_next_hole may be called under rcu_read_lock. However, like - * radix_tree_gang_lookup, this will not atomically search a snapshot of - * the tree at a single point in time. For example, if a hole is created - * at index 5, then subsequently a hole is created at index 10, - * radix_tree_next_hole covering both indexes may return 10 if called - * under rcu_read_lock. - */ -unsigned long radix_tree_next_hole(struct radix_tree_root *root, - unsigned long index, unsigned long max_scan) -{ - unsigned long i; - - for (i = 0; i < max_scan; i++) { - if (!radix_tree_lookup(root, index)) - break; - index++; - if (index == 0) - break; - } - - return index; -} -EXPORT_SYMBOL(radix_tree_next_hole); - -/** - * radix_tree_prev_hole - find the prev hole (not-present entry) - * @root: tree root - * @index: index key - * @max_scan: maximum range to search - * - * Search backwards in the range [max(index-max_scan+1, 0), index] - * for the first hole. - * - * Returns: the index of the hole if found, otherwise returns an index - * outside of the set specified (in which case 'index - return >= max_scan' - * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. - * - * radix_tree_next_hole may be called under rcu_read_lock. However, like - * radix_tree_gang_lookup, this will not atomically search a snapshot of - * the tree at a single point in time. For example, if a hole is created - * at index 10, then subsequently a hole is created at index 5, - * radix_tree_prev_hole covering both indexes may return 5 if called under - * rcu_read_lock. - */ -unsigned long radix_tree_prev_hole(struct radix_tree_root *root, - unsigned long index, unsigned long max_scan) -{ - unsigned long i; - - for (i = 0; i < max_scan; i++) { - if (!radix_tree_lookup(root, index)) - break; - index--; - if (index == ULONG_MAX) - break; - } - - return index; -} -EXPORT_SYMBOL(radix_tree_prev_hole); - /** * radix_tree_gang_lookup - perform multiple lookup on a radix tree * @root: radix tree root @@ -1189,7 +1151,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, unsigned int shift, height; unsigned long i; - height = slot->height; + height = slot->path & RADIX_TREE_HEIGHT_MASK; shift = (height-1) * RADIX_TREE_MAP_SHIFT; for ( ; height > 1; height--) { @@ -1252,9 +1214,12 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) } node = indirect_to_ptr(node); - max_index = radix_tree_maxindex(node->height); - if (cur_index > max_index) + max_index = radix_tree_maxindex(node->path & + RADIX_TREE_HEIGHT_MASK); + if (cur_index > max_index) { + rcu_read_unlock(); break; + } cur_index = __locate(node, item, cur_index, &found_index); rcu_read_unlock(); @@ -1335,48 +1300,89 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) } /** - * radix_tree_delete - delete an item from a radix tree + * __radix_tree_delete_node - try to free node after clearing a slot + * @root: radix tree root + * @node: node containing @index + * + * After clearing the slot at @index in @node from radix tree + * rooted at @root, call this function to attempt freeing the + * node and shrinking the tree. + * + * Returns %true if @node was freed, %false otherwise. + */ +bool __radix_tree_delete_node(struct radix_tree_root *root, + struct radix_tree_node *node) +{ + bool deleted = false; + + do { + struct radix_tree_node *parent; + + if (node->count) { + if (node == indirect_to_ptr(root->rnode)) { + radix_tree_shrink(root); + if (root->height == 0) + deleted = true; + } + return deleted; + } + + parent = node->parent; + if (parent) { + unsigned int offset; + + offset = node->path >> RADIX_TREE_HEIGHT_SHIFT; + parent->slots[offset] = NULL; + parent->count--; + } else { + root_tag_clear_all(root); + root->height = 0; + root->rnode = NULL; + } + + radix_tree_node_free(node); + deleted = true; + + node = parent; + } while (node); + + return deleted; +} + +/** + * radix_tree_delete_item - delete an item from a radix tree * @root: radix tree root * @index: index key + * @item: expected item * - * Remove the item at @index from the radix tree rooted at @root. + * Remove @item at @index from the radix tree rooted at @root. * - * Returns the address of the deleted item, or NULL if it was not present. + * Returns the address of the deleted item, or NULL if it was not present + * or the entry at the given @index was not @item. */ -void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) +void *radix_tree_delete_item(struct radix_tree_root *root, + unsigned long index, void *item) { - struct radix_tree_node *node = NULL; - struct radix_tree_node *slot = NULL; - struct radix_tree_node *to_free; - unsigned int height, shift; + struct radix_tree_node *node; + unsigned int offset; + void **slot; + void *entry; int tag; - int uninitialized_var(offset); - height = root->height; - if (index > radix_tree_maxindex(height)) - goto out; + entry = __radix_tree_lookup(root, index, &node, &slot); + if (!entry) + return NULL; - slot = root->rnode; - if (height == 0) { + if (item && entry != item) + return NULL; + + if (!node) { root_tag_clear_all(root); root->rnode = NULL; - goto out; + return entry; } - slot = indirect_to_ptr(slot); - shift = height * RADIX_TREE_MAP_SHIFT; - do { - if (slot == NULL) - goto out; - - shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - node = slot; - slot = slot->slots[offset]; - } while (shift); - - if (slot == NULL) - goto out; + offset = index & RADIX_TREE_MAP_MASK; /* * Clear all tags associated with the item to be deleted. @@ -1387,40 +1393,27 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) radix_tree_tag_clear(root, index, tag); } - to_free = NULL; - /* Now free the nodes we do not need anymore */ - while (node) { - node->slots[offset] = NULL; - node->count--; - /* - * Queue the node for deferred freeing after the - * last reference to it disappears (set NULL, above). - */ - if (to_free) - radix_tree_node_free(to_free); - - if (node->count) { - if (node == indirect_to_ptr(root->rnode)) - radix_tree_shrink(root); - goto out; - } - - /* Node with zero slots in use so free it */ - to_free = node; + node->slots[offset] = NULL; + node->count--; - index >>= RADIX_TREE_MAP_SHIFT; - offset = index & RADIX_TREE_MAP_MASK; - node = node->parent; - } + __radix_tree_delete_node(root, node); - root_tag_clear_all(root); - root->height = 0; - root->rnode = NULL; - if (to_free) - radix_tree_node_free(to_free); + return entry; +} +EXPORT_SYMBOL(radix_tree_delete_item); -out: - return slot; +/** + * radix_tree_delete - delete an item from a radix tree + * @root: radix tree root + * @index: index key + * + * Remove the item at @index from the radix tree rooted at @root. + * + * Returns the address of the deleted item, or NULL if it was not present. + */ +void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) +{ + return radix_tree_delete_item(root, index, NULL); } EXPORT_SYMBOL(radix_tree_delete); @@ -1436,9 +1429,12 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) EXPORT_SYMBOL(radix_tree_tagged); static void -radix_tree_node_ctor(void *node) +radix_tree_node_ctor(void *arg) { - memset(node, 0, sizeof(struct radix_tree_node)); + struct radix_tree_node *node = arg; + + memset(node, 0, sizeof(*node)); + INIT_LIST_HEAD(&node->private_list); } static __init unsigned long __maxindex(unsigned int height) diff --git a/lib/random32.c b/lib/random32.c index 1e5b2df44291..0bee183fa18f 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -1,37 +1,35 @@ /* - This is a maximally equidistributed combined Tausworthe generator - based on code from GNU Scientific Library 1.5 (30 Jun 2004) - - lfsr113 version: - - x_n = (s1_n ^ s2_n ^ s3_n ^ s4_n) - - s1_{n+1} = (((s1_n & 4294967294) << 18) ^ (((s1_n << 6) ^ s1_n) >> 13)) - s2_{n+1} = (((s2_n & 4294967288) << 2) ^ (((s2_n << 2) ^ s2_n) >> 27)) - s3_{n+1} = (((s3_n & 4294967280) << 7) ^ (((s3_n << 13) ^ s3_n) >> 21)) - s4_{n+1} = (((s4_n & 4294967168) << 13) ^ (((s4_n << 3) ^ s4_n) >> 12)) - - The period of this generator is about 2^113 (see erratum paper). - - From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe - Generators", Mathematics of Computation, 65, 213 (1996), 203--213: - http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps - ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps - - There is an erratum in the paper "Tables of Maximally - Equidistributed Combined LFSR Generators", Mathematics of - Computation, 68, 225 (1999), 261--269: - http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps - - ... the k_j most significant bits of z_j must be non- - zero, for each j. (Note: this restriction also applies to the - computer code given in [4], but was mistakenly not mentioned in - that paper.) - - This affects the seeding procedure by imposing the requirement - s1 > 1, s2 > 7, s3 > 15, s4 > 127. - -*/ + * This is a maximally equidistributed combined Tausworthe generator + * based on code from GNU Scientific Library 1.5 (30 Jun 2004) + * + * lfsr113 version: + * + * x_n = (s1_n ^ s2_n ^ s3_n ^ s4_n) + * + * s1_{n+1} = (((s1_n & 4294967294) << 18) ^ (((s1_n << 6) ^ s1_n) >> 13)) + * s2_{n+1} = (((s2_n & 4294967288) << 2) ^ (((s2_n << 2) ^ s2_n) >> 27)) + * s3_{n+1} = (((s3_n & 4294967280) << 7) ^ (((s3_n << 13) ^ s3_n) >> 21)) + * s4_{n+1} = (((s4_n & 4294967168) << 13) ^ (((s4_n << 3) ^ s4_n) >> 12)) + * + * The period of this generator is about 2^113 (see erratum paper). + * + * From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe + * Generators", Mathematics of Computation, 65, 213 (1996), 203--213: + * http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps + * ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps + * + * There is an erratum in the paper "Tables of Maximally Equidistributed + * Combined LFSR Generators", Mathematics of Computation, 68, 225 (1999), + * 261--269: http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps + * + * ... the k_j most significant bits of z_j must be non-zero, + * for each j. (Note: this restriction also applies to the + * computer code given in [4], but was mistakenly not mentioned + * in that paper.) + * + * This affects the seeding procedure by imposing the requirement + * s1 > 1, s2 > 7, s3 > 15, s4 > 127. + */ #include <linux/types.h> #include <linux/percpu.h> @@ -39,9 +37,14 @@ #include <linux/jiffies.h> #include <linux/random.h> #include <linux/sched.h> +#include <asm/unaligned.h> #ifdef CONFIG_RANDOM32_SELFTEST static void __init prandom_state_selftest(void); +#else +static inline void prandom_state_selftest(void) +{ +} #endif static DEFINE_PER_CPU(struct rnd_state, net_rand_state); @@ -55,8 +58,7 @@ static DEFINE_PER_CPU(struct rnd_state, net_rand_state); */ u32 prandom_u32_state(struct rnd_state *state) { -#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) - +#define TAUSWORTHE(s, a, b, c, d) ((s & c) << d) ^ (((s << a) ^ s) >> b) state->s1 = TAUSWORTHE(state->s1, 6U, 13U, 4294967294U, 18U); state->s2 = TAUSWORTHE(state->s2, 2U, 27U, 4294967288U, 2U); state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U, 7U); @@ -75,15 +77,17 @@ EXPORT_SYMBOL(prandom_u32_state); */ u32 prandom_u32(void) { - unsigned long r; struct rnd_state *state = &get_cpu_var(net_rand_state); - r = prandom_u32_state(state); + u32 res; + + res = prandom_u32_state(state); put_cpu_var(state); - return r; + + return res; } EXPORT_SYMBOL(prandom_u32); -/* +/** * prandom_bytes_state - get the requested number of pseudo-random bytes * * @state: pointer to state structure holding seeded state. @@ -93,27 +97,23 @@ EXPORT_SYMBOL(prandom_u32); * This is used for pseudo-randomness with no outside seeding. * For more random results, use prandom_bytes(). */ -void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes) +void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes) { - unsigned char *p = buf; - int i; + u8 *ptr = buf; - for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) { - u32 random = prandom_u32_state(state); - int j; - - for (j = 0; j < sizeof(u32); j++) { - p[i + j] = random; - random >>= BITS_PER_BYTE; - } + while (bytes >= sizeof(u32)) { + put_unaligned(prandom_u32_state(state), (u32 *) ptr); + ptr += sizeof(u32); + bytes -= sizeof(u32); } - if (i < bytes) { - u32 random = prandom_u32_state(state); - for (; i < bytes; i++) { - p[i] = random; - random >>= BITS_PER_BYTE; - } + if (bytes > 0) { + u32 rem = prandom_u32_state(state); + do { + *ptr++ = (u8) rem; + bytes--; + rem >>= BITS_PER_BYTE; + } while (bytes > 0); } } EXPORT_SYMBOL(prandom_bytes_state); @@ -123,7 +123,7 @@ EXPORT_SYMBOL(prandom_bytes_state); * @buf: where to copy the pseudo-random bytes to * @bytes: the requested number of bytes */ -void prandom_bytes(void *buf, int bytes) +void prandom_bytes(void *buf, size_t bytes) { struct rnd_state *state = &get_cpu_var(net_rand_state); @@ -134,7 +134,7 @@ EXPORT_SYMBOL(prandom_bytes); static void prandom_warmup(struct rnd_state *state) { - /* Calling RNG ten times to satify recurrence condition */ + /* Calling RNG ten times to satisfy recurrence condition */ prandom_u32_state(state); prandom_u32_state(state); prandom_u32_state(state); @@ -147,21 +147,25 @@ static void prandom_warmup(struct rnd_state *state) prandom_u32_state(state); } -static void prandom_seed_very_weak(struct rnd_state *state, u32 seed) +static u32 __extract_hwseed(void) { - /* Note: This sort of seeding is ONLY used in test cases and - * during boot at the time from core_initcall until late_initcall - * as we don't have a stronger entropy source available yet. - * After late_initcall, we reseed entire state, we have to (!), - * otherwise an attacker just needs to search 32 bit space to - * probe for our internal 128 bit state if he knows a couple - * of prandom32 outputs! - */ -#define LCG(x) ((x) * 69069U) /* super-duper LCG */ - state->s1 = __seed(LCG(seed), 2U); - state->s2 = __seed(LCG(state->s1), 8U); - state->s3 = __seed(LCG(state->s2), 16U); - state->s4 = __seed(LCG(state->s3), 128U); + unsigned int val = 0; + + (void)(arch_get_random_seed_int(&val) || + arch_get_random_int(&val)); + + return val; +} + +static void prandom_seed_early(struct rnd_state *state, u32 seed, + bool mix_with_hwseed) +{ +#define LCG(x) ((x) * 69069U) /* super-duper LCG */ +#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0) + state->s1 = __seed(HWSEED() ^ LCG(seed), 2U); + state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U); + state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U); + state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U); } /** @@ -194,21 +198,22 @@ static int __init prandom_init(void) { int i; -#ifdef CONFIG_RANDOM32_SELFTEST prandom_state_selftest(); -#endif for_each_possible_cpu(i) { struct rnd_state *state = &per_cpu(net_rand_state,i); + u32 weak_seed = (i + jiffies) ^ random_get_entropy(); - prandom_seed_very_weak(state, (i + jiffies) ^ random_get_entropy()); + prandom_seed_early(state, weak_seed, true); prandom_warmup(state); } + return 0; } core_initcall(prandom_init); static void __prandom_timer(unsigned long dontcare); + static DEFINE_TIMER(seed_timer, __prandom_timer, 0, 0); static void __prandom_timer(unsigned long dontcare) @@ -220,7 +225,7 @@ static void __prandom_timer(unsigned long dontcare) prandom_seed(entropy); /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ - expires = 40 + (prandom_u32() % 40); + expires = 40 + prandom_u32_max(40); seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC); add_timer(&seed_timer); @@ -244,10 +249,22 @@ static void __prandom_reseed(bool late) static bool latch = false; static DEFINE_SPINLOCK(lock); + /* Asking for random bytes might result in bytes getting + * moved into the nonblocking pool and thus marking it + * as initialized. In this case we would double back into + * this function and attempt to do a late reseed. + * Ignore the pointless attempt to reseed again if we're + * already waiting for bytes when the nonblocking pool + * got initialized. + */ + /* only allow initial seeding (late == false) once */ - spin_lock_irqsave(&lock, flags); + if (!spin_trylock_irqsave(&lock, flags)) + return; + if (latch && !late) goto out; + latch = true; for_each_possible_cpu(i) { @@ -406,7 +423,7 @@ static void __init prandom_state_selftest(void) for (i = 0; i < ARRAY_SIZE(test1); i++) { struct rnd_state state; - prandom_seed_very_weak(&state, test1[i].seed); + prandom_seed_early(&state, test1[i].seed, false); prandom_warmup(&state); if (test1[i].result != prandom_u32_state(&state)) @@ -421,7 +438,7 @@ static void __init prandom_state_selftest(void) for (i = 0; i < ARRAY_SIZE(test2); i++) { struct rnd_state state; - prandom_seed_very_weak(&state, test2[i].seed); + prandom_seed_early(&state, test2[i].seed, false); prandom_warmup(&state); for (j = 0; j < test2[i].iteration - 1; j++) diff --git a/lib/rbtree.c b/lib/rbtree.c index 65f4effd117f..c16c81a3d430 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -101,7 +101,7 @@ __rb_insert(struct rb_node *node, struct rb_root *root, * / \ / \ * p u --> P U * / / - * n N + * n n * * However, since g's parent might be red, and * 4) does not allow this, we need to recurse diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 31dd4ccd3baa..8b3c9dc88262 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -8,8 +8,8 @@ #define CHECK_LOOPS 100 struct test_node { - struct rb_node rb; u32 key; + struct rb_node rb; /* following fields used for testing augmented rbtree functionality */ u32 val; @@ -114,6 +114,16 @@ static int black_path_count(struct rb_node *rb) return count; } +static void check_postorder_foreach(int nr_nodes) +{ + struct test_node *cur, *n; + int count = 0; + rbtree_postorder_for_each_entry_safe(cur, n, &root, rb) + count++; + + WARN_ON_ONCE(count != nr_nodes); +} + static void check_postorder(int nr_nodes) { struct rb_node *rb; @@ -148,6 +158,7 @@ static void check(int nr_nodes) WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1); check_postorder(nr_nodes); + check_postorder_foreach(nr_nodes); } static void check_augmented(int nr_nodes) diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c index 75510e94f7d0..464152410c51 100644 --- a/lib/reciprocal_div.c +++ b/lib/reciprocal_div.c @@ -1,11 +1,27 @@ +#include <linux/kernel.h> #include <asm/div64.h> #include <linux/reciprocal_div.h> #include <linux/export.h> -u32 reciprocal_value(u32 k) +/* + * For a description of the algorithm please have a look at + * include/linux/reciprocal_div.h + */ + +struct reciprocal_value reciprocal_value(u32 d) { - u64 val = (1LL << 32) + (k - 1); - do_div(val, k); - return (u32)val; + struct reciprocal_value R; + u64 m; + int l; + + l = fls(d - 1); + m = ((1ULL << 32) * ((1ULL << l) - d)); + do_div(m, d); + ++m; + R.m = (u32)m; + R.sh1 = min(l, 1); + R.sh2 = max(l - 1, 0); + + return R; } EXPORT_SYMBOL(reciprocal_value); diff --git a/lib/rhashtable.c b/lib/rhashtable.c new file mode 100644 index 000000000000..081be3ba9ea8 --- /dev/null +++ b/lib/rhashtable.c @@ -0,0 +1,794 @@ +/* + * Resizable, Scalable, Concurrent Hash Table + * + * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> + * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> + * + * Based on the following paper: + * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf + * + * Code partially derived from nft_hash + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/log2.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/hash.h> +#include <linux/random.h> +#include <linux/rhashtable.h> + +#define HASH_DEFAULT_SIZE 64UL +#define HASH_MIN_SIZE 4UL + +#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) + +#ifdef CONFIG_PROVE_LOCKING +int lockdep_rht_mutex_is_held(const struct rhashtable *ht) +{ + return ht->p.mutex_is_held(); +} +EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); +#endif + +static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) +{ + return (void *) he - ht->p.head_offset; +} + +static u32 __hashfn(const struct rhashtable *ht, const void *key, + u32 len, u32 hsize) +{ + u32 h; + + h = ht->p.hashfn(key, len, ht->p.hash_rnd); + + return h & (hsize - 1); +} + +/** + * rhashtable_hashfn - compute hash for key of given length + * @ht: hash table to compute for + * @key: pointer to key + * @len: length of key + * + * Computes the hash value using the hash function provided in the 'hashfn' + * of struct rhashtable_params. The returned value is guaranteed to be + * smaller than the number of buckets in the hash table. + */ +u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len) +{ + struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + + return __hashfn(ht, key, len, tbl->size); +} +EXPORT_SYMBOL_GPL(rhashtable_hashfn); + +static u32 obj_hashfn(const struct rhashtable *ht, const void *ptr, u32 hsize) +{ + if (unlikely(!ht->p.key_len)) { + u32 h; + + h = ht->p.obj_hashfn(ptr, ht->p.hash_rnd); + + return h & (hsize - 1); + } + + return __hashfn(ht, ptr + ht->p.key_offset, ht->p.key_len, hsize); +} + +/** + * rhashtable_obj_hashfn - compute hash for hashed object + * @ht: hash table to compute for + * @ptr: pointer to hashed object + * + * Computes the hash value using the hash function `hashfn` respectively + * 'obj_hashfn' depending on whether the hash table is set up to work with + * a fixed length key. The returned value is guaranteed to be smaller than + * the number of buckets in the hash table. + */ +u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr) +{ + struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + + return obj_hashfn(ht, ptr, tbl->size); +} +EXPORT_SYMBOL_GPL(rhashtable_obj_hashfn); + +static u32 head_hashfn(const struct rhashtable *ht, + const struct rhash_head *he, u32 hsize) +{ + return obj_hashfn(ht, rht_obj(ht, he), hsize); +} + +static struct bucket_table *bucket_table_alloc(size_t nbuckets, gfp_t flags) +{ + struct bucket_table *tbl; + size_t size; + + size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); + tbl = kzalloc(size, flags); + if (tbl == NULL) + tbl = vzalloc(size); + + if (tbl == NULL) + return NULL; + + tbl->size = nbuckets; + + return tbl; +} + +static void bucket_table_free(const struct bucket_table *tbl) +{ + kvfree(tbl); +} + +/** + * rht_grow_above_75 - returns true if nelems > 0.75 * table-size + * @ht: hash table + * @new_size: new table size + */ +bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) +{ + /* Expand table when exceeding 75% load */ + return ht->nelems > (new_size / 4 * 3); +} +EXPORT_SYMBOL_GPL(rht_grow_above_75); + +/** + * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size + * @ht: hash table + * @new_size: new table size + */ +bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) +{ + /* Shrink table beneath 30% load */ + return ht->nelems < (new_size * 3 / 10); +} +EXPORT_SYMBOL_GPL(rht_shrink_below_30); + +static void hashtable_chain_unzip(const struct rhashtable *ht, + const struct bucket_table *new_tbl, + struct bucket_table *old_tbl, size_t n) +{ + struct rhash_head *he, *p, *next; + unsigned int h; + + /* Old bucket empty, no work needed. */ + p = rht_dereference(old_tbl->buckets[n], ht); + if (!p) + return; + + /* Advance the old bucket pointer one or more times until it + * reaches a node that doesn't hash to the same bucket as the + * previous node p. Call the previous node p; + */ + h = head_hashfn(ht, p, new_tbl->size); + rht_for_each(he, p->next, ht) { + if (head_hashfn(ht, he, new_tbl->size) != h) + break; + p = he; + } + RCU_INIT_POINTER(old_tbl->buckets[n], p->next); + + /* Find the subsequent node which does hash to the same + * bucket as node P, or NULL if no such node exists. + */ + next = NULL; + if (he) { + rht_for_each(he, he->next, ht) { + if (head_hashfn(ht, he, new_tbl->size) == h) { + next = he; + break; + } + } + } + + /* Set p's next pointer to that subsequent node pointer, + * bypassing the nodes which do not hash to p's bucket + */ + RCU_INIT_POINTER(p->next, next); +} + +/** + * rhashtable_expand - Expand hash table while allowing concurrent lookups + * @ht: the hash table to expand + * @flags: allocation flags + * + * A secondary bucket array is allocated and the hash entries are migrated + * while keeping them on both lists until the end of the RCU grace period. + * + * This function may only be called in a context where it is safe to call + * synchronize_rcu(), e.g. not within a rcu_read_lock() section. + * + * The caller must ensure that no concurrent table mutations take place. + * It is however valid to have concurrent lookups if they are RCU protected. + */ +int rhashtable_expand(struct rhashtable *ht, gfp_t flags) +{ + struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); + struct rhash_head *he; + unsigned int i, h; + bool complete; + + ASSERT_RHT_MUTEX(ht); + + if (ht->p.max_shift && ht->shift >= ht->p.max_shift) + return 0; + + new_tbl = bucket_table_alloc(old_tbl->size * 2, flags); + if (new_tbl == NULL) + return -ENOMEM; + + ht->shift++; + + /* For each new bucket, search the corresponding old bucket + * for the first entry that hashes to the new bucket, and + * link the new bucket to that entry. Since all the entries + * which will end up in the new bucket appear in the same + * old bucket, this constructs an entirely valid new hash + * table, but with multiple buckets "zipped" together into a + * single imprecise chain. + */ + for (i = 0; i < new_tbl->size; i++) { + h = i & (old_tbl->size - 1); + rht_for_each(he, old_tbl->buckets[h], ht) { + if (head_hashfn(ht, he, new_tbl->size) == i) { + RCU_INIT_POINTER(new_tbl->buckets[i], he); + break; + } + } + } + + /* Publish the new table pointer. Lookups may now traverse + * the new table, but they will not benefit from any + * additional efficiency until later steps unzip the buckets. + */ + rcu_assign_pointer(ht->tbl, new_tbl); + + /* Unzip interleaved hash chains */ + do { + /* Wait for readers. All new readers will see the new + * table, and thus no references to the old table will + * remain. + */ + synchronize_rcu(); + + /* For each bucket in the old table (each of which + * contains items from multiple buckets of the new + * table): ... + */ + complete = true; + for (i = 0; i < old_tbl->size; i++) { + hashtable_chain_unzip(ht, new_tbl, old_tbl, i); + if (old_tbl->buckets[i] != NULL) + complete = false; + } + } while (!complete); + + bucket_table_free(old_tbl); + return 0; +} +EXPORT_SYMBOL_GPL(rhashtable_expand); + +/** + * rhashtable_shrink - Shrink hash table while allowing concurrent lookups + * @ht: the hash table to shrink + * @flags: allocation flags + * + * This function may only be called in a context where it is safe to call + * synchronize_rcu(), e.g. not within a rcu_read_lock() section. + * + * The caller must ensure that no concurrent table mutations take place. + * It is however valid to have concurrent lookups if they are RCU protected. + */ +int rhashtable_shrink(struct rhashtable *ht, gfp_t flags) +{ + struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht); + struct rhash_head __rcu **pprev; + unsigned int i; + + ASSERT_RHT_MUTEX(ht); + + if (ht->shift <= ht->p.min_shift) + return 0; + + ntbl = bucket_table_alloc(tbl->size / 2, flags); + if (ntbl == NULL) + return -ENOMEM; + + ht->shift--; + + /* Link each bucket in the new table to the first bucket + * in the old table that contains entries which will hash + * to the new bucket. + */ + for (i = 0; i < ntbl->size; i++) { + ntbl->buckets[i] = tbl->buckets[i]; + + /* Link each bucket in the new table to the first bucket + * in the old table that contains entries which will hash + * to the new bucket. + */ + for (pprev = &ntbl->buckets[i]; *pprev != NULL; + pprev = &rht_dereference(*pprev, ht)->next) + ; + RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); + } + + /* Publish the new, valid hash table */ + rcu_assign_pointer(ht->tbl, ntbl); + + /* Wait for readers. No new readers will have references to the + * old hash table. + */ + synchronize_rcu(); + + bucket_table_free(tbl); + + return 0; +} +EXPORT_SYMBOL_GPL(rhashtable_shrink); + +/** + * rhashtable_insert - insert object into hash hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @flags: allocation flags (table expansion) + * + * Will automatically grow the table via rhashtable_expand() if the the + * grow_decision function specified at rhashtable_init() returns true. + * + * The caller must ensure that no concurrent table mutations occur. It is + * however valid to have concurrent lookups if they are RCU protected. + */ +void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, + gfp_t flags) +{ + struct bucket_table *tbl = rht_dereference(ht->tbl, ht); + u32 hash; + + ASSERT_RHT_MUTEX(ht); + + hash = head_hashfn(ht, obj, tbl->size); + RCU_INIT_POINTER(obj->next, tbl->buckets[hash]); + rcu_assign_pointer(tbl->buckets[hash], obj); + ht->nelems++; + + if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) + rhashtable_expand(ht, flags); +} +EXPORT_SYMBOL_GPL(rhashtable_insert); + +/** + * rhashtable_remove_pprev - remove object from hash table given previous element + * @ht: hash table + * @obj: pointer to hash head inside object + * @pprev: pointer to previous element + * @flags: allocation flags (table expansion) + * + * Identical to rhashtable_remove() but caller is alreayd aware of the element + * in front of the element to be deleted. This is in particular useful for + * deletion when combined with walking or lookup. + */ +void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, + struct rhash_head __rcu **pprev, gfp_t flags) +{ + struct bucket_table *tbl = rht_dereference(ht->tbl, ht); + + ASSERT_RHT_MUTEX(ht); + + RCU_INIT_POINTER(*pprev, obj->next); + ht->nelems--; + + if (ht->p.shrink_decision && + ht->p.shrink_decision(ht, tbl->size)) + rhashtable_shrink(ht, flags); +} +EXPORT_SYMBOL_GPL(rhashtable_remove_pprev); + +/** + * rhashtable_remove - remove object from hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @flags: allocation flags (table expansion) + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table via rhashtable_expand() if the the + * shrink_decision function specified at rhashtable_init() returns true. + * + * The caller must ensure that no concurrent table mutations occur. It is + * however valid to have concurrent lookups if they are RCU protected. + */ +bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj, + gfp_t flags) +{ + struct bucket_table *tbl = rht_dereference(ht->tbl, ht); + struct rhash_head __rcu **pprev; + struct rhash_head *he; + u32 h; + + ASSERT_RHT_MUTEX(ht); + + h = head_hashfn(ht, obj, tbl->size); + + pprev = &tbl->buckets[h]; + rht_for_each(he, tbl->buckets[h], ht) { + if (he != obj) { + pprev = &he->next; + continue; + } + + rhashtable_remove_pprev(ht, he, pprev, flags); + return true; + } + + return false; +} +EXPORT_SYMBOL_GPL(rhashtable_remove); + +/** + * rhashtable_lookup - lookup key in hash table + * @ht: hash table + * @key: pointer to key + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * This lookup function may only be used for fixed key hash table (key_len + * paramter set). It will BUG() if used inappropriately. + * + * Lookups may occur in parallel with hash mutations as long as the lookup is + * guarded by rcu_read_lock(). The caller must take care of this. + */ +void *rhashtable_lookup(const struct rhashtable *ht, const void *key) +{ + const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + struct rhash_head *he; + u32 h; + + BUG_ON(!ht->p.key_len); + + h = __hashfn(ht, key, ht->p.key_len, tbl->size); + rht_for_each_rcu(he, tbl->buckets[h], ht) { + if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key, + ht->p.key_len)) + continue; + return (void *) he - ht->p.head_offset; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(rhashtable_lookup); + +/** + * rhashtable_lookup_compare - search hash table with compare function + * @ht: hash table + * @hash: hash value of desired entry + * @compare: compare function, must return true on match + * @arg: argument passed on to compare function + * + * Traverses the bucket chain behind the provided hash value and calls the + * specified compare function for each entry. + * + * Lookups may occur in parallel with hash mutations as long as the lookup is + * guarded by rcu_read_lock(). The caller must take care of this. + * + * Returns the first entry on which the compare function returned true. + */ +void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash, + bool (*compare)(void *, void *), void *arg) +{ + const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + struct rhash_head *he; + + if (unlikely(hash >= tbl->size)) + return NULL; + + rht_for_each_rcu(he, tbl->buckets[hash], ht) { + if (!compare(rht_obj(ht, he), arg)) + continue; + return (void *) he - ht->p.head_offset; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); + +static size_t rounded_hashtable_size(struct rhashtable_params *params) +{ + return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), + 1UL << params->min_shift); +} + +/** + * rhashtable_init - initialize a new hash table + * @ht: hash table to be initialized + * @params: configuration parameters + * + * Initializes a new hash table based on the provided configuration + * parameters. A table can be configured either with a variable or + * fixed length key: + * + * Configuration Example 1: Fixed length keys + * struct test_obj { + * int key; + * void * my_member; + * struct rhash_head node; + * }; + * + * struct rhashtable_params params = { + * .head_offset = offsetof(struct test_obj, node), + * .key_offset = offsetof(struct test_obj, key), + * .key_len = sizeof(int), + * .hashfn = arch_fast_hash, + * .mutex_is_held = &my_mutex_is_held, + * }; + * + * Configuration Example 2: Variable length keys + * struct test_obj { + * [...] + * struct rhash_head node; + * }; + * + * u32 my_hash_fn(const void *data, u32 seed) + * { + * struct test_obj *obj = data; + * + * return [... hash ...]; + * } + * + * struct rhashtable_params params = { + * .head_offset = offsetof(struct test_obj, node), + * .hashfn = arch_fast_hash, + * .obj_hashfn = my_hash_fn, + * .mutex_is_held = &my_mutex_is_held, + * }; + */ +int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) +{ + struct bucket_table *tbl; + size_t size; + + size = HASH_DEFAULT_SIZE; + + if ((params->key_len && !params->hashfn) || + (!params->key_len && !params->obj_hashfn)) + return -EINVAL; + + params->min_shift = max_t(size_t, params->min_shift, + ilog2(HASH_MIN_SIZE)); + + if (params->nelem_hint) + size = rounded_hashtable_size(params); + + tbl = bucket_table_alloc(size, GFP_KERNEL); + if (tbl == NULL) + return -ENOMEM; + + memset(ht, 0, sizeof(*ht)); + ht->shift = ilog2(tbl->size); + memcpy(&ht->p, params, sizeof(*params)); + RCU_INIT_POINTER(ht->tbl, tbl); + + if (!ht->p.hash_rnd) + get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); + + return 0; +} +EXPORT_SYMBOL_GPL(rhashtable_init); + +/** + * rhashtable_destroy - destroy hash table + * @ht: the hash table to destroy + * + * Frees the bucket array. This function is not rcu safe, therefore the caller + * has to make sure that no resizing may happen by unpublishing the hashtable + * and waiting for the quiescent cycle before releasing the bucket array. + */ +void rhashtable_destroy(const struct rhashtable *ht) +{ + bucket_table_free(ht->tbl); +} +EXPORT_SYMBOL_GPL(rhashtable_destroy); + +/************************************************************************** + * Self Test + **************************************************************************/ + +#ifdef CONFIG_TEST_RHASHTABLE + +#define TEST_HT_SIZE 8 +#define TEST_ENTRIES 2048 +#define TEST_PTR ((void *) 0xdeadbeef) +#define TEST_NEXPANDS 4 + +static int test_mutex_is_held(void) +{ + return 1; +} + +struct test_obj { + void *ptr; + int value; + struct rhash_head node; +}; + +static int __init test_rht_lookup(struct rhashtable *ht) +{ + unsigned int i; + + for (i = 0; i < TEST_ENTRIES * 2; i++) { + struct test_obj *obj; + bool expected = !(i % 2); + u32 key = i; + + obj = rhashtable_lookup(ht, &key); + + if (expected && !obj) { + pr_warn("Test failed: Could not find key %u\n", key); + return -ENOENT; + } else if (!expected && obj) { + pr_warn("Test failed: Unexpected entry found for key %u\n", + key); + return -EEXIST; + } else if (expected && obj) { + if (obj->ptr != TEST_PTR || obj->value != i) { + pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n", + obj->ptr, TEST_PTR, obj->value, i); + return -EINVAL; + } + } + } + + return 0; +} + +static void test_bucket_stats(struct rhashtable *ht, + struct bucket_table *tbl, + bool quiet) +{ + unsigned int cnt, i, total = 0; + struct test_obj *obj; + + for (i = 0; i < tbl->size; i++) { + cnt = 0; + + if (!quiet) + pr_info(" [%#4x/%zu]", i, tbl->size); + + rht_for_each_entry_rcu(obj, tbl->buckets[i], node) { + cnt++; + total++; + if (!quiet) + pr_cont(" [%p],", obj); + } + + if (!quiet) + pr_cont("\n [%#x] first element: %p, chain length: %u\n", + i, tbl->buckets[i], cnt); + } + + pr_info(" Traversal complete: counted=%u, nelems=%zu, entries=%d\n", + total, ht->nelems, TEST_ENTRIES); +} + +static int __init test_rhashtable(struct rhashtable *ht) +{ + struct bucket_table *tbl; + struct test_obj *obj, *next; + int err; + unsigned int i; + + /* + * Insertion Test: + * Insert TEST_ENTRIES into table with all keys even numbers + */ + pr_info(" Adding %d keys\n", TEST_ENTRIES); + for (i = 0; i < TEST_ENTRIES; i++) { + struct test_obj *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) { + err = -ENOMEM; + goto error; + } + + obj->ptr = TEST_PTR; + obj->value = i * 2; + + rhashtable_insert(ht, &obj->node, GFP_KERNEL); + } + + rcu_read_lock(); + tbl = rht_dereference_rcu(ht->tbl, ht); + test_bucket_stats(ht, tbl, true); + test_rht_lookup(ht); + rcu_read_unlock(); + + for (i = 0; i < TEST_NEXPANDS; i++) { + pr_info(" Table expansion iteration %u...\n", i); + rhashtable_expand(ht, GFP_KERNEL); + + rcu_read_lock(); + pr_info(" Verifying lookups...\n"); + test_rht_lookup(ht); + rcu_read_unlock(); + } + + for (i = 0; i < TEST_NEXPANDS; i++) { + pr_info(" Table shrinkage iteration %u...\n", i); + rhashtable_shrink(ht, GFP_KERNEL); + + rcu_read_lock(); + pr_info(" Verifying lookups...\n"); + test_rht_lookup(ht); + rcu_read_unlock(); + } + + pr_info(" Deleting %d keys\n", TEST_ENTRIES); + for (i = 0; i < TEST_ENTRIES; i++) { + u32 key = i * 2; + + obj = rhashtable_lookup(ht, &key); + BUG_ON(!obj); + + rhashtable_remove(ht, &obj->node, GFP_KERNEL); + kfree(obj); + } + + return 0; + +error: + tbl = rht_dereference_rcu(ht->tbl, ht); + for (i = 0; i < tbl->size; i++) + rht_for_each_entry_safe(obj, next, tbl->buckets[i], ht, node) + kfree(obj); + + return err; +} + +static int __init test_rht_init(void) +{ + struct rhashtable ht; + struct rhashtable_params params = { + .nelem_hint = TEST_HT_SIZE, + .head_offset = offsetof(struct test_obj, node), + .key_offset = offsetof(struct test_obj, value), + .key_len = sizeof(int), + .hashfn = arch_fast_hash, + .mutex_is_held = &test_mutex_is_held, + .grow_decision = rht_grow_above_75, + .shrink_decision = rht_shrink_below_30, + }; + int err; + + pr_info("Running resizable hashtable tests...\n"); + + err = rhashtable_init(&ht, ¶ms); + if (err < 0) { + pr_warn("Test failed: Unable to initialize hashtable: %d\n", + err); + return err; + } + + err = test_rhashtable(&ht); + + rhashtable_destroy(&ht); + + return err; +} + +subsys_initcall(test_rht_init); + +#endif /* CONFIG_TEST_RHASHTABLE */ diff --git a/lib/scatterlist.c b/lib/scatterlist.c index d16fa295ae1d..9cdf62f8accd 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -73,7 +73,7 @@ EXPORT_SYMBOL(sg_nents); **/ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) { -#ifndef ARCH_HAS_SG_CHAIN +#ifndef CONFIG_ARCH_HAS_SG_CHAIN struct scatterlist *ret = &sgl[nents - 1]; #else struct scatterlist *sg, *ret = NULL; @@ -165,6 +165,7 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents) * __sg_free_table - Free a previously mapped sg table * @table: The sg table header to use * @max_ents: The maximum number of entries per single scatterlist + * @skip_first_chunk: don't free the (preallocated) first scatterlist chunk * @free_fn: Free function * * Description: @@ -174,7 +175,7 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents) * **/ void __sg_free_table(struct sg_table *table, unsigned int max_ents, - sg_free_fn *free_fn) + bool skip_first_chunk, sg_free_fn *free_fn) { struct scatterlist *sgl, *next; @@ -202,7 +203,10 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents, } table->orig_nents -= sg_size; - free_fn(sgl, alloc_size); + if (!skip_first_chunk) { + free_fn(sgl, alloc_size); + skip_first_chunk = false; + } sgl = next; } @@ -217,7 +221,7 @@ EXPORT_SYMBOL(__sg_free_table); **/ void sg_free_table(struct sg_table *table) { - __sg_free_table(table, SG_MAX_SINGLE_ALLOC, sg_kfree); + __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree); } EXPORT_SYMBOL(sg_free_table); @@ -241,8 +245,8 @@ EXPORT_SYMBOL(sg_free_table); * **/ int __sg_alloc_table(struct sg_table *table, unsigned int nents, - unsigned int max_ents, gfp_t gfp_mask, - sg_alloc_fn *alloc_fn) + unsigned int max_ents, struct scatterlist *first_chunk, + gfp_t gfp_mask, sg_alloc_fn *alloc_fn) { struct scatterlist *sg, *prv; unsigned int left; @@ -251,7 +255,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, if (nents == 0) return -EINVAL; -#ifndef ARCH_HAS_SG_CHAIN +#ifndef CONFIG_ARCH_HAS_SG_CHAIN if (WARN_ON_ONCE(nents > max_ents)) return -EINVAL; #endif @@ -269,7 +273,12 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, left -= sg_size; - sg = alloc_fn(alloc_size, gfp_mask); + if (first_chunk) { + sg = first_chunk; + first_chunk = NULL; + } else { + sg = alloc_fn(alloc_size, gfp_mask); + } if (unlikely(!sg)) { /* * Adjust entry count to reflect that the last @@ -324,9 +333,9 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask) int ret; ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC, - gfp_mask, sg_kmalloc); + NULL, gfp_mask, sg_kmalloc); if (unlikely(ret)) - __sg_free_table(table, SG_MAX_SINGLE_ALLOC, sg_kfree); + __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree); return ret; } @@ -495,7 +504,7 @@ static bool sg_miter_get_next_page(struct sg_mapping_iter *miter) * true if @miter contains the valid mapping. false if end of sg * list is reached. */ -static bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) +bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) { sg_miter_stop(miter); @@ -513,6 +522,7 @@ static bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) return true; } +EXPORT_SYMBOL(sg_miter_skip); /** * sg_miter_next - proceed mapping iterator to the next mapping diff --git a/lib/show_mem.c b/lib/show_mem.c index 5847a4921b8e..09225796991a 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -17,9 +17,6 @@ void show_mem(unsigned int filter) printk("Mem-Info:\n"); show_free_areas(filter); - if (filter & SHOW_MEM_FILTER_PAGE_COUNT) - return; - for_each_online_pgdat(pgdat) { unsigned long flags; int zoneid; @@ -46,4 +43,7 @@ void show_mem(unsigned int filter) printk("%lu pages in pagetable cache\n", quicklist_total_size()); #endif +#ifdef CONFIG_MEMORY_FAILURE + printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); +#endif } diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 04abe53f12a1..1afec32de6f2 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -7,7 +7,8 @@ #include <linux/kallsyms.h> #include <linux/sched.h> -notrace unsigned int debug_smp_processor_id(void) +notrace static unsigned int check_preemption_disabled(const char *what1, + const char *what2) { int this_cpu = raw_smp_processor_id(); @@ -38,9 +39,9 @@ notrace unsigned int debug_smp_processor_id(void) if (!printk_ratelimit()) goto out_enable; - printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] " - "code: %s/%d\n", - preempt_count() - 1, current->comm, current->pid); + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n", + what1, what2, preempt_count() - 1, current->comm, current->pid); + print_symbol("caller is %s\n", (long)__builtin_return_address(0)); dump_stack(); @@ -50,5 +51,14 @@ out: return this_cpu; } +notrace unsigned int debug_smp_processor_id(void) +{ + return check_preemption_disabled("smp_processor_id", ""); +} EXPORT_SYMBOL(debug_smp_processor_id); +notrace void __this_cpu_preempt_check(const char *op) +{ + check_preemption_disabled("__this_cpu_", op); +} +EXPORT_SYMBOL(__this_cpu_preempt_check); diff --git a/lib/string.c b/lib/string.c index e5878de4f101..f3c6ff596414 100644 --- a/lib/string.c +++ b/lib/string.c @@ -107,7 +107,7 @@ EXPORT_SYMBOL(strcpy); #ifndef __HAVE_ARCH_STRNCPY /** - * strncpy - Copy a length-limited, %NUL-terminated string + * strncpy - Copy a length-limited, C-string * @dest: Where to copy the string to * @src: Where to copy the string from * @count: The maximum number of bytes to copy @@ -136,7 +136,7 @@ EXPORT_SYMBOL(strncpy); #ifndef __HAVE_ARCH_STRLCPY /** - * strlcpy - Copy a %NUL terminated string into a sized buffer + * strlcpy - Copy a C-string into a sized buffer * @dest: Where to copy the string to * @src: Where to copy the string from * @size: size of destination buffer @@ -182,7 +182,7 @@ EXPORT_SYMBOL(strcat); #ifndef __HAVE_ARCH_STRNCAT /** - * strncat - Append a length-limited, %NUL-terminated string to another + * strncat - Append a length-limited, C-string to another * @dest: The string to be appended to * @src: The string to append to it * @count: The maximum numbers of bytes to copy @@ -211,7 +211,7 @@ EXPORT_SYMBOL(strncat); #ifndef __HAVE_ARCH_STRLCAT /** - * strlcat - Append a length-limited, %NUL-terminated string to another + * strlcat - Append a length-limited, C-string to another * @dest: The string to be appended to * @src: The string to append to it * @count: The size of the destination buffer. @@ -301,6 +301,24 @@ char *strchr(const char *s, int c) EXPORT_SYMBOL(strchr); #endif +#ifndef __HAVE_ARCH_STRCHRNUL +/** + * strchrnul - Find and return a character in a string, or end of string + * @s: The string to be searched + * @c: The character to search for + * + * Returns pointer to first occurrence of 'c' in s. If c is not found, then + * return a pointer to the null byte at the end of s. + */ +char *strchrnul(const char *s, int c) +{ + while (*s && *s != (char)c) + s++; + return (char *)s; +} +EXPORT_SYMBOL(strchrnul); +#endif + #ifndef __HAVE_ARCH_STRRCHR /** * strrchr - Find the last occurrence of a character in a string @@ -648,7 +666,7 @@ EXPORT_SYMBOL(memmove); * @count: The size of the area. */ #undef memcmp -int memcmp(const void *cs, const void *ct, size_t count) +__visible int memcmp(const void *cs, const void *ct, size_t count) { const unsigned char *su1, *su2; int res = 0; @@ -789,9 +807,9 @@ void *memchr_inv(const void *start, int c, size_t bytes) return check_bytes8(start, value, bytes); value64 = value; -#if defined(ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64 +#if defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64 value64 *= 0x0101010101010101; -#elif defined(ARCH_HAS_FAST_MULTIPLIER) +#elif defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER) value64 *= 0x01010101; value64 |= value64 << 32; #else diff --git a/lib/string_helpers.c b/lib/string_helpers.c index ed5c1454dd62..29033f319aea 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -25,12 +25,15 @@ int string_get_size(u64 size, const enum string_size_units units, char *buf, int len) { - static const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB", - "EB", "ZB", "YB", NULL}; - static const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", - "EiB", "ZiB", "YiB", NULL }; - static const char **units_str[] = { - [STRING_UNITS_10] = units_10, + static const char *const units_10[] = { + "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL + }; + static const char *const units_2[] = { + "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB", + NULL + }; + static const char *const *const units_str[] = { + [STRING_UNITS_10] = units_10, [STRING_UNITS_2] = units_2, }; static const unsigned int divisor[] = { diff --git a/lib/swiotlb.c b/lib/swiotlb.c index e4399fa65ad6..4abda074ea45 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -86,6 +86,7 @@ static unsigned int io_tlb_index; * We need to save away the original address corresponding to a mapped entry * for the sync operations. */ +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) static phys_addr_t *io_tlb_orig_addr; /* @@ -172,8 +173,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) /* * Get the overflow emergency buffer */ - v_overflow_buffer = alloc_bootmem_low_pages_nopanic( - PAGE_ALIGN(io_tlb_overflow)); + v_overflow_buffer = memblock_virt_alloc_low_nopanic( + PAGE_ALIGN(io_tlb_overflow), + PAGE_SIZE); if (!v_overflow_buffer) return -ENOMEM; @@ -184,11 +186,17 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE * between io_tlb_start and io_tlb_end. */ - io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); - for (i = 0; i < io_tlb_nslabs; i++) - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_list = memblock_virt_alloc( + PAGE_ALIGN(io_tlb_nslabs * sizeof(int)), + PAGE_SIZE); + io_tlb_orig_addr = memblock_virt_alloc( + PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)), + PAGE_SIZE); + for (i = 0; i < io_tlb_nslabs; i++) { + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } io_tlb_index = 0; - io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); if (verbose) swiotlb_print_info(); @@ -215,13 +223,13 @@ swiotlb_init(int verbose) bytes = io_tlb_nslabs << IO_TLB_SHIFT; /* Get IO TLB memory from the low pages */ - vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes)); + vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) return; if (io_tlb_start) - free_bootmem(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + memblock_free_early(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); pr_warn("Cannot allocate SWIOTLB buffer"); no_iotlb_memory = true; } @@ -308,10 +316,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) if (!io_tlb_list) goto cleanup3; - for (i = 0; i < io_tlb_nslabs; i++) - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_index = 0; - io_tlb_orig_addr = (phys_addr_t *) __get_free_pages(GFP_KERNEL, get_order(io_tlb_nslabs * @@ -319,7 +323,11 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) if (!io_tlb_orig_addr) goto cleanup4; - memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t)); + for (i = 0; i < io_tlb_nslabs; i++) { + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } + io_tlb_index = 0; swiotlb_print_info(); @@ -357,19 +365,19 @@ void __init swiotlb_free(void) free_pages((unsigned long)phys_to_virt(io_tlb_start), get_order(io_tlb_nslabs << IO_TLB_SHIFT)); } else { - free_bootmem_late(io_tlb_overflow_buffer, - PAGE_ALIGN(io_tlb_overflow)); - free_bootmem_late(__pa(io_tlb_orig_addr), - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); - free_bootmem_late(__pa(io_tlb_list), - PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); - free_bootmem_late(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + memblock_free_late(io_tlb_overflow_buffer, + PAGE_ALIGN(io_tlb_overflow)); + memblock_free_late(__pa(io_tlb_orig_addr), + PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); + memblock_free_late(__pa(io_tlb_list), + PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); + memblock_free_late(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); } io_tlb_nslabs = 0; } -static int is_swiotlb_buffer(phys_addr_t paddr) +int is_swiotlb_buffer(phys_addr_t paddr) { return paddr >= io_tlb_start && paddr < io_tlb_end; } @@ -505,7 +513,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, not_found: spin_unlock_irqrestore(&io_tlb_lock, flags); - dev_warn(hwdev, "swiotlb buffer is full\n"); + if (printk_ratelimit()) + dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); return SWIOTLB_MAP_ERROR; found: spin_unlock_irqrestore(&io_tlb_lock, flags); @@ -550,7 +559,8 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, /* * First, sync the memory before unmapping the entry */ - if (orig_addr && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) + if (orig_addr != INVALID_PHYS_ADDR && + ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); /* @@ -567,8 +577,10 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, * Step 1: return the slots to the free list, merging the * slots with superceeding slots */ - for (i = index + nslots - 1; i >= index; i--) + for (i = index + nslots - 1; i >= index; i--) { io_tlb_list[i] = ++count; + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } /* * Step 2: merge the returned slots with the preceding slots, * if available (non zero) @@ -587,6 +599,8 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = io_tlb_orig_addr[index]; + if (orig_addr == INVALID_PHYS_ADDR) + return; orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); switch (target) { diff --git a/lib/syscall.c b/lib/syscall.c index 58710eefeac8..e30e03932480 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -72,4 +72,3 @@ int task_current_syscall(struct task_struct *target, long *callno, return 0; } -EXPORT_SYMBOL_GPL(task_current_syscall); diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c index bea3f3fa3f02..4137bca5f8e8 100644 --- a/lib/test-kstrtox.c +++ b/lib/test-kstrtox.c @@ -3,7 +3,7 @@ #include <linux/module.h> #define for_each_test(i, test) \ - for (i = 0; i < sizeof(test) / sizeof(test[0]); i++) + for (i = 0; i < ARRAY_SIZE(test); i++) struct test_fail { const char *str; diff --git a/lib/test_bpf.c b/lib/test_bpf.c new file mode 100644 index 000000000000..23e070bcf72d --- /dev/null +++ b/lib/test_bpf.c @@ -0,0 +1,1988 @@ +/* + * Testsuite for BPF interpreter and BPF JIT compiler + * + * Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/filter.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/if_vlan.h> + +/* General test specific settings */ +#define MAX_SUBTESTS 3 +#define MAX_TESTRUNS 10000 +#define MAX_DATA 128 +#define MAX_INSNS 512 +#define MAX_K 0xffffFFFF + +/* Few constants used to init test 'skb' */ +#define SKB_TYPE 3 +#define SKB_MARK 0x1234aaaa +#define SKB_HASH 0x1234aaab +#define SKB_QUEUE_MAP 123 +#define SKB_VLAN_TCI 0xffff +#define SKB_DEV_IFINDEX 577 +#define SKB_DEV_TYPE 588 + +/* Redefine REGs to make tests less verbose */ +#define R0 BPF_REG_0 +#define R1 BPF_REG_1 +#define R2 BPF_REG_2 +#define R3 BPF_REG_3 +#define R4 BPF_REG_4 +#define R5 BPF_REG_5 +#define R6 BPF_REG_6 +#define R7 BPF_REG_7 +#define R8 BPF_REG_8 +#define R9 BPF_REG_9 +#define R10 BPF_REG_10 + +/* Flags that can be passed to test cases */ +#define FLAG_NO_DATA BIT(0) +#define FLAG_EXPECTED_FAIL BIT(1) + +enum { + CLASSIC = BIT(6), /* Old BPF instructions only. */ + INTERNAL = BIT(7), /* Extended instruction set. */ +}; + +#define TEST_TYPE_MASK (CLASSIC | INTERNAL) + +struct bpf_test { + const char *descr; + union { + struct sock_filter insns[MAX_INSNS]; + struct bpf_insn insns_int[MAX_INSNS]; + } u; + __u8 aux; + __u8 data[MAX_DATA]; + struct { + int data_size; + __u32 result; + } test[MAX_SUBTESTS]; +}; + +static struct bpf_test tests[] = { + { + "TAX", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_ALU | BPF_NEG, 0), /* A == -3 */ + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_LEN, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_MISC | BPF_TAX, 0), /* X == len - 3 */ + BPF_STMT(BPF_LD | BPF_B | BPF_IND, 1), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { 10, 20, 30, 40, 50 }, + { { 2, 10 }, { 3, 20 }, { 4, 30 } }, + }, + { + "TXA", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) /* A == len * 2 */ + }, + CLASSIC, + { 10, 20, 30, 40, 50 }, + { { 1, 2 }, { 3, 6 }, { 4, 8 } }, + }, + { + "ADD_SUB_MUL_K", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 1), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 2), + BPF_STMT(BPF_LDX | BPF_IMM, 3), + BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 0xffffffff), + BPF_STMT(BPF_ALU | BPF_MUL | BPF_K, 3), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xfffffffd } } + }, + { + "DIV_KX", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 8), + BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 2), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff), + BPF_STMT(BPF_ALU | BPF_DIV | BPF_X, 0), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff), + BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 0x70000000), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0x40000001 } } + }, + { + "AND_OR_LSH_K", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0xff), + BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xf0), + BPF_STMT(BPF_ALU | BPF_LSH | BPF_K, 27), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xf), + BPF_STMT(BPF_ALU | BPF_OR | BPF_K, 0xf0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0x800000ff }, { 1, 0x800000ff } }, + }, + { + "LD_IMM_0", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0), /* ld #0 */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 0), + BPF_STMT(BPF_RET | BPF_K, 1), + }, + CLASSIC, + { }, + { { 1, 1 } }, + }, + { + "LD_IND", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_IND, MAX_K), + BPF_STMT(BPF_RET | BPF_K, 1) + }, + CLASSIC, + { }, + { { 1, 0 }, { 10, 0 }, { 60, 0 } }, + }, + { + "LD_ABS", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, 1000), + BPF_STMT(BPF_RET | BPF_K, 1) + }, + CLASSIC, + { }, + { { 1, 0 }, { 10, 0 }, { 60, 0 } }, + }, + { + "LD_ABS_LL", + .u.insns = { + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_LL_OFF), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_LL_OFF + 1), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { 1, 2, 3 }, + { { 1, 0 }, { 2, 3 } }, + }, + { + "LD_IND_LL", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, SKF_LL_OFF - 1), + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_IND, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { 1, 2, 3, 0xff }, + { { 1, 1 }, { 3, 3 }, { 4, 0xff } }, + }, + { + "LD_ABS_NET", + .u.insns = { + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_NET_OFF), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_NET_OFF + 1), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3 }, + { { 15, 0 }, { 16, 3 } }, + }, + { + "LD_IND_NET", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, SKF_NET_OFF - 15), + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_IND, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3 }, + { { 14, 0 }, { 15, 1 }, { 17, 3 } }, + }, + { + "LD_PKTTYPE", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SKB_TYPE, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SKB_TYPE, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SKB_TYPE, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, 3 }, { 10, 3 } }, + }, + { + "LD_MARK", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_MARK), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, SKB_MARK}, { 10, SKB_MARK} }, + }, + { + "LD_RXHASH", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_RXHASH), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, SKB_HASH}, { 10, SKB_HASH} }, + }, + { + "LD_QUEUE", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_QUEUE), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, SKB_QUEUE_MAP }, { 10, SKB_QUEUE_MAP } }, + }, + { + "LD_PROTOCOL", + .u.insns = { + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 1), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 20, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 0), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PROTOCOL), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 30, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 0), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { 10, 20, 30 }, + { { 10, ETH_P_IP }, { 100, ETH_P_IP } }, + }, + { + "LD_VLAN_TAG", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_VLAN_TAG), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { + { 1, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT }, + { 10, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT } + }, + }, + { + "LD_VLAN_TAG_PRESENT", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { + { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, + { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } + }, + }, + { + "LD_IFINDEX", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_IFINDEX), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, SKB_DEV_IFINDEX }, { 10, SKB_DEV_IFINDEX } }, + }, + { + "LD_HATYPE", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_HATYPE), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, SKB_DEV_TYPE }, { 10, SKB_DEV_TYPE } }, + }, + { + "LD_CPU", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_CPU), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_CPU), + BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, 0 }, { 10, 0 } }, + }, + { + "LD_NLATTR", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 2), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_LDX | BPF_IMM, 3), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, +#ifdef __BIG_ENDIAN + { 0xff, 0xff, 0, 4, 0, 2, 0, 4, 0, 3 }, +#else + { 0xff, 0xff, 4, 0, 2, 0, 4, 0, 3, 0 }, +#endif + { { 4, 0 }, { 20, 6 } }, + }, + { + "LD_NLATTR_NEST", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LDX | BPF_IMM, 3), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, +#ifdef __BIG_ENDIAN + { 0xff, 0xff, 0, 12, 0, 1, 0, 4, 0, 2, 0, 4, 0, 3 }, +#else + { 0xff, 0xff, 12, 0, 1, 0, 4, 0, 2, 0, 4, 0, 3, 0 }, +#endif + { { 4, 0 }, { 20, 10 } }, + }, + { + "LD_PAYLOAD_OFF", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PAY_OFFSET), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PAY_OFFSET), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PAY_OFFSET), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PAY_OFFSET), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PAY_OFFSET), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + /* 00:00:00:00:00:00 > 00:00:00:00:00:00, ethtype IPv4 (0x0800), + * length 98: 127.0.0.1 > 127.0.0.1: ICMP echo request, + * id 9737, seq 1, length 64 + */ + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, + 0x45, 0x00, 0x00, 0x54, 0xac, 0x8b, 0x40, 0x00, 0x40, + 0x01, 0x90, 0x1b, 0x7f, 0x00, 0x00, 0x01 }, + { { 30, 0 }, { 100, 42 } }, + }, + { + "LD_ANC_XOR", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 10), + BPF_STMT(BPF_LDX | BPF_IMM, 300), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_ALU_XOR_X), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 4, 10 ^ 300 }, { 20, 10 ^ 300 } }, + }, + { + "SPILL_FILL", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_ALU | BPF_RSH, 1), + BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0), + BPF_STMT(BPF_ST, 1), /* M1 = 1 ^ len */ + BPF_STMT(BPF_ALU | BPF_XOR | BPF_K, 0x80000000), + BPF_STMT(BPF_ST, 2), /* M2 = 1 ^ len ^ 0x80000000 */ + BPF_STMT(BPF_STX, 15), /* M3 = len */ + BPF_STMT(BPF_LDX | BPF_MEM, 1), + BPF_STMT(BPF_LD | BPF_MEM, 2), + BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 15), + BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, 0x80000001 }, { 2, 0x80000002 }, { 60, 0x80000000 ^ 60 } } + }, + { + "JEQ", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 1), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_RET | BPF_K, MAX_K) + }, + CLASSIC, + { 3, 3, 3, 3, 3 }, + { { 1, 0 }, { 3, 1 }, { 4, MAX_K } }, + }, + { + "JGT", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2), + BPF_JUMP(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 1), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_RET | BPF_K, MAX_K) + }, + CLASSIC, + { 4, 4, 4, 3, 3 }, + { { 2, 0 }, { 3, 1 }, { 4, MAX_K } }, + }, + { + "JGE", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_IND, MAX_K), + BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 1, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 10), + BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 2, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 20), + BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 3, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 30), + BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 4, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 40), + BPF_STMT(BPF_RET | BPF_K, MAX_K) + }, + CLASSIC, + { 1, 2, 3, 4, 5 }, + { { 1, 20 }, { 3, 40 }, { 5, MAX_K } }, + }, + { + "JSET", + .u.insns = { + BPF_JUMP(BPF_JMP | BPF_JA, 0, 0, 0), + BPF_JUMP(BPF_JMP | BPF_JA, 1, 1, 1), + BPF_JUMP(BPF_JMP | BPF_JA, 0, 0, 0), + BPF_JUMP(BPF_JMP | BPF_JA, 0, 0, 0), + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_SUB | BPF_K, 4), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_W | BPF_IND, 0), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 1, 0, 1), + BPF_STMT(BPF_RET | BPF_K, 10), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0x80000000, 0, 1), + BPF_STMT(BPF_RET | BPF_K, 20), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 30), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 30), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 30), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 30), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 30), + BPF_STMT(BPF_RET | BPF_K, MAX_K) + }, + CLASSIC, + { 0, 0xAA, 0x55, 1 }, + { { 4, 10 }, { 5, 20 }, { 6, MAX_K } }, + }, + { + "tcpdump port 22", + .u.insns = { + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 12), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x86dd, 0, 8), /* IPv6 */ + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 20), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x84, 2, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x6, 1, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x11, 0, 17), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 54), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 14, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 56), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 12, 13), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x0800, 0, 12), /* IPv4 */ + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 23), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x84, 2, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x6, 1, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x11, 0, 8), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 20), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0x1fff, 6, 0), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 14), + BPF_STMT(BPF_LD | BPF_H | BPF_IND, 14), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 2, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_IND, 16), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 0, 1), + BPF_STMT(BPF_RET | BPF_K, 0xffff), + BPF_STMT(BPF_RET | BPF_K, 0), + }, + CLASSIC, + /* 3c:07:54:43:e5:76 > 10:bf:48:d6:43:d6, ethertype IPv4(0x0800) + * length 114: 10.1.1.149.49700 > 10.1.2.10.22: Flags [P.], + * seq 1305692979:1305693027, ack 3650467037, win 65535, + * options [nop,nop,TS val 2502645400 ecr 3971138], length 48 + */ + { 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6, + 0x3c, 0x07, 0x54, 0x43, 0xe5, 0x76, + 0x08, 0x00, + 0x45, 0x10, 0x00, 0x64, 0x75, 0xb5, + 0x40, 0x00, 0x40, 0x06, 0xad, 0x2e, /* IP header */ + 0x0a, 0x01, 0x01, 0x95, /* ip src */ + 0x0a, 0x01, 0x02, 0x0a, /* ip dst */ + 0xc2, 0x24, + 0x00, 0x16 /* dst port */ }, + { { 10, 0 }, { 30, 0 }, { 100, 65535 } }, + }, + { + "tcpdump complex", + .u.insns = { + /* tcpdump -nei eth0 'tcp port 22 and (((ip[2:2] - + * ((ip[0]&0xf)<<2)) - ((tcp[12]&0xf0)>>2)) != 0) and + * (len > 115 or len < 30000000000)' -d + */ + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 12), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x86dd, 30, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x800, 0, 29), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 23), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x6, 0, 27), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 20), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0x1fff, 25, 0), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 14), + BPF_STMT(BPF_LD | BPF_H | BPF_IND, 14), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 2, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_IND, 16), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 0, 20), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 16), + BPF_STMT(BPF_ST, 1), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 14), + BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xf), + BPF_STMT(BPF_ALU | BPF_LSH | BPF_K, 2), + BPF_STMT(BPF_MISC | BPF_TAX, 0x5), /* libpcap emits K on TAX */ + BPF_STMT(BPF_LD | BPF_MEM, 1), + BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0), + BPF_STMT(BPF_ST, 5), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 14), + BPF_STMT(BPF_LD | BPF_B | BPF_IND, 26), + BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xf0), + BPF_STMT(BPF_ALU | BPF_RSH | BPF_K, 2), + BPF_STMT(BPF_MISC | BPF_TAX, 0x9), /* libpcap emits K on TAX */ + BPF_STMT(BPF_LD | BPF_MEM, 5), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_X, 0, 4, 0), + BPF_STMT(BPF_LD | BPF_LEN, 0), + BPF_JUMP(BPF_JMP | BPF_JGT | BPF_K, 0x73, 1, 0), + BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 0xfc23ac00, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 0xffff), + BPF_STMT(BPF_RET | BPF_K, 0), + }, + CLASSIC, + { 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6, + 0x3c, 0x07, 0x54, 0x43, 0xe5, 0x76, + 0x08, 0x00, + 0x45, 0x10, 0x00, 0x64, 0x75, 0xb5, + 0x40, 0x00, 0x40, 0x06, 0xad, 0x2e, /* IP header */ + 0x0a, 0x01, 0x01, 0x95, /* ip src */ + 0x0a, 0x01, 0x02, 0x0a, /* ip dst */ + 0xc2, 0x24, + 0x00, 0x16 /* dst port */ }, + { { 10, 0 }, { 30, 0 }, { 100, 65535 } }, + }, + { + "RET_A", + .u.insns = { + /* check that unitialized X and A contain zeros */ + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { {1, 0}, {2, 0} }, + }, + { + "INT: ADD trivial", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R1, 1), + BPF_ALU64_IMM(BPF_ADD, R1, 2), + BPF_ALU64_IMM(BPF_MOV, R2, 3), + BPF_ALU64_REG(BPF_SUB, R1, R2), + BPF_ALU64_IMM(BPF_ADD, R1, -1), + BPF_ALU64_IMM(BPF_MUL, R1, 3), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffd } } + }, + { + "INT: MUL_X", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_ALU64_IMM(BPF_MOV, R1, -1), + BPF_ALU64_IMM(BPF_MOV, R2, 3), + BPF_ALU64_REG(BPF_MUL, R1, R2), + BPF_JMP_IMM(BPF_JEQ, R1, 0xfffffffd, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "INT: MUL_X2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -1), + BPF_ALU32_IMM(BPF_MOV, R1, -1), + BPF_ALU32_IMM(BPF_MOV, R2, 3), + BPF_ALU64_REG(BPF_MUL, R1, R2), + BPF_ALU64_IMM(BPF_RSH, R1, 8), + BPF_JMP_IMM(BPF_JEQ, R1, 0x2ffffff, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "INT: MUL32_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -1), + BPF_ALU64_IMM(BPF_MOV, R1, -1), + BPF_ALU32_IMM(BPF_MOV, R2, 3), + BPF_ALU32_REG(BPF_MUL, R1, R2), + BPF_ALU64_IMM(BPF_RSH, R1, 8), + BPF_JMP_IMM(BPF_JEQ, R1, 0xffffff, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + /* Have to test all register combinations, since + * JITing of different registers will produce + * different asm code. + */ + "INT: ADD 64-bit", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_ALU64_IMM(BPF_MOV, R1, 1), + BPF_ALU64_IMM(BPF_MOV, R2, 2), + BPF_ALU64_IMM(BPF_MOV, R3, 3), + BPF_ALU64_IMM(BPF_MOV, R4, 4), + BPF_ALU64_IMM(BPF_MOV, R5, 5), + BPF_ALU64_IMM(BPF_MOV, R6, 6), + BPF_ALU64_IMM(BPF_MOV, R7, 7), + BPF_ALU64_IMM(BPF_MOV, R8, 8), + BPF_ALU64_IMM(BPF_MOV, R9, 9), + BPF_ALU64_IMM(BPF_ADD, R0, 20), + BPF_ALU64_IMM(BPF_ADD, R1, 20), + BPF_ALU64_IMM(BPF_ADD, R2, 20), + BPF_ALU64_IMM(BPF_ADD, R3, 20), + BPF_ALU64_IMM(BPF_ADD, R4, 20), + BPF_ALU64_IMM(BPF_ADD, R5, 20), + BPF_ALU64_IMM(BPF_ADD, R6, 20), + BPF_ALU64_IMM(BPF_ADD, R7, 20), + BPF_ALU64_IMM(BPF_ADD, R8, 20), + BPF_ALU64_IMM(BPF_ADD, R9, 20), + BPF_ALU64_IMM(BPF_SUB, R0, 10), + BPF_ALU64_IMM(BPF_SUB, R1, 10), + BPF_ALU64_IMM(BPF_SUB, R2, 10), + BPF_ALU64_IMM(BPF_SUB, R3, 10), + BPF_ALU64_IMM(BPF_SUB, R4, 10), + BPF_ALU64_IMM(BPF_SUB, R5, 10), + BPF_ALU64_IMM(BPF_SUB, R6, 10), + BPF_ALU64_IMM(BPF_SUB, R7, 10), + BPF_ALU64_IMM(BPF_SUB, R8, 10), + BPF_ALU64_IMM(BPF_SUB, R9, 10), + BPF_ALU64_REG(BPF_ADD, R0, R0), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_ALU64_REG(BPF_ADD, R0, R2), + BPF_ALU64_REG(BPF_ADD, R0, R3), + BPF_ALU64_REG(BPF_ADD, R0, R4), + BPF_ALU64_REG(BPF_ADD, R0, R5), + BPF_ALU64_REG(BPF_ADD, R0, R6), + BPF_ALU64_REG(BPF_ADD, R0, R7), + BPF_ALU64_REG(BPF_ADD, R0, R8), + BPF_ALU64_REG(BPF_ADD, R0, R9), /* R0 == 155 */ + BPF_JMP_IMM(BPF_JEQ, R0, 155, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R1, R0), + BPF_ALU64_REG(BPF_ADD, R1, R1), + BPF_ALU64_REG(BPF_ADD, R1, R2), + BPF_ALU64_REG(BPF_ADD, R1, R3), + BPF_ALU64_REG(BPF_ADD, R1, R4), + BPF_ALU64_REG(BPF_ADD, R1, R5), + BPF_ALU64_REG(BPF_ADD, R1, R6), + BPF_ALU64_REG(BPF_ADD, R1, R7), + BPF_ALU64_REG(BPF_ADD, R1, R8), + BPF_ALU64_REG(BPF_ADD, R1, R9), /* R1 == 456 */ + BPF_JMP_IMM(BPF_JEQ, R1, 456, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R2, R0), + BPF_ALU64_REG(BPF_ADD, R2, R1), + BPF_ALU64_REG(BPF_ADD, R2, R2), + BPF_ALU64_REG(BPF_ADD, R2, R3), + BPF_ALU64_REG(BPF_ADD, R2, R4), + BPF_ALU64_REG(BPF_ADD, R2, R5), + BPF_ALU64_REG(BPF_ADD, R2, R6), + BPF_ALU64_REG(BPF_ADD, R2, R7), + BPF_ALU64_REG(BPF_ADD, R2, R8), + BPF_ALU64_REG(BPF_ADD, R2, R9), /* R2 == 1358 */ + BPF_JMP_IMM(BPF_JEQ, R2, 1358, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R3, R0), + BPF_ALU64_REG(BPF_ADD, R3, R1), + BPF_ALU64_REG(BPF_ADD, R3, R2), + BPF_ALU64_REG(BPF_ADD, R3, R3), + BPF_ALU64_REG(BPF_ADD, R3, R4), + BPF_ALU64_REG(BPF_ADD, R3, R5), + BPF_ALU64_REG(BPF_ADD, R3, R6), + BPF_ALU64_REG(BPF_ADD, R3, R7), + BPF_ALU64_REG(BPF_ADD, R3, R8), + BPF_ALU64_REG(BPF_ADD, R3, R9), /* R3 == 4063 */ + BPF_JMP_IMM(BPF_JEQ, R3, 4063, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R4, R0), + BPF_ALU64_REG(BPF_ADD, R4, R1), + BPF_ALU64_REG(BPF_ADD, R4, R2), + BPF_ALU64_REG(BPF_ADD, R4, R3), + BPF_ALU64_REG(BPF_ADD, R4, R4), + BPF_ALU64_REG(BPF_ADD, R4, R5), + BPF_ALU64_REG(BPF_ADD, R4, R6), + BPF_ALU64_REG(BPF_ADD, R4, R7), + BPF_ALU64_REG(BPF_ADD, R4, R8), + BPF_ALU64_REG(BPF_ADD, R4, R9), /* R4 == 12177 */ + BPF_JMP_IMM(BPF_JEQ, R4, 12177, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R5, R0), + BPF_ALU64_REG(BPF_ADD, R5, R1), + BPF_ALU64_REG(BPF_ADD, R5, R2), + BPF_ALU64_REG(BPF_ADD, R5, R3), + BPF_ALU64_REG(BPF_ADD, R5, R4), + BPF_ALU64_REG(BPF_ADD, R5, R5), + BPF_ALU64_REG(BPF_ADD, R5, R6), + BPF_ALU64_REG(BPF_ADD, R5, R7), + BPF_ALU64_REG(BPF_ADD, R5, R8), + BPF_ALU64_REG(BPF_ADD, R5, R9), /* R5 == 36518 */ + BPF_JMP_IMM(BPF_JEQ, R5, 36518, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R6, R0), + BPF_ALU64_REG(BPF_ADD, R6, R1), + BPF_ALU64_REG(BPF_ADD, R6, R2), + BPF_ALU64_REG(BPF_ADD, R6, R3), + BPF_ALU64_REG(BPF_ADD, R6, R4), + BPF_ALU64_REG(BPF_ADD, R6, R5), + BPF_ALU64_REG(BPF_ADD, R6, R6), + BPF_ALU64_REG(BPF_ADD, R6, R7), + BPF_ALU64_REG(BPF_ADD, R6, R8), + BPF_ALU64_REG(BPF_ADD, R6, R9), /* R6 == 109540 */ + BPF_JMP_IMM(BPF_JEQ, R6, 109540, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R7, R0), + BPF_ALU64_REG(BPF_ADD, R7, R1), + BPF_ALU64_REG(BPF_ADD, R7, R2), + BPF_ALU64_REG(BPF_ADD, R7, R3), + BPF_ALU64_REG(BPF_ADD, R7, R4), + BPF_ALU64_REG(BPF_ADD, R7, R5), + BPF_ALU64_REG(BPF_ADD, R7, R6), + BPF_ALU64_REG(BPF_ADD, R7, R7), + BPF_ALU64_REG(BPF_ADD, R7, R8), + BPF_ALU64_REG(BPF_ADD, R7, R9), /* R7 == 328605 */ + BPF_JMP_IMM(BPF_JEQ, R7, 328605, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R8, R0), + BPF_ALU64_REG(BPF_ADD, R8, R1), + BPF_ALU64_REG(BPF_ADD, R8, R2), + BPF_ALU64_REG(BPF_ADD, R8, R3), + BPF_ALU64_REG(BPF_ADD, R8, R4), + BPF_ALU64_REG(BPF_ADD, R8, R5), + BPF_ALU64_REG(BPF_ADD, R8, R6), + BPF_ALU64_REG(BPF_ADD, R8, R7), + BPF_ALU64_REG(BPF_ADD, R8, R8), + BPF_ALU64_REG(BPF_ADD, R8, R9), /* R8 == 985799 */ + BPF_JMP_IMM(BPF_JEQ, R8, 985799, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R9, R0), + BPF_ALU64_REG(BPF_ADD, R9, R1), + BPF_ALU64_REG(BPF_ADD, R9, R2), + BPF_ALU64_REG(BPF_ADD, R9, R3), + BPF_ALU64_REG(BPF_ADD, R9, R4), + BPF_ALU64_REG(BPF_ADD, R9, R5), + BPF_ALU64_REG(BPF_ADD, R9, R6), + BPF_ALU64_REG(BPF_ADD, R9, R7), + BPF_ALU64_REG(BPF_ADD, R9, R8), + BPF_ALU64_REG(BPF_ADD, R9, R9), /* R9 == 2957380 */ + BPF_ALU64_REG(BPF_MOV, R0, R9), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2957380 } } + }, + { + "INT: ADD 32-bit", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 20), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R2, 2), + BPF_ALU32_IMM(BPF_MOV, R3, 3), + BPF_ALU32_IMM(BPF_MOV, R4, 4), + BPF_ALU32_IMM(BPF_MOV, R5, 5), + BPF_ALU32_IMM(BPF_MOV, R6, 6), + BPF_ALU32_IMM(BPF_MOV, R7, 7), + BPF_ALU32_IMM(BPF_MOV, R8, 8), + BPF_ALU32_IMM(BPF_MOV, R9, 9), + BPF_ALU64_IMM(BPF_ADD, R1, 10), + BPF_ALU64_IMM(BPF_ADD, R2, 10), + BPF_ALU64_IMM(BPF_ADD, R3, 10), + BPF_ALU64_IMM(BPF_ADD, R4, 10), + BPF_ALU64_IMM(BPF_ADD, R5, 10), + BPF_ALU64_IMM(BPF_ADD, R6, 10), + BPF_ALU64_IMM(BPF_ADD, R7, 10), + BPF_ALU64_IMM(BPF_ADD, R8, 10), + BPF_ALU64_IMM(BPF_ADD, R9, 10), + BPF_ALU32_REG(BPF_ADD, R0, R1), + BPF_ALU32_REG(BPF_ADD, R0, R2), + BPF_ALU32_REG(BPF_ADD, R0, R3), + BPF_ALU32_REG(BPF_ADD, R0, R4), + BPF_ALU32_REG(BPF_ADD, R0, R5), + BPF_ALU32_REG(BPF_ADD, R0, R6), + BPF_ALU32_REG(BPF_ADD, R0, R7), + BPF_ALU32_REG(BPF_ADD, R0, R8), + BPF_ALU32_REG(BPF_ADD, R0, R9), /* R0 == 155 */ + BPF_JMP_IMM(BPF_JEQ, R0, 155, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R1, R0), + BPF_ALU32_REG(BPF_ADD, R1, R1), + BPF_ALU32_REG(BPF_ADD, R1, R2), + BPF_ALU32_REG(BPF_ADD, R1, R3), + BPF_ALU32_REG(BPF_ADD, R1, R4), + BPF_ALU32_REG(BPF_ADD, R1, R5), + BPF_ALU32_REG(BPF_ADD, R1, R6), + BPF_ALU32_REG(BPF_ADD, R1, R7), + BPF_ALU32_REG(BPF_ADD, R1, R8), + BPF_ALU32_REG(BPF_ADD, R1, R9), /* R1 == 456 */ + BPF_JMP_IMM(BPF_JEQ, R1, 456, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R2, R0), + BPF_ALU32_REG(BPF_ADD, R2, R1), + BPF_ALU32_REG(BPF_ADD, R2, R2), + BPF_ALU32_REG(BPF_ADD, R2, R3), + BPF_ALU32_REG(BPF_ADD, R2, R4), + BPF_ALU32_REG(BPF_ADD, R2, R5), + BPF_ALU32_REG(BPF_ADD, R2, R6), + BPF_ALU32_REG(BPF_ADD, R2, R7), + BPF_ALU32_REG(BPF_ADD, R2, R8), + BPF_ALU32_REG(BPF_ADD, R2, R9), /* R2 == 1358 */ + BPF_JMP_IMM(BPF_JEQ, R2, 1358, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R3, R0), + BPF_ALU32_REG(BPF_ADD, R3, R1), + BPF_ALU32_REG(BPF_ADD, R3, R2), + BPF_ALU32_REG(BPF_ADD, R3, R3), + BPF_ALU32_REG(BPF_ADD, R3, R4), + BPF_ALU32_REG(BPF_ADD, R3, R5), + BPF_ALU32_REG(BPF_ADD, R3, R6), + BPF_ALU32_REG(BPF_ADD, R3, R7), + BPF_ALU32_REG(BPF_ADD, R3, R8), + BPF_ALU32_REG(BPF_ADD, R3, R9), /* R3 == 4063 */ + BPF_JMP_IMM(BPF_JEQ, R3, 4063, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R4, R0), + BPF_ALU32_REG(BPF_ADD, R4, R1), + BPF_ALU32_REG(BPF_ADD, R4, R2), + BPF_ALU32_REG(BPF_ADD, R4, R3), + BPF_ALU32_REG(BPF_ADD, R4, R4), + BPF_ALU32_REG(BPF_ADD, R4, R5), + BPF_ALU32_REG(BPF_ADD, R4, R6), + BPF_ALU32_REG(BPF_ADD, R4, R7), + BPF_ALU32_REG(BPF_ADD, R4, R8), + BPF_ALU32_REG(BPF_ADD, R4, R9), /* R4 == 12177 */ + BPF_JMP_IMM(BPF_JEQ, R4, 12177, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R5, R0), + BPF_ALU32_REG(BPF_ADD, R5, R1), + BPF_ALU32_REG(BPF_ADD, R5, R2), + BPF_ALU32_REG(BPF_ADD, R5, R3), + BPF_ALU32_REG(BPF_ADD, R5, R4), + BPF_ALU32_REG(BPF_ADD, R5, R5), + BPF_ALU32_REG(BPF_ADD, R5, R6), + BPF_ALU32_REG(BPF_ADD, R5, R7), + BPF_ALU32_REG(BPF_ADD, R5, R8), + BPF_ALU32_REG(BPF_ADD, R5, R9), /* R5 == 36518 */ + BPF_JMP_IMM(BPF_JEQ, R5, 36518, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R6, R0), + BPF_ALU32_REG(BPF_ADD, R6, R1), + BPF_ALU32_REG(BPF_ADD, R6, R2), + BPF_ALU32_REG(BPF_ADD, R6, R3), + BPF_ALU32_REG(BPF_ADD, R6, R4), + BPF_ALU32_REG(BPF_ADD, R6, R5), + BPF_ALU32_REG(BPF_ADD, R6, R6), + BPF_ALU32_REG(BPF_ADD, R6, R7), + BPF_ALU32_REG(BPF_ADD, R6, R8), + BPF_ALU32_REG(BPF_ADD, R6, R9), /* R6 == 109540 */ + BPF_JMP_IMM(BPF_JEQ, R6, 109540, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R7, R0), + BPF_ALU32_REG(BPF_ADD, R7, R1), + BPF_ALU32_REG(BPF_ADD, R7, R2), + BPF_ALU32_REG(BPF_ADD, R7, R3), + BPF_ALU32_REG(BPF_ADD, R7, R4), + BPF_ALU32_REG(BPF_ADD, R7, R5), + BPF_ALU32_REG(BPF_ADD, R7, R6), + BPF_ALU32_REG(BPF_ADD, R7, R7), + BPF_ALU32_REG(BPF_ADD, R7, R8), + BPF_ALU32_REG(BPF_ADD, R7, R9), /* R7 == 328605 */ + BPF_JMP_IMM(BPF_JEQ, R7, 328605, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R8, R0), + BPF_ALU32_REG(BPF_ADD, R8, R1), + BPF_ALU32_REG(BPF_ADD, R8, R2), + BPF_ALU32_REG(BPF_ADD, R8, R3), + BPF_ALU32_REG(BPF_ADD, R8, R4), + BPF_ALU32_REG(BPF_ADD, R8, R5), + BPF_ALU32_REG(BPF_ADD, R8, R6), + BPF_ALU32_REG(BPF_ADD, R8, R7), + BPF_ALU32_REG(BPF_ADD, R8, R8), + BPF_ALU32_REG(BPF_ADD, R8, R9), /* R8 == 985799 */ + BPF_JMP_IMM(BPF_JEQ, R8, 985799, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R9, R0), + BPF_ALU32_REG(BPF_ADD, R9, R1), + BPF_ALU32_REG(BPF_ADD, R9, R2), + BPF_ALU32_REG(BPF_ADD, R9, R3), + BPF_ALU32_REG(BPF_ADD, R9, R4), + BPF_ALU32_REG(BPF_ADD, R9, R5), + BPF_ALU32_REG(BPF_ADD, R9, R6), + BPF_ALU32_REG(BPF_ADD, R9, R7), + BPF_ALU32_REG(BPF_ADD, R9, R8), + BPF_ALU32_REG(BPF_ADD, R9, R9), /* R9 == 2957380 */ + BPF_ALU32_REG(BPF_MOV, R0, R9), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2957380 } } + }, + { /* Mainly checking JIT here. */ + "INT: SUB", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_ALU64_IMM(BPF_MOV, R1, 1), + BPF_ALU64_IMM(BPF_MOV, R2, 2), + BPF_ALU64_IMM(BPF_MOV, R3, 3), + BPF_ALU64_IMM(BPF_MOV, R4, 4), + BPF_ALU64_IMM(BPF_MOV, R5, 5), + BPF_ALU64_IMM(BPF_MOV, R6, 6), + BPF_ALU64_IMM(BPF_MOV, R7, 7), + BPF_ALU64_IMM(BPF_MOV, R8, 8), + BPF_ALU64_IMM(BPF_MOV, R9, 9), + BPF_ALU64_REG(BPF_SUB, R0, R0), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_ALU64_REG(BPF_SUB, R0, R2), + BPF_ALU64_REG(BPF_SUB, R0, R3), + BPF_ALU64_REG(BPF_SUB, R0, R4), + BPF_ALU64_REG(BPF_SUB, R0, R5), + BPF_ALU64_REG(BPF_SUB, R0, R6), + BPF_ALU64_REG(BPF_SUB, R0, R7), + BPF_ALU64_REG(BPF_SUB, R0, R8), + BPF_ALU64_REG(BPF_SUB, R0, R9), + BPF_ALU64_IMM(BPF_SUB, R0, 10), + BPF_JMP_IMM(BPF_JEQ, R0, -55, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R1, R0), + BPF_ALU64_REG(BPF_SUB, R1, R2), + BPF_ALU64_REG(BPF_SUB, R1, R3), + BPF_ALU64_REG(BPF_SUB, R1, R4), + BPF_ALU64_REG(BPF_SUB, R1, R5), + BPF_ALU64_REG(BPF_SUB, R1, R6), + BPF_ALU64_REG(BPF_SUB, R1, R7), + BPF_ALU64_REG(BPF_SUB, R1, R8), + BPF_ALU64_REG(BPF_SUB, R1, R9), + BPF_ALU64_IMM(BPF_SUB, R1, 10), + BPF_ALU64_REG(BPF_SUB, R2, R0), + BPF_ALU64_REG(BPF_SUB, R2, R1), + BPF_ALU64_REG(BPF_SUB, R2, R3), + BPF_ALU64_REG(BPF_SUB, R2, R4), + BPF_ALU64_REG(BPF_SUB, R2, R5), + BPF_ALU64_REG(BPF_SUB, R2, R6), + BPF_ALU64_REG(BPF_SUB, R2, R7), + BPF_ALU64_REG(BPF_SUB, R2, R8), + BPF_ALU64_REG(BPF_SUB, R2, R9), + BPF_ALU64_IMM(BPF_SUB, R2, 10), + BPF_ALU64_REG(BPF_SUB, R3, R0), + BPF_ALU64_REG(BPF_SUB, R3, R1), + BPF_ALU64_REG(BPF_SUB, R3, R2), + BPF_ALU64_REG(BPF_SUB, R3, R4), + BPF_ALU64_REG(BPF_SUB, R3, R5), + BPF_ALU64_REG(BPF_SUB, R3, R6), + BPF_ALU64_REG(BPF_SUB, R3, R7), + BPF_ALU64_REG(BPF_SUB, R3, R8), + BPF_ALU64_REG(BPF_SUB, R3, R9), + BPF_ALU64_IMM(BPF_SUB, R3, 10), + BPF_ALU64_REG(BPF_SUB, R4, R0), + BPF_ALU64_REG(BPF_SUB, R4, R1), + BPF_ALU64_REG(BPF_SUB, R4, R2), + BPF_ALU64_REG(BPF_SUB, R4, R3), + BPF_ALU64_REG(BPF_SUB, R4, R5), + BPF_ALU64_REG(BPF_SUB, R4, R6), + BPF_ALU64_REG(BPF_SUB, R4, R7), + BPF_ALU64_REG(BPF_SUB, R4, R8), + BPF_ALU64_REG(BPF_SUB, R4, R9), + BPF_ALU64_IMM(BPF_SUB, R4, 10), + BPF_ALU64_REG(BPF_SUB, R5, R0), + BPF_ALU64_REG(BPF_SUB, R5, R1), + BPF_ALU64_REG(BPF_SUB, R5, R2), + BPF_ALU64_REG(BPF_SUB, R5, R3), + BPF_ALU64_REG(BPF_SUB, R5, R4), + BPF_ALU64_REG(BPF_SUB, R5, R6), + BPF_ALU64_REG(BPF_SUB, R5, R7), + BPF_ALU64_REG(BPF_SUB, R5, R8), + BPF_ALU64_REG(BPF_SUB, R5, R9), + BPF_ALU64_IMM(BPF_SUB, R5, 10), + BPF_ALU64_REG(BPF_SUB, R6, R0), + BPF_ALU64_REG(BPF_SUB, R6, R1), + BPF_ALU64_REG(BPF_SUB, R6, R2), + BPF_ALU64_REG(BPF_SUB, R6, R3), + BPF_ALU64_REG(BPF_SUB, R6, R4), + BPF_ALU64_REG(BPF_SUB, R6, R5), + BPF_ALU64_REG(BPF_SUB, R6, R7), + BPF_ALU64_REG(BPF_SUB, R6, R8), + BPF_ALU64_REG(BPF_SUB, R6, R9), + BPF_ALU64_IMM(BPF_SUB, R6, 10), + BPF_ALU64_REG(BPF_SUB, R7, R0), + BPF_ALU64_REG(BPF_SUB, R7, R1), + BPF_ALU64_REG(BPF_SUB, R7, R2), + BPF_ALU64_REG(BPF_SUB, R7, R3), + BPF_ALU64_REG(BPF_SUB, R7, R4), + BPF_ALU64_REG(BPF_SUB, R7, R5), + BPF_ALU64_REG(BPF_SUB, R7, R6), + BPF_ALU64_REG(BPF_SUB, R7, R8), + BPF_ALU64_REG(BPF_SUB, R7, R9), + BPF_ALU64_IMM(BPF_SUB, R7, 10), + BPF_ALU64_REG(BPF_SUB, R8, R0), + BPF_ALU64_REG(BPF_SUB, R8, R1), + BPF_ALU64_REG(BPF_SUB, R8, R2), + BPF_ALU64_REG(BPF_SUB, R8, R3), + BPF_ALU64_REG(BPF_SUB, R8, R4), + BPF_ALU64_REG(BPF_SUB, R8, R5), + BPF_ALU64_REG(BPF_SUB, R8, R6), + BPF_ALU64_REG(BPF_SUB, R8, R7), + BPF_ALU64_REG(BPF_SUB, R8, R9), + BPF_ALU64_IMM(BPF_SUB, R8, 10), + BPF_ALU64_REG(BPF_SUB, R9, R0), + BPF_ALU64_REG(BPF_SUB, R9, R1), + BPF_ALU64_REG(BPF_SUB, R9, R2), + BPF_ALU64_REG(BPF_SUB, R9, R3), + BPF_ALU64_REG(BPF_SUB, R9, R4), + BPF_ALU64_REG(BPF_SUB, R9, R5), + BPF_ALU64_REG(BPF_SUB, R9, R6), + BPF_ALU64_REG(BPF_SUB, R9, R7), + BPF_ALU64_REG(BPF_SUB, R9, R8), + BPF_ALU64_IMM(BPF_SUB, R9, 10), + BPF_ALU64_IMM(BPF_SUB, R0, 10), + BPF_ALU64_IMM(BPF_NEG, R0, 0), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_ALU64_REG(BPF_SUB, R0, R2), + BPF_ALU64_REG(BPF_SUB, R0, R3), + BPF_ALU64_REG(BPF_SUB, R0, R4), + BPF_ALU64_REG(BPF_SUB, R0, R5), + BPF_ALU64_REG(BPF_SUB, R0, R6), + BPF_ALU64_REG(BPF_SUB, R0, R7), + BPF_ALU64_REG(BPF_SUB, R0, R8), + BPF_ALU64_REG(BPF_SUB, R0, R9), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 11 } } + }, + { /* Mainly checking JIT here. */ + "INT: XOR", + .u.insns_int = { + BPF_ALU64_REG(BPF_SUB, R0, R0), + BPF_ALU64_REG(BPF_XOR, R1, R1), + BPF_JMP_REG(BPF_JEQ, R0, R1, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R0, 10), + BPF_ALU64_IMM(BPF_MOV, R1, -1), + BPF_ALU64_REG(BPF_SUB, R1, R1), + BPF_ALU64_REG(BPF_XOR, R2, R2), + BPF_JMP_REG(BPF_JEQ, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R2, R2), + BPF_ALU64_REG(BPF_XOR, R3, R3), + BPF_ALU64_IMM(BPF_MOV, R0, 10), + BPF_ALU64_IMM(BPF_MOV, R1, -1), + BPF_JMP_REG(BPF_JEQ, R2, R3, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R3, R3), + BPF_ALU64_REG(BPF_XOR, R4, R4), + BPF_ALU64_IMM(BPF_MOV, R2, 1), + BPF_ALU64_IMM(BPF_MOV, R5, -1), + BPF_JMP_REG(BPF_JEQ, R3, R4, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R4, R4), + BPF_ALU64_REG(BPF_XOR, R5, R5), + BPF_ALU64_IMM(BPF_MOV, R3, 1), + BPF_ALU64_IMM(BPF_MOV, R7, -1), + BPF_JMP_REG(BPF_JEQ, R5, R4, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R5, 1), + BPF_ALU64_REG(BPF_SUB, R5, R5), + BPF_ALU64_REG(BPF_XOR, R6, R6), + BPF_ALU64_IMM(BPF_MOV, R1, 1), + BPF_ALU64_IMM(BPF_MOV, R8, -1), + BPF_JMP_REG(BPF_JEQ, R5, R6, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R6, R6), + BPF_ALU64_REG(BPF_XOR, R7, R7), + BPF_JMP_REG(BPF_JEQ, R7, R6, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R7, R7), + BPF_ALU64_REG(BPF_XOR, R8, R8), + BPF_JMP_REG(BPF_JEQ, R7, R8, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R8, R8), + BPF_ALU64_REG(BPF_XOR, R9, R9), + BPF_JMP_REG(BPF_JEQ, R9, R8, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R9, R9), + BPF_ALU64_REG(BPF_XOR, R0, R0), + BPF_JMP_REG(BPF_JEQ, R9, R0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R1, R1), + BPF_ALU64_REG(BPF_XOR, R0, R0), + BPF_JMP_REG(BPF_JEQ, R9, R0, 2), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { /* Mainly checking JIT here. */ + "INT: MUL", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 11), + BPF_ALU64_IMM(BPF_MOV, R1, 1), + BPF_ALU64_IMM(BPF_MOV, R2, 2), + BPF_ALU64_IMM(BPF_MOV, R3, 3), + BPF_ALU64_IMM(BPF_MOV, R4, 4), + BPF_ALU64_IMM(BPF_MOV, R5, 5), + BPF_ALU64_IMM(BPF_MOV, R6, 6), + BPF_ALU64_IMM(BPF_MOV, R7, 7), + BPF_ALU64_IMM(BPF_MOV, R8, 8), + BPF_ALU64_IMM(BPF_MOV, R9, 9), + BPF_ALU64_REG(BPF_MUL, R0, R0), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_ALU64_REG(BPF_MUL, R0, R2), + BPF_ALU64_REG(BPF_MUL, R0, R3), + BPF_ALU64_REG(BPF_MUL, R0, R4), + BPF_ALU64_REG(BPF_MUL, R0, R5), + BPF_ALU64_REG(BPF_MUL, R0, R6), + BPF_ALU64_REG(BPF_MUL, R0, R7), + BPF_ALU64_REG(BPF_MUL, R0, R8), + BPF_ALU64_REG(BPF_MUL, R0, R9), + BPF_ALU64_IMM(BPF_MUL, R0, 10), + BPF_JMP_IMM(BPF_JEQ, R0, 439084800, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_MUL, R1, R0), + BPF_ALU64_REG(BPF_MUL, R1, R2), + BPF_ALU64_REG(BPF_MUL, R1, R3), + BPF_ALU64_REG(BPF_MUL, R1, R4), + BPF_ALU64_REG(BPF_MUL, R1, R5), + BPF_ALU64_REG(BPF_MUL, R1, R6), + BPF_ALU64_REG(BPF_MUL, R1, R7), + BPF_ALU64_REG(BPF_MUL, R1, R8), + BPF_ALU64_REG(BPF_MUL, R1, R9), + BPF_ALU64_IMM(BPF_MUL, R1, 10), + BPF_ALU64_REG(BPF_MOV, R2, R1), + BPF_ALU64_IMM(BPF_RSH, R2, 32), + BPF_JMP_IMM(BPF_JEQ, R2, 0x5a924, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_LSH, R1, 32), + BPF_ALU64_IMM(BPF_ARSH, R1, 32), + BPF_JMP_IMM(BPF_JEQ, R1, 0xebb90000, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_MUL, R2, R0), + BPF_ALU64_REG(BPF_MUL, R2, R1), + BPF_ALU64_REG(BPF_MUL, R2, R3), + BPF_ALU64_REG(BPF_MUL, R2, R4), + BPF_ALU64_REG(BPF_MUL, R2, R5), + BPF_ALU64_REG(BPF_MUL, R2, R6), + BPF_ALU64_REG(BPF_MUL, R2, R7), + BPF_ALU64_REG(BPF_MUL, R2, R8), + BPF_ALU64_REG(BPF_MUL, R2, R9), + BPF_ALU64_IMM(BPF_MUL, R2, 10), + BPF_ALU64_IMM(BPF_RSH, R2, 32), + BPF_ALU64_REG(BPF_MOV, R0, R2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x35d97ef2 } } + }, + { + "INT: ALU MIX", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 11), + BPF_ALU64_IMM(BPF_ADD, R0, -1), + BPF_ALU64_IMM(BPF_MOV, R2, 2), + BPF_ALU64_IMM(BPF_XOR, R2, 3), + BPF_ALU64_REG(BPF_DIV, R0, R2), + BPF_JMP_IMM(BPF_JEQ, R0, 10, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOD, R0, 3), + BPF_JMP_IMM(BPF_JEQ, R0, 1, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "INT: shifts by register", + .u.insns_int = { + BPF_MOV64_IMM(R0, -1234), + BPF_MOV64_IMM(R1, 1), + BPF_ALU32_REG(BPF_RSH, R0, R1), + BPF_JMP_IMM(BPF_JEQ, R0, 0x7ffffd97, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(R2, 1), + BPF_ALU64_REG(BPF_LSH, R0, R2), + BPF_MOV32_IMM(R4, -1234), + BPF_JMP_REG(BPF_JEQ, R0, R4, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_AND, R4, 63), + BPF_ALU64_REG(BPF_LSH, R0, R4), /* R0 <= 46 */ + BPF_MOV64_IMM(R3, 47), + BPF_ALU64_REG(BPF_ARSH, R0, R3), + BPF_JMP_IMM(BPF_JEQ, R0, -617, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(R2, 1), + BPF_ALU64_REG(BPF_LSH, R4, R2), /* R4 = 46 << 1 */ + BPF_JMP_IMM(BPF_JEQ, R4, 92, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(R4, 4), + BPF_ALU64_REG(BPF_LSH, R4, R4), /* R4 = 4 << 4 */ + BPF_JMP_IMM(BPF_JEQ, R4, 64, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(R4, 5), + BPF_ALU32_REG(BPF_LSH, R4, R4), /* R4 = 5 << 5 */ + BPF_JMP_IMM(BPF_JEQ, R4, 160, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(R0, -1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "INT: DIV + ABS", + .u.insns_int = { + BPF_ALU64_REG(BPF_MOV, R6, R1), + BPF_LD_ABS(BPF_B, 3), + BPF_ALU64_IMM(BPF_MOV, R2, 2), + BPF_ALU32_REG(BPF_DIV, R0, R2), + BPF_ALU64_REG(BPF_MOV, R8, R0), + BPF_LD_ABS(BPF_B, 4), + BPF_ALU64_REG(BPF_ADD, R8, R0), + BPF_LD_IND(BPF_B, R8, -70), + BPF_EXIT_INSN(), + }, + INTERNAL, + { 10, 20, 30, 40, 50 }, + { { 4, 0 }, { 5, 10 } } + }, + { + "INT: DIV by zero", + .u.insns_int = { + BPF_ALU64_REG(BPF_MOV, R6, R1), + BPF_ALU64_IMM(BPF_MOV, R7, 0), + BPF_LD_ABS(BPF_B, 3), + BPF_ALU32_REG(BPF_DIV, R0, R7), + BPF_EXIT_INSN(), + }, + INTERNAL, + { 10, 20, 30, 40, 50 }, + { { 3, 0 }, { 4, 0 } } + }, + { + "check: missing ret", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 1), + }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { } + }, + { + "check: div_k_0", + .u.insns = { + BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 0), + BPF_STMT(BPF_RET | BPF_K, 0) + }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { } + }, + { + "check: unknown insn", + .u.insns = { + /* seccomp insn, rejected in socket filter */ + BPF_STMT(BPF_LDX | BPF_W | BPF_ABS, 0), + BPF_STMT(BPF_RET | BPF_K, 0) + }, + CLASSIC | FLAG_EXPECTED_FAIL, + { }, + { } + }, + { + "check: out of range spill/fill", + .u.insns = { + BPF_STMT(BPF_STX, 16), + BPF_STMT(BPF_RET | BPF_K, 0) + }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { } + }, + { + "JUMPS + HOLES", + .u.insns = { + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JGE, 0, 13, 15), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90c2894d, 3, 4), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90c2894d, 1, 2), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JGE, 0, 14, 15), + BPF_JUMP(BPF_JMP | BPF_JGE, 0, 13, 14), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ, 0x2ac28349, 2, 3), + BPF_JUMP(BPF_JMP | BPF_JEQ, 0x2ac28349, 1, 2), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JGE, 0, 14, 15), + BPF_JUMP(BPF_JMP | BPF_JGE, 0, 13, 14), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90d2ff41, 2, 3), + BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90d2ff41, 1, 2), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_RET | BPF_A, 0), + BPF_STMT(BPF_RET | BPF_A, 0), + }, + CLASSIC, + { 0x00, 0x1b, 0x21, 0x3c, 0x9d, 0xf8, + 0x90, 0xe2, 0xba, 0x0a, 0x56, 0xb4, + 0x08, 0x00, + 0x45, 0x00, 0x00, 0x28, 0x00, 0x00, + 0x20, 0x00, 0x40, 0x11, 0x00, 0x00, /* IP header */ + 0xc0, 0xa8, 0x33, 0x01, + 0xc0, 0xa8, 0x33, 0x02, + 0xbb, 0xb6, + 0xa9, 0xfa, + 0x00, 0x14, 0x00, 0x00, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc }, + { { 88, 0x001b } } + }, + { + "check: RET X", + .u.insns = { + BPF_STMT(BPF_RET | BPF_X, 0), + }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { }, + }, + { + "check: LDX + RET X", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 42), + BPF_STMT(BPF_RET | BPF_X, 0), + }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { }, + }, + { /* Mainly checking JIT here. */ + "M[]: alt STX + LDX", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 100), + BPF_STMT(BPF_STX, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 0), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 1), + BPF_STMT(BPF_LDX | BPF_MEM, 1), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 2), + BPF_STMT(BPF_LDX | BPF_MEM, 2), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 3), + BPF_STMT(BPF_LDX | BPF_MEM, 3), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 4), + BPF_STMT(BPF_LDX | BPF_MEM, 4), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 5), + BPF_STMT(BPF_LDX | BPF_MEM, 5), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 6), + BPF_STMT(BPF_LDX | BPF_MEM, 6), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 7), + BPF_STMT(BPF_LDX | BPF_MEM, 7), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 8), + BPF_STMT(BPF_LDX | BPF_MEM, 8), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 9), + BPF_STMT(BPF_LDX | BPF_MEM, 9), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 10), + BPF_STMT(BPF_LDX | BPF_MEM, 10), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 11), + BPF_STMT(BPF_LDX | BPF_MEM, 11), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 12), + BPF_STMT(BPF_LDX | BPF_MEM, 12), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 13), + BPF_STMT(BPF_LDX | BPF_MEM, 13), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 14), + BPF_STMT(BPF_LDX | BPF_MEM, 14), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 15), + BPF_STMT(BPF_LDX | BPF_MEM, 15), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_RET | BPF_A, 0), + }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 116 } }, + }, + { /* Mainly checking JIT here. */ + "M[]: full STX + full LDX", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 0xbadfeedb), + BPF_STMT(BPF_STX, 0), + BPF_STMT(BPF_LDX | BPF_IMM, 0xecabedae), + BPF_STMT(BPF_STX, 1), + BPF_STMT(BPF_LDX | BPF_IMM, 0xafccfeaf), + BPF_STMT(BPF_STX, 2), + BPF_STMT(BPF_LDX | BPF_IMM, 0xbffdcedc), + BPF_STMT(BPF_STX, 3), + BPF_STMT(BPF_LDX | BPF_IMM, 0xfbbbdccb), + BPF_STMT(BPF_STX, 4), + BPF_STMT(BPF_LDX | BPF_IMM, 0xfbabcbda), + BPF_STMT(BPF_STX, 5), + BPF_STMT(BPF_LDX | BPF_IMM, 0xaedecbdb), + BPF_STMT(BPF_STX, 6), + BPF_STMT(BPF_LDX | BPF_IMM, 0xadebbade), + BPF_STMT(BPF_STX, 7), + BPF_STMT(BPF_LDX | BPF_IMM, 0xfcfcfaec), + BPF_STMT(BPF_STX, 8), + BPF_STMT(BPF_LDX | BPF_IMM, 0xbcdddbdc), + BPF_STMT(BPF_STX, 9), + BPF_STMT(BPF_LDX | BPF_IMM, 0xfeefdfac), + BPF_STMT(BPF_STX, 10), + BPF_STMT(BPF_LDX | BPF_IMM, 0xcddcdeea), + BPF_STMT(BPF_STX, 11), + BPF_STMT(BPF_LDX | BPF_IMM, 0xaccfaebb), + BPF_STMT(BPF_STX, 12), + BPF_STMT(BPF_LDX | BPF_IMM, 0xbdcccdcf), + BPF_STMT(BPF_STX, 13), + BPF_STMT(BPF_LDX | BPF_IMM, 0xaaedecde), + BPF_STMT(BPF_STX, 14), + BPF_STMT(BPF_LDX | BPF_IMM, 0xfaeacdad), + BPF_STMT(BPF_STX, 15), + BPF_STMT(BPF_LDX | BPF_MEM, 0), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 1), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 2), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 3), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 4), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 5), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 6), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 7), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 8), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 9), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 10), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 11), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 12), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 13), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 14), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 15), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0), + }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0x2a5a5e5 } }, + }, + { + "check: SKF_AD_MAX", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_MAX), + BPF_STMT(BPF_RET | BPF_A, 0), + }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { }, + }, + { /* Passes checker but fails during runtime. */ + "LD [SKF_AD_OFF-1]", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF - 1), + BPF_STMT(BPF_RET | BPF_K, 1), + }, + CLASSIC, + { }, + { { 1, 0 } }, + }, + { + "load 64-bit immediate", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x567800001234LL), + BPF_MOV64_REG(R2, R1), + BPF_MOV64_REG(R3, R2), + BPF_ALU64_IMM(BPF_RSH, R2, 32), + BPF_ALU64_IMM(BPF_LSH, R3, 32), + BPF_ALU64_IMM(BPF_RSH, R3, 32), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_JMP_IMM(BPF_JEQ, R2, 0x5678, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, R3, 0x1234, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, +}; + +static struct net_device dev; + +static struct sk_buff *populate_skb(char *buf, int size) +{ + struct sk_buff *skb; + + if (size >= MAX_DATA) + return NULL; + + skb = alloc_skb(MAX_DATA, GFP_KERNEL); + if (!skb) + return NULL; + + memcpy(__skb_put(skb, size), buf, size); + + /* Initialize a fake skb with test pattern. */ + skb_reset_mac_header(skb); + skb->protocol = htons(ETH_P_IP); + skb->pkt_type = SKB_TYPE; + skb->mark = SKB_MARK; + skb->hash = SKB_HASH; + skb->queue_mapping = SKB_QUEUE_MAP; + skb->vlan_tci = SKB_VLAN_TCI; + skb->dev = &dev; + skb->dev->ifindex = SKB_DEV_IFINDEX; + skb->dev->type = SKB_DEV_TYPE; + skb_set_network_header(skb, min(size, ETH_HLEN)); + + return skb; +} + +static void *generate_test_data(struct bpf_test *test, int sub) +{ + if (test->aux & FLAG_NO_DATA) + return NULL; + + /* Test case expects an skb, so populate one. Various + * subtests generate skbs of different sizes based on + * the same data. + */ + return populate_skb(test->data, test->test[sub].data_size); +} + +static void release_test_data(const struct bpf_test *test, void *data) +{ + if (test->aux & FLAG_NO_DATA) + return; + + kfree_skb(data); +} + +static int probe_filter_length(struct sock_filter *fp) +{ + int len = 0; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].k != 0) + break; + + return len + 1; +} + +static struct bpf_prog *generate_filter(int which, int *err) +{ + struct bpf_prog *fp; + struct sock_fprog_kern fprog; + unsigned int flen = probe_filter_length(tests[which].u.insns); + __u8 test_type = tests[which].aux & TEST_TYPE_MASK; + + switch (test_type) { + case CLASSIC: + fprog.filter = tests[which].u.insns; + fprog.len = flen; + + *err = bpf_prog_create(&fp, &fprog); + if (tests[which].aux & FLAG_EXPECTED_FAIL) { + if (*err == -EINVAL) { + pr_cont("PASS\n"); + /* Verifier rejected filter as expected. */ + *err = 0; + return NULL; + } else { + pr_cont("UNEXPECTED_PASS\n"); + /* Verifier didn't reject the test that's + * bad enough, just return! + */ + *err = -EINVAL; + return NULL; + } + } + /* We don't expect to fail. */ + if (*err) { + pr_cont("FAIL to attach err=%d len=%d\n", + *err, fprog.len); + return NULL; + } + break; + + case INTERNAL: + fp = bpf_prog_alloc(bpf_prog_size(flen), 0); + if (fp == NULL) { + pr_cont("UNEXPECTED_FAIL no memory left\n"); + *err = -ENOMEM; + return NULL; + } + + fp->len = flen; + memcpy(fp->insnsi, tests[which].u.insns_int, + fp->len * sizeof(struct bpf_insn)); + + bpf_prog_select_runtime(fp); + break; + } + + *err = 0; + return fp; +} + +static void release_filter(struct bpf_prog *fp, int which) +{ + __u8 test_type = tests[which].aux & TEST_TYPE_MASK; + + switch (test_type) { + case CLASSIC: + bpf_prog_destroy(fp); + break; + case INTERNAL: + bpf_prog_free(fp); + break; + } +} + +static int __run_one(const struct bpf_prog *fp, const void *data, + int runs, u64 *duration) +{ + u64 start, finish; + int ret = 0, i; + + start = ktime_to_us(ktime_get()); + + for (i = 0; i < runs; i++) + ret = BPF_PROG_RUN(fp, data); + + finish = ktime_to_us(ktime_get()); + + *duration = (finish - start) * 1000ULL; + do_div(*duration, runs); + + return ret; +} + +static int run_one(const struct bpf_prog *fp, struct bpf_test *test) +{ + int err_cnt = 0, i, runs = MAX_TESTRUNS; + + for (i = 0; i < MAX_SUBTESTS; i++) { + void *data; + u64 duration; + u32 ret; + + if (test->test[i].data_size == 0 && + test->test[i].result == 0) + break; + + data = generate_test_data(test, i); + ret = __run_one(fp, data, runs, &duration); + release_test_data(test, data); + + if (ret == test->test[i].result) { + pr_cont("%lld ", duration); + } else { + pr_cont("ret %d != %d ", ret, + test->test[i].result); + err_cnt++; + } + } + + return err_cnt; +} + +static __init int test_bpf(void) +{ + int i, err_cnt = 0, pass_cnt = 0; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + struct bpf_prog *fp; + int err; + + pr_info("#%d %s ", i, tests[i].descr); + + fp = generate_filter(i, &err); + if (fp == NULL) { + if (err == 0) { + pass_cnt++; + continue; + } + + return err; + } + err = run_one(fp, &tests[i]); + release_filter(fp, i); + + if (err) { + pr_cont("FAIL (%d times)\n", err); + err_cnt++; + } else { + pr_cont("PASS\n"); + pass_cnt++; + } + } + + pr_info("Summary: %d PASSED, %d FAILED\n", pass_cnt, err_cnt); + return err_cnt ? -EINVAL : 0; +} + +static int __init test_bpf_init(void) +{ + return test_bpf(); +} + +static void __exit test_bpf_exit(void) +{ +} + +module_init(test_bpf_init); +module_exit(test_bpf_exit); + +MODULE_LICENSE("GPL"); diff --git a/lib/test_firmware.c b/lib/test_firmware.c new file mode 100644 index 000000000000..86374c1c49a4 --- /dev/null +++ b/lib/test_firmware.c @@ -0,0 +1,117 @@ +/* + * This module provides an interface to trigger and test firmware loading. + * + * It is designed to be used for basic evaluation of the firmware loading + * subsystem (for example when validating firmware verification). It lacks + * any extra dependencies, and will not normally be loaded by the system + * unless explicitly requested by name. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/printk.h> +#include <linux/firmware.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/slab.h> +#include <linux/uaccess.h> + +static DEFINE_MUTEX(test_fw_mutex); +static const struct firmware *test_firmware; + +static ssize_t test_fw_misc_read(struct file *f, char __user *buf, + size_t size, loff_t *offset) +{ + ssize_t rc = 0; + + mutex_lock(&test_fw_mutex); + if (test_firmware) + rc = simple_read_from_buffer(buf, size, offset, + test_firmware->data, + test_firmware->size); + mutex_unlock(&test_fw_mutex); + return rc; +} + +static const struct file_operations test_fw_fops = { + .owner = THIS_MODULE, + .read = test_fw_misc_read, +}; + +static struct miscdevice test_fw_misc_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "test_firmware", + .fops = &test_fw_fops, +}; + +static ssize_t trigger_request_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int rc; + char *name; + + name = kzalloc(count + 1, GFP_KERNEL); + if (!name) + return -ENOSPC; + memcpy(name, buf, count); + + pr_info("loading '%s'\n", name); + + mutex_lock(&test_fw_mutex); + release_firmware(test_firmware); + test_firmware = NULL; + rc = request_firmware(&test_firmware, name, dev); + if (rc) + pr_info("load of '%s' failed: %d\n", name, rc); + pr_info("loaded: %zu\n", test_firmware ? test_firmware->size : 0); + mutex_unlock(&test_fw_mutex); + + kfree(name); + + return count; +} +static DEVICE_ATTR_WO(trigger_request); + +static int __init test_firmware_init(void) +{ + int rc; + + rc = misc_register(&test_fw_misc_device); + if (rc) { + pr_err("could not register misc device: %d\n", rc); + return rc; + } + rc = device_create_file(test_fw_misc_device.this_device, + &dev_attr_trigger_request); + if (rc) { + pr_err("could not create sysfs interface: %d\n", rc); + goto dereg; + } + + pr_warn("interface ready\n"); + + return 0; +dereg: + misc_deregister(&test_fw_misc_device); + return rc; +} + +module_init(test_firmware_init); + +static void __exit test_firmware_exit(void) +{ + release_firmware(test_firmware); + device_remove_file(test_fw_misc_device.this_device, + &dev_attr_trigger_request); + misc_deregister(&test_fw_misc_device); + pr_warn("removed interface\n"); +} + +module_exit(test_firmware_exit); + +MODULE_AUTHOR("Kees Cook <keescook@chromium.org>"); +MODULE_LICENSE("GPL"); diff --git a/lib/test_module.c b/lib/test_module.c new file mode 100644 index 000000000000..319b66f1ff61 --- /dev/null +++ b/lib/test_module.c @@ -0,0 +1,33 @@ +/* + * This module emits "Hello, world" on printk when loaded. + * + * It is designed to be used for basic evaluation of the module loading + * subsystem (for example when validating module signing/verification). It + * lacks any extra dependencies, and will not normally be loaded by the + * system unless explicitly requested by name. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/printk.h> + +static int __init test_module_init(void) +{ + pr_warn("Hello, world\n"); + + return 0; +} + +module_init(test_module_init); + +static void __exit test_module_exit(void) +{ + pr_warn("Goodbye\n"); +} + +module_exit(test_module_exit); + +MODULE_AUTHOR("Kees Cook <keescook@chromium.org>"); +MODULE_LICENSE("GPL"); diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c new file mode 100644 index 000000000000..0ecef3e4690e --- /dev/null +++ b/lib/test_user_copy.c @@ -0,0 +1,110 @@ +/* + * Kernel module for testing copy_to/from_user infrastructure. + * + * Copyright 2013 Google Inc. All Rights Reserved + * + * Authors: + * Kees Cook <keescook@chromium.org> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/mman.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> + +#define test(condition, msg) \ +({ \ + int cond = (condition); \ + if (cond) \ + pr_warn("%s\n", msg); \ + cond; \ +}) + +static int __init test_user_copy_init(void) +{ + int ret = 0; + char *kmem; + char __user *usermem; + char *bad_usermem; + unsigned long user_addr; + unsigned long value = 0x5A; + + kmem = kmalloc(PAGE_SIZE * 2, GFP_KERNEL); + if (!kmem) + return -ENOMEM; + + user_addr = vm_mmap(NULL, 0, PAGE_SIZE * 2, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (user_addr >= (unsigned long)(TASK_SIZE)) { + pr_warn("Failed to allocate user memory\n"); + kfree(kmem); + return -ENOMEM; + } + + usermem = (char __user *)user_addr; + bad_usermem = (char *)user_addr; + + /* Legitimate usage: none of these should fail. */ + ret |= test(copy_from_user(kmem, usermem, PAGE_SIZE), + "legitimate copy_from_user failed"); + ret |= test(copy_to_user(usermem, kmem, PAGE_SIZE), + "legitimate copy_to_user failed"); + ret |= test(get_user(value, (unsigned long __user *)usermem), + "legitimate get_user failed"); + ret |= test(put_user(value, (unsigned long __user *)usermem), + "legitimate put_user failed"); + + /* Invalid usage: none of these should succeed. */ + ret |= test(!copy_from_user(kmem, (char __user *)(kmem + PAGE_SIZE), + PAGE_SIZE), + "illegal all-kernel copy_from_user passed"); + ret |= test(!copy_from_user(bad_usermem, (char __user *)kmem, + PAGE_SIZE), + "illegal reversed copy_from_user passed"); + ret |= test(!copy_to_user((char __user *)kmem, kmem + PAGE_SIZE, + PAGE_SIZE), + "illegal all-kernel copy_to_user passed"); + ret |= test(!copy_to_user((char __user *)kmem, bad_usermem, + PAGE_SIZE), + "illegal reversed copy_to_user passed"); + ret |= test(!get_user(value, (unsigned long __user *)kmem), + "illegal get_user passed"); + ret |= test(!put_user(value, (unsigned long __user *)kmem), + "illegal put_user passed"); + + vm_munmap(user_addr, PAGE_SIZE * 2); + kfree(kmem); + + if (ret == 0) { + pr_info("tests passed.\n"); + return 0; + } + + return -EINVAL; +} + +module_init(test_user_copy_init); + +static void __exit test_user_copy_exit(void) +{ + pr_info("unloaded.\n"); +} + +module_exit(test_user_copy_exit); + +MODULE_AUTHOR("Kees Cook <keescook@chromium.org>"); +MODULE_LICENSE("GPL"); diff --git a/lib/textsearch.c b/lib/textsearch.c index e0cc0146ae62..0c7e9ab2d88f 100644 --- a/lib/textsearch.c +++ b/lib/textsearch.c @@ -159,6 +159,7 @@ errout: spin_unlock(&ts_mod_lock); return err; } +EXPORT_SYMBOL(textsearch_register); /** * textsearch_unregister - unregister a textsearch module @@ -190,6 +191,7 @@ out: spin_unlock(&ts_mod_lock); return err; } +EXPORT_SYMBOL(textsearch_unregister); struct ts_linear_state { @@ -236,6 +238,7 @@ unsigned int textsearch_find_continuous(struct ts_config *conf, return textsearch_find(conf, state); } +EXPORT_SYMBOL(textsearch_find_continuous); /** * textsearch_prepare - Prepare a search @@ -298,6 +301,7 @@ errout: return ERR_PTR(err); } +EXPORT_SYMBOL(textsearch_prepare); /** * textsearch_destroy - destroy a search configuration @@ -316,9 +320,4 @@ void textsearch_destroy(struct ts_config *conf) kfree(conf); } - -EXPORT_SYMBOL(textsearch_register); -EXPORT_SYMBOL(textsearch_unregister); -EXPORT_SYMBOL(textsearch_prepare); -EXPORT_SYMBOL(textsearch_find_continuous); EXPORT_SYMBOL(textsearch_destroy); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 10909c571494..ba3cd0a35640 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -364,7 +364,6 @@ enum format_type { FORMAT_TYPE_SHORT, FORMAT_TYPE_UINT, FORMAT_TYPE_INT, - FORMAT_TYPE_NRCHARS, FORMAT_TYPE_SIZE_T, FORMAT_TYPE_PTRDIFF }; @@ -719,10 +718,15 @@ char *resource_string(char *buf, char *end, struct resource *res, specp = &mem_spec; decode = 0; } - p = number(p, pend, res->start, *specp); - if (res->start != res->end) { - *p++ = '-'; - p = number(p, pend, res->end, *specp); + if (decode && res->flags & IORESOURCE_UNSET) { + p = string(p, pend, "size ", str_spec); + p = number(p, pend, resource_size(res), *specp); + } else { + p = number(p, pend, res->start, *specp); + if (res->start != res->end) { + *p++ = '-'; + p = number(p, pend, res->end, *specp); + } } if (decode) { if (res->flags & IORESOURCE_MEM_64) @@ -1155,6 +1159,30 @@ char *netdev_feature_string(char *buf, char *end, const u8 *addr, return number(buf, end, *(const netdev_features_t *)addr, spec); } +static noinline_for_stack +char *address_val(char *buf, char *end, const void *addr, + struct printf_spec spec, const char *fmt) +{ + unsigned long long num; + + spec.flags |= SPECIAL | SMALL | ZEROPAD; + spec.base = 16; + + switch (fmt[1]) { + case 'd': + num = *(const dma_addr_t *)addr; + spec.field_width = sizeof(dma_addr_t) * 2 + 2; + break; + case 'p': + default: + num = *(const phys_addr_t *)addr; + spec.field_width = sizeof(phys_addr_t) * 2 + 2; + break; + } + + return number(buf, end, num, spec); +} + int kptr_restrict __read_mostly; /* @@ -1218,7 +1246,8 @@ int kptr_restrict __read_mostly; * N no separator * The maximum supported length is 64 bytes of the input. Consider * to use print_hex_dump() for the larger input. - * - 'a' For a phys_addr_t type and its derivative types (passed by reference) + * - 'a[pd]' For address types [p] phys_addr_t, [d] dma_addr_t and derivatives + * (default assumed to be phys_addr_t, passed by reference) * - 'd[234]' For a dentry name (optionally 2-4 last components) * - 'D[234]' Same as 'd' but for a struct file * @@ -1353,11 +1382,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, } break; case 'a': - spec.flags |= SPECIAL | SMALL | ZEROPAD; - spec.field_width = sizeof(phys_addr_t) * 2 + 2; - spec.base = 16; - return number(buf, end, - (unsigned long long) *((phys_addr_t *)ptr), spec); + return address_val(buf, end, ptr, spec, fmt); case 'd': return dentry_name(buf, end, ptr, spec, fmt); case 'D': @@ -1512,10 +1537,6 @@ qualifier: return fmt - start; /* skip alnum */ - case 'n': - spec->type = FORMAT_TYPE_NRCHARS; - return ++fmt - start; - case '%': spec->type = FORMAT_TYPE_PERCENT_CHAR; return ++fmt - start; @@ -1538,6 +1559,15 @@ qualifier: case 'u': break; + case 'n': + /* + * Since %n poses a greater security risk than utility, treat + * it as an invalid format specifier. Warn about its use so + * that new instances don't get added. + */ + WARN_ONCE(1, "Please remove ignored %%n in '%s'\n", fmt); + /* Fall-through */ + default: spec->type = FORMAT_TYPE_INVALID; return fmt - start; @@ -1711,20 +1741,6 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) ++str; break; - case FORMAT_TYPE_NRCHARS: { - /* - * Since %n poses a greater security risk than - * utility, ignore %n and skip its argument. - */ - void *skip_arg; - - WARN_ONCE(1, "Please remove ignored %%n in '%s'\n", - old_fmt); - - skip_arg = va_arg(args, void *); - break; - } - default: switch (spec.type) { case FORMAT_TYPE_LONG_LONG: @@ -1921,7 +1937,7 @@ EXPORT_SYMBOL(sprintf); * @args: Arguments for the format string * * The format follows C99 vsnprintf, except %n is ignored, and its argument - * is skiped. + * is skipped. * * The return value is the number of words(32bits) which would be generated for * the given input. @@ -1999,19 +2015,6 @@ do { \ fmt++; break; - case FORMAT_TYPE_NRCHARS: { - /* skip %n 's argument */ - u8 qualifier = spec.qualifier; - void *skip_arg; - if (qualifier == 'l') - skip_arg = va_arg(args, long *); - else if (_tolower(qualifier) == 'z') - skip_arg = va_arg(args, size_t *); - else - skip_arg = va_arg(args, int *); - break; - } - default: switch (spec.type) { @@ -2170,10 +2173,6 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) ++str; break; - case FORMAT_TYPE_NRCHARS: - /* skip */ - break; - default: { unsigned long long num; @@ -2348,7 +2347,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args) break; base = 10; - is_sign = 0; + is_sign = false; switch (*fmt++) { case 'c': @@ -2387,7 +2386,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args) case 'i': base = 0; case 'd': - is_sign = 1; + is_sign = true; case 'u': break; case '%': diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index 08837db52d94..12d2d777f36b 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -9,33 +9,33 @@ config XZ_DEC if XZ_DEC config XZ_DEC_X86 - bool "x86 BCJ filter decoder" - default y if X86 + bool "x86 BCJ filter decoder" if EXPERT + default y select XZ_DEC_BCJ config XZ_DEC_POWERPC - bool "PowerPC BCJ filter decoder" - default y if PPC + bool "PowerPC BCJ filter decoder" if EXPERT + default y select XZ_DEC_BCJ config XZ_DEC_IA64 - bool "IA-64 BCJ filter decoder" - default y if IA64 + bool "IA-64 BCJ filter decoder" if EXPERT + default y select XZ_DEC_BCJ config XZ_DEC_ARM - bool "ARM BCJ filter decoder" - default y if ARM + bool "ARM BCJ filter decoder" if EXPERT + default y select XZ_DEC_BCJ config XZ_DEC_ARMTHUMB - bool "ARM-Thumb BCJ filter decoder" - default y if (ARM && ARM_THUMB) + bool "ARM-Thumb BCJ filter decoder" if EXPERT + default y select XZ_DEC_BCJ config XZ_DEC_SPARC - bool "SPARC BCJ filter decoder" - default y if SPARC + bool "SPARC BCJ filter decoder" if EXPERT + default y select XZ_DEC_BCJ endif diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c index a6cdc969ea42..08c3c8049998 100644 --- a/lib/xz/xz_dec_lzma2.c +++ b/lib/xz/xz_dec_lzma2.c @@ -1043,6 +1043,8 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, s->lzma2.sequence = SEQ_LZMA_PREPARE; + /* Fall through */ + case SEQ_LZMA_PREPARE: if (s->lzma2.compressed < RC_INIT_BYTES) return XZ_DATA_ERROR; @@ -1053,6 +1055,8 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, s->lzma2.compressed -= RC_INIT_BYTES; s->lzma2.sequence = SEQ_LZMA_RUN; + /* Fall through */ + case SEQ_LZMA_RUN: /* * Set dictionary limit to indicate how much we want diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c index d63381e8e333..d20ef458f137 100644 --- a/lib/zlib_deflate/deflate.c +++ b/lib/zlib_deflate/deflate.c @@ -250,52 +250,6 @@ int zlib_deflateInit2( } /* ========================================================================= */ -#if 0 -int zlib_deflateSetDictionary( - z_streamp strm, - const Byte *dictionary, - uInt dictLength -) -{ - deflate_state *s; - uInt length = dictLength; - uInt n; - IPos hash_head = 0; - - if (strm == NULL || strm->state == NULL || dictionary == NULL) - return Z_STREAM_ERROR; - - s = (deflate_state *) strm->state; - if (s->status != INIT_STATE) return Z_STREAM_ERROR; - - strm->adler = zlib_adler32(strm->adler, dictionary, dictLength); - - if (length < MIN_MATCH) return Z_OK; - if (length > MAX_DIST(s)) { - length = MAX_DIST(s); -#ifndef USE_DICT_HEAD - dictionary += dictLength - length; /* use the tail of the dictionary */ -#endif - } - memcpy((char *)s->window, dictionary, length); - s->strstart = length; - s->block_start = (long)length; - - /* Insert all strings in the hash table (except for the last two bytes). - * s->lookahead stays null, so s->ins_h will be recomputed at the next - * call of fill_window. - */ - s->ins_h = s->window[0]; - UPDATE_HASH(s, s->ins_h, s->window[1]); - for (n = 0; n <= length - MIN_MATCH; n++) { - INSERT_STRING(s, n, hash_head); - } - if (hash_head) hash_head = 0; /* to make compiler happy */ - return Z_OK; -} -#endif /* 0 */ - -/* ========================================================================= */ int zlib_deflateReset( z_streamp strm ) @@ -326,45 +280,6 @@ int zlib_deflateReset( return Z_OK; } -/* ========================================================================= */ -#if 0 -int zlib_deflateParams( - z_streamp strm, - int level, - int strategy -) -{ - deflate_state *s; - compress_func func; - int err = Z_OK; - - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; - s = (deflate_state *) strm->state; - - if (level == Z_DEFAULT_COMPRESSION) { - level = 6; - } - if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) { - return Z_STREAM_ERROR; - } - func = configuration_table[s->level].func; - - if (func != configuration_table[level].func && strm->total_in != 0) { - /* Flush the last buffer: */ - err = zlib_deflate(strm, Z_PARTIAL_FLUSH); - } - if (s->level != level) { - s->level = level; - s->max_lazy_match = configuration_table[level].max_lazy; - s->good_match = configuration_table[level].good_length; - s->nice_match = configuration_table[level].nice_length; - s->max_chain_length = configuration_table[level].max_chain; - } - s->strategy = strategy; - return err; -} -#endif /* 0 */ - /* ========================================================================= * Put a short in the pending buffer. The 16-bit value is put in MSB order. * IN assertion: the stream state is correct and there is enough room in @@ -568,64 +483,6 @@ int zlib_deflateEnd( return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; } -/* ========================================================================= - * Copy the source state to the destination state. - */ -#if 0 -int zlib_deflateCopy ( - z_streamp dest, - z_streamp source -) -{ -#ifdef MAXSEG_64K - return Z_STREAM_ERROR; -#else - deflate_state *ds; - deflate_state *ss; - ush *overlay; - deflate_workspace *mem; - - - if (source == NULL || dest == NULL || source->state == NULL) { - return Z_STREAM_ERROR; - } - - ss = (deflate_state *) source->state; - - *dest = *source; - - mem = (deflate_workspace *) dest->workspace; - - ds = &(mem->deflate_memory); - - dest->state = (struct internal_state *) ds; - *ds = *ss; - ds->strm = dest; - - ds->window = (Byte *) mem->window_memory; - ds->prev = (Pos *) mem->prev_memory; - ds->head = (Pos *) mem->head_memory; - overlay = (ush *) mem->overlay_memory; - ds->pending_buf = (uch *) overlay; - - memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); - memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); - memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); - memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); - - ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); - ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); - ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; - - ds->l_desc.dyn_tree = ds->dyn_ltree; - ds->d_desc.dyn_tree = ds->dyn_dtree; - ds->bl_desc.dyn_tree = ds->bl_tree; - - return Z_OK; -#endif -} -#endif /* 0 */ - /* =========================================================================== * Read a new buffer from the current input stream, update the adler32 * and total number of bytes read. All deflate() input goes through diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c index f5ce87b0800e..58a733b10387 100644 --- a/lib/zlib_inflate/inflate.c +++ b/lib/zlib_inflate/inflate.c @@ -45,21 +45,6 @@ int zlib_inflateReset(z_streamp strm) return Z_OK; } -#if 0 -int zlib_inflatePrime(z_streamp strm, int bits, int value) -{ - struct inflate_state *state; - - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; - state = (struct inflate_state *)strm->state; - if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR; - value &= (1L << bits) - 1; - state->hold += value << state->bits; - state->bits += bits; - return Z_OK; -} -#endif - int zlib_inflateInit2(z_streamp strm, int windowBits) { struct inflate_state *state; @@ -761,123 +746,6 @@ int zlib_inflateEnd(z_streamp strm) return Z_OK; } -#if 0 -int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary, - uInt dictLength) -{ - struct inflate_state *state; - unsigned long id; - - /* check state */ - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; - state = (struct inflate_state *)strm->state; - if (state->wrap != 0 && state->mode != DICT) - return Z_STREAM_ERROR; - - /* check for correct dictionary id */ - if (state->mode == DICT) { - id = zlib_adler32(0L, NULL, 0); - id = zlib_adler32(id, dictionary, dictLength); - if (id != state->check) - return Z_DATA_ERROR; - } - - /* copy dictionary to window */ - zlib_updatewindow(strm, strm->avail_out); - - if (dictLength > state->wsize) { - memcpy(state->window, dictionary + dictLength - state->wsize, - state->wsize); - state->whave = state->wsize; - } - else { - memcpy(state->window + state->wsize - dictLength, dictionary, - dictLength); - state->whave = dictLength; - } - state->havedict = 1; - return Z_OK; -} -#endif - -#if 0 -/* - Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found - or when out of input. When called, *have is the number of pattern bytes - found in order so far, in 0..3. On return *have is updated to the new - state. If on return *have equals four, then the pattern was found and the - return value is how many bytes were read including the last byte of the - pattern. If *have is less than four, then the pattern has not been found - yet and the return value is len. In the latter case, zlib_syncsearch() can be - called again with more data and the *have state. *have is initialized to - zero for the first call. - */ -static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf, - unsigned len) -{ - unsigned got; - unsigned next; - - got = *have; - next = 0; - while (next < len && got < 4) { - if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) - got++; - else if (buf[next]) - got = 0; - else - got = 4 - got; - next++; - } - *have = got; - return next; -} -#endif - -#if 0 -int zlib_inflateSync(z_streamp strm) -{ - unsigned len; /* number of bytes to look at or looked at */ - unsigned long in, out; /* temporary to save total_in and total_out */ - unsigned char buf[4]; /* to restore bit buffer to byte string */ - struct inflate_state *state; - - /* check parameters */ - if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; - state = (struct inflate_state *)strm->state; - if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; - - /* if first time, start search in bit buffer */ - if (state->mode != SYNC) { - state->mode = SYNC; - state->hold <<= state->bits & 7; - state->bits -= state->bits & 7; - len = 0; - while (state->bits >= 8) { - buf[len++] = (unsigned char)(state->hold); - state->hold >>= 8; - state->bits -= 8; - } - state->have = 0; - zlib_syncsearch(&(state->have), buf, len); - } - - /* search available input */ - len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in); - strm->avail_in -= len; - strm->next_in += len; - strm->total_in += len; - - /* return no joy or set up to restart inflate() on a new block */ - if (state->have != 4) return Z_DATA_ERROR; - in = strm->total_in; out = strm->total_out; - zlib_inflateReset(strm); - strm->total_in = in; strm->total_out = out; - state->mode = TYPE; - return Z_OK; -} -#endif - /* * This subroutine adds the data at next_in/avail_in to the output history * without performing any output. The output buffer must be "caught up"; |
