85 files changed, 5647 insertions, 1467 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 991c98bc4a3f..54cf309a92a5 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -51,6 +51,9 @@ config PERCPU_RWSEM
 config ARCH_USE_CMPXCHG_LOCKREF
 	bool
 
+config ARCH_HAS_FAST_MULTIPLIER
+	bool
+
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
@@ -182,6 +185,15 @@ config AUDIT_GENERIC
 	depends on AUDIT && !AUDIT_ARCH
 	default y
 
+config AUDIT_ARCH_COMPAT_GENERIC
+	bool
+	default n
+
+config AUDIT_COMPAT_GENERIC
+	bool
+	depends on AUDIT_GENERIC && AUDIT_ARCH_COMPAT_GENERIC && COMPAT
+	default y
+
 config RANDOM32_SELFTEST
 	bool "PRNG perform self test on init"
 	default n
@@ -322,6 +334,20 @@ config TEXTSEARCH_FSM
 config BTREE
 	boolean
 
+config INTERVAL_TREE
+	boolean
+	help
+	  Simple, embeddable, interval-tree. Can find the start of an
+	  overlapping range in log(n) time and then iterate over all
+	  overlapping nodes. The algorithm is implemented as an
+	  augmented rbtree.
+
+	  See:
+
+		Documentation/rbtree.txt
+
+	  for more information.
+
 config ASSOCIATIVE_ARRAY
 	bool
 	help
@@ -342,9 +368,9 @@ config HAS_IOMEM
 	select GENERIC_IO
 	default y
 
-config HAS_IOPORT
+config HAS_IOPORT_MAP
 	boolean
-	depends on HAS_IOMEM && !NO_IOPORT
+	depends on HAS_IOMEM && !NO_IOPORT_MAP
 	default y
 
 config HAS_DMA
@@ -373,6 +399,39 @@ config CPU_RMAP
 config DQL
 	bool
 
+config GLOB
+	bool
+#	This actually supports modular compilation, but the module overhead
+#	is ridiculous for the amount of code involved.	Until an out-of-tree
+#	driver asks for it, we'll just link it directly it into the kernel
+#	when required.  Since we're ignoring out-of-tree users,	there's also
+#	no need bother prompting for a manual decision:
+#	prompt "glob_match() function"
+	help
+	  This option provides a glob_match function for performing
+	  simple text pattern matching.  It originated in the ATA code
+	  to blacklist particular drive models, but other device drivers
+	  may need similar functionality.
+
+	  All drivers in the Linux kernel tree that require this function
+	  should automatically select this option.  Say N unless you
+	  are compiling an out-of tree driver which tells you that it
+	  depends on this.
+
+config GLOB_SELFTEST
+	bool "glob self-test on init"
+	default n
+	depends on GLOB
+	help
+	  This option enables a simple self-test of the glob_match
+	  function on startup.	It is primarily useful for people
+	  working on the code to ensure they haven't introduced any
+	  regressions.
+
+	  It only adds a little bit of code and slows kernel boot (or
+	  module load) by a small amount, so you're welcome to play with
+	  it, but you probably don't need it.
+
 #
 # Netlink attribute parsing support is select'ed if needed
 #
@@ -428,7 +487,8 @@ config MPILIB
 
 config SIGNATURE
 	tristate
-	depends on KEYS && CRYPTO
+	depends on KEYS
+	select CRYPTO
 	select CRYPTO_SHA1
 	select MPILIB
 	help
@@ -451,4 +511,11 @@ config UCS2_STRING
 
 source "lib/fonts/Kconfig"
 
+#
+# sg chaining option
+#
+
+config ARCH_HAS_SG_CHAIN
+	def_bool n
+
 endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index db25707aa41b..3ac43f34437b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -15,7 +15,7 @@ config PRINTK_TIME
 	  The behavior is also controlled by the kernel command line
 	  parameter printk.time=1. See Documentation/kernel-parameters.txt
 
-config DEFAULT_MESSAGE_LOGLEVEL
+config MESSAGE_LOGLEVEL_DEFAULT
 	int "Default message log level (1-7)"
 	range 1 7
 	default "4"
@@ -119,7 +119,7 @@ menu "Compile-time checks and compiler options"
 
 config DEBUG_INFO
 	bool "Compile the kernel with debug info"
-	depends on DEBUG_KERNEL
+	depends on DEBUG_KERNEL && !COMPILE_TEST
 	help
           If you say Y here the resulting kernel image will include
 	  debugging info resulting in a larger kernel image.
@@ -143,6 +143,30 @@ config DEBUG_INFO_REDUCED
 	  DEBUG_INFO build and compile times are reduced too.
 	  Only works with newer gcc versions.
 
+config DEBUG_INFO_SPLIT
+	bool "Produce split debuginfo in .dwo files"
+	depends on DEBUG_INFO
+	help
+	  Generate debug info into separate .dwo files. This significantly
+	  reduces the build directory size for builds with DEBUG_INFO,
+	  because it stores the information only once on disk in .dwo
+	  files instead of multiple times in object files and executables.
+	  In addition the debug information is also compressed.
+
+	  Requires recent gcc (4.7+) and recent gdb/binutils.
+	  Any tool that packages or reads debug information would need
+	  to know about the .dwo files and include them.
+	  Incompatible with older versions of ccache.
+
+config DEBUG_INFO_DWARF4
+	bool "Generate dwarf4 debuginfo"
+	depends on DEBUG_INFO
+	help
+	  Generate dwarf4 debug info. This requires recent versions
+	  of gcc and gdb. It makes the debug information larger.
+	  But it significantly improves the success of resolving
+	  variables in gdb on optimized code.
+
 config ENABLE_WARN_DEPRECATED
 	bool "Enable __deprecated logic"
 	default y
@@ -501,12 +525,21 @@ config DEBUG_VM
 
 	  If unsure, say N.
 
+config DEBUG_VM_VMACACHE
+	bool "Debug VMA caching"
+	depends on DEBUG_VM
+	help
+	  Enable this to turn on VMA caching debug information. Doing so
+	  can cause significant overhead, so only enable it in non-production
+	  environments.
+
+	  If unsure, say N.
+
 config DEBUG_VM_RB
 	bool "Debug VM red-black trees"
 	depends on DEBUG_VM
 	help
-	  Enable this to turn on more extended checks in the virtual-memory
-	  system that may impact performance.
+	  Enable VM red-black tree debugging information and extra validations.
 
 	  If unsure, say N.
 
@@ -576,8 +609,8 @@ config DEBUG_HIGHMEM
 	bool "Highmem debugging"
 	depends on DEBUG_KERNEL && HIGHMEM
 	help
-	  This options enables addition error checking for high memory systems.
-	  Disable for production systems.
+	  This option enables additional error checking for high memory
+	  systems.  Disable for production systems.
 
 config HAVE_DEBUG_STACKOVERFLOW
 	bool
@@ -761,6 +794,15 @@ config PANIC_ON_OOPS_VALUE
 	default 0 if !PANIC_ON_OOPS
 	default 1 if PANIC_ON_OOPS
 
+config PANIC_TIMEOUT
+	int "panic timeout"
+	default 0
+	help
+	  Set the timeout value (in seconds) until a reboot occurs when the
+	  the kernel panics. If n = 0, then we wait forever. A timeout
+	  value n > 0 will wait n seconds before rebooting, while a timeout
+	  value n < 0 will reboot immediately.
+
 config SCHED_DEBUG
 	bool "Collect scheduler debugging info"
 	depends on DEBUG_KERNEL && PROC_FS
@@ -815,14 +857,9 @@ config DEBUG_RT_MUTEXES
 	 This allows rt mutex semantics violations and rt mutex related
 	 deadlocks (lockups) to be detected and reported automatically.
 
-config DEBUG_PI_LIST
-	bool
-	default y
-	depends on DEBUG_RT_MUTEXES
-
 config RT_MUTEX_TESTER
 	bool "Built-in scriptable tester for rt-mutexes"
-	depends on DEBUG_KERNEL && RT_MUTEXES
+	depends on DEBUG_KERNEL && RT_MUTEXES && BROKEN
 	help
 	  This option enables a rt-mutex tester.
 
@@ -855,6 +892,10 @@ config DEBUG_WW_MUTEX_SLOWPATH
 	 the full mutex checks enabled with (CONFIG_PROVE_LOCKING) this
 	 will test all possible w/w mutex interface abuse with the
 	 exception of simply not acquiring all the required locks.
+	 Note that this feature can introduce significant overhead, so
+	 it really should not be enabled in a production or distro kernel,
+	 even a debug kernel.  If you are a driver writer, enable it.  If
+	 you are a distro, do not.
 
 config DEBUG_LOCK_ALLOC
 	bool "Lock debugging: detect incorrect freeing of live locks"
@@ -917,7 +958,7 @@ config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
 	select STACKTRACE
-	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC
+	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !SCORE
 	select KALLSYMS
 	select KALLSYMS_ALL
 
@@ -971,6 +1012,21 @@ config DEBUG_LOCKING_API_SELFTESTS
 	  The following locking APIs are covered: spinlocks, rwlocks,
 	  mutexes and rwsems.
 
+config LOCK_TORTURE_TEST
+	tristate "torture tests for locking"
+	depends on DEBUG_KERNEL
+	select TORTURE_TEST
+	default n
+	help
+	  This option provides a kernel module that runs torture tests
+	  on kernel locking primitives.  The kernel module may be built
+	  after the fact on the running kernel to be tested, if desired.
+
+	  Say Y here if you want kernel locking-primitive torture tests
+	  to be built into the kernel.
+	  Say M if you want these torture tests to build as a module.
+	  Say N if you are unsure.
+
 endmenu # lock debugging
 
 config TRACE_IRQFLAGS
@@ -980,8 +1036,13 @@ config TRACE_IRQFLAGS
 	  either tracing or lock debugging.
 
 config STACKTRACE
-	bool
+	bool "Stack backtrace support"
 	depends on STACKTRACE_SUPPORT
+	help
+	  This option causes the kernel to create a /proc/pid/stack for
+	  every process, showing its current stack trace.
+	  It is also used by various kernel debugging features that require
+	  stack trace generation.
 
 config DEBUG_KOBJECT
 	bool "kobject debugging"
@@ -1021,22 +1082,22 @@ config DEBUG_BUGVERBOSE
 	  of the BUG call as well as the EIP and oops trace.  This aids
 	  debugging but costs about 70-100K of memory.
 
-config DEBUG_WRITECOUNT
-	bool "Debug filesystem writers count"
+config DEBUG_LIST
+	bool "Debug linked list manipulation"
 	depends on DEBUG_KERNEL
 	help
-	  Enable this to catch wrong use of the writers count in struct
-	  vfsmount.  This will increase the size of each file struct by
-	  32 bits.
+	  Enable this to turn on extended checks in the linked-list
+	  walking routines.
 
 	  If unsure, say N.
 
-config DEBUG_LIST
-	bool "Debug linked list manipulation"
+config DEBUG_PI_LIST
+	bool "Debug priority linked list manipulation"
 	depends on DEBUG_KERNEL
 	help
-	  Enable this to turn on extended checks in the linked-list
-	  walking routines.
+	  Enable this to turn on extended checks in the priority-ordered
+	  linked-list (plist) walking routines.  This checks the entire
+	  list multiple times during each manipulation.
 
 	  If unsure, say N.
 
@@ -1103,20 +1164,6 @@ config PROVE_RCU_REPEATEDLY
 
 	 Say N if you are unsure.
 
-config PROVE_RCU_DELAY
-	bool "RCU debugging: preemptible RCU race provocation"
-	depends on DEBUG_KERNEL && PREEMPT_RCU
-	default n
-	help
-	 There is a class of races that involve an unlikely preemption
-	 of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
-	 been set to INT_MIN.  This feature inserts a delay at that
-	 point to increase the probability of these races.
-
-	 Say Y to increase probability of preemption of __rcu_read_unlock().
-
-	 Say N if you are unsure.
-
 config SPARSE_RCU_POINTER
 	bool "RCU debugging: sparse-based checks for pointer usage"
 	default n
@@ -1132,9 +1179,14 @@ config SPARSE_RCU_POINTER
 
 	 Say N if you are unsure.
 
+config TORTURE_TEST
+	tristate
+	default n
+
 config RCU_TORTURE_TEST
 	tristate "torture tests for RCU"
 	depends on DEBUG_KERNEL
+	select TORTURE_TEST
 	default n
 	help
 	  This option provides a kernel module that runs torture tests
@@ -1375,7 +1427,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER
 	depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
 	depends on !X86_64
 	select STACKTRACE
-	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC
+	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !SCORE
 	help
 	  Provide stacktrace filter for fault-injection capabilities
 
@@ -1478,6 +1530,7 @@ config RBTREE_TEST
 config INTERVAL_TREE_TEST
 	tristate "Interval tree test"
 	depends on m && DEBUG_KERNEL
+	select INTERVAL_TREE
 	help
 	  A benchmark measuring the performance of the interval tree library
 
@@ -1516,6 +1569,14 @@ config TEST_STRING_HELPERS
 config TEST_KSTRTOX
 	tristate "Test kstrto*() family of functions at runtime"
 
+config TEST_RHASHTABLE
+	bool "Perform selftest on resizable hash table"
+	default n
+	help
+	  Enable this option to test the rhashtable functions at boot.
+
+	  If unsure, say N.
+
 endmenu # runtime tests
 
 config PROVIDE_OHCI1394_DMA_INIT
@@ -1547,17 +1608,6 @@ config PROVIDE_OHCI1394_DMA_INIT
 
 	  See Documentation/debugging-via-ohci1394.txt for more information.
 
-config FIREWIRE_OHCI_REMOTE_DMA
-	bool "Remote debugging over FireWire with firewire-ohci"
-	depends on FIREWIRE_OHCI
-	help
-	  This option lets you use the FireWire bus for remote debugging
-	  with help of the firewire-ohci driver. It enables unfiltered
-	  remote DMA in firewire-ohci.
-	  See Documentation/debugging-via-ohci1394.txt for more information.
-
-	  If unsure, say N.
-
 config BUILD_DOCSRC
 	bool "Build targets in Documentation/ tree"
 	depends on HEADERS_CHECK
@@ -1575,8 +1625,79 @@ config DMA_API_DEBUG
 	  With this option you will be able to detect common bugs in device
 	  drivers like double-freeing of DMA mappings or freeing mappings that
 	  were never allocated.
-	  This option causes a performance degredation.  Use only if you want
-	  to debug device drivers. If unsure, say N.
+
+	  This also attempts to catch cases where a page owned by DMA is
+	  accessed by the cpu in a way that could cause data corruption.  For
+	  example, this enables cow_user_page() to check that the source page is
+	  not undergoing DMA.
+
+	  This option causes a performance degradation.  Use only if you want to
+	  debug device drivers and dma interactions.
+
+	  If unsure, say N.
+
+config TEST_MODULE
+	tristate "Test module loading with 'hello world' module"
+	default n
+	depends on m
+	help
+	  This builds the "test_module" module that emits "Hello, world"
+	  on printk when loaded. It is designed to be used for basic
+	  evaluation of the module loading subsystem (for example when
+	  validating module verification). It lacks any extra dependencies,
+	  and will not normally be loaded by the system unless explicitly
+	  requested by name.
+
+	  If unsure, say N.
+
+config TEST_USER_COPY
+	tristate "Test user/kernel boundary protections"
+	default n
+	depends on m
+	help
+	  This builds the "test_user_copy" module that runs sanity checks
+	  on the copy_to/from_user infrastructure, making sure basic
+	  user/kernel boundary testing is working. If it fails to load,
+	  a regression has been detected in the user/kernel memory boundary
+	  protections.
+
+	  If unsure, say N.
+
+config TEST_BPF
+	tristate "Test BPF filter functionality"
+	default n
+	depends on m && NET
+	help
+	  This builds the "test_bpf" module that runs various test vectors
+	  against the BPF interpreter or BPF JIT compiler depending on the
+	  current setting. This is in particular useful for BPF JIT compiler
+	  development, but also to run regression tests against changes in
+	  the interpreter code. It also enables test stubs for eBPF maps and
+	  verifier used by user space verifier testsuite.
+
+	  If unsure, say N.
+
+config TEST_FIRMWARE
+	tristate "Test firmware loading via userspace interface"
+	default n
+	depends on FW_LOADER
+	help
+	  This builds the "test_firmware" module that creates a userspace
+	  interface for testing firmware loading. This can be used to
+	  control the triggering of firmware loading without needing an
+	  actual firmware-using device. The contents can be rechecked by
+	  userspace.
+
+	  If unsure, say N.
+
+config TEST_UDELAY
+	tristate "udelay test driver"
+	default n
+	help
+	  This builds the "udelay_test" module that helps to make sure
+	  that udelay() is working properly.
+
+	  If unsure, say N.
 
 source "samples/Kconfig"
 
diff --git a/lib/Makefile b/lib/Makefile
index a459c31e8c6b..d6b4bc496408 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,11 +26,15 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
 	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o percpu_ida.o
+	 percpu-refcount.o percpu_ida.o hash.o rhashtable.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
+obj-$(CONFIG_TEST_MODULE) += test_module.o
+obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
+obj-$(CONFIG_TEST_BPF) += test_bpf.o
+obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
@@ -43,10 +47,12 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 
+GCOV_PROFILE_hweight.o := n
 CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
+obj-$(CONFIG_INTERVAL_TREE) += interval_tree.o
 obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
@@ -93,6 +99,7 @@ obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
 obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
 obj-$(CONFIG_SMP) += percpu_counter.o
 obj-$(CONFIG_AUDIT_GENERIC) += audit.o
+obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
 
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
 obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
@@ -130,6 +137,8 @@ obj-$(CONFIG_CORDIC) += cordic.o
 
 obj-$(CONFIG_DQL) += dynamic_queue_limits.o
 
+obj-$(CONFIG_GLOB) += glob.o
+
 obj-$(CONFIG_MPILIB) += mpi/
 obj-$(CONFIG_SIGNATURE) += digsig.o
 
@@ -144,7 +153,8 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
 
 obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
 
-libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o
+libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
+	       fdt_empty_tree.o
 $(foreach file, $(libfdt_files), \
 	$(eval CFLAGS_$(file) = -I$(src)/../scripts/dtc/libfdt))
 lib-$(CONFIG_LIBFDT) += $(libfdt_files)
@@ -152,8 +162,6 @@ lib-$(CONFIG_LIBFDT) += $(libfdt_files)
 obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o
 obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o
 
-interval_tree_test-objs := interval_tree_test_main.o interval_tree.o
-
 obj-$(CONFIG_PERCPU_TEST) += percpu_test.o
 
 obj-$(CONFIG_ASN1) += asn1_decoder.o
diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index 11b9b01fda6b..1a000bb050f9 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -140,7 +140,7 @@ error:
  * @decoder: The decoder definition (produced by asn1_compiler)
  * @context: The caller's context (to be passed to the action functions)
  * @data: The encoded data
- * @datasize: The size of the encoded data
+ * @datalen: The size of the encoded data
  *
  * Decode BER/DER/CER encoded ASN.1 data according to a bytecode pattern
  * produced by asn1_compiler.  Action functions are called on marked tags to
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
index 1b6a44f1ec3e..2404d03e251a 100644
--- a/lib/assoc_array.c
+++ b/lib/assoc_array.c
@@ -157,7 +157,7 @@ enum assoc_array_walk_status {
 	assoc_array_walk_tree_empty,
 	assoc_array_walk_found_terminal_node,
 	assoc_array_walk_found_wrong_shortcut,
-} status;
+};
 
 struct assoc_array_walk_result {
 	struct {
@@ -1723,11 +1723,13 @@ ascend_old_tree:
 		shortcut = assoc_array_ptr_to_shortcut(ptr);
 		slot = shortcut->parent_slot;
 		cursor = shortcut->back_pointer;
+		if (!cursor)
+			goto gc_complete;
 	} else {
 		slot = node->parent_slot;
 		cursor = ptr;
 	}
-	BUG_ON(!ptr);
+	BUG_ON(!cursor);
 	node = assoc_array_ptr_to_node(cursor);
 	slot++;
 	goto continue_node;
@@ -1735,7 +1737,7 @@ ascend_old_tree:
 gc_complete:
 	edit->set[0].to = new_root;
 	assoc_array_apply_edit(edit);
-	edit->array->nr_leaves_on_tree = nr_leaves_on_tree;
+	array->nr_leaves_on_tree = nr_leaves_on_tree;
 	return 0;
 
 enomem:
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 00bca223d1e1..0211d30d8c39 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -8,6 +8,9 @@
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/bug.h>
 #include <linux/kernel.h>
@@ -146,18 +149,18 @@ static __init int test_atomic64(void)
 	BUG_ON(v.counter != r);
 
 #ifdef CONFIG_X86
-	printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n",
+	pr_info("passed for %s platform %s CX8 and %s SSE\n",
 #ifdef CONFIG_X86_64
-	       "x86-64",
+		"x86-64",
 #elif defined(CONFIG_X86_CMPXCHG64)
-	       "i586+",
+		"i586+",
 #else
-	       "i386+",
+		"i386+",
 #endif
 	       boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without",
 	       boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without");
 #else
-	printk(KERN_INFO "atomic64 test passed\n");
+	pr_info("passed\n");
 #endif
 
 	return 0;
diff --git a/lib/audit.c b/lib/audit.c
index 76bbed4a20e5..1d726a22565b 100644
--- a/lib/audit.c
+++ b/lib/audit.c
@@ -30,11 +30,17 @@ static unsigned signal_class[] = {
 
 int audit_classify_arch(int arch)
 {
-	return 0;
+	if (audit_is_compat(arch))
+		return 1;
+	else
+		return 0;
 }
 
 int audit_classify_syscall(int abi, unsigned syscall)
 {
+	if (audit_is_compat(abi))
+		return audit_classify_compat_syscall(abi, syscall);
+
 	switch(syscall) {
 #ifdef __NR_open
 	case __NR_open:
@@ -57,6 +63,13 @@ int audit_classify_syscall(int abi, unsigned syscall)
 
 static int __init audit_classes_init(void)
 {
+#ifdef CONFIG_AUDIT_COMPAT_GENERIC
+	audit_register_class(AUDIT_CLASS_WRITE_32, compat_write_class);
+	audit_register_class(AUDIT_CLASS_READ_32, compat_read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE_32, compat_dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR_32, compat_chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL_32, compat_signal_class);
+#endif
 	audit_register_class(AUDIT_CLASS_WRITE, write_class);
 	audit_register_class(AUDIT_CLASS_READ, read_class);
 	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
diff --git a/lib/average.c b/lib/average.c
index 99a67e662b3c..114d1beae0c7 100644
--- a/lib/average.c
+++ b/lib/average.c
@@ -53,8 +53,10 @@ EXPORT_SYMBOL(ewma_init);
  */
 struct ewma *ewma_add(struct ewma *avg, unsigned long val)
 {
-	avg->internal = avg->internal  ?
-		(((avg->internal << avg->weight) - avg->internal) +
+	unsigned long internal = ACCESS_ONCE(avg->internal);
+
+	ACCESS_ONCE(avg->internal) = internal ?
+		(((internal << avg->weight) - internal) +
 			(val << avg->factor)) >> avg->weight :
 		(val << avg->factor);
 	return avg;
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06f7e4fe8d2d..cd250a2e14cb 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -40,9 +40,9 @@
  * for the best explanations of this ordering.
  */
 
-int __bitmap_empty(const unsigned long *bitmap, int bits)
+int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (bitmap[k])
 			return 0;
@@ -55,9 +55,9 @@ int __bitmap_empty(const unsigned long *bitmap, int bits)
 }
 EXPORT_SYMBOL(__bitmap_empty);
 
-int __bitmap_full(const unsigned long *bitmap, int bits)
+int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (~bitmap[k])
 			return 0;
@@ -71,9 +71,9 @@ int __bitmap_full(const unsigned long *bitmap, int bits)
 EXPORT_SYMBOL(__bitmap_full);
 
 int __bitmap_equal(const unsigned long *bitmap1,
-		const unsigned long *bitmap2, int bits)
+		const unsigned long *bitmap2, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (bitmap1[k] != bitmap2[k])
 			return 0;
@@ -86,14 +86,14 @@ int __bitmap_equal(const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_equal);
 
-void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits)
+void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		dst[k] = ~src[k];
 
 	if (bits % BITS_PER_LONG)
-		dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+		dst[k] = ~src[k];
 }
 EXPORT_SYMBOL(__bitmap_complement);
 
@@ -182,23 +182,26 @@ void __bitmap_shift_left(unsigned long *dst,
 EXPORT_SYMBOL(__bitmap_shift_left);
 
 int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+				const unsigned long *bitmap2, unsigned int bits)
 {
-	int k;
-	int nr = BITS_TO_LONGS(bits);
+	unsigned int k;
+	unsigned int lim = bits/BITS_PER_LONG;
 	unsigned long result = 0;
 
-	for (k = 0; k < nr; k++)
+	for (k = 0; k < lim; k++)
 		result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+	if (bits % BITS_PER_LONG)
+		result |= (dst[k] = bitmap1[k] & bitmap2[k] &
+			   BITMAP_LAST_WORD_MASK(bits));
 	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_and);
 
 void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+				const unsigned long *bitmap2, unsigned int bits)
 {
-	int k;
-	int nr = BITS_TO_LONGS(bits);
+	unsigned int k;
+	unsigned int nr = BITS_TO_LONGS(bits);
 
 	for (k = 0; k < nr; k++)
 		dst[k] = bitmap1[k] | bitmap2[k];
@@ -206,10 +209,10 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_or);
 
 void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+				const unsigned long *bitmap2, unsigned int bits)
 {
-	int k;
-	int nr = BITS_TO_LONGS(bits);
+	unsigned int k;
+	unsigned int nr = BITS_TO_LONGS(bits);
 
 	for (k = 0; k < nr; k++)
 		dst[k] = bitmap1[k] ^ bitmap2[k];
@@ -217,22 +220,25 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_xor);
 
 int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+				const unsigned long *bitmap2, unsigned int bits)
 {
-	int k;
-	int nr = BITS_TO_LONGS(bits);
+	unsigned int k;
+	unsigned int lim = bits/BITS_PER_LONG;
 	unsigned long result = 0;
 
-	for (k = 0; k < nr; k++)
+	for (k = 0; k < lim; k++)
 		result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+	if (bits % BITS_PER_LONG)
+		result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
+			   BITMAP_LAST_WORD_MASK(bits));
 	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_andnot);
 
 int __bitmap_intersects(const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+			const unsigned long *bitmap2, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (bitmap1[k] & bitmap2[k])
 			return 1;
@@ -245,9 +251,9 @@ int __bitmap_intersects(const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_intersects);
 
 int __bitmap_subset(const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+		    const unsigned long *bitmap2, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (bitmap1[k] & ~bitmap2[k])
 			return 0;
@@ -259,9 +265,10 @@ int __bitmap_subset(const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_subset);
 
-int __bitmap_weight(const unsigned long *bitmap, int bits)
+int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
 {
-	int k, w = 0, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
+	int w = 0;
 
 	for (k = 0; k < lim; k++)
 		w += hweight_long(bitmap[k]);
@@ -273,42 +280,42 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
-void bitmap_set(unsigned long *map, int start, int nr)
+void bitmap_set(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
-	const int size = start + nr;
+	const unsigned int size = start + len;
 	int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
 	unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
 
-	while (nr - bits_to_set >= 0) {
+	while (len - bits_to_set >= 0) {
 		*p |= mask_to_set;
-		nr -= bits_to_set;
+		len -= bits_to_set;
 		bits_to_set = BITS_PER_LONG;
 		mask_to_set = ~0UL;
 		p++;
 	}
-	if (nr) {
+	if (len) {
 		mask_to_set &= BITMAP_LAST_WORD_MASK(size);
 		*p |= mask_to_set;
 	}
 }
 EXPORT_SYMBOL(bitmap_set);
 
-void bitmap_clear(unsigned long *map, int start, int nr)
+void bitmap_clear(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
-	const int size = start + nr;
+	const unsigned int size = start + len;
 	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
 	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
 
-	while (nr - bits_to_clear >= 0) {
+	while (len - bits_to_clear >= 0) {
 		*p &= ~mask_to_clear;
-		nr -= bits_to_clear;
+		len -= bits_to_clear;
 		bits_to_clear = BITS_PER_LONG;
 		mask_to_clear = ~0UL;
 		p++;
 	}
-	if (nr) {
+	if (len) {
 		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
 		*p &= ~mask_to_clear;
 	}
@@ -664,13 +671,8 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
 
 int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
 {
-	char *nl  = strchr(bp, '\n');
-	int len;
-
-	if (nl)
-		len = nl - bp;
-	else
-		len = strlen(bp);
+	char *nl  = strchrnul(bp, '\n');
+	int len = nl - bp;
 
 	return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
 }
@@ -716,7 +718,7 @@ EXPORT_SYMBOL(bitmap_parselist_user);
  *
  * If for example, just bits 4 through 7 are set in @buf, then @pos
  * values 4 through 7 will get mapped to 0 through 3, respectively,
- * and other @pos values will get mapped to 0.  When @pos value 7
+ * and other @pos values will get mapped to -1.  When @pos value 7
  * gets mapped to (returns) @ord value 3 in this example, that means
  * that bit 7 is the 3rd (starting with 0th) set bit in @buf.
  *
@@ -882,7 +884,7 @@ EXPORT_SYMBOL(bitmap_bitremap);
  * read it, you're overqualified for your current job.)
  *
  * In other words, @orig is mapped onto (surjectively) @dst,
- * using the the map { <n, m> | the n-th bit of @relmap is the
+ * using the map { <n, m> | the n-th bit of @relmap is the
  * m-th set bit of @relmap }.
  *
  * Any set bits in @orig above bit number W, where W is the
@@ -930,7 +932,7 @@ EXPORT_SYMBOL(bitmap_bitremap);
  *
  *  Further lets say we use the following code, invoking
  *  bitmap_fold() then bitmap_onto, as suggested above to
- *  avoid the possitility of an empty @dst result:
+ *  avoid the possibility of an empty @dst result:
  *
  *	unsigned long *tmp;	// a temporary bitmap's bits
  *
@@ -1046,7 +1048,7 @@ enum {
 	REG_OP_RELEASE,		/* clear all bits in region */
 };
 
-static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op)
+static int __reg_op(unsigned long *bitmap, unsigned int pos, int order, int reg_op)
 {
 	int nbits_reg;		/* number of bits in region */
 	int index;		/* index first long of region in bitmap */
@@ -1112,11 +1114,11 @@ done:
  * Return the bit offset in bitmap of the allocated region,
  * or -errno on failure.
  */
-int bitmap_find_free_region(unsigned long *bitmap, int bits, int order)
+int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
 {
-	int pos, end;		/* scans bitmap by regions of size order */
+	unsigned int pos, end;		/* scans bitmap by regions of size order */
 
-	for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) {
+	for (pos = 0 ; (end = pos + (1U << order)) <= bits; pos = end) {
 		if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
 			continue;
 		__reg_op(bitmap, pos, order, REG_OP_ALLOC);
@@ -1137,7 +1139,7 @@ EXPORT_SYMBOL(bitmap_find_free_region);
  *
  * No return value.
  */
-void bitmap_release_region(unsigned long *bitmap, int pos, int order)
+void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
 {
 	__reg_op(bitmap, pos, order, REG_OP_RELEASE);
 }
@@ -1154,12 +1156,11 @@ EXPORT_SYMBOL(bitmap_release_region);
  * Return 0 on success, or %-EBUSY if specified region wasn't
  * free (not all bits were zero).
  */
-int bitmap_allocate_region(unsigned long *bitmap, int pos, int order)
+int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
 {
 	if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
 		return -EBUSY;
-	__reg_op(bitmap, pos, order, REG_OP_ALLOC);
-	return 0;
+	return __reg_op(bitmap, pos, order, REG_OP_ALLOC);
 }
 EXPORT_SYMBOL(bitmap_allocate_region);
 
diff --git a/lib/btree.c b/lib/btree.c
index f9a484676cb6..4264871ea1a0 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -198,6 +198,7 @@ EXPORT_SYMBOL_GPL(btree_init);
 
 void btree_destroy(struct btree_head *head)
 {
+	mempool_free(head->node, head->mempool);
 	mempool_destroy(head->mempool);
 	head->mempool = NULL;
 }
diff --git a/lib/bug.c b/lib/bug.c
index 168603477f02..d1d7c7878900 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -37,6 +37,9 @@
 
     Jeremy Fitzhardinge <jeremy@goop.org> 2006
  */
+
+#define pr_fmt(fmt) fmt
+
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -153,15 +156,13 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 
 	if (warning) {
 		/* this is a WARN_ON rather than BUG/BUG_ON */
-		printk(KERN_WARNING "------------[ cut here ]------------\n");
+		pr_warn("------------[ cut here ]------------\n");
 
 		if (file)
-			printk(KERN_WARNING "WARNING: at %s:%u\n",
-			       file, line);
+			pr_warn("WARNING: at %s:%u\n", file, line);
 		else
-			printk(KERN_WARNING "WARNING: at %p "
-			       "[verbose debug info unavailable]\n",
-			       (void *)bugaddr);
+			pr_warn("WARNING: at %p [verbose debug info unavailable]\n",
+				(void *)bugaddr);
 
 		print_modules();
 		show_regs(regs);
@@ -174,12 +175,10 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 	printk(KERN_DEFAULT "------------[ cut here ]------------\n");
 
 	if (file)
-		printk(KERN_CRIT "kernel BUG at %s:%u!\n",
-		       file, line);
+		pr_crit("kernel BUG at %s:%u!\n", file, line);
 	else
-		printk(KERN_CRIT "Kernel BUG at %p "
-		       "[verbose debug info unavailable]\n",
-		       (void *)bugaddr);
+		pr_crit("Kernel BUG at %p [verbose debug info unavailable]\n",
+			(void *)bugaddr);
 
 	return BUG_TRAP_TYPE_BUG;
 }
diff --git a/lib/clz_ctz.c b/lib/clz_ctz.c
index a8f8379eb49f..2e11e48446ab 100644
--- a/lib/clz_ctz.c
+++ b/lib/clz_ctz.c
@@ -6,6 +6,9 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ * The functions in this file aren't called directly, but are required by
+ * GCC builtins such as __builtin_ctz, and therefore they can't be removed
+ * despite appearing unreferenced in kernel source.
  *
  * __c[lt]z[sd]i2 can be overridden by linking arch-specific versions.
  */
@@ -13,18 +16,22 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 
+int __weak __ctzsi2(int val);
 int __weak __ctzsi2(int val)
 {
 	return __ffs(val);
 }
 EXPORT_SYMBOL(__ctzsi2);
 
+int __weak __clzsi2(int val);
 int __weak __clzsi2(int val)
 {
 	return 32 - fls(val);
 }
 EXPORT_SYMBOL(__clzsi2);
 
+int __weak __clzdi2(long val);
+int __weak __ctzdi2(long val);
 #if BITS_PER_LONG == 32
 
 int __weak __clzdi2(long val)
diff --git a/lib/cmdline.c b/lib/cmdline.c
index eb6791188cf5..76a712e6e20e 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -49,13 +49,13 @@ static int get_range(char **str, int *pint)
  *	3 - hyphen found to denote a range
  */
 
-int get_option (char **str, int *pint)
+int get_option(char **str, int *pint)
 {
 	char *cur = *str;
 
 	if (!cur || !(*cur))
 		return 0;
-	*pint = simple_strtol (cur, str, 0);
+	*pint = simple_strtol(cur, str, 0);
 	if (cur == *str)
 		return 0;
 	if (**str == ',') {
@@ -67,6 +67,7 @@ int get_option (char **str, int *pint)
 
 	return 1;
 }
+EXPORT_SYMBOL(get_option);
 
 /**
  *	get_options - Parse a string into a list of integers
@@ -84,13 +85,13 @@ int get_option (char **str, int *pint)
  *	the parse to end (typically a null terminator, if @str is
  *	completely parseable).
  */
- 
+
 char *get_options(const char *str, int nints, int *ints)
 {
 	int res, i = 1;
 
 	while (i < nints) {
-		res = get_option ((char **)&str, ints + i);
+		res = get_option((char **)&str, ints + i);
 		if (res == 0)
 			break;
 		if (res == 3) {
@@ -112,6 +113,7 @@ char *get_options(const char *str, int nints, int *ints)
 	ints[0] = i - 1;
 	return (char *)str;
 }
+EXPORT_SYMBOL(get_options);
 
 /**
  *	memparse - parse a string with mem suffixes into a number
@@ -119,11 +121,7 @@ char *get_options(const char *str, int nints, int *ints)
  *	@retptr: (output) Optional pointer to next char after parse completes
  *
  *	Parses a string into a number.  The number stored at @ptr is
- *	potentially suffixed with %K (for kilobytes, or 1024 bytes),
- *	%M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
- *	1073741824).  If the number is suffixed with K, M, or G, then
- *	the return value is the number multiplied by one kilobyte, one
- *	megabyte, or one gigabyte, respectively.
+ *	potentially suffixed with K, M, G, T, P, E.
  */
 
 unsigned long long memparse(const char *ptr, char **retptr)
@@ -133,6 +131,15 @@ unsigned long long memparse(const char *ptr, char **retptr)
 	unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
 
 	switch (*endptr) {
+	case 'E':
+	case 'e':
+		ret <<= 10;
+	case 'P':
+	case 'p':
+		ret <<= 10;
+	case 'T':
+	case 't':
+		ret <<= 10;
 	case 'G':
 	case 'g':
 		ret <<= 10;
@@ -152,8 +159,4 @@ unsigned long long memparse(const char *ptr, char **retptr)
 
 	return ret;
 }
-
-
 EXPORT_SYMBOL(memparse);
-EXPORT_SYMBOL(get_option);
-EXPORT_SYMBOL(get_options);
diff --git a/lib/compat_audit.c b/lib/compat_audit.c
new file mode 100644
index 000000000000..873f75b640ab
--- /dev/null
+++ b/lib/compat_audit.c
@@ -0,0 +1,50 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <asm/unistd32.h>
+
+unsigned compat_dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+unsigned compat_read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+unsigned compat_write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+unsigned compat_chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+unsigned compat_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_compat_syscall(int abi, unsigned syscall)
+{
+	switch (syscall) {
+#ifdef __NR_open
+	case __NR_open:
+		return 2;
+#endif
+#ifdef __NR_openat
+	case __NR_openat:
+		return 3;
+#endif
+#ifdef __NR_socketcall
+	case __NR_socketcall:
+		return 4;
+#endif
+	case __NR_execve:
+		return 5;
+	default:
+		return 1;
+	}
+}
diff --git a/lib/cpumask.c b/lib/cpumask.c
index d327b87c99b7..b6513a9f2892 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -140,7 +140,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var);
  */
 void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
-	*mask = alloc_bootmem(cpumask_size());
+	*mask = memblock_virt_alloc(cpumask_size(), 0);
 }
 
 /**
@@ -161,6 +161,69 @@ EXPORT_SYMBOL(free_cpumask_var);
  */
 void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 {
-	free_bootmem(__pa(mask), cpumask_size());
+	memblock_free_early(__pa(mask), cpumask_size());
 }
 #endif
+
+/**
+ * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first
+ *
+ * @i: index number
+ * @numa_node: local numa_node
+ * @dstp: cpumask with the relevant cpu bit set according to the policy
+ *
+ * This function sets the cpumask according to a numa aware policy.
+ * cpumask could be used as an affinity hint for the IRQ related to a
+ * queue. When the policy is to spread queues across cores - local cores
+ * first.
+ *
+ * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set
+ * the cpu bit and need to re-call the function.
+ */
+int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+{
+	cpumask_var_t mask;
+	int cpu;
+	int ret = 0;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	i %= num_online_cpus();
+
+	if (numa_node == -1 || !cpumask_of_node(numa_node)) {
+		/* Use all online cpu's for non numa aware system */
+		cpumask_copy(mask, cpu_online_mask);
+	} else {
+		int n;
+
+		cpumask_and(mask,
+			    cpumask_of_node(numa_node), cpu_online_mask);
+
+		n = cpumask_weight(mask);
+		if (i >= n) {
+			i -= n;
+
+			/* If index > number of local cpu's, mask out local
+			 * cpu's
+			 */
+			cpumask_andnot(mask, cpu_online_mask, mask);
+		}
+	}
+
+	for_each_cpu(cpu, mask) {
+		if (--i < 0)
+			goto out;
+	}
+
+	ret = -EAGAIN;
+
+out:
+	free_cpumask_var(mask);
+
+	if (!ret)
+		cpumask_set_cpu(cpu, dstp);
+
+	return ret;
+}
+EXPORT_SYMBOL(cpumask_set_cpu_local_first);
diff --git a/lib/crc32.c b/lib/crc32.c
index 70f00ca5ef1e..9a907d489d95 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -33,13 +33,13 @@
 #include "crc32defs.h"
 
 #if CRC_LE_BITS > 8
-# define tole(x) ((__force u32) __constant_cpu_to_le32(x))
+# define tole(x) ((__force u32) cpu_to_le32(x))
 #else
 # define tole(x) (x)
 #endif
 
 #if CRC_BE_BITS > 8
-# define tobe(x) ((__force u32) __constant_cpu_to_be32(x))
+# define tobe(x) ((__force u32) cpu_to_be32(x))
 #else
 # define tobe(x) (x)
 #endif
@@ -50,34 +50,10 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
 MODULE_DESCRIPTION("Various CRC32 calculations");
 MODULE_LICENSE("GPL");
 
-#define GF2_DIM		32
-
-static u32 gf2_matrix_times(u32 *mat, u32 vec)
-{
-	u32 sum = 0;
-
-	while (vec) {
-		if (vec & 1)
-			sum ^= *mat;
-		vec >>= 1;
-		mat++;
-	}
-
-	return sum;
-}
-
-static void gf2_matrix_square(u32 *square, u32 *mat)
-{
-	int i;
-
-	for (i = 0; i < GF2_DIM; i++)
-		square[i] = gf2_matrix_times(mat, mat[i]);
-}
-
 #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
 
 /* implements slicing-by-4 or slicing-by-8 algorithm */
-static inline u32
+static inline u32 __pure
 crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 {
 # ifdef __LITTLE_ENDIAN
@@ -155,51 +131,6 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 }
 #endif
 
-/* For conditions of distribution and use, see copyright notice in zlib.h */
-static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2,
-				 u32 polynomial)
-{
-	u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */
-	u32 odd[GF2_DIM];  /* Odd-power-of-two zeros operator  */
-	u32 row;
-	int i;
-
-	if (len2 <= 0)
-		return crc1;
-
-	/* Put operator for one zero bit in odd */
-	odd[0] = polynomial;
-	row = 1;
-	for (i = 1; i < GF2_DIM; i++) {
-		odd[i] = row;
-		row <<= 1;
-	}
-
-	gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */
-	gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */
-
-	/* Apply len2 zeros to crc1 (first square will put the operator for one
-	 * zero byte, eight zero bits, in even).
-	 */
-	do {
-		/* Apply zeros operator for this bit of len2 */
-		gf2_matrix_square(even, odd);
-		if (len2 & 1)
-			crc1 = gf2_matrix_times(even, crc1);
-		len2 >>= 1;
-		/* If no more bits set, then done */
-		if (len2 == 0)
-			break;
-		/* Another iteration of the loop with odd and even swapped */
-		gf2_matrix_square(odd, even);
-		if (len2 & 1)
-			crc1 = gf2_matrix_times(odd, crc1);
-		len2 >>= 1;
-	} while (len2 != 0);
-
-	crc1 ^= crc2;
-	return crc1;
-}
 
 /**
  * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
@@ -271,19 +202,81 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 			(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
 }
 #endif
-u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
+EXPORT_SYMBOL(crc32_le);
+EXPORT_SYMBOL(__crc32c_le);
+
+/*
+ * This multiplies the polynomials x and y modulo the given modulus.
+ * This follows the "little-endian" CRC convention that the lsbit
+ * represents the highest power of x, and the msbit represents x^0.
+ */
+static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus)
 {
-	return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE);
+	u32 product = x & 1 ? y : 0;
+	int i;
+
+	for (i = 0; i < 31; i++) {
+		product = (product >> 1) ^ (product & 1 ? modulus : 0);
+		x >>= 1;
+		product ^= x & 1 ? y : 0;
+	}
+
+	return product;
 }
 
-u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
+/**
+ * crc32_generic_shift - Append len 0 bytes to crc, in logarithmic time
+ * @crc: The original little-endian CRC (i.e. lsbit is x^31 coefficient)
+ * @len: The number of bytes. @crc is multiplied by x^(8*@len)
+ * @polynomial: The modulus used to reduce the result to 32 bits.
+ *
+ * It's possible to parallelize CRC computations by computing a CRC
+ * over separate ranges of a buffer, then summing them.
+ * This shifts the given CRC by 8*len bits (i.e. produces the same effect
+ * as appending len bytes of zero to the data), in time proportional
+ * to log(len).
+ */
+static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
+						   u32 polynomial)
 {
-	return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE);
+	u32 power = polynomial;	/* CRC of x^32 */
+	int i;
+
+	/* Shift up to 32 bits in the simple linear way */
+	for (i = 0; i < 8 * (int)(len & 3); i++)
+		crc = (crc >> 1) ^ (crc & 1 ? polynomial : 0);
+
+	len >>= 2;
+	if (!len)
+		return crc;
+
+	for (;;) {
+		/* "power" is x^(2^i), modulo the polynomial */
+		if (len & 1)
+			crc = gf2_multiply(crc, power, polynomial);
+
+		len >>= 1;
+		if (!len)
+			break;
+
+		/* Square power, advancing to x^(2^(i+1)) */
+		power = gf2_multiply(power, power, polynomial);
+	}
+
+	return crc;
 }
-EXPORT_SYMBOL(crc32_le);
-EXPORT_SYMBOL(crc32_le_combine);
-EXPORT_SYMBOL(__crc32c_le);
-EXPORT_SYMBOL(__crc32c_le_combine);
+
+u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len)
+{
+	return crc32_generic_shift(crc, len, CRCPOLY_LE);
+}
+
+u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len)
+{
+	return crc32_generic_shift(crc, len, CRC32C_POLY_LE);
+}
+EXPORT_SYMBOL(crc32_le_shift);
+EXPORT_SYMBOL(__crc32c_le_shift);
 
 /**
  * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
@@ -351,7 +344,7 @@ EXPORT_SYMBOL(crc32_be);
 #ifdef CONFIG_CRC32_SELFTEST
 
 /* 4096 random bytes */
-static u8 __attribute__((__aligned__(8))) test_buf[] =
+static u8 const __aligned(8) test_buf[] __initconst =
 {
 	0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30,
 	0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4,
@@ -875,7 +868,7 @@ static struct crc_test {
 	u32 crc_le;	/* expected crc32_le result */
 	u32 crc_be;	/* expected crc32_be result */
 	u32 crc32c_le;	/* expected crc32c_le result */
-} test[] =
+} const test[] __initconst =
 {
 	{0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c},
 	{0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca},
diff --git a/lib/crc7.c b/lib/crc7.c
index f1c3a144cec1..bf6255e23919 100644
--- a/lib/crc7.c
+++ b/lib/crc7.c
@@ -10,42 +10,47 @@
 #include <linux/crc7.h>
 
 
-/* Table for CRC-7 (polynomial x^7 + x^3 + 1) */
-const u8 crc7_syndrome_table[256] = {
-	0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f,
-	0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
-	0x19, 0x10, 0x0b, 0x02, 0x3d, 0x34, 0x2f, 0x26,
-	0x51, 0x58, 0x43, 0x4a, 0x75, 0x7c, 0x67, 0x6e,
-	0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d,
-	0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45,
-	0x2b, 0x22, 0x39, 0x30, 0x0f, 0x06, 0x1d, 0x14,
-	0x63, 0x6a, 0x71, 0x78, 0x47, 0x4e, 0x55, 0x5c,
-	0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b,
-	0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13,
-	0x7d, 0x74, 0x6f, 0x66, 0x59, 0x50, 0x4b, 0x42,
-	0x35, 0x3c, 0x27, 0x2e, 0x11, 0x18, 0x03, 0x0a,
-	0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69,
-	0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21,
-	0x4f, 0x46, 0x5d, 0x54, 0x6b, 0x62, 0x79, 0x70,
-	0x07, 0x0e, 0x15, 0x1c, 0x23, 0x2a, 0x31, 0x38,
-	0x41, 0x48, 0x53, 0x5a, 0x65, 0x6c, 0x77, 0x7e,
-	0x09, 0x00, 0x1b, 0x12, 0x2d, 0x24, 0x3f, 0x36,
-	0x58, 0x51, 0x4a, 0x43, 0x7c, 0x75, 0x6e, 0x67,
-	0x10, 0x19, 0x02, 0x0b, 0x34, 0x3d, 0x26, 0x2f,
-	0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
-	0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04,
-	0x6a, 0x63, 0x78, 0x71, 0x4e, 0x47, 0x5c, 0x55,
-	0x22, 0x2b, 0x30, 0x39, 0x06, 0x0f, 0x14, 0x1d,
-	0x25, 0x2c, 0x37, 0x3e, 0x01, 0x08, 0x13, 0x1a,
-	0x6d, 0x64, 0x7f, 0x76, 0x49, 0x40, 0x5b, 0x52,
-	0x3c, 0x35, 0x2e, 0x27, 0x18, 0x11, 0x0a, 0x03,
-	0x74, 0x7d, 0x66, 0x6f, 0x50, 0x59, 0x42, 0x4b,
-	0x17, 0x1e, 0x05, 0x0c, 0x33, 0x3a, 0x21, 0x28,
-	0x5f, 0x56, 0x4d, 0x44, 0x7b, 0x72, 0x69, 0x60,
-	0x0e, 0x07, 0x1c, 0x15, 0x2a, 0x23, 0x38, 0x31,
-	0x46, 0x4f, 0x54, 0x5d, 0x62, 0x6b, 0x70, 0x79
+/*
+ * Table for CRC-7 (polynomial x^7 + x^3 + 1).
+ * This is a big-endian CRC (msbit is highest power of x),
+ * aligned so the msbit of the byte is the x^6 coefficient
+ * and the lsbit is not used.
+ */
+const u8 crc7_be_syndrome_table[256] = {
+	0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,
+	0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
+	0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c,
+	0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc,
+	0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a,
+	0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
+	0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28,
+	0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8,
+	0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,
+	0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
+	0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84,
+	0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14,
+	0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2,
+	0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
+	0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0,
+	0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70,
+	0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc,
+	0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c,
+	0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce,
+	0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e,
+	0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98,
+	0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08,
+	0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa,
+	0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a,
+	0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34,
+	0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4,
+	0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06,
+	0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96,
+	0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50,
+	0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0,
+	0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62,
+	0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2
 };
-EXPORT_SYMBOL(crc7_syndrome_table);
+EXPORT_SYMBOL(crc7_be_syndrome_table);
 
 /**
  * crc7 - update the CRC7 for the data buffer
@@ -55,14 +60,17 @@ EXPORT_SYMBOL(crc7_syndrome_table);
  * Context: any
  *
  * Returns the updated CRC7 value.
+ * The CRC7 is left-aligned in the byte (the lsbit is always 0), as that
+ * makes the computation easier, and all callers want it in that form.
+ *
  */
-u8 crc7(u8 crc, const u8 *buffer, size_t len)
+u8 crc7_be(u8 crc, const u8 *buffer, size_t len)
 {
 	while (len--)
-		crc = crc7_byte(crc, *buffer++);
+		crc = crc7_be_byte(crc, *buffer++);
 	return crc;
 }
-EXPORT_SYMBOL(crc7);
+EXPORT_SYMBOL(crc7_be);
 
 MODULE_DESCRIPTION("CRC7 calculations");
 MODULE_LICENSE("GPL");
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index e0731c3db706..547f7f923dbc 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -7,6 +7,9 @@
  *
  * For licencing details see kernel-base/COPYING
  */
+
+#define pr_fmt(fmt) "ODEBUG: " fmt
+
 #include <linux/debugobjects.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
@@ -218,7 +221,7 @@ static void debug_objects_oom(void)
 	unsigned long flags;
 	int i;
 
-	printk(KERN_WARNING "ODEBUG: Out of memory. ODEBUG disabled\n");
+	pr_warn("Out of memory. ODEBUG disabled\n");
 
 	for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
 		raw_spin_lock_irqsave(&db->lock, flags);
@@ -292,11 +295,9 @@ static void debug_object_is_on_stack(void *addr, int onstack)
 
 	limit++;
 	if (is_on_stack)
-		printk(KERN_WARNING
-		       "ODEBUG: object is on stack, but not annotated\n");
+		pr_warn("object is on stack, but not annotated\n");
 	else
-		printk(KERN_WARNING
-		       "ODEBUG: object is not on stack, but annotated\n");
+		pr_warn("object is not on stack, but annotated\n");
 	WARN_ON(1);
 }
 
@@ -985,7 +986,7 @@ static void __init debug_objects_selftest(void)
 	if (check_results(&obj, ODEBUG_STATE_NONE, ++fixups, ++warnings))
 		goto out;
 #endif
-	printk(KERN_INFO "ODEBUG: selftest passed\n");
+	pr_info("selftest passed\n");
 
 out:
 	debug_objects_fixups = oldfixups;
@@ -1060,8 +1061,8 @@ static int __init debug_objects_replace_static_objects(void)
 	}
 	local_irq_enable();
 
-	printk(KERN_DEBUG "ODEBUG: %d of %d active objects replaced\n", cnt,
-	       obj_pool_used);
+	pr_debug("%d of %d active objects replaced\n",
+		 cnt, obj_pool_used);
 	return 0;
 free:
 	hlist_for_each_entry_safe(obj, tmp, &objects, node) {
@@ -1090,7 +1091,7 @@ void __init debug_objects_mem_init(void)
 		debug_objects_enabled = 0;
 		if (obj_cache)
 			kmem_cache_destroy(obj_cache);
-		printk(KERN_WARNING "ODEBUG: out of memory.\n");
+		pr_warn("out of memory.\n");
 	} else
 		debug_objects_selftest();
 }
diff --git a/lib/decompress.c b/lib/decompress.c
index 4d1cd0397aab..37f3c786348f 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -16,6 +16,7 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/init.h>
+#include <linux/printk.h>
 
 #ifndef CONFIG_DECOMPRESS_GZIP
 # define gunzip NULL
@@ -53,7 +54,7 @@ static const struct compress_format compressed_formats[] __initconst = {
 	{ {0, 0}, NULL, NULL }
 };
 
-decompress_fn __init decompress_method(const unsigned char *inbuf, int len,
+decompress_fn __init decompress_method(const unsigned char *inbuf, long len,
 				const char **name)
 {
 	const struct compress_format *cf;
@@ -61,6 +62,8 @@ decompress_fn __init decompress_method(const unsigned char *inbuf, int len,
 	if (len < 2)
 		return NULL;	/* Need at least this much... */
 
+	pr_debug("Compressed data magic: %#.2x %#.2x\n", inbuf[0], inbuf[1]);
+
 	for (cf = compressed_formats; cf->name; cf++) {
 		if (!memcmp(inbuf, cf->magic, 2))
 			break;
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 31c5f7675fbf..8290e0bef7ea 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -92,8 +92,8 @@ struct bunzip_data {
 	/* State for interrupting output loop */
 	int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
 	/* I/O tracking data (file handles, buffers, positions, etc.) */
-	int (*fill)(void*, unsigned int);
-	int inbufCount, inbufPos /*, outbufPos*/;
+	long (*fill)(void*, unsigned long);
+	long inbufCount, inbufPos /*, outbufPos*/;
 	unsigned char *inbuf /*,*outbuf*/;
 	unsigned int inbufBitCount, inbufBits;
 	/* The CRC values stored in the block header and calculated from the
@@ -617,7 +617,7 @@ decode_next_byte:
 	goto decode_next_byte;
 }
 
-static int INIT nofill(void *buf, unsigned int len)
+static long INIT nofill(void *buf, unsigned long len)
 {
 	return -1;
 }
@@ -625,8 +625,8 @@ static int INIT nofill(void *buf, unsigned int len)
 /* Allocate the structure, read file header.  If in_fd ==-1, inbuf must contain
    a complete bunzip file (len bytes long).  If in_fd!=-1, inbuf and len are
    ignored, and data is read from file handle into temporary buffer. */
-static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
-			     int (*fill)(void*, unsigned int))
+static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len,
+			     long (*fill)(void*, unsigned long))
 {
 	struct bunzip_data *bd;
 	unsigned int i, j, c;
@@ -675,11 +675,11 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
 
 /* Example usage: decompress src_fd to dst_fd.  (Stops at end of bzip2 data,
    not end of file.) */
-STATIC int INIT bunzip2(unsigned char *buf, int len,
-			int(*fill)(void*, unsigned int),
-			int(*flush)(void*, unsigned int),
+STATIC int INIT bunzip2(unsigned char *buf, long len,
+			long (*fill)(void*, unsigned long),
+			long (*flush)(void*, unsigned long),
 			unsigned char *outbuf,
-			int *pos,
+			long *pos,
 			void(*error)(char *x))
 {
 	struct bunzip_data *bd;
@@ -743,11 +743,11 @@ exit_0:
 }
 
 #ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int len,
-			int(*fill)(void*, unsigned int),
-			int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long len,
+			long (*fill)(void*, unsigned long),
+			long (*flush)(void*, unsigned long),
 			unsigned char *outbuf,
-			int *pos,
+			long *pos,
 			void(*error)(char *x))
 {
 	return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index d619b28c456f..d4c7891635ec 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -19,6 +19,7 @@
 #include "zlib_inflate/inflate.h"
 
 #include "zlib_inflate/infutil.h"
+#include <linux/decompress/inflate.h>
 
 #endif /* STATIC */
 
@@ -26,17 +27,17 @@
 
 #define GZIP_IOBUF_SIZE (16*1024)
 
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
 {
 	return -1;
 }
 
 /* Included from initramfs et al code */
-STATIC int INIT gunzip(unsigned char *buf, int len,
-		       int(*fill)(void*, unsigned int),
-		       int(*flush)(void*, unsigned int),
+STATIC int INIT gunzip(unsigned char *buf, long len,
+		       long (*fill)(void*, unsigned long),
+		       long (*flush)(void*, unsigned long),
 		       unsigned char *out_buf,
-		       int *pos,
+		       long *pos,
 		       void(*error)(char *x)) {
 	u8 *zbuf;
 	struct z_stream_s *strm;
@@ -141,7 +142,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 
 		/* Write any data generated */
 		if (flush && strm->next_out > out_buf) {
-			int l = strm->next_out - out_buf;
+			long l = strm->next_out - out_buf;
 			if (l != flush(out_buf, l)) {
 				rc = -1;
 				error("write error");
diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 3e67cfad16ad..40f66ebe57b7 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -31,10 +31,10 @@
 #define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20)
 #define ARCHIVE_MAGICNUMBER 0x184C2102
 
-STATIC inline int INIT unlz4(u8 *input, int in_len,
-				int (*fill) (void *, unsigned int),
-				int (*flush) (void *, unsigned int),
-				u8 *output, int *posp,
+STATIC inline int INIT unlz4(u8 *input, long in_len,
+				long (*fill)(void *, unsigned long),
+				long (*flush)(void *, unsigned long),
+				u8 *output, long *posp,
 				void (*error) (char *x))
 {
 	int ret = -1;
@@ -43,7 +43,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
 	u8 *inp;
 	u8 *inp_start;
 	u8 *outp;
-	int size = in_len;
+	long size = in_len;
 #ifdef PREBOOT
 	size_t out_len = get_unaligned_le32(input + in_len);
 #endif
@@ -83,13 +83,20 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
 	if (posp)
 		*posp = 0;
 
-	if (fill)
-		fill(inp, 4);
+	if (fill) {
+		size = fill(inp, 4);
+		if (size < 4) {
+			error("data corrupted");
+			goto exit_2;
+		}
+	}
 
 	chunksize = get_unaligned_le32(inp);
 	if (chunksize == ARCHIVE_MAGICNUMBER) {
-		inp += 4;
-		size -= 4;
+		if (!fill) {
+			inp += 4;
+			size -= 4;
+		}
 	} else {
 		error("invalid header");
 		goto exit_2;
@@ -100,29 +107,44 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
 
 	for (;;) {
 
-		if (fill)
-			fill(inp, 4);
+		if (fill) {
+			size = fill(inp, 4);
+			if (size == 0)
+				break;
+			if (size < 4) {
+				error("data corrupted");
+				goto exit_2;
+			}
+		}
 
 		chunksize = get_unaligned_le32(inp);
 		if (chunksize == ARCHIVE_MAGICNUMBER) {
-			inp += 4;
-			size -= 4;
+			if (!fill) {
+				inp += 4;
+				size -= 4;
+			}
 			if (posp)
 				*posp += 4;
 			continue;
 		}
-		inp += 4;
-		size -= 4;
+
 
 		if (posp)
 			*posp += 4;
 
-		if (fill) {
+		if (!fill) {
+			inp += 4;
+			size -= 4;
+		} else {
 			if (chunksize > lz4_compressbound(uncomp_chunksize)) {
 				error("chunk length is longer than allocated");
 				goto exit_2;
 			}
-			fill(inp, chunksize);
+			size = fill(inp, chunksize);
+			if (size < chunksize) {
+				error("data corrupted");
+				goto exit_2;
+			}
 		}
 #ifdef PREBOOT
 		if (out_len >= uncomp_chunksize) {
@@ -141,6 +163,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
 			goto exit_2;
 		}
 
+		ret = -1;
 		if (flush && flush(outp, dest_len) != dest_len)
 			goto exit_2;
 		if (output)
@@ -148,18 +171,17 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
 		if (posp)
 			*posp += chunksize;
 
-		size -= chunksize;
+		if (!fill) {
+			size -= chunksize;
 
-		if (size == 0)
-			break;
-		else if (size < 0) {
-			error("data corrupted");
-			goto exit_2;
+			if (size == 0)
+				break;
+			else if (size < 0) {
+				error("data corrupted");
+				goto exit_2;
+			}
+			inp += chunksize;
 		}
-
-		inp += chunksize;
-		if (fill)
-			inp = inp_start;
 	}
 
 	ret = 0;
@@ -174,11 +196,11 @@ exit_0:
 }
 
 #ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
-			      int(*fill)(void*, unsigned int),
-			      int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+			      long (*fill)(void*, unsigned long),
+			      long (*flush)(void*, unsigned long),
 			      unsigned char *output,
-			      int *posp,
+			      long *posp,
 			      void(*error)(char *x)
 	)
 {
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 32adb73a9038..0be83af62b88 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -65,11 +65,11 @@ static long long INIT read_int(unsigned char *ptr, int size)
 #define LZMA_IOBUF_SIZE	0x10000
 
 struct rc {
-	int (*fill)(void*, unsigned int);
+	long (*fill)(void*, unsigned long);
 	uint8_t *ptr;
 	uint8_t *buffer;
 	uint8_t *buffer_end;
-	int buffer_size;
+	long buffer_size;
 	uint32_t code;
 	uint32_t range;
 	uint32_t bound;
@@ -82,7 +82,7 @@ struct rc {
 #define RC_MODEL_TOTAL_BITS 11
 
 
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
 {
 	return -1;
 }
@@ -99,8 +99,8 @@ static void INIT rc_read(struct rc *rc)
 
 /* Called once */
 static inline void INIT rc_init(struct rc *rc,
-				       int (*fill)(void*, unsigned int),
-				       char *buffer, int buffer_size)
+				       long (*fill)(void*, unsigned long),
+				       char *buffer, long buffer_size)
 {
 	if (fill)
 		rc->fill = fill;
@@ -280,7 +280,7 @@ struct writer {
 	size_t buffer_pos;
 	int bufsize;
 	size_t global_pos;
-	int(*flush)(void*, unsigned int);
+	long (*flush)(void*, unsigned long);
 	struct lzma_header *header;
 };
 
@@ -534,11 +534,11 @@ static inline int INIT process_bit1(struct writer *wr, struct rc *rc,
 
 
 
-STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
-			      int(*fill)(void*, unsigned int),
-			      int(*flush)(void*, unsigned int),
+STATIC inline int INIT unlzma(unsigned char *buf, long in_len,
+			      long (*fill)(void*, unsigned long),
+			      long (*flush)(void*, unsigned long),
 			      unsigned char *output,
-			      int *posp,
+			      long *posp,
 			      void(*error)(char *x)
 	)
 {
@@ -667,11 +667,11 @@ exit_0:
 }
 
 #ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
-			      int(*fill)(void*, unsigned int),
-			      int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+			      long (*fill)(void*, unsigned long),
+			      long (*flush)(void*, unsigned long),
 			      unsigned char *output,
-			      int *posp,
+			      long *posp,
 			      void(*error)(char *x)
 	)
 {
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index 960183d4258f..b94a31bdd87d 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -51,7 +51,7 @@ static const unsigned char lzop_magic[] = {
 #define HEADER_SIZE_MIN       (9 + 7     + 4 + 8     + 1       + 4)
 #define HEADER_SIZE_MAX       (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4)
 
-STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
+STATIC inline long INIT parse_header(u8 *input, long *skip, long in_len)
 {
 	int l;
 	u8 *parse = input;
@@ -108,14 +108,14 @@ STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
 	return 1;
 }
 
-STATIC inline int INIT unlzo(u8 *input, int in_len,
-				int (*fill) (void *, unsigned int),
-				int (*flush) (void *, unsigned int),
-				u8 *output, int *posp,
+STATIC int INIT unlzo(u8 *input, long in_len,
+				long (*fill)(void *, unsigned long),
+				long (*flush)(void *, unsigned long),
+				u8 *output, long *posp,
 				void (*error) (char *x))
 {
 	u8 r = 0;
-	int skip = 0;
+	long skip = 0;
 	u32 src_len, dst_len;
 	size_t tmp;
 	u8 *in_buf, *in_buf_save, *out_buf;
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c
index 9f34eb56854d..b07a78340e9d 100644
--- a/lib/decompress_unxz.c
+++ b/lib/decompress_unxz.c
@@ -248,10 +248,10 @@ void *memmove(void *dest, const void *src, size_t size)
  * both input and output buffers are available as a single chunk, i.e. when
  * fill() and flush() won't be used.
  */
-STATIC int INIT unxz(unsigned char *in, int in_size,
-		     int (*fill)(void *dest, unsigned int size),
-		     int (*flush)(void *src, unsigned int size),
-		     unsigned char *out, int *in_used,
+STATIC int INIT unxz(unsigned char *in, long in_size,
+		     long (*fill)(void *dest, unsigned long size),
+		     long (*flush)(void *src, unsigned long size),
+		     unsigned char *out, long *in_used,
 		     void (*error)(char *x))
 {
 	struct xz_buf b;
@@ -329,7 +329,7 @@ STATIC int INIT unxz(unsigned char *in, int in_size,
 				 * returned by xz_dec_run(), but probably
 				 * it's not too bad.
 				 */
-				if (flush(b.out, b.out_pos) != (int)b.out_pos)
+				if (flush(b.out, b.out_pos) != (long)b.out_pos)
 					ret = XZ_BUF_ERROR;
 
 				b.out_pos = 0;
diff --git a/lib/devres.c b/lib/devres.c
index 823533138fa0..f4a195a6efe4 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -81,7 +81,7 @@ EXPORT_SYMBOL(devm_ioremap_nocache);
 void devm_iounmap(struct device *dev, void __iomem *addr)
 {
 	WARN_ON(devres_destroy(dev, devm_ioremap_release, devm_ioremap_match,
-			       (void *)addr));
+			       (__force void *)addr));
 	iounmap(addr);
 }
 EXPORT_SYMBOL(devm_iounmap);
@@ -114,7 +114,7 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
 
 	if (!res || resource_type(res) != IORESOURCE_MEM) {
 		dev_err(dev, "invalid resource\n");
-		return ERR_PTR(-EINVAL);
+		return IOMEM_ERR_PTR(-EINVAL);
 	}
 
 	size = resource_size(res);
@@ -122,7 +122,7 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
 
 	if (!devm_request_mem_region(dev, res->start, size, name)) {
 		dev_err(dev, "can't request region for resource %pR\n", res);
-		return ERR_PTR(-EBUSY);
+		return IOMEM_ERR_PTR(-EBUSY);
 	}
 
 	if (res->flags & IORESOURCE_CACHEABLE)
@@ -133,42 +133,14 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
 	if (!dest_ptr) {
 		dev_err(dev, "ioremap failed for resource %pR\n", res);
 		devm_release_mem_region(dev, res->start, size);
-		dest_ptr = ERR_PTR(-ENOMEM);
+		dest_ptr = IOMEM_ERR_PTR(-ENOMEM);
 	}
 
 	return dest_ptr;
 }
 EXPORT_SYMBOL(devm_ioremap_resource);
 
-/**
- * devm_request_and_ioremap() - Check, request region, and ioremap resource
- * @dev: Generic device to handle the resource for
- * @res: resource to be handled
- *
- * Takes all necessary steps to ioremap a mem resource. Uses managed device, so
- * everything is undone on driver detach. Checks arguments, so you can feed
- * it the result from e.g. platform_get_resource() directly. Returns the
- * remapped pointer or NULL on error. Usage example:
- *
- *	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- *	base = devm_request_and_ioremap(&pdev->dev, res);
- *	if (!base)
- *		return -EADDRNOTAVAIL;
- */
-void __iomem *devm_request_and_ioremap(struct device *device,
-				       struct resource *res)
-{
-	void __iomem *dest_ptr;
-
-	dest_ptr = devm_ioremap_resource(device, res);
-	if (IS_ERR(dest_ptr))
-		return NULL;
-
-	return dest_ptr;
-}
-EXPORT_SYMBOL(devm_request_and_ioremap);
-
-#ifdef CONFIG_HAS_IOPORT
+#ifdef CONFIG_HAS_IOPORT_MAP
 /*
  * Generic iomap devres
  */
@@ -192,7 +164,7 @@ static int devm_ioport_map_match(struct device *dev, void *res,
  * Managed ioport_map().  Map is automatically unmapped on driver
  * detach.
  */
-void __iomem * devm_ioport_map(struct device *dev, unsigned long port,
+void __iomem *devm_ioport_map(struct device *dev, unsigned long port,
 			       unsigned int nr)
 {
 	void __iomem **ptr, *addr;
@@ -224,10 +196,10 @@ void devm_ioport_unmap(struct device *dev, void __iomem *addr)
 {
 	ioport_unmap(addr);
 	WARN_ON(devres_destroy(dev, devm_ioport_map_release,
-			       devm_ioport_map_match, (void *)addr));
+			       devm_ioport_map_match, (__force void *)addr));
 }
 EXPORT_SYMBOL(devm_ioport_unmap);
-#endif /* CONFIG_HAS_IOPORT */
+#endif /* CONFIG_HAS_IOPORT_MAP */
 
 #ifdef CONFIG_PCI
 /*
@@ -263,7 +235,7 @@ static void pcim_iomap_release(struct device *gendev, void *res)
  * be safely called without context and guaranteed to succed once
  * allocated.
  */
-void __iomem * const * pcim_iomap_table(struct pci_dev *pdev)
+void __iomem * const *pcim_iomap_table(struct pci_dev *pdev)
 {
 	struct pcim_iomap_devres *dr, *new_dr;
 
@@ -288,7 +260,7 @@ EXPORT_SYMBOL(pcim_iomap_table);
  * Managed pci_iomap().  Map is automatically unmapped on driver
  * detach.
  */
-void __iomem * pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen)
+void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen)
 {
 	void __iomem **tbl;
 
diff --git a/lib/digsig.c b/lib/digsig.c
index 8793aeda30ca..ae05ea393fc8 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -175,10 +175,11 @@ err1:
  * digsig_verify() - digital signature verification with public key
  * @keyring:	keyring to search key in
  * @sig:	digital signature
- * @sigen:	length of the signature
+ * @siglen:	length of the signature
  * @data:	data
  * @datalen:	length of the data
- * @return:	0 on success, -EINVAL otherwise
+ *
+ * Returns 0 on success, -EINVAL otherwise
  *
  * Verifies data integrity against digital signature.
  * Currently only RSA is supported.
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d87a17a819d0..add80cc02dbe 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -53,11 +53,26 @@ enum map_err_types {
 
 #define DMA_DEBUG_STACKTRACE_ENTRIES 5
 
+/**
+ * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
+ * @list: node on pre-allocated free_entries list
+ * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
+ * @type: single, page, sg, coherent
+ * @pfn: page frame of the start address
+ * @offset: offset of mapping relative to pfn
+ * @size: length of the mapping
+ * @direction: enum dma_data_direction
+ * @sg_call_ents: 'nents' from dma_map_sg
+ * @sg_mapped_ents: 'mapped_ents' from dma_map_sg
+ * @map_err_type: track whether dma_mapping_error() was checked
+ * @stacktrace: support backtraces when a violation is detected
+ */
 struct dma_debug_entry {
 	struct list_head list;
 	struct device    *dev;
 	int              type;
-	phys_addr_t      paddr;
+	unsigned long	 pfn;
+	size_t		 offset;
 	u64              dev_addr;
 	u64              size;
 	int              direction;
@@ -372,6 +387,11 @@ static void hash_bucket_del(struct dma_debug_entry *entry)
 	list_del(&entry->list);
 }
 
+static unsigned long long phys_addr(struct dma_debug_entry *entry)
+{
+	return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
+}
+
 /*
  * Dump mapping entries for debugging purposes
  */
@@ -389,9 +409,9 @@ void debug_dma_dump_mappings(struct device *dev)
 		list_for_each_entry(entry, &bucket->list, list) {
 			if (!dev || dev == entry->dev) {
 				dev_info(entry->dev,
-					 "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n",
+					 "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n",
 					 type2name[entry->type], idx,
-					 (unsigned long long)entry->paddr,
+					 phys_addr(entry), entry->pfn,
 					 entry->dev_addr, entry->size,
 					 dir2name[entry->direction],
 					 maperr2str[entry->map_err_type]);
@@ -404,6 +424,176 @@ void debug_dma_dump_mappings(struct device *dev)
 EXPORT_SYMBOL(debug_dma_dump_mappings);
 
 /*
+ * For each mapping (initial cacheline in the case of
+ * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a
+ * scatterlist, or the cacheline specified in dma_map_single) insert
+ * into this tree using the cacheline as the key. At
+ * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry.  If
+ * the entry already exists at insertion time add a tag as a reference
+ * count for the overlapping mappings.  For now, the overlap tracking
+ * just ensures that 'unmaps' balance 'maps' before marking the
+ * cacheline idle, but we should also be flagging overlaps as an API
+ * violation.
+ *
+ * Memory usage is mostly constrained by the maximum number of available
+ * dma-debug entries in that we need a free dma_debug_entry before
+ * inserting into the tree.  In the case of dma_map_page and
+ * dma_alloc_coherent there is only one dma_debug_entry and one
+ * dma_active_cacheline entry to track per event.  dma_map_sg(), on the
+ * other hand, consumes a single dma_debug_entry, but inserts 'nents'
+ * entries into the tree.
+ *
+ * At any time debug_dma_assert_idle() can be called to trigger a
+ * warning if any cachelines in the given page are in the active set.
+ */
+static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT);
+static DEFINE_SPINLOCK(radix_lock);
+#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
+#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
+#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT)
+
+static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry)
+{
+	return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) +
+		(entry->offset >> L1_CACHE_SHIFT);
+}
+
+static int active_cacheline_read_overlap(phys_addr_t cln)
+{
+	int overlap = 0, i;
+
+	for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
+		if (radix_tree_tag_get(&dma_active_cacheline, cln, i))
+			overlap |= 1 << i;
+	return overlap;
+}
+
+static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
+{
+	int i;
+
+	if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0)
+		return overlap;
+
+	for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
+		if (overlap & 1 << i)
+			radix_tree_tag_set(&dma_active_cacheline, cln, i);
+		else
+			radix_tree_tag_clear(&dma_active_cacheline, cln, i);
+
+	return overlap;
+}
+
+static void active_cacheline_inc_overlap(phys_addr_t cln)
+{
+	int overlap = active_cacheline_read_overlap(cln);
+
+	overlap = active_cacheline_set_overlap(cln, ++overlap);
+
+	/* If we overflowed the overlap counter then we're potentially
+	 * leaking dma-mappings.  Otherwise, if maps and unmaps are
+	 * balanced then this overflow may cause false negatives in
+	 * debug_dma_assert_idle() as the cacheline may be marked idle
+	 * prematurely.
+	 */
+	WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
+		  "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n",
+		  ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
+}
+
+static int active_cacheline_dec_overlap(phys_addr_t cln)
+{
+	int overlap = active_cacheline_read_overlap(cln);
+
+	return active_cacheline_set_overlap(cln, --overlap);
+}
+
+static int active_cacheline_insert(struct dma_debug_entry *entry)
+{
+	phys_addr_t cln = to_cacheline_number(entry);
+	unsigned long flags;
+	int rc;
+
+	/* If the device is not writing memory then we don't have any
+	 * concerns about the cpu consuming stale data.  This mitigates
+	 * legitimate usages of overlapping mappings.
+	 */
+	if (entry->direction == DMA_TO_DEVICE)
+		return 0;
+
+	spin_lock_irqsave(&radix_lock, flags);
+	rc = radix_tree_insert(&dma_active_cacheline, cln, entry);
+	if (rc == -EEXIST)
+		active_cacheline_inc_overlap(cln);
+	spin_unlock_irqrestore(&radix_lock, flags);
+
+	return rc;
+}
+
+static void active_cacheline_remove(struct dma_debug_entry *entry)
+{
+	phys_addr_t cln = to_cacheline_number(entry);
+	unsigned long flags;
+
+	/* ...mirror the insert case */
+	if (entry->direction == DMA_TO_DEVICE)
+		return;
+
+	spin_lock_irqsave(&radix_lock, flags);
+	/* since we are counting overlaps the final put of the
+	 * cacheline will occur when the overlap count is 0.
+	 * active_cacheline_dec_overlap() returns -1 in that case
+	 */
+	if (active_cacheline_dec_overlap(cln) < 0)
+		radix_tree_delete(&dma_active_cacheline, cln);
+	spin_unlock_irqrestore(&radix_lock, flags);
+}
+
+/**
+ * debug_dma_assert_idle() - assert that a page is not undergoing dma
+ * @page: page to lookup in the dma_active_cacheline tree
+ *
+ * Place a call to this routine in cases where the cpu touching the page
+ * before the dma completes (page is dma_unmapped) will lead to data
+ * corruption.
+ */
+void debug_dma_assert_idle(struct page *page)
+{
+	static struct dma_debug_entry *ents[CACHELINES_PER_PAGE];
+	struct dma_debug_entry *entry = NULL;
+	void **results = (void **) &ents;
+	unsigned int nents, i;
+	unsigned long flags;
+	phys_addr_t cln;
+
+	if (!page)
+		return;
+
+	cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT;
+	spin_lock_irqsave(&radix_lock, flags);
+	nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln,
+				       CACHELINES_PER_PAGE);
+	for (i = 0; i < nents; i++) {
+		phys_addr_t ent_cln = to_cacheline_number(ents[i]);
+
+		if (ent_cln == cln) {
+			entry = ents[i];
+			break;
+		} else if (ent_cln >= cln + CACHELINES_PER_PAGE)
+			break;
+	}
+	spin_unlock_irqrestore(&radix_lock, flags);
+
+	if (!entry)
+		return;
+
+	cln = to_cacheline_number(entry);
+	err_printk(entry->dev, entry,
+		   "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n",
+		   &cln);
+}
+
+/*
  * Wrapper function for adding an entry to the hash.
  * This function takes care of locking itself.
  */
@@ -411,10 +601,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
 {
 	struct hash_bucket *bucket;
 	unsigned long flags;
+	int rc;
 
 	bucket = get_hash_bucket(entry, &flags);
 	hash_bucket_add(bucket, entry);
 	put_hash_bucket(bucket, &flags);
+
+	rc = active_cacheline_insert(entry);
+	if (rc == -ENOMEM) {
+		pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n");
+		global_disable = true;
+	}
+
+	/* TODO: report -EEXIST errors here as overlapping mappings are
+	 * not supported by the DMA API
+	 */
 }
 
 static struct dma_debug_entry *__dma_entry_alloc(void)
@@ -469,6 +670,8 @@ static void dma_entry_free(struct dma_debug_entry *entry)
 {
 	unsigned long flags;
 
+	active_cacheline_remove(entry);
+
 	/*
 	 * add to beginning of the list - this way the entries are
 	 * more likely cache hot when they are reallocated.
@@ -895,15 +1098,15 @@ static void check_unmap(struct dma_debug_entry *ref)
 			   ref->dev_addr, ref->size,
 			   type2name[entry->type], type2name[ref->type]);
 	} else if ((entry->type == dma_debug_coherent) &&
-		   (ref->paddr != entry->paddr)) {
+		   (phys_addr(ref) != phys_addr(entry))) {
 		err_printk(ref->dev, entry, "DMA-API: device driver frees "
 			   "DMA memory with different CPU address "
 			   "[device address=0x%016llx] [size=%llu bytes] "
 			   "[cpu alloc address=0x%016llx] "
 			   "[cpu free address=0x%016llx]",
 			   ref->dev_addr, ref->size,
-			   (unsigned long long)entry->paddr,
-			   (unsigned long long)ref->paddr);
+			   phys_addr(entry),
+			   phys_addr(ref));
 	}
 
 	if (ref->sg_call_ents && ref->type == dma_debug_sg &&
@@ -946,7 +1149,7 @@ static void check_unmap(struct dma_debug_entry *ref)
 static void check_for_stack(struct device *dev, void *addr)
 {
 	if (object_is_on_stack(addr))
-		err_printk(dev, NULL, "DMA-API: device driver maps memory from"
+		err_printk(dev, NULL, "DMA-API: device driver maps memory from "
 				"stack [addr=%p]\n", addr);
 }
 
@@ -1052,7 +1255,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 
 	entry->dev       = dev;
 	entry->type      = dma_debug_page;
-	entry->paddr     = page_to_phys(page) + offset;
+	entry->pfn	 = page_to_pfn(page);
+	entry->offset	 = offset,
 	entry->dev_addr  = dma_addr;
 	entry->size      = size;
 	entry->direction = direction;
@@ -1148,7 +1352,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
 
 		entry->type           = dma_debug_sg;
 		entry->dev            = dev;
-		entry->paddr          = sg_phys(s);
+		entry->pfn	      = page_to_pfn(sg_page(s));
+		entry->offset	      = s->offset,
 		entry->size           = sg_dma_len(s);
 		entry->dev_addr       = sg_dma_address(s);
 		entry->direction      = direction;
@@ -1198,7 +1403,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 		struct dma_debug_entry ref = {
 			.type           = dma_debug_sg,
 			.dev            = dev,
-			.paddr          = sg_phys(s),
+			.pfn		= page_to_pfn(sg_page(s)),
+			.offset		= s->offset,
 			.dev_addr       = sg_dma_address(s),
 			.size           = sg_dma_len(s),
 			.direction      = dir,
@@ -1233,7 +1439,8 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
 
 	entry->type      = dma_debug_coherent;
 	entry->dev       = dev;
-	entry->paddr     = virt_to_phys(virt);
+	entry->pfn	 = page_to_pfn(virt_to_page(virt));
+	entry->offset	 = (size_t) virt & PAGE_MASK;
 	entry->size      = size;
 	entry->dev_addr  = dma_addr;
 	entry->direction = DMA_BIDIRECTIONAL;
@@ -1248,7 +1455,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
 	struct dma_debug_entry ref = {
 		.type           = dma_debug_coherent,
 		.dev            = dev,
-		.paddr          = virt_to_phys(virt),
+		.pfn		= page_to_pfn(virt_to_page(virt)),
+		.offset		= (size_t) virt & PAGE_MASK,
 		.dev_addr       = addr,
 		.size           = size,
 		.direction      = DMA_BIDIRECTIONAL,
@@ -1356,7 +1564,8 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 		struct dma_debug_entry ref = {
 			.type           = dma_debug_sg,
 			.dev            = dev,
-			.paddr          = sg_phys(s),
+			.pfn		= page_to_pfn(sg_page(s)),
+			.offset		= s->offset,
 			.dev_addr       = sg_dma_address(s),
 			.size           = sg_dma_len(s),
 			.direction      = direction,
@@ -1388,7 +1597,8 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 		struct dma_debug_entry ref = {
 			.type           = dma_debug_sg,
 			.dev            = dev,
-			.paddr          = sg_phys(s),
+			.pfn		= page_to_pfn(sg_page(s)),
+			.offset		= s->offset,
 			.dev_addr       = sg_dma_address(s),
 			.size           = sg_dma_len(s),
 			.direction      = direction,
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index f23b63f0a1c3..6745c6230db3 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -23,7 +23,7 @@ static void __dump_stack(void)
 #ifdef CONFIG_SMP
 static atomic_t dump_lock = ATOMIC_INIT(-1);
 
-asmlinkage void dump_stack(void)
+asmlinkage __visible void dump_stack(void)
 {
 	int was_locked;
 	int old;
@@ -55,7 +55,7 @@ retry:
 	preempt_enable();
 }
 #else
-asmlinkage void dump_stack(void)
+asmlinkage __visible void dump_stack(void)
 {
 	__dump_stack();
 }
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index c37aeacd7651..31fe79e31ab8 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -8,6 +8,7 @@
  * By Greg Banks <gnb@melbourne.sgi.com>
  * Copyright (c) 2008 Silicon Graphics Inc.  All Rights Reserved.
  * Copyright (C) 2011 Bart Van Assche.  All Rights Reserved.
+ * Copyright (C) 2013 Du, Changbin <changbin.du@gmail.com>
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
@@ -24,6 +25,7 @@
 #include <linux/sysctl.h>
 #include <linux/ctype.h>
 #include <linux/string.h>
+#include <linux/parser.h>
 #include <linux/string_helpers.h>
 #include <linux/uaccess.h>
 #include <linux/dynamic_debug.h>
@@ -147,7 +149,8 @@ static int ddebug_change(const struct ddebug_query *query,
 	list_for_each_entry(dt, &ddebug_tables, link) {
 
 		/* match against the module name */
-		if (query->module && strcmp(query->module, dt->mod_name))
+		if (query->module &&
+		    !match_wildcard(query->module, dt->mod_name))
 			continue;
 
 		for (i = 0; i < dt->num_ddebugs; i++) {
@@ -155,14 +158,16 @@ static int ddebug_change(const struct ddebug_query *query,
 
 			/* match against the source filename */
 			if (query->filename &&
-			    strcmp(query->filename, dp->filename) &&
-			    strcmp(query->filename, kbasename(dp->filename)) &&
-			    strcmp(query->filename, trim_prefix(dp->filename)))
+			    !match_wildcard(query->filename, dp->filename) &&
+			    !match_wildcard(query->filename,
+					   kbasename(dp->filename)) &&
+			    !match_wildcard(query->filename,
+					   trim_prefix(dp->filename)))
 				continue;
 
 			/* match against the function */
 			if (query->function &&
-			    strcmp(query->function, dp->function))
+			    !match_wildcard(query->function, dp->function))
 				continue;
 
 			/* match against the format */
@@ -263,14 +268,12 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords)
  */
 static inline int parse_lineno(const char *str, unsigned int *val)
 {
-	char *end = NULL;
 	BUG_ON(str == NULL);
 	if (*str == '\0') {
 		*val = 0;
 		return 0;
 	}
-	*val = simple_strtoul(str, &end, 10);
-	if (end == NULL || end == str || *end != '\0') {
+	if (kstrtouint(str, 10, val) < 0) {
 		pr_err("bad line-number: %s\n", str);
 		return -EINVAL;
 	}
@@ -343,14 +346,14 @@ static int ddebug_parse_query(char *words[], int nwords,
 			}
 			if (last)
 				*last++ = '\0';
-			if (parse_lineno(first, &query->first_lineno) < 0) {
-				pr_err("line-number is <0\n");
+			if (parse_lineno(first, &query->first_lineno) < 0)
 				return -EINVAL;
-			}
 			if (last) {
 				/* range <first>-<last> */
-				if (parse_lineno(last, &query->last_lineno)
-				    < query->first_lineno) {
+				if (parse_lineno(last, &query->last_lineno) < 0)
+					return -EINVAL;
+
+				if (query->last_lineno < query->first_lineno) {
 					pr_err("last-line:%d < 1st-line:%d\n",
 						query->last_lineno,
 						query->first_lineno);
@@ -534,10 +537,9 @@ static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf)
 	return buf;
 }
 
-int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
+void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
 {
 	va_list args;
-	int res;
 	struct va_format vaf;
 	char buf[PREFIX_SIZE];
 
@@ -549,21 +551,17 @@ int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	res = printk(KERN_DEBUG "%s%pV",
-		     dynamic_emit_prefix(descriptor, buf), &vaf);
+	printk(KERN_DEBUG "%s%pV", dynamic_emit_prefix(descriptor, buf), &vaf);
 
 	va_end(args);
-
-	return res;
 }
 EXPORT_SYMBOL(__dynamic_pr_debug);
 
-int __dynamic_dev_dbg(struct _ddebug *descriptor,
+void __dynamic_dev_dbg(struct _ddebug *descriptor,
 		      const struct device *dev, const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int res;
 
 	BUG_ON(!descriptor);
 	BUG_ON(!fmt);
@@ -574,30 +572,27 @@ int __dynamic_dev_dbg(struct _ddebug *descriptor,
 	vaf.va = &args;
 
 	if (!dev) {
-		res = printk(KERN_DEBUG "(NULL device *): %pV", &vaf);
+		printk(KERN_DEBUG "(NULL device *): %pV", &vaf);
 	} else {
 		char buf[PREFIX_SIZE];
 
-		res = dev_printk_emit(7, dev, "%s%s %s: %pV",
-				      dynamic_emit_prefix(descriptor, buf),
-				      dev_driver_string(dev), dev_name(dev),
-				      &vaf);
+		dev_printk_emit(7, dev, "%s%s %s: %pV",
+				dynamic_emit_prefix(descriptor, buf),
+				dev_driver_string(dev), dev_name(dev),
+				&vaf);
 	}
 
 	va_end(args);
-
-	return res;
 }
 EXPORT_SYMBOL(__dynamic_dev_dbg);
 
 #ifdef CONFIG_NET
 
-int __dynamic_netdev_dbg(struct _ddebug *descriptor,
-			 const struct net_device *dev, const char *fmt, ...)
+void __dynamic_netdev_dbg(struct _ddebug *descriptor,
+			  const struct net_device *dev, const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int res;
 
 	BUG_ON(!descriptor);
 	BUG_ON(!fmt);
@@ -610,21 +605,21 @@ int __dynamic_netdev_dbg(struct _ddebug *descriptor,
 	if (dev && dev->dev.parent) {
 		char buf[PREFIX_SIZE];
 
-		res = dev_printk_emit(7, dev->dev.parent,
-				      "%s%s %s %s: %pV",
-				      dynamic_emit_prefix(descriptor, buf),
-				      dev_driver_string(dev->dev.parent),
-				      dev_name(dev->dev.parent),
-				      netdev_name(dev), &vaf);
+		dev_printk_emit(7, dev->dev.parent,
+				"%s%s %s %s%s: %pV",
+				dynamic_emit_prefix(descriptor, buf),
+				dev_driver_string(dev->dev.parent),
+				dev_name(dev->dev.parent),
+				netdev_name(dev), netdev_reg_state(dev),
+				&vaf);
 	} else if (dev) {
-		res = printk(KERN_DEBUG "%s: %pV", netdev_name(dev), &vaf);
+		printk(KERN_DEBUG "%s%s: %pV", netdev_name(dev),
+		       netdev_reg_state(dev), &vaf);
 	} else {
-		res = printk(KERN_DEBUG "(NULL net_device): %pV", &vaf);
+		printk(KERN_DEBUG "(NULL net_device): %pV", &vaf);
 	}
 
 	va_end(args);
-
-	return res;
 }
 EXPORT_SYMBOL(__dynamic_netdev_dbg);
 
diff --git a/lib/fdt_empty_tree.c b/lib/fdt_empty_tree.c
new file mode 100644
index 000000000000..5d30c58150ad
--- /dev/null
+++ b/lib/fdt_empty_tree.c
@@ -0,0 +1,2 @@
+#include <linux/libfdt_env.h>
+#include "../scripts/dtc/libfdt/fdt_empty_tree.c"
diff --git a/lib/flex_array.c b/lib/flex_array.c
index 6948a6692fc4..2eed22fa507c 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -90,8 +90,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 {
 	struct flex_array *ret;
 	int elems_per_part = 0;
-	int reciprocal_elems = 0;
 	int max_size = 0;
+	struct reciprocal_value reciprocal_elems = { 0 };
 
 	if (element_size) {
 		elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
@@ -119,6 +119,11 @@ EXPORT_SYMBOL(flex_array_alloc);
 static int fa_element_to_part_nr(struct flex_array *fa,
 					unsigned int element_nr)
 {
+	/*
+	 * if element_size == 0 we don't get here, so we never touch
+	 * the zeroed fa->reciprocal_elems, which would yield invalid
+	 * results
+	 */
 	return reciprocal_divide(element_nr, fa->reciprocal_elems);
 }
 
diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c
index ebf3bac460b0..8f25652f40d4 100644
--- a/lib/flex_proportions.c
+++ b/lib/flex_proportions.c
@@ -34,13 +34,13 @@
  */
 #include <linux/flex_proportions.h>
 
-int fprop_global_init(struct fprop_global *p)
+int fprop_global_init(struct fprop_global *p, gfp_t gfp)
 {
 	int err;
 
 	p->period = 0;
 	/* Use 1 to avoid dealing with periods with 0 events... */
-	err = percpu_counter_init(&p->events, 1);
+	err = percpu_counter_init(&p->events, 1, gfp);
 	if (err)
 		return err;
 	seqcount_init(&p->sequence);
@@ -168,11 +168,11 @@ void fprop_fraction_single(struct fprop_global *p,
  */
 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
-int fprop_local_init_percpu(struct fprop_local_percpu *pl)
+int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp)
 {
 	int err;
 
-	err = percpu_counter_init(&pl->events, 0);
+	err = percpu_counter_init(&pl->events, 0, gfp);
 	if (err)
 		return err;
 	pl->period = 0;
diff --git a/lib/genalloc.c b/lib/genalloc.c
index dda31168844f..cce4dd68c40d 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -316,7 +316,7 @@ EXPORT_SYMBOL(gen_pool_alloc);
  * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage
  * @pool: pool to allocate from
  * @size: number of bytes to allocate from the pool
- * @dma: dma-view physical address
+ * @dma: dma-view physical address return value.  Use NULL if unneeded.
  *
  * Allocate the requested number of bytes from the specified pool.
  * Uses the pool allocation function (with first-fit algorithm by default).
@@ -334,7 +334,8 @@ void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma)
 	if (!vaddr)
 		return NULL;
 
-	*dma = gen_pool_virt_to_phys(pool, vaddr);
+	if (dma)
+		*dma = gen_pool_virt_to_phys(pool, vaddr);
 
 	return (void *)vaddr;
 }
@@ -402,6 +403,35 @@ void gen_pool_for_each_chunk(struct gen_pool *pool,
 EXPORT_SYMBOL(gen_pool_for_each_chunk);
 
 /**
+ * addr_in_gen_pool - checks if an address falls within the range of a pool
+ * @pool:	the generic memory pool
+ * @start:	start address
+ * @size:	size of the region
+ *
+ * Check if the range of addresses falls within the specified pool. Returns
+ * true if the entire range is contained in the pool and false otherwise.
+ */
+bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
+			size_t size)
+{
+	bool found = false;
+	unsigned long end = start + size;
+	struct gen_pool_chunk *chunk;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk) {
+		if (start >= chunk->start_addr && start <= chunk->end_addr) {
+			if (end <= chunk->end_addr) {
+				found = true;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+	return found;
+}
+
+/**
  * gen_pool_avail - get available free space of the pool
  * @pool: pool to get available free space
  *
@@ -480,6 +510,26 @@ unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size,
 EXPORT_SYMBOL(gen_pool_first_fit);
 
 /**
+ * gen_pool_first_fit_order_align - find the first available region
+ * of memory matching the size requirement. The region will be aligned
+ * to the order of the size specified.
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @data: additional data - unused
+ */
+unsigned long gen_pool_first_fit_order_align(unsigned long *map,
+		unsigned long size, unsigned long start,
+		unsigned int nr, void *data)
+{
+	unsigned long align_mask = roundup_pow_of_two(nr) - 1;
+
+	return bitmap_find_next_zero_area(map, size, start, nr, align_mask);
+}
+EXPORT_SYMBOL(gen_pool_first_fit_order_align);
+
+/**
  * gen_pool_best_fit - find the best fitting region of memory
  * macthing the size requirement (no alignment constraint)
  * @map: The address to base the search on
@@ -587,6 +637,7 @@ struct gen_pool *of_get_named_gen_pool(struct device_node *np,
 	if (!np_pool)
 		return NULL;
 	pdev = of_find_device_by_node(np_pool);
+	of_node_put(np_pool);
 	if (!pdev)
 		return NULL;
 	return dev_get_gen_pool(&pdev->dev);
diff --git a/lib/glob.c b/lib/glob.c
new file mode 100644
index 000000000000..500fc80d23e1
--- /dev/null
+++ b/lib/glob.c
@@ -0,0 +1,287 @@
+#include <linux/module.h>
+#include <linux/glob.h>
+
+/*
+ * The only reason this code can be compiled as a module is because the
+ * ATA code that depends on it can be as well.  In practice, they're
+ * both usually compiled in and the module overhead goes away.
+ */
+MODULE_DESCRIPTION("glob(7) matching");
+MODULE_LICENSE("Dual MIT/GPL");
+
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match.  The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails.  Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns.  Thus, it
+ * does not preprocess the patterns.  It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+bool __pure glob_match(char const *pat, char const *str)
+{
+	/*
+	 * Backtrack to previous * on mismatch and retry starting one
+	 * character later in the string.  Because * matches all characters
+	 * (no exception for /), it can be easily proved that there's
+	 * never a need to backtrack multiple levels.
+	 */
+	char const *back_pat = NULL, *back_str = back_str;
+
+	/*
+	 * Loop over each token (character or class) in pat, matching
+	 * it against the remaining unmatched tail of str.  Return false
+	 * on mismatch, or true after matching the trailing nul bytes.
+	 */
+	for (;;) {
+		unsigned char c = *str++;
+		unsigned char d = *pat++;
+
+		switch (d) {
+		case '?':	/* Wildcard: anything but nul */
+			if (c == '\0')
+				return false;
+			break;
+		case '*':	/* Any-length wildcard */
+			if (*pat == '\0')	/* Optimize trailing * case */
+				return true;
+			back_pat = pat;
+			back_str = --str;	/* Allow zero-length match */
+			break;
+		case '[': {	/* Character class */
+			bool match = false, inverted = (*pat == '!');
+			char const *class = pat + inverted;
+			unsigned char a = *class++;
+
+			/*
+			 * Iterate over each span in the character class.
+			 * A span is either a single character a, or a
+			 * range a-b.  The first span may begin with ']'.
+			 */
+			do {
+				unsigned char b = a;
+
+				if (a == '\0')	/* Malformed */
+					goto literal;
+
+				if (class[0] == '-' && class[1] != ']') {
+					b = class[1];
+
+					if (b == '\0')
+						goto literal;
+
+					class += 2;
+					/* Any special action if a > b? */
+				}
+				match |= (a <= c && c <= b);
+			} while ((a = *class++) != ']');
+
+			if (match == inverted)
+				goto backtrack;
+			pat = class;
+			}
+			break;
+		case '\\':
+			d = *pat++;
+			/*FALLTHROUGH*/
+		default:	/* Literal character */
+literal:
+			if (c == d) {
+				if (d == '\0')
+					return true;
+				break;
+			}
+backtrack:
+			if (c == '\0' || !back_pat)
+				return false;	/* No point continuing */
+			/* Try again from last *, one character later in str. */
+			pat = back_pat;
+			str = ++back_str;
+			break;
+		}
+	}
+}
+EXPORT_SYMBOL(glob_match);
+
+
+#ifdef CONFIG_GLOB_SELFTEST
+
+#include <linux/printk.h>
+#include <linux/moduleparam.h>
+
+/* Boot with "glob.verbose=1" to show successful tests, too */
+static bool verbose = false;
+module_param(verbose, bool, 0);
+
+struct glob_test {
+	char const *pat, *str;
+	bool expected;
+};
+
+static bool __pure __init test(char const *pat, char const *str, bool expected)
+{
+	bool match = glob_match(pat, str);
+	bool success = match == expected;
+
+	/* Can't get string literals into a particular section, so... */
+	static char const msg_error[] __initconst =
+		KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
+	static char const msg_ok[] __initconst =
+		KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
+	static char const mismatch[] __initconst = "mismatch";
+	char const *message;
+
+	if (!success)
+		message = msg_error;
+	else if (verbose)
+		message = msg_ok;
+	else
+		return success;
+
+	printk(message, pat, str, mismatch + 3*match);
+	return success;
+}
+
+/*
+ * The tests are all jammed together in one array to make it simpler
+ * to place that array in the .init.rodata section.  The obvious
+ * "array of structures containing char *" has no way to force the
+ * pointed-to strings to be in a particular section.
+ *
+ * Anyway, a test consists of:
+ * 1. Expected glob_match result: '1' or '0'.
+ * 2. Pattern to match: null-terminated string
+ * 3. String to match against: null-terminated string
+ *
+ * The list of tests is terminated with a final '\0' instead of
+ * a glob_match result character.
+ */
+static char const glob_tests[] __initconst =
+	/* Some basic tests */
+	"1" "a\0" "a\0"
+	"0" "a\0" "b\0"
+	"0" "a\0" "aa\0"
+	"0" "a\0" "\0"
+	"1" "\0" "\0"
+	"0" "\0" "a\0"
+	/* Simple character class tests */
+	"1" "[a]\0" "a\0"
+	"0" "[a]\0" "b\0"
+	"0" "[!a]\0" "a\0"
+	"1" "[!a]\0" "b\0"
+	"1" "[ab]\0" "a\0"
+	"1" "[ab]\0" "b\0"
+	"0" "[ab]\0" "c\0"
+	"1" "[!ab]\0" "c\0"
+	"1" "[a-c]\0" "b\0"
+	"0" "[a-c]\0" "d\0"
+	/* Corner cases in character class parsing */
+	"1" "[a-c-e-g]\0" "-\0"
+	"0" "[a-c-e-g]\0" "d\0"
+	"1" "[a-c-e-g]\0" "f\0"
+	"1" "[]a-ceg-ik[]\0" "a\0"
+	"1" "[]a-ceg-ik[]\0" "]\0"
+	"1" "[]a-ceg-ik[]\0" "[\0"
+	"1" "[]a-ceg-ik[]\0" "h\0"
+	"0" "[]a-ceg-ik[]\0" "f\0"
+	"0" "[!]a-ceg-ik[]\0" "h\0"
+	"0" "[!]a-ceg-ik[]\0" "]\0"
+	"1" "[!]a-ceg-ik[]\0" "f\0"
+	/* Simple wild cards */
+	"1" "?\0" "a\0"
+	"0" "?\0" "aa\0"
+	"0" "??\0" "a\0"
+	"1" "?x?\0" "axb\0"
+	"0" "?x?\0" "abx\0"
+	"0" "?x?\0" "xab\0"
+	/* Asterisk wild cards (backtracking) */
+	"0" "*??\0" "a\0"
+	"1" "*??\0" "ab\0"
+	"1" "*??\0" "abc\0"
+	"1" "*??\0" "abcd\0"
+	"0" "??*\0" "a\0"
+	"1" "??*\0" "ab\0"
+	"1" "??*\0" "abc\0"
+	"1" "??*\0" "abcd\0"
+	"0" "?*?\0" "a\0"
+	"1" "?*?\0" "ab\0"
+	"1" "?*?\0" "abc\0"
+	"1" "?*?\0" "abcd\0"
+	"1" "*b\0" "b\0"
+	"1" "*b\0" "ab\0"
+	"0" "*b\0" "ba\0"
+	"1" "*b\0" "bb\0"
+	"1" "*b\0" "abb\0"
+	"1" "*b\0" "bab\0"
+	"1" "*bc\0" "abbc\0"
+	"1" "*bc\0" "bc\0"
+	"1" "*bc\0" "bbc\0"
+	"1" "*bc\0" "bcbc\0"
+	/* Multiple asterisks (complex backtracking) */
+	"1" "*ac*\0" "abacadaeafag\0"
+	"1" "*ac*ae*ag*\0" "abacadaeafag\0"
+	"1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
+	"0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
+	"1" "*abcd*\0" "abcabcabcabcdefg\0"
+	"1" "*ab*cd*\0" "abcabcabcabcdefg\0"
+	"1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
+	"0" "*abcd*\0" "abcabcabcabcefg\0"
+	"0" "*ab*cd*\0" "abcabcabcabcefg\0";
+
+static int __init glob_init(void)
+{
+	unsigned successes = 0;
+	unsigned n = 0;
+	char const *p = glob_tests;
+	static char const message[] __initconst =
+		KERN_INFO "glob: %u self-tests passed, %u failed\n";
+
+	/*
+	 * Tests are jammed together in a string.  The first byte is '1'
+	 * or '0' to indicate the expected outcome, or '\0' to indicate the
+	 * end of the tests.  Then come two null-terminated strings: the
+	 * pattern and the string to match it against.
+	 */
+	while (*p) {
+		bool expected = *p++ & 1;
+		char const *pat = p;
+
+		p += strlen(p) + 1;
+		successes += test(pat, p, expected);
+		p += strlen(p) + 1;
+		n++;
+	}
+
+	n -= successes;
+	printk(message, successes, n);
+
+	/* What's the errno for "kernel bug detected"?  Guess... */
+	return n ? -ECANCELED : 0;
+}
+
+/* We need a dummy exit function to allow unload */
+static void __exit glob_fini(void) { }
+
+module_init(glob_init);
+module_exit(glob_fini);
+
+#endif /* CONFIG_GLOB_SELFTEST */
diff --git a/lib/hash.c b/lib/hash.c
new file mode 100644
index 000000000000..fea973f4bd57
--- /dev/null
+++ b/lib/hash.c
@@ -0,0 +1,39 @@
+/* General purpose hashing library
+ *
+ * That's a start of a kernel hashing library, which can be extended
+ * with further algorithms in future. arch_fast_hash{2,}() will
+ * eventually resolve to an architecture optimized implementation.
+ *
+ * Copyright 2013 Francesco Fusco <ffusco@redhat.com>
+ * Copyright 2013 Daniel Borkmann <dborkman@redhat.com>
+ * Copyright 2013 Thomas Graf <tgraf@redhat.com>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+#include <linux/jhash.h>
+#include <linux/hash.h>
+#include <linux/cache.h>
+
+static struct fast_hash_ops arch_hash_ops __read_mostly = {
+	.hash  = jhash,
+	.hash2 = jhash2,
+};
+
+u32 arch_fast_hash(const void *data, u32 len, u32 seed)
+{
+	return arch_hash_ops.hash(data, len, seed);
+}
+EXPORT_SYMBOL_GPL(arch_fast_hash);
+
+u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
+{
+	return arch_hash_ops.hash2(data, len, seed);
+}
+EXPORT_SYMBOL_GPL(arch_fast_hash2);
+
+static int __init hashlib_init(void)
+{
+	setup_arch_fast_hash(&arch_hash_ops);
+	return 0;
+}
+early_initcall(hashlib_init);
diff --git a/lib/hweight.c b/lib/hweight.c
index b7d81ba143d1..9a5c1f221558 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -11,7 +11,7 @@
 
 unsigned int __sw_hweight32(unsigned int w)
 {
-#ifdef ARCH_HAS_FAST_MULTIPLIER
+#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
 	w -= (w >> 1) & 0x55555555;
 	w =  (w & 0x33333333) + ((w >> 2) & 0x33333333);
 	w =  (w + (w >> 4)) & 0x0f0f0f0f;
@@ -49,7 +49,7 @@ unsigned long __sw_hweight64(__u64 w)
 	return __sw_hweight32((unsigned int)(w >> 32)) +
 	       __sw_hweight32((unsigned int)w);
 #elif BITS_PER_LONG == 64
-#ifdef ARCH_HAS_FAST_MULTIPLIER
+#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
 	w -= (w >> 1) & 0x5555555555555555ul;
 	w =  (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul);
 	w =  (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful;
diff --git a/lib/idr.c b/lib/idr.c
index bfe4db4e165f..e654aebd5f80 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -18,12 +18,6 @@
  * pointer or what ever, we treat it as a (void *).  You can pass this
  * id to a user for him to pass back at a later time.  You then pass
  * that id to this code and it returns your pointer.
-
- * You can release ids at any time. When all ids are released, most of
- * the memory is returned (we keep MAX_IDR_FREE) in a local pool so we
- * don't need to go to the memory "store" during an id allocate, just
- * so you don't need to be too concerned about locking and conflicts
- * with the slab allocator.
  */
 
 #ifndef TEST                        // to test in user space...
@@ -151,7 +145,7 @@ static void idr_layer_rcu_free(struct rcu_head *head)
 
 static inline void free_layer(struct idr *idr, struct idr_layer *p)
 {
-	if (idr->hint && idr->hint == p)
+	if (idr->hint == p)
 		RCU_INIT_POINTER(idr->hint, NULL);
 	call_rcu(&p->rcu_head, idr_layer_rcu_free);
 }
@@ -196,7 +190,7 @@ static void idr_mark_full(struct idr_layer **pa, int id)
 	}
 }
 
-int __idr_pre_get(struct idr *idp, gfp_t gfp_mask)
+static int __idr_pre_get(struct idr *idp, gfp_t gfp_mask)
 {
 	while (idp->id_free_cnt < MAX_IDR_FREE) {
 		struct idr_layer *new;
@@ -207,7 +201,6 @@ int __idr_pre_get(struct idr *idp, gfp_t gfp_mask)
 	}
 	return 1;
 }
-EXPORT_SYMBOL(__idr_pre_get);
 
 /**
  * sub_alloc - try to allocate an id without growing the tree depth
@@ -250,7 +243,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa,
 			id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
 
 			/* if already at the top layer, we need to grow */
-			if (id >= 1 << (idp->layers * IDR_BITS)) {
+			if (id > idr_max(idp->layers)) {
 				*starting_id = id;
 				return -EAGAIN;
 			}
@@ -374,20 +367,6 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id,
 	idr_mark_full(pa, id);
 }
 
-int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
-{
-	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
-	int rv;
-
-	rv = idr_get_empty_slot(idp, starting_id, pa, 0, idp);
-	if (rv < 0)
-		return rv == -ENOMEM ? -EAGAIN : rv;
-
-	idr_fill_slot(idp, ptr, rv, pa);
-	*id = rv;
-	return 0;
-}
-EXPORT_SYMBOL(__idr_get_new_above);
 
 /**
  * idr_preload - preload for idr_alloc()
@@ -548,7 +527,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
 	n = id & IDR_MASK;
 	if (likely(p != NULL && test_bit(n, p->bitmap))) {
 		__clear_bit(n, p->bitmap);
-		rcu_assign_pointer(p->ary[n], NULL);
+		RCU_INIT_POINTER(p->ary[n], NULL);
 		to_free = NULL;
 		while(*paa && ! --((**paa)->count)){
 			if (to_free)
@@ -577,6 +556,11 @@ void idr_remove(struct idr *idp, int id)
 	if (id < 0)
 		return;
 
+	if (id > idr_max(idp->layers)) {
+		idr_remove_warning(id);
+		return;
+	}
+
 	sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
 	if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
 	    idp->top->ary[0]) {
@@ -594,20 +578,10 @@ void idr_remove(struct idr *idp, int id)
 		bitmap_clear(to_free->bitmap, 0, IDR_SIZE);
 		free_layer(idp, to_free);
 	}
-	while (idp->id_free_cnt >= MAX_IDR_FREE) {
-		p = get_from_free_list(idp);
-		/*
-		 * Note: we don't call the rcu callback here, since the only
-		 * layers that fall into the freelist are those that have been
-		 * preallocated.
-		 */
-		kmem_cache_free(idr_layer_cache, p);
-	}
-	return;
 }
 EXPORT_SYMBOL(idr_remove);
 
-void __idr_remove_all(struct idr *idp)
+static void __idr_remove_all(struct idr *idp)
 {
 	int n, id, max;
 	int bt_mask;
@@ -616,31 +590,31 @@ void __idr_remove_all(struct idr *idp)
 	struct idr_layer **paa = &pa[0];
 
 	n = idp->layers * IDR_BITS;
-	p = idp->top;
-	rcu_assign_pointer(idp->top, NULL);
+	*paa = idp->top;
+	RCU_INIT_POINTER(idp->top, NULL);
 	max = idr_max(idp->layers);
 
 	id = 0;
 	while (id >= 0 && id <= max) {
+		p = *paa;
 		while (n > IDR_BITS && p) {
 			n -= IDR_BITS;
-			*paa++ = p;
 			p = p->ary[(id >> n) & IDR_MASK];
+			*++paa = p;
 		}
 
 		bt_mask = id;
 		id += 1 << n;
 		/* Get the highest bit that the above add changed from 0->1. */
 		while (n < fls(id ^ bt_mask)) {
-			if (p)
-				free_layer(idp, p);
+			if (*paa)
+				free_layer(idp, *paa);
 			n += IDR_BITS;
-			p = *--paa;
+			--paa;
 		}
 	}
 	idp->layers = 0;
 }
-EXPORT_SYMBOL(__idr_remove_all);
 
 /**
  * idr_destroy - release all cached layers within an idr tree
@@ -652,7 +626,7 @@ EXPORT_SYMBOL(__idr_remove_all);
  * idr_destroy().
  *
  * A typical clean-up sequence for objects stored in an idr tree will use
- * idr_for_each() to free all objects, if necessay, then idr_destroy() to
+ * idr_for_each() to free all objects, if necessary, then idr_destroy() to
  * free up the id mappings and cached idr_layers.
  */
 void idr_destroy(struct idr *idp)
@@ -719,15 +693,16 @@ int idr_for_each(struct idr *idp,
 	struct idr_layer **paa = &pa[0];
 
 	n = idp->layers * IDR_BITS;
-	p = rcu_dereference_raw(idp->top);
+	*paa = rcu_dereference_raw(idp->top);
 	max = idr_max(idp->layers);
 
 	id = 0;
 	while (id >= 0 && id <= max) {
+		p = *paa;
 		while (n > 0 && p) {
 			n -= IDR_BITS;
-			*paa++ = p;
 			p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+			*++paa = p;
 		}
 
 		if (p) {
@@ -739,7 +714,7 @@ int idr_for_each(struct idr *idp,
 		id += 1 << n;
 		while (n < fls(id)) {
 			n += IDR_BITS;
-			p = *--paa;
+			--paa;
 		}
 	}
 
@@ -767,17 +742,18 @@ void *idr_get_next(struct idr *idp, int *nextidp)
 	int n, max;
 
 	/* find first ent */
-	p = rcu_dereference_raw(idp->top);
+	p = *paa = rcu_dereference_raw(idp->top);
 	if (!p)
 		return NULL;
 	n = (p->layer + 1) * IDR_BITS;
 	max = idr_max(p->layer + 1);
 
 	while (id >= 0 && id <= max) {
+		p = *paa;
 		while (n > 0 && p) {
 			n -= IDR_BITS;
-			*paa++ = p;
 			p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+			*++paa = p;
 		}
 
 		if (p) {
@@ -795,7 +771,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
 		id = round_up(id + 1, 1 << n);
 		while (n < fls(id)) {
 			n += IDR_BITS;
-			p = *--paa;
+			--paa;
 		}
 	}
 	return NULL;
@@ -825,14 +801,12 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
 
 	p = idp->top;
 	if (!p)
-		return ERR_PTR(-EINVAL);
-
-	n = (p->layer+1) * IDR_BITS;
+		return ERR_PTR(-ENOENT);
 
-	if (id >= (1 << n))
-		return ERR_PTR(-EINVAL);
+	if (id > idr_max(p->layer + 1))
+		return ERR_PTR(-ENOENT);
 
-	n -= IDR_BITS;
+	n = p->layer * IDR_BITS;
 	while ((n > 0) && p) {
 		p = p->ary[(id >> n) & IDR_MASK];
 		n -= IDR_BITS;
@@ -869,6 +843,16 @@ void idr_init(struct idr *idp)
 }
 EXPORT_SYMBOL(idr_init);
 
+static int idr_has_entry(int id, void *p, void *data)
+{
+	return 1;
+}
+
+bool idr_is_empty(struct idr *idp)
+{
+	return !idr_for_each(idp, idr_has_entry, NULL);
+}
+EXPORT_SYMBOL(idr_is_empty);
 
 /**
  * DOC: IDA description
@@ -1033,6 +1017,9 @@ void ida_remove(struct ida *ida, int id)
 	int n;
 	struct ida_bitmap *bitmap;
 
+	if (idr_id > idr_max(ida->idr.layers))
+		goto err;
+
 	/* clear full bits while looking up the leaf idr_layer */
 	while ((shift > 0) && p) {
 		n = (idr_id >> shift) & IDR_MASK;
@@ -1048,7 +1035,7 @@ void ida_remove(struct ida *ida, int id)
 	__clear_bit(n, p->bitmap);
 
 	bitmap = (void *)p->ary[n];
-	if (!test_bit(offset, bitmap->bitmap))
+	if (!bitmap || !test_bit(offset, bitmap->bitmap))
 		goto err;
 
 	/* update bitmap and remove it if empty */
diff --git a/lib/interval_tree.c b/lib/interval_tree.c
index e6eb406f2d65..f367f9ad544c 100644
--- a/lib/interval_tree.c
+++ b/lib/interval_tree.c
@@ -1,6 +1,7 @@
 #include <linux/init.h>
 #include <linux/interval_tree.h>
 #include <linux/interval_tree_generic.h>
+#include <linux/module.h>
 
 #define START(node) ((node)->start)
 #define LAST(node)  ((node)->last)
@@ -8,3 +9,8 @@
 INTERVAL_TREE_DEFINE(struct interval_tree_node, rb,
 		     unsigned long, __subtree_last,
 		     START, LAST,, interval_tree)
+
+EXPORT_SYMBOL_GPL(interval_tree_insert);
+EXPORT_SYMBOL_GPL(interval_tree_remove);
+EXPORT_SYMBOL_GPL(interval_tree_iter_first);
+EXPORT_SYMBOL_GPL(interval_tree_iter_next);
diff --git a/lib/interval_tree_test_main.c b/lib/interval_tree_test.c
index 245900b98c8e..245900b98c8e 100644
--- a/lib/interval_tree_test_main.c
+++ b/lib/interval_tree_test.c
diff --git a/lib/iomap.c b/lib/iomap.c
index 2c08f36862eb..fc3dcb4b238e 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -224,7 +224,7 @@ EXPORT_SYMBOL(iowrite8_rep);
 EXPORT_SYMBOL(iowrite16_rep);
 EXPORT_SYMBOL(iowrite32_rep);
 
-#ifdef CONFIG_HAS_IOPORT
+#ifdef CONFIG_HAS_IOPORT_MAP
 /* Create a virtual mapping cookie for an IO port range */
 void __iomem *ioport_map(unsigned long port, unsigned int nr)
 {
@@ -239,7 +239,7 @@ void ioport_unmap(void __iomem *addr)
 }
 EXPORT_SYMBOL(ioport_map);
 EXPORT_SYMBOL(ioport_unmap);
-#endif /* CONFIG_HAS_IOPORT */
+#endif /* CONFIG_HAS_IOPORT_MAP */
 
 #ifdef CONFIG_PCI
 /* Hide the details if this is a MMIO or PIO address space and just do what
diff --git a/lib/iovec.c b/lib/iovec.c
index 454baa88bf27..df3abd1eaa4a 100644
--- a/lib/iovec.c
+++ b/lib/iovec.c
@@ -51,3 +51,62 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
 	return 0;
 }
 EXPORT_SYMBOL(memcpy_toiovec);
+
+/*
+ *	Copy kernel to iovec. Returns -EFAULT on error.
+ */
+
+int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
+		      int offset, int len)
+{
+	int copy;
+	for (; len > 0; ++iov) {
+		/* Skip over the finished iovecs */
+		if (unlikely(offset >= iov->iov_len)) {
+			offset -= iov->iov_len;
+			continue;
+		}
+		copy = min_t(unsigned int, iov->iov_len - offset, len);
+		if (copy_to_user(iov->iov_base + offset, kdata, copy))
+			return -EFAULT;
+		offset = 0;
+		kdata += copy;
+		len -= copy;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(memcpy_toiovecend);
+
+/*
+ *	Copy iovec to kernel. Returns -EFAULT on error.
+ */
+
+int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
+			int offset, int len)
+{
+	/* No data? Done! */
+	if (len == 0)
+		return 0;
+
+	/* Skip over the finished iovecs */
+	while (offset >= iov->iov_len) {
+		offset -= iov->iov_len;
+		iov++;
+	}
+
+	while (len > 0) {
+		u8 __user *base = iov->iov_base + offset;
+		int copy = min_t(unsigned int, len, iov->iov_len - offset);
+
+		offset = 0;
+		if (copy_from_user(kdata, base, copy))
+			return -EFAULT;
+		len -= copy;
+		kdata += copy;
+		iov++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(memcpy_fromiovecend);
diff --git a/lib/kfifo.c b/lib/kfifo.c
index d79b9d222065..90ba1eb1df06 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -561,8 +561,7 @@ EXPORT_SYMBOL(__kfifo_to_user_r);
 unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo,
 	struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
 {
-	if (!nents)
-		BUG();
+	BUG_ON(!nents);
 
 	len = __kfifo_max_r(len, recsize);
 
@@ -585,8 +584,7 @@ EXPORT_SYMBOL(__kfifo_dma_in_finish_r);
 unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo,
 	struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
 {
-	if (!nents)
-		BUG();
+	BUG_ON(!nents);
 
 	len = __kfifo_max_r(len, recsize);
 
diff --git a/lib/klist.c b/lib/klist.c
index 358a368a2947..89b485a2a58d 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -140,11 +140,11 @@ void klist_add_tail(struct klist_node *n, struct klist *k)
 EXPORT_SYMBOL_GPL(klist_add_tail);
 
 /**
- * klist_add_after - Init a klist_node and add it after an existing node
+ * klist_add_behind - Init a klist_node and add it after an existing node
  * @n: node we're adding.
  * @pos: node to put @n after
  */
-void klist_add_after(struct klist_node *n, struct klist_node *pos)
+void klist_add_behind(struct klist_node *n, struct klist_node *pos)
 {
 	struct klist *k = knode_klist(pos);
 
@@ -153,7 +153,7 @@ void klist_add_after(struct klist_node *n, struct klist_node *pos)
 	list_add(&n->n_node, &pos->n_node);
 	spin_unlock(&k->k_lock);
 }
-EXPORT_SYMBOL_GPL(klist_add_after);
+EXPORT_SYMBOL_GPL(klist_add_behind);
 
 /**
  * klist_add_before - Init a klist_node and add it before an existing node
diff --git a/lib/kobject.c b/lib/kobject.c
index 5b4b8886435e..58751bb80a7c 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -13,11 +13,11 @@
  */
 
 #include <linux/kobject.h>
-#include <linux/kobj_completion.h>
 #include <linux/string.h>
 #include <linux/export.h>
 #include <linux/stat.h>
 #include <linux/slab.h>
+#include <linux/random.h>
 
 /**
  * kobject_namespace - return @kobj's namespace tag
@@ -65,13 +65,17 @@ static int populate_dir(struct kobject *kobj)
 
 static int create_dir(struct kobject *kobj)
 {
+	const struct kobj_ns_type_operations *ops;
 	int error;
 
 	error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj));
-	if (!error) {
-		error = populate_dir(kobj);
-		if (error)
-			sysfs_remove_dir(kobj);
+	if (error)
+		return error;
+
+	error = populate_dir(kobj);
+	if (error) {
+		sysfs_remove_dir(kobj);
+		return error;
 	}
 
 	/*
@@ -80,7 +84,20 @@ static int create_dir(struct kobject *kobj)
 	 */
 	sysfs_get(kobj->sd);
 
-	return error;
+	/*
+	 * If @kobj has ns_ops, its children need to be filtered based on
+	 * their namespace tags.  Enable namespace support on @kobj->sd.
+	 */
+	ops = kobj_child_ns_ops(kobj);
+	if (ops) {
+		BUG_ON(ops->type <= KOBJ_NS_TYPE_NONE);
+		BUG_ON(ops->type >= KOBJ_NS_TYPES);
+		BUG_ON(!kobj_ns_type_registered(ops->type));
+
+		sysfs_enable_ns(kobj->sd);
+	}
+
+	return 0;
 }
 
 static int get_kobj_path_length(struct kobject *kobj)
@@ -247,8 +264,10 @@ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt,
 		return 0;
 
 	kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs);
-	if (!kobj->name)
+	if (!kobj->name) {
+		kobj->name = old_name;
 		return -ENOMEM;
+	}
 
 	/* ewww... some of these buggers have '/' in the name ... */
 	while ((s = strchr(kobj->name, '/')))
@@ -346,7 +365,7 @@ static int kobject_add_varg(struct kobject *kobj, struct kobject *parent,
  *
  * If @parent is set, then the parent of the @kobj will be set to it.
  * If @parent is NULL, then the parent of the @kobj will be set to the
- * kobject associted with the kset assigned to this kobject.  If no kset
+ * kobject associated with the kset assigned to this kobject.  If no kset
  * is assigned to the kobject, then the kobject will be located in the
  * root of the sysfs tree.
  *
@@ -536,7 +555,7 @@ out:
  */
 void kobject_del(struct kobject *kobj)
 {
-	struct sysfs_dirent *sd;
+	struct kernfs_node *sd;
 
 	if (!kobj)
 		return;
@@ -625,10 +644,12 @@ static void kobject_release(struct kref *kref)
 {
 	struct kobject *kobj = container_of(kref, struct kobject, kref);
 #ifdef CONFIG_DEBUG_KOBJECT_RELEASE
-	pr_info("kobject: '%s' (%p): %s, parent %p (delayed)\n",
-		 kobject_name(kobj), kobj, __func__, kobj->parent);
+	unsigned long delay = HZ + HZ * (get_random_int() & 0x3);
+	pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n",
+		 kobject_name(kobj), kobj, __func__, kobj->parent, delay);
 	INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup);
-	schedule_delayed_work(&kobj->release, HZ);
+
+	schedule_delayed_work(&kobj->release, delay);
 #else
 	kobject_cleanup(kobj);
 #endif
@@ -758,55 +779,7 @@ const struct sysfs_ops kobj_sysfs_ops = {
 	.show	= kobj_attr_show,
 	.store	= kobj_attr_store,
 };
-
-/**
- * kobj_completion_init - initialize a kobj_completion object.
- * @kc: kobj_completion
- * @ktype: type of kobject to initialize
- *
- * kobj_completion structures can be embedded within structures with different
- * lifetime rules.  During the release of the enclosing object, we can
- * wait on the release of the kobject so that we don't free it while it's
- * still busy.
- */
-void kobj_completion_init(struct kobj_completion *kc, struct kobj_type *ktype)
-{
-	init_completion(&kc->kc_unregister);
-	kobject_init(&kc->kc_kobj, ktype);
-}
-EXPORT_SYMBOL_GPL(kobj_completion_init);
-
-/**
- * kobj_completion_release - release a kobj_completion object
- * @kobj: kobject embedded in kobj_completion
- *
- * Used with kobject_release to notify waiters that the kobject has been
- * released.
- */
-void kobj_completion_release(struct kobject *kobj)
-{
-	struct kobj_completion *kc = kobj_to_kobj_completion(kobj);
-	complete(&kc->kc_unregister);
-}
-EXPORT_SYMBOL_GPL(kobj_completion_release);
-
-/**
- * kobj_completion_del_and_wait - release the kobject and wait for it
- * @kc: kobj_completion object to release
- *
- * Delete the kobject from sysfs and drop the reference count.  Then wait
- * until any other outstanding references are also dropped.  This routine
- * is only necessary once other references may have been taken on the
- * kobject.  Typically this happens when the kobject has been published
- * to sysfs via kobject_add.
- */
-void kobj_completion_del_and_wait(struct kobj_completion *kc)
-{
-	kobject_del(&kc->kc_kobj);
-	kobject_put(&kc->kc_kobj);
-	wait_for_completion(&kc->kc_unregister);
-}
-EXPORT_SYMBOL_GPL(kobj_completion_del_and_wait);
+EXPORT_SYMBOL_GPL(kobj_sysfs_ops);
 
 /**
  * kset_register - initialize and add a kset.
@@ -835,6 +808,7 @@ void kset_unregister(struct kset *k)
 {
 	if (!k)
 		return;
+	kobject_del(&k->kobj);
 	kobject_put(&k->kobj);
 }
 
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 52e5abbc41db..9ebf9e20de53 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -29,7 +29,9 @@
 
 
 u64 uevent_seqnum;
+#ifdef CONFIG_UEVENT_HELPER
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
+#endif
 #ifdef CONFIG_NET
 struct uevent_sock {
 	struct list_head list;
@@ -88,11 +90,17 @@ out:
 #ifdef CONFIG_NET
 static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
 {
-	struct kobject *kobj = data;
+	struct kobject *kobj = data, *ksobj;
 	const struct kobj_ns_type_operations *ops;
 
 	ops = kobj_ns_ops(kobj);
-	if (ops) {
+	if (!ops && kobj->kset) {
+		ksobj = &kobj->kset->kobj;
+		if (ksobj->parent != NULL)
+			ops = kobj_ns_ops(ksobj->parent);
+	}
+
+	if (ops && ops->netlink_ns && kobj->ktype->namespace) {
 		const void *sock_ns, *ns;
 		ns = kobj->ktype->namespace(kobj);
 		sock_ns = ops->netlink_ns(dsk);
@@ -103,6 +111,7 @@ static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
 }
 #endif
 
+#ifdef CONFIG_UEVENT_HELPER
 static int kobj_usermode_filter(struct kobject *kobj)
 {
 	const struct kobj_ns_type_operations *ops;
@@ -118,6 +127,31 @@ static int kobj_usermode_filter(struct kobject *kobj)
 	return 0;
 }
 
+static int init_uevent_argv(struct kobj_uevent_env *env, const char *subsystem)
+{
+	int len;
+
+	len = strlcpy(&env->buf[env->buflen], subsystem,
+		      sizeof(env->buf) - env->buflen);
+	if (len >= (sizeof(env->buf) - env->buflen)) {
+		WARN(1, KERN_ERR "init_uevent_argv: buffer size too small\n");
+		return -ENOMEM;
+	}
+
+	env->argv[0] = uevent_helper;
+	env->argv[1] = &env->buf[env->buflen];
+	env->argv[2] = NULL;
+
+	env->buflen += len + 1;
+	return 0;
+}
+
+static void cleanup_uevent_env(struct subprocess_info *info)
+{
+	kfree(info->data);
+}
+#endif
+
 /**
  * kobject_uevent_env - send an uevent with environmental data
  *
@@ -293,13 +327,11 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 #endif
 	mutex_unlock(&uevent_sock_mutex);
 
+#ifdef CONFIG_UEVENT_HELPER
 	/* call uevent_helper, usually only enabled during early boot */
 	if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
-		char *argv [3];
+		struct subprocess_info *info;
 
-		argv [0] = uevent_helper;
-		argv [1] = (char *)subsystem;
-		argv [2] = NULL;
 		retval = add_uevent_var(env, "HOME=/");
 		if (retval)
 			goto exit;
@@ -307,10 +339,20 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 					"PATH=/sbin:/bin:/usr/sbin:/usr/bin");
 		if (retval)
 			goto exit;
+		retval = init_uevent_argv(env, subsystem);
+		if (retval)
+			goto exit;
 
-		retval = call_usermodehelper(argv[0], argv,
-					     env->envp, UMH_WAIT_EXEC);
+		retval = -ENOMEM;
+		info = call_usermodehelper_setup(env->argv[0], env->argv,
+						 env->envp, GFP_KERNEL,
+						 NULL, cleanup_uevent_env, env);
+		if (info) {
+			retval = call_usermodehelper_exec(info, UMH_NO_WAIT);
+			env = NULL;	/* freed by cleanup_uevent_env */
+		}
 	}
+#endif
 
 exit:
 	kfree(devpath);
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index f78ae0c0c4e2..ec8da78df9be 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -92,7 +92,6 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 	rv = _parse_integer(s, base, &_res);
 	if (rv & KSTRTOX_OVERFLOW)
 		return -ERANGE;
-	rv &= ~KSTRTOX_OVERFLOW;
 	if (rv == 0)
 		return -EINVAL;
 	s += rv;
diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
index 244f5480c898..b3131f5cf8a2 100644
--- a/lib/libcrc32c.c
+++ b/lib/libcrc32c.c
@@ -62,10 +62,7 @@ EXPORT_SYMBOL(crc32c);
 static int __init libcrc32c_mod_init(void)
 {
 	tfm = crypto_alloc_shash("crc32c", 0, 0);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	return 0;
+	return PTR_ERR_OR_ZERO(tfm);
 }
 
 static void __exit libcrc32c_mod_fini(void)
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 1183fa70a44d..12bcba1c8612 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -1,3 +1,6 @@
+
+#define pr_fmt(fmt) "list_sort_test: " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/list_sort.h>
@@ -47,6 +50,7 @@ static void merge_and_restore_back_links(void *priv,
 				struct list_head *a, struct list_head *b)
 {
 	struct list_head *tail = head;
+	u8 count = 0;
 
 	while (a && b) {
 		/* if equal, take 'a' -- important for sort stability */
@@ -70,7 +74,8 @@ static void merge_and_restore_back_links(void *priv,
 		 * element comparison is needed, so the client's cmp()
 		 * routine can invoke cond_resched() periodically.
 		 */
-		(*cmp)(priv, tail->next, tail->next);
+		if (unlikely(!(++count)))
+			(*cmp)(priv, tail->next, tail->next);
 
 		tail->next->prev = tail;
 		tail = tail->next;
@@ -123,9 +128,7 @@ void list_sort(void *priv, struct list_head *head,
 		}
 		if (lev > max_lev) {
 			if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
-				printk_once(KERN_DEBUG "list passed to"
-					" list_sort() too long for"
-					" efficiency\n");
+				printk_once(KERN_DEBUG "list too long for efficiency\n");
 				lev--;
 			}
 			max_lev = lev;
@@ -168,27 +171,25 @@ static struct debug_el **elts __initdata;
 static int __init check(struct debug_el *ela, struct debug_el *elb)
 {
 	if (ela->serial >= TEST_LIST_LEN) {
-		printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
-				ela->serial);
+		pr_err("error: incorrect serial %d\n", ela->serial);
 		return -EINVAL;
 	}
 	if (elb->serial >= TEST_LIST_LEN) {
-		printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
-				elb->serial);
+		pr_err("error: incorrect serial %d\n", elb->serial);
 		return -EINVAL;
 	}
 	if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
-		printk(KERN_ERR "list_sort_test: error: phantom element\n");
+		pr_err("error: phantom element\n");
 		return -EINVAL;
 	}
 	if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
-		printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
-				ela->poison1, ela->poison2);
+		pr_err("error: bad poison: %#x/%#x\n",
+			ela->poison1, ela->poison2);
 		return -EINVAL;
 	}
 	if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
-		printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
-				elb->poison1, elb->poison2);
+		pr_err("error: bad poison: %#x/%#x\n",
+			elb->poison1, elb->poison2);
 		return -EINVAL;
 	}
 	return 0;
@@ -207,25 +208,23 @@ static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
 
 static int __init list_sort_test(void)
 {
-	int i, count = 1, err = -EINVAL;
+	int i, count = 1, err = -ENOMEM;
 	struct debug_el *el;
-	struct list_head *cur, *tmp;
+	struct list_head *cur;
 	LIST_HEAD(head);
 
-	printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
+	pr_debug("start testing list_sort()\n");
 
-	elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
+	elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL);
 	if (!elts) {
-		printk(KERN_ERR "list_sort_test: error: cannot allocate "
-				"memory\n");
-		goto exit;
+		pr_err("error: cannot allocate memory\n");
+		return err;
 	}
 
 	for (i = 0; i < TEST_LIST_LEN; i++) {
 		el = kmalloc(sizeof(*el), GFP_KERNEL);
 		if (!el) {
-			printk(KERN_ERR "list_sort_test: error: cannot "
-					"allocate memory\n");
+			pr_err("error: cannot allocate memory\n");
 			goto exit;
 		}
 		 /* force some equivalencies */
@@ -239,52 +238,52 @@ static int __init list_sort_test(void)
 
 	list_sort(NULL, &head, cmp);
 
+	err = -EINVAL;
 	for (cur = head.next; cur->next != &head; cur = cur->next) {
 		struct debug_el *el1;
 		int cmp_result;
 
 		if (cur->next->prev != cur) {
-			printk(KERN_ERR "list_sort_test: error: list is "
-					"corrupted\n");
+			pr_err("error: list is corrupted\n");
 			goto exit;
 		}
 
 		cmp_result = cmp(NULL, cur, cur->next);
 		if (cmp_result > 0) {
-			printk(KERN_ERR "list_sort_test: error: list is not "
-					"sorted\n");
+			pr_err("error: list is not sorted\n");
 			goto exit;
 		}
 
 		el = container_of(cur, struct debug_el, list);
 		el1 = container_of(cur->next, struct debug_el, list);
 		if (cmp_result == 0 && el->serial >= el1->serial) {
-			printk(KERN_ERR "list_sort_test: error: order of "
-					"equivalent elements not preserved\n");
+			pr_err("error: order of equivalent elements not "
+				"preserved\n");
 			goto exit;
 		}
 
 		if (check(el, el1)) {
-			printk(KERN_ERR "list_sort_test: error: element check "
-					"failed\n");
+			pr_err("error: element check failed\n");
 			goto exit;
 		}
 		count++;
 	}
+	if (head.prev != cur) {
+		pr_err("error: list is corrupted\n");
+		goto exit;
+	}
+
 
 	if (count != TEST_LIST_LEN) {
-		printk(KERN_ERR "list_sort_test: error: bad list length %d",
-				count);
+		pr_err("error: bad list length %d", count);
 		goto exit;
 	}
 
 	err = 0;
 exit:
+	for (i = 0; i < TEST_LIST_LEN; i++)
+		kfree(elts[i]);
 	kfree(elts);
-	list_for_each_safe(cur, tmp, &head) {
-		list_del(cur);
-		kfree(container_of(cur, struct debug_el, list));
-	}
 	return err;
 }
 module_init(list_sort_test);
diff --git a/lib/lockref.c b/lib/lockref.c
index f07a40d33871..d2233de9a86e 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -1,6 +1,5 @@
 #include <linux/export.h>
 #include <linux/lockref.h>
-#include <linux/mutex.h>
 
 #if USE_CMPXCHG_LOCKREF
 
@@ -29,7 +28,7 @@
 		if (likely(old.lock_count == prev.lock_count)) {		\
 			SUCCESS;						\
 		}								\
-		arch_mutex_cpu_relax();						\
+		cpu_relax_lowlatency();						\
 	}									\
 } while (0)
 
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index 4a83ecd03650..852c81e3ba9a 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -169,7 +169,7 @@ out_fail:
 	return NULL;
 }
 
-void lc_free_by_index(struct lru_cache *lc, unsigned i)
+static void lc_free_by_index(struct lru_cache *lc, unsigned i)
 {
 	void *p = lc->lc_element[i];
 	WARN_ON(!p);
@@ -643,9 +643,10 @@ void lc_set(struct lru_cache *lc, unsigned int enr, int index)
  * lc_dump - Dump a complete LRU cache to seq in textual form.
  * @lc: the lru cache to operate on
  * @seq: the &struct seq_file pointer to seq_printf into
- * @utext: user supplied "heading" or other info
+ * @utext: user supplied additional "heading" or other info
  * @detail: function pointer the user may provide to dump further details
- * of the object the lc_element is embedded in.
+ * of the object the lc_element is embedded in. May be NULL.
+ * Note: a leading space ' ' and trailing newline '\n' is implied.
  */
 void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
 	     void (*detail) (struct seq_file *, struct lc_element *))
@@ -654,16 +655,18 @@ void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext
 	struct lc_element *e;
 	int i;
 
-	seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext);
+	seq_printf(seq, "\tnn: lc_number (new nr) refcnt %s\n ", utext);
 	for (i = 0; i < nr_elements; i++) {
 		e = lc_element_by_index(lc, i);
-		if (e->lc_number == LC_FREE) {
-			seq_printf(seq, "\t%2d: FREE\n", i);
-		} else {
-			seq_printf(seq, "\t%2d: %4u %4u    ", i,
-				   e->lc_number, e->refcnt);
+		if (e->lc_number != e->lc_new_number)
+			seq_printf(seq, "\t%5d: %6d %8d %6d ",
+				i, e->lc_number, e->lc_new_number, e->refcnt);
+		else
+			seq_printf(seq, "\t%5d: %6d %-8s %6d ",
+				i, e->lc_number, "-\"-", e->refcnt);
+		if (detail)
 			detail(seq, e);
-		}
+		seq_putc(seq, '\n');
 	}
 }
 
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index df6839e3ce08..7a85967060a5 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -72,6 +72,8 @@ static int lz4_uncompress(const char *source, char *dest, int osize)
 			len = *ip++;
 			for (; len == 255; length += 255)
 				len = *ip++;
+			if (unlikely(length > (size_t)(length + len)))
+				goto _output_error;
 			length += len;
 		}
 
@@ -106,6 +108,8 @@ static int lz4_uncompress(const char *source, char *dest, int osize)
 		if (length == ML_MASK) {
 			for (; *ip == 255; length += 255)
 				ip++;
+			if (unlikely(length > (size_t)(length + *ip)))
+				goto _output_error;
 			length += *ip++;
 		}
 
@@ -155,7 +159,7 @@ static int lz4_uncompress(const char *source, char *dest, int osize)
 
 	/* write overflow error detected */
 _output_error:
-	return (int) (-(((char *)ip) - source));
+	return -1;
 }
 
 static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
@@ -188,6 +192,8 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
 			int s = 255;
 			while ((ip < iend) && (s == 255)) {
 				s = *ip++;
+				if (unlikely(length > (size_t)(length + s)))
+					goto _output_error;
 				length += s;
 			}
 		}
@@ -228,6 +234,8 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
 		if (length == ML_MASK) {
 			while (ip < iend) {
 				int s = *ip++;
+				if (unlikely(length > (size_t)(length + s)))
+					goto _output_error;
 				length += s;
 				if (s == 255)
 					continue;
@@ -280,7 +288,7 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
 
 	/* write overflow error detected */
 _output_error:
-	return (int) (-(((char *) ip) - source));
+	return -1;
 }
 
 int lz4_decompress(const unsigned char *src, size_t *src_len,
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index 569985d522d5..a1c387f6afba 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -25,6 +25,16 @@
 #define NEED_OP(x)      if (!HAVE_OP(x)) goto output_overrun
 #define TEST_LB(m_pos)  if ((m_pos) < out) goto lookbehind_overrun
 
+/* This MAX_255_COUNT is the maximum number of times we can add 255 to a base
+ * count without overflowing an integer. The multiply will overflow when
+ * multiplying 255 by more than MAXINT/255. The sum will overflow earlier
+ * depending on the base count. Since the base count is taken from a u8
+ * and a few bits, it is safe to assume that it will always be lower than
+ * or equal to 2*255, thus we can always prevent any overflow by accepting
+ * two less 255 steps. See Documentation/lzo.txt for more information.
+ */
+#define MAX_255_COUNT      ((((size_t)~0) / 255) - 2)
+
 int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 			  unsigned char *out, size_t *out_len)
 {
@@ -55,12 +65,19 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 		if (t < 16) {
 			if (likely(state == 0)) {
 				if (unlikely(t == 0)) {
+					size_t offset;
+					const unsigned char *ip_last = ip;
+
 					while (unlikely(*ip == 0)) {
-						t += 255;
 						ip++;
 						NEED_IP(1);
 					}
-					t += 15 + *ip++;
+					offset = ip - ip_last;
+					if (unlikely(offset > MAX_255_COUNT))
+						return LZO_E_ERROR;
+
+					offset = (offset << 8) - offset;
+					t += offset + 15 + *ip++;
 				}
 				t += 3;
 copy_literal_run:
@@ -116,12 +133,19 @@ copy_literal_run:
 		} else if (t >= 32) {
 			t = (t & 31) + (3 - 1);
 			if (unlikely(t == 2)) {
+				size_t offset;
+				const unsigned char *ip_last = ip;
+
 				while (unlikely(*ip == 0)) {
-					t += 255;
 					ip++;
 					NEED_IP(1);
 				}
-				t += 31 + *ip++;
+				offset = ip - ip_last;
+				if (unlikely(offset > MAX_255_COUNT))
+					return LZO_E_ERROR;
+
+				offset = (offset << 8) - offset;
+				t += offset + 31 + *ip++;
 				NEED_IP(2);
 			}
 			m_pos = op - 1;
@@ -134,12 +158,19 @@ copy_literal_run:
 			m_pos -= (t & 8) << 11;
 			t = (t & 7) + (3 - 1);
 			if (unlikely(t == 2)) {
+				size_t offset;
+				const unsigned char *ip_last = ip;
+
 				while (unlikely(*ip == 0)) {
-					t += 255;
 					ip++;
 					NEED_IP(1);
 				}
-				t += 7 + *ip++;
+				offset = ip - ip_last;
+				if (unlikely(offset > MAX_255_COUNT))
+					return LZO_E_ERROR;
+
+				offset = (offset << 8) - offset;
+				t += offset + 7 + *ip++;
 				NEED_IP(2);
 			}
 			next = get_unaligned_le16(ip);
diff --git a/lib/net_utils.c b/lib/net_utils.c
index 2e3c52c8d050..148fc6e99ef6 100644
--- a/lib/net_utils.c
+++ b/lib/net_utils.c
@@ -3,24 +3,24 @@
 #include <linux/ctype.h>
 #include <linux/kernel.h>
 
-int mac_pton(const char *s, u8 *mac)
+bool mac_pton(const char *s, u8 *mac)
 {
 	int i;
 
 	/* XX:XX:XX:XX:XX:XX */
 	if (strlen(s) < 3 * ETH_ALEN - 1)
-		return 0;
+		return false;
 
 	/* Don't dirty result unless string is valid MAC. */
 	for (i = 0; i < ETH_ALEN; i++) {
 		if (!isxdigit(s[i * 3]) || !isxdigit(s[i * 3 + 1]))
-			return 0;
+			return false;
 		if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')
-			return 0;
+			return false;
 	}
 	for (i = 0; i < ETH_ALEN; i++) {
 		mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]);
 	}
-	return 1;
+	return true;
 }
 EXPORT_SYMBOL(mac_pton);
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 18eca7809b08..9c3e85ff0a6c 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -136,6 +136,7 @@ int nla_validate(const struct nlattr *head, int len, int maxtype,
 errout:
 	return err;
 }
+EXPORT_SYMBOL(nla_validate);
 
 /**
  * nla_policy_len - Determin the max. length of a policy
@@ -162,6 +163,7 @@ nla_policy_len(const struct nla_policy *p, int n)
 
 	return len;
 }
+EXPORT_SYMBOL(nla_policy_len);
 
 /**
  * nla_parse - Parse a stream of attributes into a tb buffer
@@ -201,13 +203,14 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
 	}
 
 	if (unlikely(rem > 0))
-		printk(KERN_WARNING "netlink: %d bytes leftover after parsing "
-		       "attributes.\n", rem);
+		pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n",
+				    rem, current->comm);
 
 	err = 0;
 errout:
 	return err;
 }
+EXPORT_SYMBOL(nla_parse);
 
 /**
  * nla_find - Find a specific attribute in a stream of attributes
@@ -228,6 +231,7 @@ struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype)
 
 	return NULL;
 }
+EXPORT_SYMBOL(nla_find);
 
 /**
  * nla_strlcpy - Copy string attribute payload into a sized buffer
@@ -258,6 +262,7 @@ size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
 
 	return srclen;
 }
+EXPORT_SYMBOL(nla_strlcpy);
 
 /**
  * nla_memcpy - Copy a netlink attribute into another memory area
@@ -278,6 +283,7 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count)
 
 	return minlen;
 }
+EXPORT_SYMBOL(nla_memcpy);
 
 /**
  * nla_memcmp - Compare an attribute with sized memory area
@@ -295,6 +301,7 @@ int nla_memcmp(const struct nlattr *nla, const void *data,
 
 	return d;
 }
+EXPORT_SYMBOL(nla_memcmp);
 
 /**
  * nla_strcmp - Compare a string attribute against a string
@@ -303,14 +310,21 @@ int nla_memcmp(const struct nlattr *nla, const void *data,
  */
 int nla_strcmp(const struct nlattr *nla, const char *str)
 {
-	int len = strlen(str) + 1;
-	int d = nla_len(nla) - len;
+	int len = strlen(str);
+	char *buf = nla_data(nla);
+	int attrlen = nla_len(nla);
+	int d;
 
+	if (attrlen > 0 && buf[attrlen - 1] == '\0')
+		attrlen--;
+
+	d = attrlen - len;
 	if (d == 0)
 		d = memcmp(nla_data(nla), str, len);
 
 	return d;
 }
+EXPORT_SYMBOL(nla_strcmp);
 
 #ifdef CONFIG_NET
 /**
@@ -496,12 +510,3 @@ int nla_append(struct sk_buff *skb, int attrlen, const void *data)
 }
 EXPORT_SYMBOL(nla_append);
 #endif
-
-EXPORT_SYMBOL(nla_validate);
-EXPORT_SYMBOL(nla_policy_len);
-EXPORT_SYMBOL(nla_parse);
-EXPORT_SYMBOL(nla_find);
-EXPORT_SYMBOL(nla_strlcpy);
-EXPORT_SYMBOL(nla_memcpy);
-EXPORT_SYMBOL(nla_memcmp);
-EXPORT_SYMBOL(nla_strcmp);
diff --git a/lib/parser.c b/lib/parser.c
index 807b2aaa33fa..b6d11631231b 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -113,6 +113,7 @@ int match_token(char *s, const match_table_t table, substring_t args[])
 
 	return p->token;
 }
+EXPORT_SYMBOL(match_token);
 
 /**
  * match_number: scan a number in the given base from a substring_t
@@ -163,6 +164,7 @@ int match_int(substring_t *s, int *result)
 {
 	return match_number(s, result, 0);
 }
+EXPORT_SYMBOL(match_int);
 
 /**
  * match_octal: - scan an octal representation of an integer from a substring_t
@@ -177,6 +179,7 @@ int match_octal(substring_t *s, int *result)
 {
 	return match_number(s, result, 8);
 }
+EXPORT_SYMBOL(match_octal);
 
 /**
  * match_hex: - scan a hex representation of an integer from a substring_t
@@ -191,6 +194,58 @@ int match_hex(substring_t *s, int *result)
 {
 	return match_number(s, result, 16);
 }
+EXPORT_SYMBOL(match_hex);
+
+/**
+ * match_wildcard: - parse if a string matches given wildcard pattern
+ * @pattern: wildcard pattern
+ * @str: the string to be parsed
+ *
+ * Description: Parse the string @str to check if matches wildcard
+ * pattern @pattern. The pattern may contain two type wildcardes:
+ *   '*' - matches zero or more characters
+ *   '?' - matches one character
+ * If it's matched, return true, else return false.
+ */
+bool match_wildcard(const char *pattern, const char *str)
+{
+	const char *s = str;
+	const char *p = pattern;
+	bool star = false;
+
+	while (*s) {
+		switch (*p) {
+		case '?':
+			s++;
+			p++;
+			break;
+		case '*':
+			star = true;
+			str = s;
+			if (!*++p)
+				return true;
+			pattern = p;
+			break;
+		default:
+			if (*s == *p) {
+				s++;
+				p++;
+			} else {
+				if (!star)
+					return false;
+				str++;
+				s = str;
+				p = pattern;
+			}
+			break;
+		}
+	}
+
+	if (*p == '*')
+		++p;
+	return !*p;
+}
+EXPORT_SYMBOL(match_wildcard);
 
 /**
  * match_strlcpy: - Copy the characters from a substring_t to a sized buffer
@@ -213,6 +268,7 @@ size_t match_strlcpy(char *dest, const substring_t *src, size_t size)
 	}
 	return ret;
 }
+EXPORT_SYMBOL(match_strlcpy);
 
 /**
  * match_strdup: - allocate a new string with the contents of a substring_t
@@ -230,10 +286,4 @@ char *match_strdup(const substring_t *s)
 		match_strlcpy(p, s, sz);
 	return p;
 }
-
-EXPORT_SYMBOL(match_token);
-EXPORT_SYMBOL(match_int);
-EXPORT_SYMBOL(match_octal);
-EXPORT_SYMBOL(match_hex);
-EXPORT_SYMBOL(match_strlcpy);
 EXPORT_SYMBOL(match_strdup);
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 1a53d497a8c5..6111bcb28376 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -1,6 +1,8 @@
 #define pr_fmt(fmt) "%s: " fmt "\n", __func__
 
 #include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
 #include <linux/percpu-refcount.h>
 
 /*
@@ -11,8 +13,8 @@
  * percpu counters will all sum to the correct value
  *
  * (More precisely: because moduler arithmatic is commutative the sum of all the
- * pcpu_count vars will be equal to what it would have been if all the gets and
- * puts were done to a single integer, even if some of the percpu integers
+ * percpu_count vars will be equal to what it would have been if all the gets
+ * and puts were done to a single integer, even if some of the percpu integers
  * overflow or underflow).
  *
  * The real trick to implementing percpu refcounts is shutdown. We can't detect
@@ -25,85 +27,110 @@
  * works.
  *
  * Converting to non percpu mode is done with some RCUish stuff in
- * percpu_ref_kill. Additionally, we need a bias value so that the atomic_t
- * can't hit 0 before we've added up all the percpu refs.
+ * percpu_ref_kill. Additionally, we need a bias value so that the
+ * atomic_long_t can't hit 0 before we've added up all the percpu refs.
  */
 
-#define PCPU_COUNT_BIAS		(1U << 31)
+#define PERCPU_COUNT_BIAS	(1LU << (BITS_PER_LONG - 1))
+
+static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
+
+static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
+{
+	return (unsigned long __percpu *)
+		(ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
+}
 
 /**
  * percpu_ref_init - initialize a percpu refcount
  * @ref: percpu_ref to initialize
  * @release: function which will be called when refcount hits 0
+ * @flags: PERCPU_REF_INIT_* flags
+ * @gfp: allocation mask to use
  *
- * Initializes the refcount in single atomic counter mode with a refcount of 1;
- * analagous to atomic_set(ref, 1).
+ * Initializes @ref.  If @flags is zero, @ref starts in percpu mode with a
+ * refcount of 1; analagous to atomic_long_set(ref, 1).  See the
+ * definitions of PERCPU_REF_INIT_* flags for flag behaviors.
  *
  * Note that @release must not sleep - it may potentially be called from RCU
  * callback context by percpu_ref_kill().
  */
-int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release)
+int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
+		    unsigned int flags, gfp_t gfp)
 {
-	atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+	size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS,
+			     __alignof__(unsigned long));
+	unsigned long start_count = 0;
 
-	ref->pcpu_count = alloc_percpu(unsigned);
-	if (!ref->pcpu_count)
+	ref->percpu_count_ptr = (unsigned long)
+		__alloc_percpu_gfp(sizeof(unsigned long), align, gfp);
+	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
+	ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC;
+
+	if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD))
+		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+	else
+		start_count += PERCPU_COUNT_BIAS;
+
+	if (flags & PERCPU_REF_INIT_DEAD)
+		ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
+	else
+		start_count++;
+
+	atomic_long_set(&ref->count, start_count);
+
 	ref->release = release;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(percpu_ref_init);
 
 /**
- * percpu_ref_cancel_init - cancel percpu_ref_init()
- * @ref: percpu_ref to cancel init for
- *
- * Once a percpu_ref is initialized, its destruction is initiated by
- * percpu_ref_kill() and completes asynchronously, which can be painful to
- * do when destroying a half-constructed object in init failure path.
+ * percpu_ref_exit - undo percpu_ref_init()
+ * @ref: percpu_ref to exit
  *
- * This function destroys @ref without invoking @ref->release and the
- * memory area containing it can be freed immediately on return.  To
- * prevent accidental misuse, it's required that @ref has finished
- * percpu_ref_init(), whether successful or not, but never used.
- *
- * The weird name and usage restriction are to prevent people from using
- * this function by mistake for normal shutdown instead of
- * percpu_ref_kill().
+ * This function exits @ref.  The caller is responsible for ensuring that
+ * @ref is no longer in active use.  The usual places to invoke this
+ * function from are the @ref->release() callback or in init failure path
+ * where percpu_ref_init() succeeded but other parts of the initialization
+ * of the embedding object failed.
  */
-void percpu_ref_cancel_init(struct percpu_ref *ref)
+void percpu_ref_exit(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count = ref->pcpu_count;
-	int cpu;
-
-	WARN_ON_ONCE(atomic_read(&ref->count) != 1 + PCPU_COUNT_BIAS);
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 
-	if (pcpu_count) {
-		for_each_possible_cpu(cpu)
-			WARN_ON_ONCE(*per_cpu_ptr(pcpu_count, cpu));
-		free_percpu(ref->pcpu_count);
+	if (percpu_count) {
+		free_percpu(percpu_count);
+		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
 	}
 }
-EXPORT_SYMBOL_GPL(percpu_ref_cancel_init);
+EXPORT_SYMBOL_GPL(percpu_ref_exit);
 
-static void percpu_ref_kill_rcu(struct rcu_head *rcu)
+static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
 {
 	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
-	unsigned __percpu *pcpu_count = ref->pcpu_count;
-	unsigned count = 0;
-	int cpu;
 
-	/* Mask out PCPU_REF_DEAD */
-	pcpu_count = (unsigned __percpu *)
-		(((unsigned long) pcpu_count) & ~PCPU_STATUS_MASK);
+	ref->confirm_switch(ref);
+	ref->confirm_switch = NULL;
+	wake_up_all(&percpu_ref_switch_waitq);
 
-	for_each_possible_cpu(cpu)
-		count += *per_cpu_ptr(pcpu_count, cpu);
+	/* drop ref from percpu_ref_switch_to_atomic() */
+	percpu_ref_put(ref);
+}
+
+static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
+{
+	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
+	unsigned long count = 0;
+	int cpu;
 
-	free_percpu(pcpu_count);
+	for_each_possible_cpu(cpu)
+		count += *per_cpu_ptr(percpu_count, cpu);
 
-	pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count);
+	pr_debug("global %ld percpu %ld",
+		 atomic_long_read(&ref->count), (long)count);
 
 	/*
 	 * It's crucial that we sum the percpu counters _before_ adding the sum
@@ -117,18 +144,137 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	 * reaching 0 before we add the percpu counts. But doing it at the same
 	 * time is equivalent and saves us atomic operations:
 	 */
+	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
+
+	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
+		  "percpu ref (%pf) <= 0 (%ld) after switching to atomic",
+		  ref->release, atomic_long_read(&ref->count));
+
+	/* @ref is viewed as dead on all CPUs, send out switch confirmation */
+	percpu_ref_call_confirm_rcu(rcu);
+}
+
+static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
+{
+}
 
-	atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count);
+static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+					  percpu_ref_func_t *confirm_switch)
+{
+	if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) {
+		/* switching from percpu to atomic */
+		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+
+		/*
+		 * Non-NULL ->confirm_switch is used to indicate that
+		 * switching is in progress.  Use noop one if unspecified.
+		 */
+		WARN_ON_ONCE(ref->confirm_switch);
+		ref->confirm_switch =
+			confirm_switch ?: percpu_ref_noop_confirm_switch;
+
+		percpu_ref_get(ref);	/* put after confirmation */
+		call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
+	} else if (confirm_switch) {
+		/*
+		 * Somebody already set ATOMIC.  Switching may still be in
+		 * progress.  @confirm_switch must be invoked after the
+		 * switching is complete and a full sched RCU grace period
+		 * has passed.  Wait synchronously for the previous
+		 * switching and schedule @confirm_switch invocation.
+		 */
+		wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
+		ref->confirm_switch = confirm_switch;
 
-	/* @ref is viewed as dead on all CPUs, send out kill confirmation */
-	if (ref->confirm_kill)
-		ref->confirm_kill(ref);
+		percpu_ref_get(ref);	/* put after confirmation */
+		call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
+	}
+}
+
+/**
+ * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
+ * @ref: percpu_ref to switch to atomic mode
+ * @confirm_switch: optional confirmation callback
+ *
+ * There's no reason to use this function for the usual reference counting.
+ * Use percpu_ref_kill[_and_confirm]().
+ *
+ * Schedule switching of @ref to atomic mode.  All its percpu counts will
+ * be collected to the main atomic counter.  On completion, when all CPUs
+ * are guaraneed to be in atomic mode, @confirm_switch, which may not
+ * block, is invoked.  This function may be invoked concurrently with all
+ * the get/put operations and can safely be mixed with kill and reinit
+ * operations.  Note that @ref will stay in atomic mode across kill/reinit
+ * cycles until percpu_ref_switch_to_percpu() is called.
+ *
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @confirm_kill is specified and @ref is already in
+ * the process of switching to atomic mode.  In such cases, @confirm_switch
+ * will be invoked after the switching is complete.
+ *
+ * Due to the way percpu_ref is implemented, @confirm_switch will be called
+ * after at least one full sched RCU grace period has passed but this is an
+ * implementation detail and must not be depended upon.
+ */
+void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+				 percpu_ref_func_t *confirm_switch)
+{
+	ref->force_atomic = true;
+	__percpu_ref_switch_to_atomic(ref, confirm_switch);
+}
+
+static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
+{
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
+	int cpu;
+
+	BUG_ON(!percpu_count);
+
+	if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
+		return;
+
+	wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
+
+	atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
 
 	/*
-	 * Now we're in single atomic_t mode with a consistent refcount, so it's
-	 * safe to drop our initial ref:
+	 * Restore per-cpu operation.  smp_store_release() is paired with
+	 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
+	 * that the zeroing is visible to all percpu accesses which can see
+	 * the following __PERCPU_REF_ATOMIC clearing.
 	 */
-	percpu_ref_put(ref);
+	for_each_possible_cpu(cpu)
+		*per_cpu_ptr(percpu_count, cpu) = 0;
+
+	smp_store_release(&ref->percpu_count_ptr,
+			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
+}
+
+/**
+ * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
+ * @ref: percpu_ref to switch to percpu mode
+ *
+ * There's no reason to use this function for the usual reference counting.
+ * To re-use an expired ref, use percpu_ref_reinit().
+ *
+ * Switch @ref to percpu mode.  This function may be invoked concurrently
+ * with all the get/put operations and can safely be mixed with kill and
+ * reinit operations.  This function reverses the sticky atomic state set
+ * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic().  If @ref is
+ * dying or dead, the actual switching takes place on the following
+ * percpu_ref_reinit().
+ *
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @ref is in the process of switching to atomic mode
+ * by percpu_ref_switch_atomic().
+ */
+void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
+{
+	ref->force_atomic = false;
+
+	/* a dying or dead ref can't be switched to percpu mode w/o reinit */
+	if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD))
+		__percpu_ref_switch_to_percpu(ref);
 }
 
 /**
@@ -138,24 +284,48 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
  *
  * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
  * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
- * called after @ref is seen as dead from all CPUs - all further
- * invocations of percpu_ref_tryget() will fail.  See percpu_ref_tryget()
- * for more details.
+ * called after @ref is seen as dead from all CPUs at which point all
+ * further invocations of percpu_ref_tryget_live() will fail.  See
+ * percpu_ref_tryget_live() for details.
+ *
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @confirm_kill is specified and @ref is in the
+ * process of switching to atomic mode by percpu_ref_switch_atomic().
  *
- * Due to the way percpu_ref is implemented, @confirm_kill will be called
- * after at least one full RCU grace period has passed but this is an
- * implementation detail and callers must not depend on it.
+ * Due to the way percpu_ref is implemented, @confirm_switch will be called
+ * after at least one full sched RCU grace period has passed but this is an
+ * implementation detail and must not be depended upon.
  */
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
-	WARN_ONCE(REF_STATUS(ref->pcpu_count) == PCPU_REF_DEAD,
-		  "percpu_ref_kill() called more than once!\n");
+	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
+		  "%s called more than once on %pf!", __func__, ref->release);
 
-	ref->pcpu_count = (unsigned __percpu *)
-		(((unsigned long) ref->pcpu_count)|PCPU_REF_DEAD);
-	ref->confirm_kill = confirm_kill;
-
-	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
+	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
+	__percpu_ref_switch_to_atomic(ref, confirm_kill);
+	percpu_ref_put(ref);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
+
+/**
+ * percpu_ref_reinit - re-initialize a percpu refcount
+ * @ref: perpcu_ref to re-initialize
+ *
+ * Re-initialize @ref so that it's in the same state as when it finished
+ * percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD.  @ref must have been
+ * initialized successfully and reached 0 but not exited.
+ *
+ * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
+ * this function is in progress.
+ */
+void percpu_ref_reinit(struct percpu_ref *ref)
+{
+	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
+
+	ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
+	percpu_ref_get(ref);
+	if (!ref->force_atomic)
+		__percpu_ref_switch_to_percpu(ref);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_reinit);
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 7473ee3b4ee7..48144cdae819 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -82,10 +82,10 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
 		unsigned long flags;
 		raw_spin_lock_irqsave(&fbc->lock, flags);
 		fbc->count += count;
+		__this_cpu_sub(*fbc->counters, count - amount);
 		raw_spin_unlock_irqrestore(&fbc->lock, flags);
-		__this_cpu_write(*fbc->counters, 0);
 	} else {
-		__this_cpu_write(*fbc->counters, count);
+		this_cpu_add(*fbc->counters, amount);
 	}
 	preempt_enable();
 }
@@ -112,13 +112,15 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
 }
 EXPORT_SYMBOL(__percpu_counter_sum);
 
-int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
 			  struct lock_class_key *key)
 {
+	unsigned long flags __maybe_unused;
+
 	raw_spin_lock_init(&fbc->lock);
 	lockdep_set_class(&fbc->lock, key);
 	fbc->count = amount;
-	fbc->counters = alloc_percpu(s32);
+	fbc->counters = alloc_percpu_gfp(s32, gfp);
 	if (!fbc->counters)
 		return -ENOMEM;
 
@@ -126,9 +128,9 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
 
 #ifdef CONFIG_HOTPLUG_CPU
 	INIT_LIST_HEAD(&fbc->list);
-	spin_lock(&percpu_counters_lock);
+	spin_lock_irqsave(&percpu_counters_lock, flags);
 	list_add(&fbc->list, &percpu_counters);
-	spin_unlock(&percpu_counters_lock);
+	spin_unlock_irqrestore(&percpu_counters_lock, flags);
 #endif
 	return 0;
 }
@@ -136,15 +138,17 @@ EXPORT_SYMBOL(__percpu_counter_init);
 
 void percpu_counter_destroy(struct percpu_counter *fbc)
 {
+	unsigned long flags __maybe_unused;
+
 	if (!fbc->counters)
 		return;
 
 	debug_percpu_counter_deactivate(fbc);
 
 #ifdef CONFIG_HOTPLUG_CPU
-	spin_lock(&percpu_counters_lock);
+	spin_lock_irqsave(&percpu_counters_lock, flags);
 	list_del(&fbc->list);
-	spin_unlock(&percpu_counters_lock);
+	spin_unlock_irqrestore(&percpu_counters_lock, flags);
 #endif
 	free_percpu(fbc->counters);
 	fbc->counters = NULL;
@@ -169,11 +173,11 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
 	struct percpu_counter *fbc;
 
 	compute_batch_value();
-	if (action != CPU_DEAD)
+	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
 		return NOTIFY_OK;
 
 	cpu = (unsigned long)hcpu;
-	spin_lock(&percpu_counters_lock);
+	spin_lock_irq(&percpu_counters_lock);
 	list_for_each_entry(fbc, &percpu_counters, list) {
 		s32 *pcount;
 		unsigned long flags;
@@ -184,7 +188,7 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
 		*pcount = 0;
 		raw_spin_unlock_irqrestore(&fbc->lock, flags);
 	}
-	spin_unlock(&percpu_counters_lock);
+	spin_unlock_irq(&percpu_counters_lock);
 #endif
 	return NOTIFY_OK;
 }
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 9d054bf91d0f..93d145e5539c 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -54,9 +54,7 @@ static inline void move_tags(unsigned *dst, unsigned *dst_nr,
 /*
  * Try to steal tags from a remote cpu's percpu freelist.
  *
- * We first check how many percpu freelists have tags - we don't steal tags
- * unless enough percpu freelists have tags on them that it's possible more than
- * half the total tags could be stuck on remote percpu freelists.
+ * We first check how many percpu freelists have tags
  *
  * Then we iterate through the cpus until we find some tags - we don't attempt
  * to find the "best" cpu to steal from, to keep cacheline bouncing to a
@@ -69,8 +67,7 @@ static inline void steal_tags(struct percpu_ida *pool,
 	struct percpu_ida_cpu *remote;
 
 	for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
-	     cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2;
-	     cpus_have_tags--) {
+	     cpus_have_tags; cpus_have_tags--) {
 		cpu = cpumask_next(cpu, &pool->cpus_have_tags);
 
 		if (cpu >= nr_cpu_ids) {
@@ -132,22 +129,22 @@ static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
 /**
  * percpu_ida_alloc - allocate a tag
  * @pool: pool to allocate from
- * @gfp: gfp flags
+ * @state: task state for prepare_to_wait
  *
  * Returns a tag - an integer in the range [0..nr_tags) (passed to
  * tag_pool_init()), or otherwise -ENOSPC on allocation failure.
  *
  * Safe to be called from interrupt context (assuming it isn't passed
- * __GFP_WAIT, of course).
+ * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course).
  *
  * @gfp indicates whether or not to wait until a free id is available (it's not
  * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep
  * however long it takes until another thread frees an id (same semantics as a
  * mempool).
  *
- * Will not fail if passed __GFP_WAIT.
+ * Will not fail if passed TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE.
  */
-int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp)
+int percpu_ida_alloc(struct percpu_ida *pool, int state)
 {
 	DEFINE_WAIT(wait);
 	struct percpu_ida_cpu *tags;
@@ -174,7 +171,8 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp)
 		 *
 		 * global lock held and irqs disabled, don't need percpu lock
 		 */
-		prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
+		if (state != TASK_RUNNING)
+			prepare_to_wait(&pool->wait, &wait, state);
 
 		if (!tags->nr_free)
 			alloc_global_tags(pool, tags);
@@ -191,16 +189,22 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp)
 		spin_unlock(&pool->lock);
 		local_irq_restore(flags);
 
-		if (tag >= 0 || !(gfp & __GFP_WAIT))
+		if (tag >= 0 || state == TASK_RUNNING)
 			break;
 
+		if (signal_pending_state(state, current)) {
+			tag = -ERESTARTSYS;
+			break;
+		}
+
 		schedule();
 
 		local_irq_save(flags);
 		tags = this_cpu_ptr(pool->tag_cpu);
 	}
+	if (state != TASK_RUNNING)
+		finish_wait(&pool->wait, &wait);
 
-	finish_wait(&pool->wait, &wait);
 	return tag;
 }
 EXPORT_SYMBOL_GPL(percpu_ida_alloc);
diff --git a/lib/plist.c b/lib/plist.c
index 1ebc95f7a46f..d408e774b746 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -134,6 +134,46 @@ void plist_del(struct plist_node *node, struct plist_head *head)
 	plist_check_head(head);
 }
 
+/**
+ * plist_requeue - Requeue @node at end of same-prio entries.
+ *
+ * This is essentially an optimized plist_del() followed by
+ * plist_add().  It moves an entry already in the plist to
+ * after any other same-priority entries.
+ *
+ * @node:	&struct plist_node pointer - entry to be moved
+ * @head:	&struct plist_head pointer - list head
+ */
+void plist_requeue(struct plist_node *node, struct plist_head *head)
+{
+	struct plist_node *iter;
+	struct list_head *node_next = &head->node_list;
+
+	plist_check_head(head);
+	BUG_ON(plist_head_empty(head));
+	BUG_ON(plist_node_empty(node));
+
+	if (node == plist_last(head))
+		return;
+
+	iter = plist_next(node);
+
+	if (node->prio != iter->prio)
+		return;
+
+	plist_del(node, head);
+
+	plist_for_each_continue(iter, head) {
+		if (node->prio != iter->prio) {
+			node_next = &iter->node_list;
+			break;
+		}
+	}
+	list_add_tail(&node->node_list, node_next);
+
+	plist_check_head(head);
+}
+
 #ifdef CONFIG_DEBUG_PI_LIST
 #include <linux/sched.h>
 #include <linux/module.h>
@@ -170,12 +210,20 @@ static void __init plist_test_check(int nr_expect)
 	BUG_ON(prio_pos->prio_list.next != &first->prio_list);
 }
 
+static void __init plist_test_requeue(struct plist_node *node)
+{
+	plist_requeue(node, &test_head);
+
+	if (node != plist_last(&test_head))
+		BUG_ON(node->prio == plist_next(node)->prio);
+}
+
 static int  __init plist_test(void)
 {
 	int nr_expect = 0, i, loop;
 	unsigned int r = local_clock();
 
-	pr_debug("start plist test\n");
+	printk(KERN_DEBUG "start plist test\n");
 	plist_head_init(&test_head);
 	for (i = 0; i < ARRAY_SIZE(test_node); i++)
 		plist_node_init(test_node + i, 0);
@@ -193,6 +241,10 @@ static int  __init plist_test(void)
 			nr_expect--;
 		}
 		plist_test_check(nr_expect);
+		if (!plist_node_empty(test_node + i)) {
+			plist_test_requeue(test_node + i);
+			plist_test_check(nr_expect);
+		}
 	}
 
 	for (i = 0; i < ARRAY_SIZE(test_node); i++) {
@@ -203,7 +255,7 @@ static int  __init plist_test(void)
 		plist_test_check(nr_expect);
 	}
 
-	pr_debug("end plist test\n");
+	printk(KERN_DEBUG "end plist test\n");
 	return 0;
 }
 
diff --git a/lib/proportions.c b/lib/proportions.c
index 05df84801b56..6f724298f67a 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -73,7 +73,7 @@
 #include <linux/proportions.h>
 #include <linux/rcupdate.h>
 
-int prop_descriptor_init(struct prop_descriptor *pd, int shift)
+int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
 {
 	int err;
 
@@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift)
 	pd->index = 0;
 	pd->pg[0].shift = shift;
 	mutex_init(&pd->mutex);
-	err = percpu_counter_init(&pd->pg[0].events, 0);
+	err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
 	if (err)
 		goto out;
 
-	err = percpu_counter_init(&pd->pg[1].events, 0);
+	err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
 	if (err)
 		percpu_counter_destroy(&pd->pg[0].events);
 
@@ -188,12 +188,12 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
 
 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
-int prop_local_init_percpu(struct prop_local_percpu *pl)
+int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
 {
 	raw_spin_lock_init(&pl->lock);
 	pl->shift = 0;
 	pl->period = 0;
-	return percpu_counter_init(&pl->events, 0);
+	return percpu_counter_init(&pl->events, 0, gfp);
 }
 
 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7811ed3b4e70..3291a8e37490 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -27,6 +27,7 @@
 #include <linux/radix-tree.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
+#include <linux/kmemleak.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/string.h>
@@ -35,33 +36,6 @@
 #include <linux/hardirq.h>		/* in_interrupt() */
 
 
-#ifdef __KERNEL__
-#define RADIX_TREE_MAP_SHIFT	(CONFIG_BASE_SMALL ? 4 : 6)
-#else
-#define RADIX_TREE_MAP_SHIFT	3	/* For more stressful testing */
-#endif
-
-#define RADIX_TREE_MAP_SIZE	(1UL << RADIX_TREE_MAP_SHIFT)
-#define RADIX_TREE_MAP_MASK	(RADIX_TREE_MAP_SIZE-1)
-
-#define RADIX_TREE_TAG_LONGS	\
-	((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
-
-struct radix_tree_node {
-	unsigned int	height;		/* Height from the bottom */
-	unsigned int	count;
-	union {
-		struct radix_tree_node *parent;	/* Used when ascending tree */
-		struct rcu_head	rcu_head;	/* Used when freeing node */
-	};
-	void __rcu	*slots[RADIX_TREE_MAP_SIZE];
-	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
-};
-
-#define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
-#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
-					  RADIX_TREE_MAP_SHIFT))
-
 /*
  * The height_to_maxindex array needs to be one deeper than the maximum
  * path as height 0 holds only 1 entry.
@@ -221,12 +195,17 @@ radix_tree_node_alloc(struct radix_tree_root *root)
 		 * succeed in getting a node here (and never reach
 		 * kmem_cache_alloc)
 		 */
-		rtp = &__get_cpu_var(radix_tree_preloads);
+		rtp = this_cpu_ptr(&radix_tree_preloads);
 		if (rtp->nr) {
 			ret = rtp->nodes[rtp->nr - 1];
 			rtp->nodes[rtp->nr - 1] = NULL;
 			rtp->nr--;
 		}
+		/*
+		 * Update the allocation stack trace as this is more useful
+		 * for debugging.
+		 */
+		kmemleak_update_trace(ret);
 	}
 	if (ret == NULL)
 		ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
@@ -277,14 +256,14 @@ static int __radix_tree_preload(gfp_t gfp_mask)
 	int ret = -ENOMEM;
 
 	preempt_disable();
-	rtp = &__get_cpu_var(radix_tree_preloads);
+	rtp = this_cpu_ptr(&radix_tree_preloads);
 	while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
 		preempt_enable();
 		node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
 		if (node == NULL)
 			goto out;
 		preempt_disable();
-		rtp = &__get_cpu_var(radix_tree_preloads);
+		rtp = this_cpu_ptr(&radix_tree_preloads);
 		if (rtp->nr < ARRAY_SIZE(rtp->nodes))
 			rtp->nodes[rtp->nr++] = node;
 		else
@@ -369,7 +348,8 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 
 		/* Increase the height.  */
 		newheight = root->height+1;
-		node->height = newheight;
+		BUG_ON(newheight & ~RADIX_TREE_HEIGHT_MASK);
+		node->path = newheight;
 		node->count = 1;
 		node->parent = NULL;
 		slot = root->rnode;
@@ -387,23 +367,28 @@ out:
 }
 
 /**
- *	radix_tree_insert    -    insert into a radix tree
+ *	__radix_tree_create	-	create a slot in a radix tree
  *	@root:		radix tree root
  *	@index:		index key
- *	@item:		item to insert
+ *	@nodep:		returns node
+ *	@slotp:		returns slot
  *
- *	Insert an item into the radix tree at position @index.
+ *	Create, if necessary, and return the node and slot for an item
+ *	at position @index in the radix tree @root.
+ *
+ *	Until there is more than one item in the tree, no nodes are
+ *	allocated and @root->rnode is used as a direct slot instead of
+ *	pointing to a node, in which case *@nodep will be NULL.
+ *
+ *	Returns -ENOMEM, or 0 for success.
  */
-int radix_tree_insert(struct radix_tree_root *root,
-			unsigned long index, void *item)
+int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
+			struct radix_tree_node **nodep, void ***slotp)
 {
 	struct radix_tree_node *node = NULL, *slot;
-	unsigned int height, shift;
-	int offset;
+	unsigned int height, shift, offset;
 	int error;
 
-	BUG_ON(radix_tree_is_indirect_ptr(item));
-
 	/* Make sure the tree is high enough.  */
 	if (index > radix_tree_maxindex(root->height)) {
 		error = radix_tree_extend(root, index);
@@ -422,11 +407,12 @@ int radix_tree_insert(struct radix_tree_root *root,
 			/* Have to add a child node.  */
 			if (!(slot = radix_tree_node_alloc(root)))
 				return -ENOMEM;
-			slot->height = height;
+			slot->path = height;
 			slot->parent = node;
 			if (node) {
 				rcu_assign_pointer(node->slots[offset], slot);
 				node->count++;
+				slot->path |= offset << RADIX_TREE_HEIGHT_SHIFT;
 			} else
 				rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
 		}
@@ -439,16 +425,42 @@ int radix_tree_insert(struct radix_tree_root *root,
 		height--;
 	}
 
-	if (slot != NULL)
+	if (nodep)
+		*nodep = node;
+	if (slotp)
+		*slotp = node ? node->slots + offset : (void **)&root->rnode;
+	return 0;
+}
+
+/**
+ *	radix_tree_insert    -    insert into a radix tree
+ *	@root:		radix tree root
+ *	@index:		index key
+ *	@item:		item to insert
+ *
+ *	Insert an item into the radix tree at position @index.
+ */
+int radix_tree_insert(struct radix_tree_root *root,
+			unsigned long index, void *item)
+{
+	struct radix_tree_node *node;
+	void **slot;
+	int error;
+
+	BUG_ON(radix_tree_is_indirect_ptr(item));
+
+	error = __radix_tree_create(root, index, &node, &slot);
+	if (error)
+		return error;
+	if (*slot != NULL)
 		return -EEXIST;
+	rcu_assign_pointer(*slot, item);
 
 	if (node) {
 		node->count++;
-		rcu_assign_pointer(node->slots[offset], item);
-		BUG_ON(tag_get(node, 0, offset));
-		BUG_ON(tag_get(node, 1, offset));
+		BUG_ON(tag_get(node, 0, index & RADIX_TREE_MAP_MASK));
+		BUG_ON(tag_get(node, 1, index & RADIX_TREE_MAP_MASK));
 	} else {
-		rcu_assign_pointer(root->rnode, item);
 		BUG_ON(root_tag_get(root, 0));
 		BUG_ON(root_tag_get(root, 1));
 	}
@@ -457,15 +469,26 @@ int radix_tree_insert(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_insert);
 
-/*
- * is_slot == 1 : search for the slot.
- * is_slot == 0 : search for the node.
+/**
+ *	__radix_tree_lookup	-	lookup an item in a radix tree
+ *	@root:		radix tree root
+ *	@index:		index key
+ *	@nodep:		returns node
+ *	@slotp:		returns slot
+ *
+ *	Lookup and return the item at position @index in the radix
+ *	tree @root.
+ *
+ *	Until there is more than one item in the tree, no nodes are
+ *	allocated and @root->rnode is used as a direct slot instead of
+ *	pointing to a node, in which case *@nodep will be NULL.
  */
-static void *radix_tree_lookup_element(struct radix_tree_root *root,
-				unsigned long index, int is_slot)
+void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
+			  struct radix_tree_node **nodep, void ***slotp)
 {
+	struct radix_tree_node *node, *parent;
 	unsigned int height, shift;
-	struct radix_tree_node *node, **slot;
+	void **slot;
 
 	node = rcu_dereference_raw(root->rnode);
 	if (node == NULL)
@@ -474,19 +497,24 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
 	if (!radix_tree_is_indirect_ptr(node)) {
 		if (index > 0)
 			return NULL;
-		return is_slot ? (void *)&root->rnode : node;
+
+		if (nodep)
+			*nodep = NULL;
+		if (slotp)
+			*slotp = (void **)&root->rnode;
+		return node;
 	}
 	node = indirect_to_ptr(node);
 
-	height = node->height;
+	height = node->path & RADIX_TREE_HEIGHT_MASK;
 	if (index > radix_tree_maxindex(height))
 		return NULL;
 
 	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 
 	do {
-		slot = (struct radix_tree_node **)
-			(node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
+		parent = node;
+		slot = node->slots + ((index >> shift) & RADIX_TREE_MAP_MASK);
 		node = rcu_dereference_raw(*slot);
 		if (node == NULL)
 			return NULL;
@@ -495,7 +523,11 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
 		height--;
 	} while (height > 0);
 
-	return is_slot ? (void *)slot : indirect_to_ptr(node);
+	if (nodep)
+		*nodep = parent;
+	if (slotp)
+		*slotp = slot;
+	return node;
 }
 
 /**
@@ -513,7 +545,11 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
  */
 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
 {
-	return (void **)radix_tree_lookup_element(root, index, 1);
+	void **slot;
+
+	if (!__radix_tree_lookup(root, index, NULL, &slot))
+		return NULL;
+	return slot;
 }
 EXPORT_SYMBOL(radix_tree_lookup_slot);
 
@@ -531,7 +567,7 @@ EXPORT_SYMBOL(radix_tree_lookup_slot);
  */
 void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
 {
-	return radix_tree_lookup_element(root, index, 0);
+	return __radix_tree_lookup(root, index, NULL, NULL);
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
@@ -676,7 +712,7 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 		return (index == 0);
 	node = indirect_to_ptr(node);
 
-	height = node->height;
+	height = node->path & RADIX_TREE_HEIGHT_MASK;
 	if (index > radix_tree_maxindex(height))
 		return 0;
 
@@ -713,7 +749,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
 {
 	unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK;
 	struct radix_tree_node *rnode, *node;
-	unsigned long index, offset;
+	unsigned long index, offset, height;
 
 	if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
 		return NULL;
@@ -744,7 +780,8 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
 		return NULL;
 
 restart:
-	shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT;
+	height = rnode->path & RADIX_TREE_HEIGHT_MASK;
+	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 	offset = index >> shift;
 
 	/* Index outside of the tree */
@@ -946,81 +983,6 @@ next:
 }
 EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
 
-
-/**
- *	radix_tree_next_hole    -    find the next hole (not-present entry)
- *	@root:		tree root
- *	@index:		index key
- *	@max_scan:	maximum range to search
- *
- *	Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
- *	indexed hole.
- *
- *	Returns: the index of the hole if found, otherwise returns an index
- *	outside of the set specified (in which case 'return - index >= max_scan'
- *	will be true). In rare cases of index wrap-around, 0 will be returned.
- *
- *	radix_tree_next_hole may be called under rcu_read_lock. However, like
- *	radix_tree_gang_lookup, this will not atomically search a snapshot of
- *	the tree at a single point in time. For example, if a hole is created
- *	at index 5, then subsequently a hole is created at index 10,
- *	radix_tree_next_hole covering both indexes may return 10 if called
- *	under rcu_read_lock.
- */
-unsigned long radix_tree_next_hole(struct radix_tree_root *root,
-				unsigned long index, unsigned long max_scan)
-{
-	unsigned long i;
-
-	for (i = 0; i < max_scan; i++) {
-		if (!radix_tree_lookup(root, index))
-			break;
-		index++;
-		if (index == 0)
-			break;
-	}
-
-	return index;
-}
-EXPORT_SYMBOL(radix_tree_next_hole);
-
-/**
- *	radix_tree_prev_hole    -    find the prev hole (not-present entry)
- *	@root:		tree root
- *	@index:		index key
- *	@max_scan:	maximum range to search
- *
- *	Search backwards in the range [max(index-max_scan+1, 0), index]
- *	for the first hole.
- *
- *	Returns: the index of the hole if found, otherwise returns an index
- *	outside of the set specified (in which case 'index - return >= max_scan'
- *	will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
- *
- *	radix_tree_next_hole may be called under rcu_read_lock. However, like
- *	radix_tree_gang_lookup, this will not atomically search a snapshot of
- *	the tree at a single point in time. For example, if a hole is created
- *	at index 10, then subsequently a hole is created at index 5,
- *	radix_tree_prev_hole covering both indexes may return 5 if called under
- *	rcu_read_lock.
- */
-unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
-				   unsigned long index, unsigned long max_scan)
-{
-	unsigned long i;
-
-	for (i = 0; i < max_scan; i++) {
-		if (!radix_tree_lookup(root, index))
-			break;
-		index--;
-		if (index == ULONG_MAX)
-			break;
-	}
-
-	return index;
-}
-EXPORT_SYMBOL(radix_tree_prev_hole);
-
 /**
  *	radix_tree_gang_lookup - perform multiple lookup on a radix tree
  *	@root:		radix tree root
@@ -1189,7 +1151,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item,
 	unsigned int shift, height;
 	unsigned long i;
 
-	height = slot->height;
+	height = slot->path & RADIX_TREE_HEIGHT_MASK;
 	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 
 	for ( ; height > 1; height--) {
@@ -1252,9 +1214,12 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
 		}
 
 		node = indirect_to_ptr(node);
-		max_index = radix_tree_maxindex(node->height);
-		if (cur_index > max_index)
+		max_index = radix_tree_maxindex(node->path &
+						RADIX_TREE_HEIGHT_MASK);
+		if (cur_index > max_index) {
+			rcu_read_unlock();
 			break;
+		}
 
 		cur_index = __locate(node, item, cur_index, &found_index);
 		rcu_read_unlock();
@@ -1335,48 +1300,89 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 }
 
 /**
- *	radix_tree_delete    -    delete an item from a radix tree
+ *	__radix_tree_delete_node    -    try to free node after clearing a slot
+ *	@root:		radix tree root
+ *	@node:		node containing @index
+ *
+ *	After clearing the slot at @index in @node from radix tree
+ *	rooted at @root, call this function to attempt freeing the
+ *	node and shrinking the tree.
+ *
+ *	Returns %true if @node was freed, %false otherwise.
+ */
+bool __radix_tree_delete_node(struct radix_tree_root *root,
+			      struct radix_tree_node *node)
+{
+	bool deleted = false;
+
+	do {
+		struct radix_tree_node *parent;
+
+		if (node->count) {
+			if (node == indirect_to_ptr(root->rnode)) {
+				radix_tree_shrink(root);
+				if (root->height == 0)
+					deleted = true;
+			}
+			return deleted;
+		}
+
+		parent = node->parent;
+		if (parent) {
+			unsigned int offset;
+
+			offset = node->path >> RADIX_TREE_HEIGHT_SHIFT;
+			parent->slots[offset] = NULL;
+			parent->count--;
+		} else {
+			root_tag_clear_all(root);
+			root->height = 0;
+			root->rnode = NULL;
+		}
+
+		radix_tree_node_free(node);
+		deleted = true;
+
+		node = parent;
+	} while (node);
+
+	return deleted;
+}
+
+/**
+ *	radix_tree_delete_item    -    delete an item from a radix tree
  *	@root:		radix tree root
  *	@index:		index key
+ *	@item:		expected item
  *
- *	Remove the item at @index from the radix tree rooted at @root.
+ *	Remove @item at @index from the radix tree rooted at @root.
  *
- *	Returns the address of the deleted item, or NULL if it was not present.
+ *	Returns the address of the deleted item, or NULL if it was not present
+ *	or the entry at the given @index was not @item.
  */
-void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
+void *radix_tree_delete_item(struct radix_tree_root *root,
+			     unsigned long index, void *item)
 {
-	struct radix_tree_node *node = NULL;
-	struct radix_tree_node *slot = NULL;
-	struct radix_tree_node *to_free;
-	unsigned int height, shift;
+	struct radix_tree_node *node;
+	unsigned int offset;
+	void **slot;
+	void *entry;
 	int tag;
-	int uninitialized_var(offset);
 
-	height = root->height;
-	if (index > radix_tree_maxindex(height))
-		goto out;
+	entry = __radix_tree_lookup(root, index, &node, &slot);
+	if (!entry)
+		return NULL;
 
-	slot = root->rnode;
-	if (height == 0) {
+	if (item && entry != item)
+		return NULL;
+
+	if (!node) {
 		root_tag_clear_all(root);
 		root->rnode = NULL;
-		goto out;
+		return entry;
 	}
-	slot = indirect_to_ptr(slot);
-	shift = height * RADIX_TREE_MAP_SHIFT;
 
-	do {
-		if (slot == NULL)
-			goto out;
-
-		shift -= RADIX_TREE_MAP_SHIFT;
-		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-		node = slot;
-		slot = slot->slots[offset];
-	} while (shift);
-
-	if (slot == NULL)
-		goto out;
+	offset = index & RADIX_TREE_MAP_MASK;
 
 	/*
 	 * Clear all tags associated with the item to be deleted.
@@ -1387,40 +1393,27 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 			radix_tree_tag_clear(root, index, tag);
 	}
 
-	to_free = NULL;
-	/* Now free the nodes we do not need anymore */
-	while (node) {
-		node->slots[offset] = NULL;
-		node->count--;
-		/*
-		 * Queue the node for deferred freeing after the
-		 * last reference to it disappears (set NULL, above).
-		 */
-		if (to_free)
-			radix_tree_node_free(to_free);
-
-		if (node->count) {
-			if (node == indirect_to_ptr(root->rnode))
-				radix_tree_shrink(root);
-			goto out;
-		}
-
-		/* Node with zero slots in use so free it */
-		to_free = node;
+	node->slots[offset] = NULL;
+	node->count--;
 
-		index >>= RADIX_TREE_MAP_SHIFT;
-		offset = index & RADIX_TREE_MAP_MASK;
-		node = node->parent;
-	}
+	__radix_tree_delete_node(root, node);
 
-	root_tag_clear_all(root);
-	root->height = 0;
-	root->rnode = NULL;
-	if (to_free)
-		radix_tree_node_free(to_free);
+	return entry;
+}
+EXPORT_SYMBOL(radix_tree_delete_item);
 
-out:
-	return slot;
+/**
+ *	radix_tree_delete    -    delete an item from a radix tree
+ *	@root:		radix tree root
+ *	@index:		index key
+ *
+ *	Remove the item at @index from the radix tree rooted at @root.
+ *
+ *	Returns the address of the deleted item, or NULL if it was not present.
+ */
+void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
+{
+	return radix_tree_delete_item(root, index, NULL);
 }
 EXPORT_SYMBOL(radix_tree_delete);
 
@@ -1436,9 +1429,12 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
 EXPORT_SYMBOL(radix_tree_tagged);
 
 static void
-radix_tree_node_ctor(void *node)
+radix_tree_node_ctor(void *arg)
 {
-	memset(node, 0, sizeof(struct radix_tree_node));
+	struct radix_tree_node *node = arg;
+
+	memset(node, 0, sizeof(*node));
+	INIT_LIST_HEAD(&node->private_list);
 }
 
 static __init unsigned long __maxindex(unsigned int height)
diff --git a/lib/random32.c b/lib/random32.c
index 1e5b2df44291..0bee183fa18f 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -1,37 +1,35 @@
 /*
-  This is a maximally equidistributed combined Tausworthe generator
-  based on code from GNU Scientific Library 1.5 (30 Jun 2004)
-
-  lfsr113 version:
-
-   x_n = (s1_n ^ s2_n ^ s3_n ^ s4_n)
-
-   s1_{n+1} = (((s1_n & 4294967294) << 18) ^ (((s1_n <<  6) ^ s1_n) >> 13))
-   s2_{n+1} = (((s2_n & 4294967288) <<  2) ^ (((s2_n <<  2) ^ s2_n) >> 27))
-   s3_{n+1} = (((s3_n & 4294967280) <<  7) ^ (((s3_n << 13) ^ s3_n) >> 21))
-   s4_{n+1} = (((s4_n & 4294967168) << 13) ^ (((s4_n <<  3) ^ s4_n) >> 12))
-
-   The period of this generator is about 2^113 (see erratum paper).
-
-   From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
-   Generators", Mathematics of Computation, 65, 213 (1996), 203--213:
-   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
-   ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps
-
-   There is an erratum in the paper "Tables of Maximally
-   Equidistributed Combined LFSR Generators", Mathematics of
-   Computation, 68, 225 (1999), 261--269:
-   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
-
-        ... the k_j most significant bits of z_j must be non-
-        zero, for each j. (Note: this restriction also applies to the
-        computer code given in [4], but was mistakenly not mentioned in
-        that paper.)
-
-   This affects the seeding procedure by imposing the requirement
-   s1 > 1, s2 > 7, s3 > 15, s4 > 127.
-
-*/
+ * This is a maximally equidistributed combined Tausworthe generator
+ * based on code from GNU Scientific Library 1.5 (30 Jun 2004)
+ *
+ * lfsr113 version:
+ *
+ * x_n = (s1_n ^ s2_n ^ s3_n ^ s4_n)
+ *
+ * s1_{n+1} = (((s1_n & 4294967294) << 18) ^ (((s1_n <<  6) ^ s1_n) >> 13))
+ * s2_{n+1} = (((s2_n & 4294967288) <<  2) ^ (((s2_n <<  2) ^ s2_n) >> 27))
+ * s3_{n+1} = (((s3_n & 4294967280) <<  7) ^ (((s3_n << 13) ^ s3_n) >> 21))
+ * s4_{n+1} = (((s4_n & 4294967168) << 13) ^ (((s4_n <<  3) ^ s4_n) >> 12))
+ *
+ * The period of this generator is about 2^113 (see erratum paper).
+ *
+ * From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
+ * Generators", Mathematics of Computation, 65, 213 (1996), 203--213:
+ * http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
+ * ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps
+ *
+ * There is an erratum in the paper "Tables of Maximally Equidistributed
+ * Combined LFSR Generators", Mathematics of Computation, 68, 225 (1999),
+ * 261--269: http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
+ *
+ *      ... the k_j most significant bits of z_j must be non-zero,
+ *      for each j. (Note: this restriction also applies to the
+ *      computer code given in [4], but was mistakenly not mentioned
+ *      in that paper.)
+ *
+ * This affects the seeding procedure by imposing the requirement
+ * s1 > 1, s2 > 7, s3 > 15, s4 > 127.
+ */
 
 #include <linux/types.h>
 #include <linux/percpu.h>
@@ -39,9 +37,14 @@
 #include <linux/jiffies.h>
 #include <linux/random.h>
 #include <linux/sched.h>
+#include <asm/unaligned.h>
 
 #ifdef CONFIG_RANDOM32_SELFTEST
 static void __init prandom_state_selftest(void);
+#else
+static inline void prandom_state_selftest(void)
+{
+}
 #endif
 
 static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
@@ -55,8 +58,7 @@ static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
  */
 u32 prandom_u32_state(struct rnd_state *state)
 {
-#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
-
+#define TAUSWORTHE(s, a, b, c, d) ((s & c) << d) ^ (((s << a) ^ s) >> b)
 	state->s1 = TAUSWORTHE(state->s1,  6U, 13U, 4294967294U, 18U);
 	state->s2 = TAUSWORTHE(state->s2,  2U, 27U, 4294967288U,  2U);
 	state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U,  7U);
@@ -75,15 +77,17 @@ EXPORT_SYMBOL(prandom_u32_state);
  */
 u32 prandom_u32(void)
 {
-	unsigned long r;
 	struct rnd_state *state = &get_cpu_var(net_rand_state);
-	r = prandom_u32_state(state);
+	u32 res;
+
+	res = prandom_u32_state(state);
 	put_cpu_var(state);
-	return r;
+
+	return res;
 }
 EXPORT_SYMBOL(prandom_u32);
 
-/*
+/**
  *	prandom_bytes_state - get the requested number of pseudo-random bytes
  *
  *	@state: pointer to state structure holding seeded state.
@@ -93,27 +97,23 @@ EXPORT_SYMBOL(prandom_u32);
  *	This is used for pseudo-randomness with no outside seeding.
  *	For more random results, use prandom_bytes().
  */
-void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes)
+void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes)
 {
-	unsigned char *p = buf;
-	int i;
+	u8 *ptr = buf;
 
-	for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) {
-		u32 random = prandom_u32_state(state);
-		int j;
-
-		for (j = 0; j < sizeof(u32); j++) {
-			p[i + j] = random;
-			random >>= BITS_PER_BYTE;
-		}
+	while (bytes >= sizeof(u32)) {
+		put_unaligned(prandom_u32_state(state), (u32 *) ptr);
+		ptr += sizeof(u32);
+		bytes -= sizeof(u32);
 	}
-	if (i < bytes) {
-		u32 random = prandom_u32_state(state);
 
-		for (; i < bytes; i++) {
-			p[i] = random;
-			random >>= BITS_PER_BYTE;
-		}
+	if (bytes > 0) {
+		u32 rem = prandom_u32_state(state);
+		do {
+			*ptr++ = (u8) rem;
+			bytes--;
+			rem >>= BITS_PER_BYTE;
+		} while (bytes > 0);
 	}
 }
 EXPORT_SYMBOL(prandom_bytes_state);
@@ -123,7 +123,7 @@ EXPORT_SYMBOL(prandom_bytes_state);
  *	@buf: where to copy the pseudo-random bytes to
  *	@bytes: the requested number of bytes
  */
-void prandom_bytes(void *buf, int bytes)
+void prandom_bytes(void *buf, size_t bytes)
 {
 	struct rnd_state *state = &get_cpu_var(net_rand_state);
 
@@ -134,7 +134,7 @@ EXPORT_SYMBOL(prandom_bytes);
 
 static void prandom_warmup(struct rnd_state *state)
 {
-	/* Calling RNG ten times to satify recurrence condition */
+	/* Calling RNG ten times to satisfy recurrence condition */
 	prandom_u32_state(state);
 	prandom_u32_state(state);
 	prandom_u32_state(state);
@@ -147,21 +147,25 @@ static void prandom_warmup(struct rnd_state *state)
 	prandom_u32_state(state);
 }
 
-static void prandom_seed_very_weak(struct rnd_state *state, u32 seed)
+static u32 __extract_hwseed(void)
 {
-	/* Note: This sort of seeding is ONLY used in test cases and
-	 * during boot at the time from core_initcall until late_initcall
-	 * as we don't have a stronger entropy source available yet.
-	 * After late_initcall, we reseed entire state, we have to (!),
-	 * otherwise an attacker just needs to search 32 bit space to
-	 * probe for our internal 128 bit state if he knows a couple
-	 * of prandom32 outputs!
-	 */
-#define LCG(x)	((x) * 69069U)	/* super-duper LCG */
-	state->s1 = __seed(LCG(seed),        2U);
-	state->s2 = __seed(LCG(state->s1),   8U);
-	state->s3 = __seed(LCG(state->s2),  16U);
-	state->s4 = __seed(LCG(state->s3), 128U);
+	unsigned int val = 0;
+
+	(void)(arch_get_random_seed_int(&val) ||
+	       arch_get_random_int(&val));
+
+	return val;
+}
+
+static void prandom_seed_early(struct rnd_state *state, u32 seed,
+			       bool mix_with_hwseed)
+{
+#define LCG(x)	 ((x) * 69069U)	/* super-duper LCG */
+#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
+	state->s1 = __seed(HWSEED() ^ LCG(seed),        2U);
+	state->s2 = __seed(HWSEED() ^ LCG(state->s1),   8U);
+	state->s3 = __seed(HWSEED() ^ LCG(state->s2),  16U);
+	state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
 }
 
 /**
@@ -194,21 +198,22 @@ static int __init prandom_init(void)
 {
 	int i;
 
-#ifdef CONFIG_RANDOM32_SELFTEST
 	prandom_state_selftest();
-#endif
 
 	for_each_possible_cpu(i) {
 		struct rnd_state *state = &per_cpu(net_rand_state,i);
+		u32 weak_seed = (i + jiffies) ^ random_get_entropy();
 
-		prandom_seed_very_weak(state, (i + jiffies) ^ random_get_entropy());
+		prandom_seed_early(state, weak_seed, true);
 		prandom_warmup(state);
 	}
+
 	return 0;
 }
 core_initcall(prandom_init);
 
 static void __prandom_timer(unsigned long dontcare);
+
 static DEFINE_TIMER(seed_timer, __prandom_timer, 0, 0);
 
 static void __prandom_timer(unsigned long dontcare)
@@ -220,7 +225,7 @@ static void __prandom_timer(unsigned long dontcare)
 	prandom_seed(entropy);
 
 	/* reseed every ~60 seconds, in [40 .. 80) interval with slack */
-	expires = 40 + (prandom_u32() % 40);
+	expires = 40 + prandom_u32_max(40);
 	seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC);
 
 	add_timer(&seed_timer);
@@ -244,10 +249,22 @@ static void __prandom_reseed(bool late)
 	static bool latch = false;
 	static DEFINE_SPINLOCK(lock);
 
+	/* Asking for random bytes might result in bytes getting
+	 * moved into the nonblocking pool and thus marking it
+	 * as initialized. In this case we would double back into
+	 * this function and attempt to do a late reseed.
+	 * Ignore the pointless attempt to reseed again if we're
+	 * already waiting for bytes when the nonblocking pool
+	 * got initialized.
+	 */
+
 	/* only allow initial seeding (late == false) once */
-	spin_lock_irqsave(&lock, flags);
+	if (!spin_trylock_irqsave(&lock, flags))
+		return;
+
 	if (latch && !late)
 		goto out;
+
 	latch = true;
 
 	for_each_possible_cpu(i) {
@@ -406,7 +423,7 @@ static void __init prandom_state_selftest(void)
 	for (i = 0; i < ARRAY_SIZE(test1); i++) {
 		struct rnd_state state;
 
-		prandom_seed_very_weak(&state, test1[i].seed);
+		prandom_seed_early(&state, test1[i].seed, false);
 		prandom_warmup(&state);
 
 		if (test1[i].result != prandom_u32_state(&state))
@@ -421,7 +438,7 @@ static void __init prandom_state_selftest(void)
 	for (i = 0; i < ARRAY_SIZE(test2); i++) {
 		struct rnd_state state;
 
-		prandom_seed_very_weak(&state, test2[i].seed);
+		prandom_seed_early(&state, test2[i].seed, false);
 		prandom_warmup(&state);
 
 		for (j = 0; j < test2[i].iteration - 1; j++)
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 65f4effd117f..c16c81a3d430 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -101,7 +101,7 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
 				 *      / \          / \
 				 *     p   u  -->   P   U
 				 *    /            /
-				 *   n            N
+				 *   n            n
 				 *
 				 * However, since g's parent might be red, and
 				 * 4) does not allow this, we need to recurse
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index 31dd4ccd3baa..8b3c9dc88262 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -8,8 +8,8 @@
 #define CHECK_LOOPS 100
 
 struct test_node {
-	struct rb_node rb;
 	u32 key;
+	struct rb_node rb;
 
 	/* following fields used for testing augmented rbtree functionality */
 	u32 val;
@@ -114,6 +114,16 @@ static int black_path_count(struct rb_node *rb)
 	return count;
 }
 
+static void check_postorder_foreach(int nr_nodes)
+{
+	struct test_node *cur, *n;
+	int count = 0;
+	rbtree_postorder_for_each_entry_safe(cur, n, &root, rb)
+		count++;
+
+	WARN_ON_ONCE(count != nr_nodes);
+}
+
 static void check_postorder(int nr_nodes)
 {
 	struct rb_node *rb;
@@ -148,6 +158,7 @@ static void check(int nr_nodes)
 	WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1);
 
 	check_postorder(nr_nodes);
+	check_postorder_foreach(nr_nodes);
 }
 
 static void check_augmented(int nr_nodes)
diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c
index 75510e94f7d0..464152410c51 100644
--- a/lib/reciprocal_div.c
+++ b/lib/reciprocal_div.c
@@ -1,11 +1,27 @@
+#include <linux/kernel.h>
 #include <asm/div64.h>
 #include <linux/reciprocal_div.h>
 #include <linux/export.h>
 
-u32 reciprocal_value(u32 k)
+/*
+ * For a description of the algorithm please have a look at
+ * include/linux/reciprocal_div.h
+ */
+
+struct reciprocal_value reciprocal_value(u32 d)
 {
-	u64 val = (1LL << 32) + (k - 1);
-	do_div(val, k);
-	return (u32)val;
+	struct reciprocal_value R;
+	u64 m;
+	int l;
+
+	l = fls(d - 1);
+	m = ((1ULL << 32) * ((1ULL << l) - d));
+	do_div(m, d);
+	++m;
+	R.m = (u32)m;
+	R.sh1 = min(l, 1);
+	R.sh2 = max(l - 1, 0);
+
+	return R;
 }
 EXPORT_SYMBOL(reciprocal_value);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
new file mode 100644
index 000000000000..081be3ba9ea8
--- /dev/null
+++ b/lib/rhashtable.c
@@ -0,0 +1,794 @@
+/*
+ * Resizable, Scalable, Concurrent Hash Table
+ *
+ * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * Based on the following paper:
+ * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
+ *
+ * Code partially derived from nft_hash
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/log2.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/hash.h>
+#include <linux/random.h>
+#include <linux/rhashtable.h>
+
+#define HASH_DEFAULT_SIZE	64UL
+#define HASH_MIN_SIZE		4UL
+
+#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
+
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
+{
+	return ht->p.mutex_is_held();
+}
+EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
+#endif
+
+static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
+{
+	return (void *) he - ht->p.head_offset;
+}
+
+static u32 __hashfn(const struct rhashtable *ht, const void *key,
+		      u32 len, u32 hsize)
+{
+	u32 h;
+
+	h = ht->p.hashfn(key, len, ht->p.hash_rnd);
+
+	return h & (hsize - 1);
+}
+
+/**
+ * rhashtable_hashfn - compute hash for key of given length
+ * @ht:		hash table to compute for
+ * @key:	pointer to key
+ * @len:	length of key
+ *
+ * Computes the hash value using the hash function provided in the 'hashfn'
+ * of struct rhashtable_params. The returned value is guaranteed to be
+ * smaller than the number of buckets in the hash table.
+ */
+u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len)
+{
+	struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+
+	return __hashfn(ht, key, len, tbl->size);
+}
+EXPORT_SYMBOL_GPL(rhashtable_hashfn);
+
+static u32 obj_hashfn(const struct rhashtable *ht, const void *ptr, u32 hsize)
+{
+	if (unlikely(!ht->p.key_len)) {
+		u32 h;
+
+		h = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
+
+		return h & (hsize - 1);
+	}
+
+	return __hashfn(ht, ptr + ht->p.key_offset, ht->p.key_len, hsize);
+}
+
+/**
+ * rhashtable_obj_hashfn - compute hash for hashed object
+ * @ht:		hash table to compute for
+ * @ptr:	pointer to hashed object
+ *
+ * Computes the hash value using the hash function `hashfn` respectively
+ * 'obj_hashfn' depending on whether the hash table is set up to work with
+ * a fixed length key. The returned value is guaranteed to be smaller than
+ * the number of buckets in the hash table.
+ */
+u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr)
+{
+	struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+
+	return obj_hashfn(ht, ptr, tbl->size);
+}
+EXPORT_SYMBOL_GPL(rhashtable_obj_hashfn);
+
+static u32 head_hashfn(const struct rhashtable *ht,
+		       const struct rhash_head *he, u32 hsize)
+{
+	return obj_hashfn(ht, rht_obj(ht, he), hsize);
+}
+
+static struct bucket_table *bucket_table_alloc(size_t nbuckets, gfp_t flags)
+{
+	struct bucket_table *tbl;
+	size_t size;
+
+	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
+	tbl = kzalloc(size, flags);
+	if (tbl == NULL)
+		tbl = vzalloc(size);
+
+	if (tbl == NULL)
+		return NULL;
+
+	tbl->size = nbuckets;
+
+	return tbl;
+}
+
+static void bucket_table_free(const struct bucket_table *tbl)
+{
+	kvfree(tbl);
+}
+
+/**
+ * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
+ * @ht:		hash table
+ * @new_size:	new table size
+ */
+bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
+{
+	/* Expand table when exceeding 75% load */
+	return ht->nelems > (new_size / 4 * 3);
+}
+EXPORT_SYMBOL_GPL(rht_grow_above_75);
+
+/**
+ * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
+ * @ht:		hash table
+ * @new_size:	new table size
+ */
+bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
+{
+	/* Shrink table beneath 30% load */
+	return ht->nelems < (new_size * 3 / 10);
+}
+EXPORT_SYMBOL_GPL(rht_shrink_below_30);
+
+static void hashtable_chain_unzip(const struct rhashtable *ht,
+				  const struct bucket_table *new_tbl,
+				  struct bucket_table *old_tbl, size_t n)
+{
+	struct rhash_head *he, *p, *next;
+	unsigned int h;
+
+	/* Old bucket empty, no work needed. */
+	p = rht_dereference(old_tbl->buckets[n], ht);
+	if (!p)
+		return;
+
+	/* Advance the old bucket pointer one or more times until it
+	 * reaches a node that doesn't hash to the same bucket as the
+	 * previous node p. Call the previous node p;
+	 */
+	h = head_hashfn(ht, p, new_tbl->size);
+	rht_for_each(he, p->next, ht) {
+		if (head_hashfn(ht, he, new_tbl->size) != h)
+			break;
+		p = he;
+	}
+	RCU_INIT_POINTER(old_tbl->buckets[n], p->next);
+
+	/* Find the subsequent node which does hash to the same
+	 * bucket as node P, or NULL if no such node exists.
+	 */
+	next = NULL;
+	if (he) {
+		rht_for_each(he, he->next, ht) {
+			if (head_hashfn(ht, he, new_tbl->size) == h) {
+				next = he;
+				break;
+			}
+		}
+	}
+
+	/* Set p's next pointer to that subsequent node pointer,
+	 * bypassing the nodes which do not hash to p's bucket
+	 */
+	RCU_INIT_POINTER(p->next, next);
+}
+
+/**
+ * rhashtable_expand - Expand hash table while allowing concurrent lookups
+ * @ht:		the hash table to expand
+ * @flags:	allocation flags
+ *
+ * A secondary bucket array is allocated and the hash entries are migrated
+ * while keeping them on both lists until the end of the RCU grace period.
+ *
+ * This function may only be called in a context where it is safe to call
+ * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
+ *
+ * The caller must ensure that no concurrent table mutations take place.
+ * It is however valid to have concurrent lookups if they are RCU protected.
+ */
+int rhashtable_expand(struct rhashtable *ht, gfp_t flags)
+{
+	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
+	struct rhash_head *he;
+	unsigned int i, h;
+	bool complete;
+
+	ASSERT_RHT_MUTEX(ht);
+
+	if (ht->p.max_shift && ht->shift >= ht->p.max_shift)
+		return 0;
+
+	new_tbl = bucket_table_alloc(old_tbl->size * 2, flags);
+	if (new_tbl == NULL)
+		return -ENOMEM;
+
+	ht->shift++;
+
+	/* For each new bucket, search the corresponding old bucket
+	 * for the ﬁrst entry that hashes to the new bucket, and
+	 * link the new bucket to that entry. Since all the entries
+	 * which will end up in the new bucket appear in the same
+	 * old bucket, this constructs an entirely valid new hash
+	 * table, but with multiple buckets "zipped" together into a
+	 * single imprecise chain.
+	 */
+	for (i = 0; i < new_tbl->size; i++) {
+		h = i & (old_tbl->size - 1);
+		rht_for_each(he, old_tbl->buckets[h], ht) {
+			if (head_hashfn(ht, he, new_tbl->size) == i) {
+				RCU_INIT_POINTER(new_tbl->buckets[i], he);
+				break;
+			}
+		}
+	}
+
+	/* Publish the new table pointer. Lookups may now traverse
+	 * the new table, but they will not beneﬁt from any
+	 * additional efﬁciency until later steps unzip the buckets.
+	 */
+	rcu_assign_pointer(ht->tbl, new_tbl);
+
+	/* Unzip interleaved hash chains */
+	do {
+		/* Wait for readers. All new readers will see the new
+		 * table, and thus no references to the old table will
+		 * remain.
+		 */
+		synchronize_rcu();
+
+		/* For each bucket in the old table (each of which
+		 * contains items from multiple buckets of the new
+		 * table): ...
+		 */
+		complete = true;
+		for (i = 0; i < old_tbl->size; i++) {
+			hashtable_chain_unzip(ht, new_tbl, old_tbl, i);
+			if (old_tbl->buckets[i] != NULL)
+				complete = false;
+		}
+	} while (!complete);
+
+	bucket_table_free(old_tbl);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_expand);
+
+/**
+ * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
+ * @ht:		the hash table to shrink
+ * @flags:	allocation flags
+ *
+ * This function may only be called in a context where it is safe to call
+ * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
+ *
+ * The caller must ensure that no concurrent table mutations take place.
+ * It is however valid to have concurrent lookups if they are RCU protected.
+ */
+int rhashtable_shrink(struct rhashtable *ht, gfp_t flags)
+{
+	struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht);
+	struct rhash_head __rcu **pprev;
+	unsigned int i;
+
+	ASSERT_RHT_MUTEX(ht);
+
+	if (ht->shift <= ht->p.min_shift)
+		return 0;
+
+	ntbl = bucket_table_alloc(tbl->size / 2, flags);
+	if (ntbl == NULL)
+		return -ENOMEM;
+
+	ht->shift--;
+
+	/* Link each bucket in the new table to the ﬁrst bucket
+	 * in the old table that contains entries which will hash
+	 * to the new bucket.
+	 */
+	for (i = 0; i < ntbl->size; i++) {
+		ntbl->buckets[i] = tbl->buckets[i];
+
+		/* Link each bucket in the new table to the ﬁrst bucket
+		 * in the old table that contains entries which will hash
+		 * to the new bucket.
+		 */
+		for (pprev = &ntbl->buckets[i]; *pprev != NULL;
+		     pprev = &rht_dereference(*pprev, ht)->next)
+			;
+		RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
+	}
+
+	/* Publish the new, valid hash table */
+	rcu_assign_pointer(ht->tbl, ntbl);
+
+	/* Wait for readers. No new readers will have references to the
+	 * old hash table.
+	 */
+	synchronize_rcu();
+
+	bucket_table_free(tbl);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_shrink);
+
+/**
+ * rhashtable_insert - insert object into hash hash table
+ * @ht:		hash table
+ * @obj:	pointer to hash head inside object
+ * @flags:	allocation flags (table expansion)
+ *
+ * Will automatically grow the table via rhashtable_expand() if the the
+ * grow_decision function specified at rhashtable_init() returns true.
+ *
+ * The caller must ensure that no concurrent table mutations occur. It is
+ * however valid to have concurrent lookups if they are RCU protected.
+ */
+void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
+		       gfp_t flags)
+{
+	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
+	u32 hash;
+
+	ASSERT_RHT_MUTEX(ht);
+
+	hash = head_hashfn(ht, obj, tbl->size);
+	RCU_INIT_POINTER(obj->next, tbl->buckets[hash]);
+	rcu_assign_pointer(tbl->buckets[hash], obj);
+	ht->nelems++;
+
+	if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
+		rhashtable_expand(ht, flags);
+}
+EXPORT_SYMBOL_GPL(rhashtable_insert);
+
+/**
+ * rhashtable_remove_pprev - remove object from hash table given previous element
+ * @ht:		hash table
+ * @obj:	pointer to hash head inside object
+ * @pprev:	pointer to previous element
+ * @flags:	allocation flags (table expansion)
+ *
+ * Identical to rhashtable_remove() but caller is alreayd aware of the element
+ * in front of the element to be deleted. This is in particular useful for
+ * deletion when combined with walking or lookup.
+ */
+void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
+			     struct rhash_head __rcu **pprev, gfp_t flags)
+{
+	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
+
+	ASSERT_RHT_MUTEX(ht);
+
+	RCU_INIT_POINTER(*pprev, obj->next);
+	ht->nelems--;
+
+	if (ht->p.shrink_decision &&
+	    ht->p.shrink_decision(ht, tbl->size))
+		rhashtable_shrink(ht, flags);
+}
+EXPORT_SYMBOL_GPL(rhashtable_remove_pprev);
+
+/**
+ * rhashtable_remove - remove object from hash table
+ * @ht:		hash table
+ * @obj:	pointer to hash head inside object
+ * @flags:	allocation flags (table expansion)
+ *
+ * Since the hash chain is single linked, the removal operation needs to
+ * walk the bucket chain upon removal. The removal operation is thus
+ * considerable slow if the hash table is not correctly sized.
+ *
+ * Will automatically shrink the table via rhashtable_expand() if the the
+ * shrink_decision function specified at rhashtable_init() returns true.
+ *
+ * The caller must ensure that no concurrent table mutations occur. It is
+ * however valid to have concurrent lookups if they are RCU protected.
+ */
+bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj,
+		       gfp_t flags)
+{
+	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
+	struct rhash_head __rcu **pprev;
+	struct rhash_head *he;
+	u32 h;
+
+	ASSERT_RHT_MUTEX(ht);
+
+	h = head_hashfn(ht, obj, tbl->size);
+
+	pprev = &tbl->buckets[h];
+	rht_for_each(he, tbl->buckets[h], ht) {
+		if (he != obj) {
+			pprev = &he->next;
+			continue;
+		}
+
+		rhashtable_remove_pprev(ht, he, pprev, flags);
+		return true;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(rhashtable_remove);
+
+/**
+ * rhashtable_lookup - lookup key in hash table
+ * @ht:		hash table
+ * @key:	pointer to key
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key. The first matching entry is returned.
+ *
+ * This lookup function may only be used for fixed key hash table (key_len
+ * paramter set). It will BUG() if used inappropriately.
+ *
+ * Lookups may occur in parallel with hash mutations as long as the lookup is
+ * guarded by rcu_read_lock(). The caller must take care of this.
+ */
+void *rhashtable_lookup(const struct rhashtable *ht, const void *key)
+{
+	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+	struct rhash_head *he;
+	u32 h;
+
+	BUG_ON(!ht->p.key_len);
+
+	h = __hashfn(ht, key, ht->p.key_len, tbl->size);
+	rht_for_each_rcu(he, tbl->buckets[h], ht) {
+		if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key,
+			   ht->p.key_len))
+			continue;
+		return (void *) he - ht->p.head_offset;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(rhashtable_lookup);
+
+/**
+ * rhashtable_lookup_compare - search hash table with compare function
+ * @ht:		hash table
+ * @hash:	hash value of desired entry
+ * @compare:	compare function, must return true on match
+ * @arg:	argument passed on to compare function
+ *
+ * Traverses the bucket chain behind the provided hash value and calls the
+ * specified compare function for each entry.
+ *
+ * Lookups may occur in parallel with hash mutations as long as the lookup is
+ * guarded by rcu_read_lock(). The caller must take care of this.
+ *
+ * Returns the first entry on which the compare function returned true.
+ */
+void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash,
+				bool (*compare)(void *, void *), void *arg)
+{
+	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+	struct rhash_head *he;
+
+	if (unlikely(hash >= tbl->size))
+		return NULL;
+
+	rht_for_each_rcu(he, tbl->buckets[hash], ht) {
+		if (!compare(rht_obj(ht, he), arg))
+			continue;
+		return (void *) he - ht->p.head_offset;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
+
+static size_t rounded_hashtable_size(struct rhashtable_params *params)
+{
+	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
+		   1UL << params->min_shift);
+}
+
+/**
+ * rhashtable_init - initialize a new hash table
+ * @ht:		hash table to be initialized
+ * @params:	configuration parameters
+ *
+ * Initializes a new hash table based on the provided configuration
+ * parameters. A table can be configured either with a variable or
+ * fixed length key:
+ *
+ * Configuration Example 1: Fixed length keys
+ * struct test_obj {
+ *	int			key;
+ *	void *			my_member;
+ *	struct rhash_head	node;
+ * };
+ *
+ * struct rhashtable_params params = {
+ *	.head_offset = offsetof(struct test_obj, node),
+ *	.key_offset = offsetof(struct test_obj, key),
+ *	.key_len = sizeof(int),
+ *	.hashfn = arch_fast_hash,
+ *	.mutex_is_held = &my_mutex_is_held,
+ * };
+ *
+ * Configuration Example 2: Variable length keys
+ * struct test_obj {
+ *	[...]
+ *	struct rhash_head	node;
+ * };
+ *
+ * u32 my_hash_fn(const void *data, u32 seed)
+ * {
+ *	struct test_obj *obj = data;
+ *
+ *	return [... hash ...];
+ * }
+ *
+ * struct rhashtable_params params = {
+ *	.head_offset = offsetof(struct test_obj, node),
+ *	.hashfn = arch_fast_hash,
+ *	.obj_hashfn = my_hash_fn,
+ *	.mutex_is_held = &my_mutex_is_held,
+ * };
+ */
+int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
+{
+	struct bucket_table *tbl;
+	size_t size;
+
+	size = HASH_DEFAULT_SIZE;
+
+	if ((params->key_len && !params->hashfn) ||
+	    (!params->key_len && !params->obj_hashfn))
+		return -EINVAL;
+
+	params->min_shift = max_t(size_t, params->min_shift,
+				  ilog2(HASH_MIN_SIZE));
+
+	if (params->nelem_hint)
+		size = rounded_hashtable_size(params);
+
+	tbl = bucket_table_alloc(size, GFP_KERNEL);
+	if (tbl == NULL)
+		return -ENOMEM;
+
+	memset(ht, 0, sizeof(*ht));
+	ht->shift = ilog2(tbl->size);
+	memcpy(&ht->p, params, sizeof(*params));
+	RCU_INIT_POINTER(ht->tbl, tbl);
+
+	if (!ht->p.hash_rnd)
+		get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_init);
+
+/**
+ * rhashtable_destroy - destroy hash table
+ * @ht:		the hash table to destroy
+ *
+ * Frees the bucket array. This function is not rcu safe, therefore the caller
+ * has to make sure that no resizing may happen by unpublishing the hashtable
+ * and waiting for the quiescent cycle before releasing the bucket array.
+ */
+void rhashtable_destroy(const struct rhashtable *ht)
+{
+	bucket_table_free(ht->tbl);
+}
+EXPORT_SYMBOL_GPL(rhashtable_destroy);
+
+/**************************************************************************
+ * Self Test
+ **************************************************************************/
+
+#ifdef CONFIG_TEST_RHASHTABLE
+
+#define TEST_HT_SIZE	8
+#define TEST_ENTRIES	2048
+#define TEST_PTR	((void *) 0xdeadbeef)
+#define TEST_NEXPANDS	4
+
+static int test_mutex_is_held(void)
+{
+	return 1;
+}
+
+struct test_obj {
+	void			*ptr;
+	int			value;
+	struct rhash_head	node;
+};
+
+static int __init test_rht_lookup(struct rhashtable *ht)
+{
+	unsigned int i;
+
+	for (i = 0; i < TEST_ENTRIES * 2; i++) {
+		struct test_obj *obj;
+		bool expected = !(i % 2);
+		u32 key = i;
+
+		obj = rhashtable_lookup(ht, &key);
+
+		if (expected && !obj) {
+			pr_warn("Test failed: Could not find key %u\n", key);
+			return -ENOENT;
+		} else if (!expected && obj) {
+			pr_warn("Test failed: Unexpected entry found for key %u\n",
+				key);
+			return -EEXIST;
+		} else if (expected && obj) {
+			if (obj->ptr != TEST_PTR || obj->value != i) {
+				pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n",
+					obj->ptr, TEST_PTR, obj->value, i);
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void test_bucket_stats(struct rhashtable *ht,
+				     struct bucket_table *tbl,
+				     bool quiet)
+{
+	unsigned int cnt, i, total = 0;
+	struct test_obj *obj;
+
+	for (i = 0; i < tbl->size; i++) {
+		cnt = 0;
+
+		if (!quiet)
+			pr_info(" [%#4x/%zu]", i, tbl->size);
+
+		rht_for_each_entry_rcu(obj, tbl->buckets[i], node) {
+			cnt++;
+			total++;
+			if (!quiet)
+				pr_cont(" [%p],", obj);
+		}
+
+		if (!quiet)
+			pr_cont("\n  [%#x] first element: %p, chain length: %u\n",
+				i, tbl->buckets[i], cnt);
+	}
+
+	pr_info("  Traversal complete: counted=%u, nelems=%zu, entries=%d\n",
+		total, ht->nelems, TEST_ENTRIES);
+}
+
+static int __init test_rhashtable(struct rhashtable *ht)
+{
+	struct bucket_table *tbl;
+	struct test_obj *obj, *next;
+	int err;
+	unsigned int i;
+
+	/*
+	 * Insertion Test:
+	 * Insert TEST_ENTRIES into table with all keys even numbers
+	 */
+	pr_info("  Adding %d keys\n", TEST_ENTRIES);
+	for (i = 0; i < TEST_ENTRIES; i++) {
+		struct test_obj *obj;
+
+		obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+		if (!obj) {
+			err = -ENOMEM;
+			goto error;
+		}
+
+		obj->ptr = TEST_PTR;
+		obj->value = i * 2;
+
+		rhashtable_insert(ht, &obj->node, GFP_KERNEL);
+	}
+
+	rcu_read_lock();
+	tbl = rht_dereference_rcu(ht->tbl, ht);
+	test_bucket_stats(ht, tbl, true);
+	test_rht_lookup(ht);
+	rcu_read_unlock();
+
+	for (i = 0; i < TEST_NEXPANDS; i++) {
+		pr_info("  Table expansion iteration %u...\n", i);
+		rhashtable_expand(ht, GFP_KERNEL);
+
+		rcu_read_lock();
+		pr_info("  Verifying lookups...\n");
+		test_rht_lookup(ht);
+		rcu_read_unlock();
+	}
+
+	for (i = 0; i < TEST_NEXPANDS; i++) {
+		pr_info("  Table shrinkage iteration %u...\n", i);
+		rhashtable_shrink(ht, GFP_KERNEL);
+
+		rcu_read_lock();
+		pr_info("  Verifying lookups...\n");
+		test_rht_lookup(ht);
+		rcu_read_unlock();
+	}
+
+	pr_info("  Deleting %d keys\n", TEST_ENTRIES);
+	for (i = 0; i < TEST_ENTRIES; i++) {
+		u32 key = i * 2;
+
+		obj = rhashtable_lookup(ht, &key);
+		BUG_ON(!obj);
+
+		rhashtable_remove(ht, &obj->node, GFP_KERNEL);
+		kfree(obj);
+	}
+
+	return 0;
+
+error:
+	tbl = rht_dereference_rcu(ht->tbl, ht);
+	for (i = 0; i < tbl->size; i++)
+		rht_for_each_entry_safe(obj, next, tbl->buckets[i], ht, node)
+			kfree(obj);
+
+	return err;
+}
+
+static int __init test_rht_init(void)
+{
+	struct rhashtable ht;
+	struct rhashtable_params params = {
+		.nelem_hint = TEST_HT_SIZE,
+		.head_offset = offsetof(struct test_obj, node),
+		.key_offset = offsetof(struct test_obj, value),
+		.key_len = sizeof(int),
+		.hashfn = arch_fast_hash,
+		.mutex_is_held = &test_mutex_is_held,
+		.grow_decision = rht_grow_above_75,
+		.shrink_decision = rht_shrink_below_30,
+	};
+	int err;
+
+	pr_info("Running resizable hashtable tests...\n");
+
+	err = rhashtable_init(&ht, &params);
+	if (err < 0) {
+		pr_warn("Test failed: Unable to initialize hashtable: %d\n",
+			err);
+		return err;
+	}
+
+	err = test_rhashtable(&ht);
+
+	rhashtable_destroy(&ht);
+
+	return err;
+}
+
+subsys_initcall(test_rht_init);
+
+#endif /* CONFIG_TEST_RHASHTABLE */
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index d16fa295ae1d..9cdf62f8accd 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(sg_nents);
  **/
 struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
 {
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
 	struct scatterlist *ret = &sgl[nents - 1];
 #else
 	struct scatterlist *sg, *ret = NULL;
@@ -165,6 +165,7 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents)
  * __sg_free_table - Free a previously mapped sg table
  * @table:	The sg table header to use
  * @max_ents:	The maximum number of entries per single scatterlist
+ * @skip_first_chunk: don't free the (preallocated) first scatterlist chunk
  * @free_fn:	Free function
  *
  *  Description:
@@ -174,7 +175,7 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents)
  *
  **/
 void __sg_free_table(struct sg_table *table, unsigned int max_ents,
-		     sg_free_fn *free_fn)
+		     bool skip_first_chunk, sg_free_fn *free_fn)
 {
 	struct scatterlist *sgl, *next;
 
@@ -202,7 +203,10 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents,
 		}
 
 		table->orig_nents -= sg_size;
-		free_fn(sgl, alloc_size);
+		if (!skip_first_chunk) {
+			free_fn(sgl, alloc_size);
+			skip_first_chunk = false;
+		}
 		sgl = next;
 	}
 
@@ -217,7 +221,7 @@ EXPORT_SYMBOL(__sg_free_table);
  **/
 void sg_free_table(struct sg_table *table)
 {
-	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, sg_kfree);
+	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree);
 }
 EXPORT_SYMBOL(sg_free_table);
 
@@ -241,8 +245,8 @@ EXPORT_SYMBOL(sg_free_table);
  *
  **/
 int __sg_alloc_table(struct sg_table *table, unsigned int nents,
-		     unsigned int max_ents, gfp_t gfp_mask,
-		     sg_alloc_fn *alloc_fn)
+		     unsigned int max_ents, struct scatterlist *first_chunk,
+		     gfp_t gfp_mask, sg_alloc_fn *alloc_fn)
 {
 	struct scatterlist *sg, *prv;
 	unsigned int left;
@@ -251,7 +255,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
 
 	if (nents == 0)
 		return -EINVAL;
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
 	if (WARN_ON_ONCE(nents > max_ents))
 		return -EINVAL;
 #endif
@@ -269,7 +273,12 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
 
 		left -= sg_size;
 
-		sg = alloc_fn(alloc_size, gfp_mask);
+		if (first_chunk) {
+			sg = first_chunk;
+			first_chunk = NULL;
+		} else {
+			sg = alloc_fn(alloc_size, gfp_mask);
+		}
 		if (unlikely(!sg)) {
 			/*
 			 * Adjust entry count to reflect that the last
@@ -324,9 +333,9 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
 	int ret;
 
 	ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
-			       gfp_mask, sg_kmalloc);
+			       NULL, gfp_mask, sg_kmalloc);
 	if (unlikely(ret))
-		__sg_free_table(table, SG_MAX_SINGLE_ALLOC, sg_kfree);
+		__sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree);
 
 	return ret;
 }
@@ -495,7 +504,7 @@ static bool sg_miter_get_next_page(struct sg_mapping_iter *miter)
  *   true if @miter contains the valid mapping.  false if end of sg
  *   list is reached.
  */
-static bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset)
+bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset)
 {
 	sg_miter_stop(miter);
 
@@ -513,6 +522,7 @@ static bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset)
 
 	return true;
 }
+EXPORT_SYMBOL(sg_miter_skip);
 
 /**
  * sg_miter_next - proceed mapping iterator to the next mapping
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 5847a4921b8e..09225796991a 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -17,9 +17,6 @@ void show_mem(unsigned int filter)
 	printk("Mem-Info:\n");
 	show_free_areas(filter);
 
-	if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
-		return;
-
 	for_each_online_pgdat(pgdat) {
 		unsigned long flags;
 		int zoneid;
@@ -46,4 +43,7 @@ void show_mem(unsigned int filter)
 	printk("%lu pages in pagetable cache\n",
 		quicklist_total_size());
 #endif
+#ifdef CONFIG_MEMORY_FAILURE
+	printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
+#endif
 }
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 04abe53f12a1..1afec32de6f2 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -7,7 +7,8 @@
 #include <linux/kallsyms.h>
 #include <linux/sched.h>
 
-notrace unsigned int debug_smp_processor_id(void)
+notrace static unsigned int check_preemption_disabled(const char *what1,
+							const char *what2)
 {
 	int this_cpu = raw_smp_processor_id();
 
@@ -38,9 +39,9 @@ notrace unsigned int debug_smp_processor_id(void)
 	if (!printk_ratelimit())
 		goto out_enable;
 
-	printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] "
-			"code: %s/%d\n",
-			preempt_count() - 1, current->comm, current->pid);
+	printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
+		what1, what2, preempt_count() - 1, current->comm, current->pid);
+
 	print_symbol("caller is %s\n", (long)__builtin_return_address(0));
 	dump_stack();
 
@@ -50,5 +51,14 @@ out:
 	return this_cpu;
 }
 
+notrace unsigned int debug_smp_processor_id(void)
+{
+	return check_preemption_disabled("smp_processor_id", "");
+}
 EXPORT_SYMBOL(debug_smp_processor_id);
 
+notrace void __this_cpu_preempt_check(const char *op)
+{
+	check_preemption_disabled("__this_cpu_", op);
+}
+EXPORT_SYMBOL(__this_cpu_preempt_check);
diff --git a/lib/string.c b/lib/string.c
index e5878de4f101..f3c6ff596414 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -107,7 +107,7 @@ EXPORT_SYMBOL(strcpy);
 
 #ifndef __HAVE_ARCH_STRNCPY
 /**
- * strncpy - Copy a length-limited, %NUL-terminated string
+ * strncpy - Copy a length-limited, C-string
  * @dest: Where to copy the string to
  * @src: Where to copy the string from
  * @count: The maximum number of bytes to copy
@@ -136,7 +136,7 @@ EXPORT_SYMBOL(strncpy);
 
 #ifndef __HAVE_ARCH_STRLCPY
 /**
- * strlcpy - Copy a %NUL terminated string into a sized buffer
+ * strlcpy - Copy a C-string into a sized buffer
  * @dest: Where to copy the string to
  * @src: Where to copy the string from
  * @size: size of destination buffer
@@ -182,7 +182,7 @@ EXPORT_SYMBOL(strcat);
 
 #ifndef __HAVE_ARCH_STRNCAT
 /**
- * strncat - Append a length-limited, %NUL-terminated string to another
+ * strncat - Append a length-limited, C-string to another
  * @dest: The string to be appended to
  * @src: The string to append to it
  * @count: The maximum numbers of bytes to copy
@@ -211,7 +211,7 @@ EXPORT_SYMBOL(strncat);
 
 #ifndef __HAVE_ARCH_STRLCAT
 /**
- * strlcat - Append a length-limited, %NUL-terminated string to another
+ * strlcat - Append a length-limited, C-string to another
  * @dest: The string to be appended to
  * @src: The string to append to it
  * @count: The size of the destination buffer.
@@ -301,6 +301,24 @@ char *strchr(const char *s, int c)
 EXPORT_SYMBOL(strchr);
 #endif
 
+#ifndef __HAVE_ARCH_STRCHRNUL
+/**
+ * strchrnul - Find and return a character in a string, or end of string
+ * @s: The string to be searched
+ * @c: The character to search for
+ *
+ * Returns pointer to first occurrence of 'c' in s. If c is not found, then
+ * return a pointer to the null byte at the end of s.
+ */
+char *strchrnul(const char *s, int c)
+{
+	while (*s && *s != (char)c)
+		s++;
+	return (char *)s;
+}
+EXPORT_SYMBOL(strchrnul);
+#endif
+
 #ifndef __HAVE_ARCH_STRRCHR
 /**
  * strrchr - Find the last occurrence of a character in a string
@@ -648,7 +666,7 @@ EXPORT_SYMBOL(memmove);
  * @count: The size of the area.
  */
 #undef memcmp
-int memcmp(const void *cs, const void *ct, size_t count)
+__visible int memcmp(const void *cs, const void *ct, size_t count)
 {
 	const unsigned char *su1, *su2;
 	int res = 0;
@@ -789,9 +807,9 @@ void *memchr_inv(const void *start, int c, size_t bytes)
 		return check_bytes8(start, value, bytes);
 
 	value64 = value;
-#if defined(ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64
+#if defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64
 	value64 *= 0x0101010101010101;
-#elif defined(ARCH_HAS_FAST_MULTIPLIER)
+#elif defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER)
 	value64 *= 0x01010101;
 	value64 |= value64 << 32;
 #else
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index ed5c1454dd62..29033f319aea 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -25,12 +25,15 @@
 int string_get_size(u64 size, const enum string_size_units units,
 		    char *buf, int len)
 {
-	static const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB",
-				   "EB", "ZB", "YB", NULL};
-	static const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB",
-				 "EiB", "ZiB", "YiB", NULL };
-	static const char **units_str[] = {
-		[STRING_UNITS_10] =  units_10,
+	static const char *const units_10[] = {
+		"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL
+	};
+	static const char *const units_2[] = {
+		"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB",
+		NULL
+	};
+	static const char *const *const units_str[] = {
+		[STRING_UNITS_10] = units_10,
 		[STRING_UNITS_2] = units_2,
 	};
 	static const unsigned int divisor[] = {
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index e4399fa65ad6..4abda074ea45 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -86,6 +86,7 @@ static unsigned int io_tlb_index;
  * We need to save away the original address corresponding to a mapped entry
  * for the sync operations.
  */
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
 static phys_addr_t *io_tlb_orig_addr;
 
 /*
@@ -172,8 +173,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 	/*
 	 * Get the overflow emergency buffer
 	 */
-	v_overflow_buffer = alloc_bootmem_low_pages_nopanic(
-						PAGE_ALIGN(io_tlb_overflow));
+	v_overflow_buffer = memblock_virt_alloc_low_nopanic(
+						PAGE_ALIGN(io_tlb_overflow),
+						PAGE_SIZE);
 	if (!v_overflow_buffer)
 		return -ENOMEM;
 
@@ -184,11 +186,17 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 	 * between io_tlb_start and io_tlb_end.
 	 */
-	io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
-	for (i = 0; i < io_tlb_nslabs; i++)
- 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+	io_tlb_list = memblock_virt_alloc(
+				PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
+				PAGE_SIZE);
+	io_tlb_orig_addr = memblock_virt_alloc(
+				PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
+				PAGE_SIZE);
+	for (i = 0; i < io_tlb_nslabs; i++) {
+		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+	}
 	io_tlb_index = 0;
-	io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
 
 	if (verbose)
 		swiotlb_print_info();
@@ -215,13 +223,13 @@ swiotlb_init(int verbose)
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
 	/* Get IO TLB memory from the low pages */
-	vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes));
+	vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
 	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
 		return;
 
 	if (io_tlb_start)
-		free_bootmem(io_tlb_start,
-				 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+		memblock_free_early(io_tlb_start,
+				    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	pr_warn("Cannot allocate SWIOTLB buffer");
 	no_iotlb_memory = true;
 }
@@ -308,10 +316,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	if (!io_tlb_list)
 		goto cleanup3;
 
-	for (i = 0; i < io_tlb_nslabs; i++)
- 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-	io_tlb_index = 0;
-
 	io_tlb_orig_addr = (phys_addr_t *)
 		__get_free_pages(GFP_KERNEL,
 				 get_order(io_tlb_nslabs *
@@ -319,7 +323,11 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	if (!io_tlb_orig_addr)
 		goto cleanup4;
 
-	memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t));
+	for (i = 0; i < io_tlb_nslabs; i++) {
+		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+	}
+	io_tlb_index = 0;
 
 	swiotlb_print_info();
 
@@ -357,19 +365,19 @@ void __init swiotlb_free(void)
 		free_pages((unsigned long)phys_to_virt(io_tlb_start),
 			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
 	} else {
-		free_bootmem_late(io_tlb_overflow_buffer,
-				  PAGE_ALIGN(io_tlb_overflow));
-		free_bootmem_late(__pa(io_tlb_orig_addr),
-				  PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
-		free_bootmem_late(__pa(io_tlb_list),
-				  PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
-		free_bootmem_late(io_tlb_start,
-				  PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+		memblock_free_late(io_tlb_overflow_buffer,
+				   PAGE_ALIGN(io_tlb_overflow));
+		memblock_free_late(__pa(io_tlb_orig_addr),
+				   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
+		memblock_free_late(__pa(io_tlb_list),
+				   PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
+		memblock_free_late(io_tlb_start,
+				   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	}
 	io_tlb_nslabs = 0;
 }
 
-static int is_swiotlb_buffer(phys_addr_t paddr)
+int is_swiotlb_buffer(phys_addr_t paddr)
 {
 	return paddr >= io_tlb_start && paddr < io_tlb_end;
 }
@@ -505,7 +513,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 
 not_found:
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
-	dev_warn(hwdev, "swiotlb buffer is full\n");
+	if (printk_ratelimit())
+		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
 	return SWIOTLB_MAP_ERROR;
 found:
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
@@ -550,7 +559,8 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	/*
 	 * First, sync the memory before unmapping the entry
 	 */
-	if (orig_addr && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+	if (orig_addr != INVALID_PHYS_ADDR &&
+	    ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
 		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
 
 	/*
@@ -567,8 +577,10 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 		 * Step 1: return the slots to the free list, merging the
 		 * slots with superceeding slots
 		 */
-		for (i = index + nslots - 1; i >= index; i--)
+		for (i = index + nslots - 1; i >= index; i--) {
 			io_tlb_list[i] = ++count;
+			io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+		}
 		/*
 		 * Step 2: merge the returned slots with the preceding slots,
 		 * if available (non zero)
@@ -587,6 +599,8 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
 	phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
+	if (orig_addr == INVALID_PHYS_ADDR)
+		return;
 	orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
 
 	switch (target) {
diff --git a/lib/syscall.c b/lib/syscall.c
index 58710eefeac8..e30e03932480 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -72,4 +72,3 @@ int task_current_syscall(struct task_struct *target, long *callno,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(task_current_syscall);
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c
index bea3f3fa3f02..4137bca5f8e8 100644
--- a/lib/test-kstrtox.c
+++ b/lib/test-kstrtox.c
@@ -3,7 +3,7 @@
 #include <linux/module.h>
 
 #define for_each_test(i, test)	\
-	for (i = 0; i < sizeof(test) / sizeof(test[0]); i++)
+	for (i = 0; i < ARRAY_SIZE(test); i++)
 
 struct test_fail {
 	const char *str;
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
new file mode 100644
index 000000000000..23e070bcf72d
--- /dev/null
+++ b/lib/test_bpf.c
@@ -0,0 +1,1988 @@
+/*
+ * Testsuite for BPF interpreter and BPF JIT compiler
+ *
+ * Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/filter.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+
+/* General test specific settings */
+#define MAX_SUBTESTS	3
+#define MAX_TESTRUNS	10000
+#define MAX_DATA	128
+#define MAX_INSNS	512
+#define MAX_K		0xffffFFFF
+
+/* Few constants used to init test 'skb' */
+#define SKB_TYPE	3
+#define SKB_MARK	0x1234aaaa
+#define SKB_HASH	0x1234aaab
+#define SKB_QUEUE_MAP	123
+#define SKB_VLAN_TCI	0xffff
+#define SKB_DEV_IFINDEX	577
+#define SKB_DEV_TYPE	588
+
+/* Redefine REGs to make tests less verbose */
+#define R0		BPF_REG_0
+#define R1		BPF_REG_1
+#define R2		BPF_REG_2
+#define R3		BPF_REG_3
+#define R4		BPF_REG_4
+#define R5		BPF_REG_5
+#define R6		BPF_REG_6
+#define R7		BPF_REG_7
+#define R8		BPF_REG_8
+#define R9		BPF_REG_9
+#define R10		BPF_REG_10
+
+/* Flags that can be passed to test cases */
+#define FLAG_NO_DATA		BIT(0)
+#define FLAG_EXPECTED_FAIL	BIT(1)
+
+enum {
+	CLASSIC  = BIT(6),	/* Old BPF instructions only. */
+	INTERNAL = BIT(7),	/* Extended instruction set.  */
+};
+
+#define TEST_TYPE_MASK		(CLASSIC | INTERNAL)
+
+struct bpf_test {
+	const char *descr;
+	union {
+		struct sock_filter insns[MAX_INSNS];
+		struct bpf_insn insns_int[MAX_INSNS];
+	} u;
+	__u8 aux;
+	__u8 data[MAX_DATA];
+	struct {
+		int data_size;
+		__u32 result;
+	} test[MAX_SUBTESTS];
+};
+
+static struct bpf_test tests[] = {
+	{
+		"TAX",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_ALU | BPF_NEG, 0), /* A == -3 */
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_LEN, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0), /* X == len - 3 */
+			BPF_STMT(BPF_LD | BPF_B | BPF_IND, 1),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ 10, 20, 30, 40, 50 },
+		{ { 2, 10 }, { 3, 20 }, { 4, 30 } },
+	},
+	{
+		"TXA",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0) /* A == len * 2 */
+		},
+		CLASSIC,
+		{ 10, 20, 30, 40, 50 },
+		{ { 1, 2 }, { 3, 6 }, { 4, 8 } },
+	},
+	{
+		"ADD_SUB_MUL_K",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 1),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 2),
+			BPF_STMT(BPF_LDX | BPF_IMM, 3),
+			BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 0xffffffff),
+			BPF_STMT(BPF_ALU | BPF_MUL | BPF_K, 3),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{ },
+		{ { 0, 0xfffffffd } }
+	},
+	{
+		"DIV_KX",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 8),
+			BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 2),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff),
+			BPF_STMT(BPF_ALU | BPF_DIV | BPF_X, 0),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff),
+			BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 0x70000000),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{ },
+		{ { 0, 0x40000001 } }
+	},
+	{
+		"AND_OR_LSH_K",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xff),
+			BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xf0),
+			BPF_STMT(BPF_ALU | BPF_LSH | BPF_K, 27),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 0xf),
+			BPF_STMT(BPF_ALU | BPF_OR | BPF_K, 0xf0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{ },
+		{ { 0, 0x800000ff }, { 1, 0x800000ff } },
+	},
+	{
+		"LD_IMM_0",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0), /* ld #0 */
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 1 } },
+	},
+	{
+		"LD_IND",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_IND, MAX_K),
+			BPF_STMT(BPF_RET | BPF_K, 1)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 0 }, { 10, 0 }, { 60, 0 } },
+	},
+	{
+		"LD_ABS",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS, 1000),
+			BPF_STMT(BPF_RET | BPF_K, 1)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 0 }, { 10, 0 }, { 60, 0 } },
+	},
+	{
+		"LD_ABS_LL",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_LL_OFF),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_LL_OFF + 1),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ 1, 2, 3 },
+		{ { 1, 0 }, { 2, 3 } },
+	},
+	{
+		"LD_IND_LL",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, SKF_LL_OFF - 1),
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_IND, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ 1, 2, 3, 0xff },
+		{ { 1, 1 }, { 3, 3 }, { 4, 0xff } },
+	},
+	{
+		"LD_ABS_NET",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_NET_OFF),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_NET_OFF + 1),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3 },
+		{ { 15, 0 }, { 16, 3 } },
+	},
+	{
+		"LD_IND_NET",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, SKF_NET_OFF - 15),
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_IND, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3 },
+		{ { 14, 0 }, { 15, 1 }, { 17, 3 } },
+	},
+	{
+		"LD_PKTTYPE",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PKTTYPE),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SKB_TYPE, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PKTTYPE),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SKB_TYPE, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PKTTYPE),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SKB_TYPE, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 3 }, { 10, 3 } },
+	},
+	{
+		"LD_MARK",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_MARK),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, SKB_MARK}, { 10, SKB_MARK} },
+	},
+	{
+		"LD_RXHASH",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_RXHASH),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, SKB_HASH}, { 10, SKB_HASH} },
+	},
+	{
+		"LD_QUEUE",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_QUEUE),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, SKB_QUEUE_MAP }, { 10, SKB_QUEUE_MAP } },
+	},
+	{
+		"LD_PROTOCOL",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 1),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 20, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PROTOCOL),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 30, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ 10, 20, 30 },
+		{ { 10, ETH_P_IP }, { 100, ETH_P_IP } },
+	},
+	{
+		"LD_VLAN_TAG",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_VLAN_TAG),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{
+			{ 1, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT },
+			{ 10, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT }
+		},
+	},
+	{
+		"LD_VLAN_TAG_PRESENT",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{
+			{ 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) },
+			{ 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }
+		},
+	},
+	{
+		"LD_IFINDEX",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_IFINDEX),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, SKB_DEV_IFINDEX }, { 10, SKB_DEV_IFINDEX } },
+	},
+	{
+		"LD_HATYPE",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_HATYPE),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, SKB_DEV_TYPE }, { 10, SKB_DEV_TYPE } },
+	},
+	{
+		"LD_CPU",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_CPU),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_CPU),
+			BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 0 }, { 10, 0 } },
+	},
+	{
+		"LD_NLATTR",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 2),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_LDX | BPF_IMM, 3),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+#ifdef __BIG_ENDIAN
+		{ 0xff, 0xff, 0, 4, 0, 2, 0, 4, 0, 3 },
+#else
+		{ 0xff, 0xff, 4, 0, 2, 0, 4, 0, 3, 0 },
+#endif
+		{ { 4, 0 }, { 20, 6 } },
+	},
+	{
+		"LD_NLATTR_NEST",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LDX | BPF_IMM, 3),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_NLATTR_NEST),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+#ifdef __BIG_ENDIAN
+		{ 0xff, 0xff, 0, 12, 0, 1, 0, 4, 0, 2, 0, 4, 0, 3 },
+#else
+		{ 0xff, 0xff, 12, 0, 1, 0, 4, 0, 2, 0, 4, 0, 3, 0 },
+#endif
+		{ { 4, 0 }, { 20, 10 } },
+	},
+	{
+		"LD_PAYLOAD_OFF",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PAY_OFFSET),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PAY_OFFSET),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PAY_OFFSET),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PAY_OFFSET),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_PAY_OFFSET),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		/* 00:00:00:00:00:00 > 00:00:00:00:00:00, ethtype IPv4 (0x0800),
+		 * length 98: 127.0.0.1 > 127.0.0.1: ICMP echo request,
+		 * id 9737, seq 1, length 64
+		 */
+		{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		  0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		  0x08, 0x00,
+		  0x45, 0x00, 0x00, 0x54, 0xac, 0x8b, 0x40, 0x00, 0x40,
+		  0x01, 0x90, 0x1b, 0x7f, 0x00, 0x00, 0x01 },
+		{ { 30, 0 }, { 100, 42 } },
+	},
+	{
+		"LD_ANC_XOR",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 10),
+			BPF_STMT(BPF_LDX | BPF_IMM, 300),
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_ALU_XOR_X),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 4, 10 ^ 300 }, { 20, 10 ^ 300 } },
+	},
+	{
+		"SPILL_FILL",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 2),
+			BPF_STMT(BPF_ALU | BPF_RSH, 1),
+			BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0),
+			BPF_STMT(BPF_ST, 1), /* M1 = 1 ^ len */
+			BPF_STMT(BPF_ALU | BPF_XOR | BPF_K, 0x80000000),
+			BPF_STMT(BPF_ST, 2), /* M2 = 1 ^ len ^ 0x80000000 */
+			BPF_STMT(BPF_STX, 15), /* M3 = len */
+			BPF_STMT(BPF_LDX | BPF_MEM, 1),
+			BPF_STMT(BPF_LD | BPF_MEM, 2),
+			BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 15),
+			BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 0x80000001 }, { 2, 0x80000002 }, { 60, 0x80000000 ^ 60 } }
+	},
+	{
+		"JEQ",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_RET | BPF_K, MAX_K)
+		},
+		CLASSIC,
+		{ 3, 3, 3, 3, 3 },
+		{ { 1, 0 }, { 3, 1 }, { 4, MAX_K } },
+	},
+	{
+		"JGT",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2),
+			BPF_JUMP(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_RET | BPF_K, MAX_K)
+		},
+		CLASSIC,
+		{ 4, 4, 4, 3, 3 },
+		{ { 2, 0 }, { 3, 1 }, { 4, MAX_K } },
+	},
+	{
+		"JGE",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_IND, MAX_K),
+			BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 1, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 10),
+			BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 2, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 20),
+			BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 3, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 30),
+			BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 4, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 40),
+			BPF_STMT(BPF_RET | BPF_K, MAX_K)
+		},
+		CLASSIC,
+		{ 1, 2, 3, 4, 5 },
+		{ { 1, 20 }, { 3, 40 }, { 5, MAX_K } },
+	},
+	{
+		"JSET",
+		.u.insns = {
+			BPF_JUMP(BPF_JMP | BPF_JA, 0, 0, 0),
+			BPF_JUMP(BPF_JMP | BPF_JA, 1, 1, 1),
+			BPF_JUMP(BPF_JMP | BPF_JA, 0, 0, 0),
+			BPF_JUMP(BPF_JMP | BPF_JA, 0, 0, 0),
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_SUB | BPF_K, 4),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_W | BPF_IND, 0),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 1, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 10),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0x80000000, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 20),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 30),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 30),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 30),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 30),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 30),
+			BPF_STMT(BPF_RET | BPF_K, MAX_K)
+		},
+		CLASSIC,
+		{ 0, 0xAA, 0x55, 1 },
+		{ { 4, 10 }, { 5, 20 }, { 6, MAX_K } },
+	},
+	{
+		"tcpdump port 22",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 12),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x86dd, 0, 8), /* IPv6 */
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 20),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x84, 2, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x6, 1, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x11, 0, 17),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 54),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 14, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 56),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 12, 13),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x0800, 0, 12), /* IPv4 */
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 23),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x84, 2, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x6, 1, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x11, 0, 8),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 20),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0x1fff, 6, 0),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 14),
+			BPF_STMT(BPF_LD | BPF_H | BPF_IND, 14),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 2, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_IND, 16),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 0xffff),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+		},
+		CLASSIC,
+		/* 3c:07:54:43:e5:76 > 10:bf:48:d6:43:d6, ethertype IPv4(0x0800)
+		 * length 114: 10.1.1.149.49700 > 10.1.2.10.22: Flags [P.],
+		 * seq 1305692979:1305693027, ack 3650467037, win 65535,
+		 * options [nop,nop,TS val 2502645400 ecr 3971138], length 48
+		 */
+		{ 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6,
+		  0x3c, 0x07, 0x54, 0x43, 0xe5, 0x76,
+		  0x08, 0x00,
+		  0x45, 0x10, 0x00, 0x64, 0x75, 0xb5,
+		  0x40, 0x00, 0x40, 0x06, 0xad, 0x2e, /* IP header */
+		  0x0a, 0x01, 0x01, 0x95, /* ip src */
+		  0x0a, 0x01, 0x02, 0x0a, /* ip dst */
+		  0xc2, 0x24,
+		  0x00, 0x16 /* dst port */ },
+		{ { 10, 0 }, { 30, 0 }, { 100, 65535 } },
+	},
+	{
+		"tcpdump complex",
+		.u.insns = {
+			/* tcpdump -nei eth0 'tcp port 22 and (((ip[2:2] -
+			 * ((ip[0]&0xf)<<2)) - ((tcp[12]&0xf0)>>2)) != 0) and
+			 * (len > 115 or len < 30000000000)' -d
+			 */
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 12),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x86dd, 30, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x800, 0, 29),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 23),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x6, 0, 27),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 20),
+			BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0x1fff, 25, 0),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 14),
+			BPF_STMT(BPF_LD | BPF_H | BPF_IND, 14),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 2, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_IND, 16),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 0, 20),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 16),
+			BPF_STMT(BPF_ST, 1),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 14),
+			BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xf),
+			BPF_STMT(BPF_ALU | BPF_LSH | BPF_K, 2),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0x5), /* libpcap emits K on TAX */
+			BPF_STMT(BPF_LD | BPF_MEM, 1),
+			BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0),
+			BPF_STMT(BPF_ST, 5),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 14),
+			BPF_STMT(BPF_LD | BPF_B | BPF_IND, 26),
+			BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xf0),
+			BPF_STMT(BPF_ALU | BPF_RSH | BPF_K, 2),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0x9), /* libpcap emits K on TAX */
+			BPF_STMT(BPF_LD | BPF_MEM, 5),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_X, 0, 4, 0),
+			BPF_STMT(BPF_LD | BPF_LEN, 0),
+			BPF_JUMP(BPF_JMP | BPF_JGT | BPF_K, 0x73, 1, 0),
+			BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 0xfc23ac00, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0xffff),
+			BPF_STMT(BPF_RET | BPF_K, 0),
+		},
+		CLASSIC,
+		{ 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6,
+		  0x3c, 0x07, 0x54, 0x43, 0xe5, 0x76,
+		  0x08, 0x00,
+		  0x45, 0x10, 0x00, 0x64, 0x75, 0xb5,
+		  0x40, 0x00, 0x40, 0x06, 0xad, 0x2e, /* IP header */
+		  0x0a, 0x01, 0x01, 0x95, /* ip src */
+		  0x0a, 0x01, 0x02, 0x0a, /* ip dst */
+		  0xc2, 0x24,
+		  0x00, 0x16 /* dst port */ },
+		{ { 10, 0 }, { 30, 0 }, { 100, 65535 } },
+	},
+	{
+		"RET_A",
+		.u.insns = {
+			/* check that unitialized X and A contain zeros */
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0)
+		},
+		CLASSIC,
+		{ },
+		{ {1, 0}, {2, 0} },
+	},
+	{
+		"INT: ADD trivial",
+		.u.insns_int = {
+			BPF_ALU64_IMM(BPF_MOV, R1, 1),
+			BPF_ALU64_IMM(BPF_ADD, R1, 2),
+			BPF_ALU64_IMM(BPF_MOV, R2, 3),
+			BPF_ALU64_REG(BPF_SUB, R1, R2),
+			BPF_ALU64_IMM(BPF_ADD, R1, -1),
+			BPF_ALU64_IMM(BPF_MUL, R1, 3),
+			BPF_ALU64_REG(BPF_MOV, R0, R1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0xfffffffd } }
+	},
+	{
+		"INT: MUL_X",
+		.u.insns_int = {
+			BPF_ALU64_IMM(BPF_MOV, R0, -1),
+			BPF_ALU64_IMM(BPF_MOV, R1, -1),
+			BPF_ALU64_IMM(BPF_MOV, R2, 3),
+			BPF_ALU64_REG(BPF_MUL, R1, R2),
+			BPF_JMP_IMM(BPF_JEQ, R1, 0xfffffffd, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } }
+	},
+	{
+		"INT: MUL_X2",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, -1),
+			BPF_ALU32_IMM(BPF_MOV, R1, -1),
+			BPF_ALU32_IMM(BPF_MOV, R2, 3),
+			BPF_ALU64_REG(BPF_MUL, R1, R2),
+			BPF_ALU64_IMM(BPF_RSH, R1, 8),
+			BPF_JMP_IMM(BPF_JEQ, R1, 0x2ffffff, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } }
+	},
+	{
+		"INT: MUL32_X",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, -1),
+			BPF_ALU64_IMM(BPF_MOV, R1, -1),
+			BPF_ALU32_IMM(BPF_MOV, R2, 3),
+			BPF_ALU32_REG(BPF_MUL, R1, R2),
+			BPF_ALU64_IMM(BPF_RSH, R1, 8),
+			BPF_JMP_IMM(BPF_JEQ, R1, 0xffffff, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } }
+	},
+	{
+		/* Have to test all register combinations, since
+		 * JITing of different registers will produce
+		 * different asm code.
+		 */
+		"INT: ADD 64-bit",
+		.u.insns_int = {
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_ALU64_IMM(BPF_MOV, R1, 1),
+			BPF_ALU64_IMM(BPF_MOV, R2, 2),
+			BPF_ALU64_IMM(BPF_MOV, R3, 3),
+			BPF_ALU64_IMM(BPF_MOV, R4, 4),
+			BPF_ALU64_IMM(BPF_MOV, R5, 5),
+			BPF_ALU64_IMM(BPF_MOV, R6, 6),
+			BPF_ALU64_IMM(BPF_MOV, R7, 7),
+			BPF_ALU64_IMM(BPF_MOV, R8, 8),
+			BPF_ALU64_IMM(BPF_MOV, R9, 9),
+			BPF_ALU64_IMM(BPF_ADD, R0, 20),
+			BPF_ALU64_IMM(BPF_ADD, R1, 20),
+			BPF_ALU64_IMM(BPF_ADD, R2, 20),
+			BPF_ALU64_IMM(BPF_ADD, R3, 20),
+			BPF_ALU64_IMM(BPF_ADD, R4, 20),
+			BPF_ALU64_IMM(BPF_ADD, R5, 20),
+			BPF_ALU64_IMM(BPF_ADD, R6, 20),
+			BPF_ALU64_IMM(BPF_ADD, R7, 20),
+			BPF_ALU64_IMM(BPF_ADD, R8, 20),
+			BPF_ALU64_IMM(BPF_ADD, R9, 20),
+			BPF_ALU64_IMM(BPF_SUB, R0, 10),
+			BPF_ALU64_IMM(BPF_SUB, R1, 10),
+			BPF_ALU64_IMM(BPF_SUB, R2, 10),
+			BPF_ALU64_IMM(BPF_SUB, R3, 10),
+			BPF_ALU64_IMM(BPF_SUB, R4, 10),
+			BPF_ALU64_IMM(BPF_SUB, R5, 10),
+			BPF_ALU64_IMM(BPF_SUB, R6, 10),
+			BPF_ALU64_IMM(BPF_SUB, R7, 10),
+			BPF_ALU64_IMM(BPF_SUB, R8, 10),
+			BPF_ALU64_IMM(BPF_SUB, R9, 10),
+			BPF_ALU64_REG(BPF_ADD, R0, R0),
+			BPF_ALU64_REG(BPF_ADD, R0, R1),
+			BPF_ALU64_REG(BPF_ADD, R0, R2),
+			BPF_ALU64_REG(BPF_ADD, R0, R3),
+			BPF_ALU64_REG(BPF_ADD, R0, R4),
+			BPF_ALU64_REG(BPF_ADD, R0, R5),
+			BPF_ALU64_REG(BPF_ADD, R0, R6),
+			BPF_ALU64_REG(BPF_ADD, R0, R7),
+			BPF_ALU64_REG(BPF_ADD, R0, R8),
+			BPF_ALU64_REG(BPF_ADD, R0, R9), /* R0 == 155 */
+			BPF_JMP_IMM(BPF_JEQ, R0, 155, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R1, R0),
+			BPF_ALU64_REG(BPF_ADD, R1, R1),
+			BPF_ALU64_REG(BPF_ADD, R1, R2),
+			BPF_ALU64_REG(BPF_ADD, R1, R3),
+			BPF_ALU64_REG(BPF_ADD, R1, R4),
+			BPF_ALU64_REG(BPF_ADD, R1, R5),
+			BPF_ALU64_REG(BPF_ADD, R1, R6),
+			BPF_ALU64_REG(BPF_ADD, R1, R7),
+			BPF_ALU64_REG(BPF_ADD, R1, R8),
+			BPF_ALU64_REG(BPF_ADD, R1, R9), /* R1 == 456 */
+			BPF_JMP_IMM(BPF_JEQ, R1, 456, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R2, R0),
+			BPF_ALU64_REG(BPF_ADD, R2, R1),
+			BPF_ALU64_REG(BPF_ADD, R2, R2),
+			BPF_ALU64_REG(BPF_ADD, R2, R3),
+			BPF_ALU64_REG(BPF_ADD, R2, R4),
+			BPF_ALU64_REG(BPF_ADD, R2, R5),
+			BPF_ALU64_REG(BPF_ADD, R2, R6),
+			BPF_ALU64_REG(BPF_ADD, R2, R7),
+			BPF_ALU64_REG(BPF_ADD, R2, R8),
+			BPF_ALU64_REG(BPF_ADD, R2, R9), /* R2 == 1358 */
+			BPF_JMP_IMM(BPF_JEQ, R2, 1358, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R3, R0),
+			BPF_ALU64_REG(BPF_ADD, R3, R1),
+			BPF_ALU64_REG(BPF_ADD, R3, R2),
+			BPF_ALU64_REG(BPF_ADD, R3, R3),
+			BPF_ALU64_REG(BPF_ADD, R3, R4),
+			BPF_ALU64_REG(BPF_ADD, R3, R5),
+			BPF_ALU64_REG(BPF_ADD, R3, R6),
+			BPF_ALU64_REG(BPF_ADD, R3, R7),
+			BPF_ALU64_REG(BPF_ADD, R3, R8),
+			BPF_ALU64_REG(BPF_ADD, R3, R9), /* R3 == 4063 */
+			BPF_JMP_IMM(BPF_JEQ, R3, 4063, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R4, R0),
+			BPF_ALU64_REG(BPF_ADD, R4, R1),
+			BPF_ALU64_REG(BPF_ADD, R4, R2),
+			BPF_ALU64_REG(BPF_ADD, R4, R3),
+			BPF_ALU64_REG(BPF_ADD, R4, R4),
+			BPF_ALU64_REG(BPF_ADD, R4, R5),
+			BPF_ALU64_REG(BPF_ADD, R4, R6),
+			BPF_ALU64_REG(BPF_ADD, R4, R7),
+			BPF_ALU64_REG(BPF_ADD, R4, R8),
+			BPF_ALU64_REG(BPF_ADD, R4, R9), /* R4 == 12177 */
+			BPF_JMP_IMM(BPF_JEQ, R4, 12177, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R5, R0),
+			BPF_ALU64_REG(BPF_ADD, R5, R1),
+			BPF_ALU64_REG(BPF_ADD, R5, R2),
+			BPF_ALU64_REG(BPF_ADD, R5, R3),
+			BPF_ALU64_REG(BPF_ADD, R5, R4),
+			BPF_ALU64_REG(BPF_ADD, R5, R5),
+			BPF_ALU64_REG(BPF_ADD, R5, R6),
+			BPF_ALU64_REG(BPF_ADD, R5, R7),
+			BPF_ALU64_REG(BPF_ADD, R5, R8),
+			BPF_ALU64_REG(BPF_ADD, R5, R9), /* R5 == 36518 */
+			BPF_JMP_IMM(BPF_JEQ, R5, 36518, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R6, R0),
+			BPF_ALU64_REG(BPF_ADD, R6, R1),
+			BPF_ALU64_REG(BPF_ADD, R6, R2),
+			BPF_ALU64_REG(BPF_ADD, R6, R3),
+			BPF_ALU64_REG(BPF_ADD, R6, R4),
+			BPF_ALU64_REG(BPF_ADD, R6, R5),
+			BPF_ALU64_REG(BPF_ADD, R6, R6),
+			BPF_ALU64_REG(BPF_ADD, R6, R7),
+			BPF_ALU64_REG(BPF_ADD, R6, R8),
+			BPF_ALU64_REG(BPF_ADD, R6, R9), /* R6 == 109540 */
+			BPF_JMP_IMM(BPF_JEQ, R6, 109540, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R7, R0),
+			BPF_ALU64_REG(BPF_ADD, R7, R1),
+			BPF_ALU64_REG(BPF_ADD, R7, R2),
+			BPF_ALU64_REG(BPF_ADD, R7, R3),
+			BPF_ALU64_REG(BPF_ADD, R7, R4),
+			BPF_ALU64_REG(BPF_ADD, R7, R5),
+			BPF_ALU64_REG(BPF_ADD, R7, R6),
+			BPF_ALU64_REG(BPF_ADD, R7, R7),
+			BPF_ALU64_REG(BPF_ADD, R7, R8),
+			BPF_ALU64_REG(BPF_ADD, R7, R9), /* R7 == 328605 */
+			BPF_JMP_IMM(BPF_JEQ, R7, 328605, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R8, R0),
+			BPF_ALU64_REG(BPF_ADD, R8, R1),
+			BPF_ALU64_REG(BPF_ADD, R8, R2),
+			BPF_ALU64_REG(BPF_ADD, R8, R3),
+			BPF_ALU64_REG(BPF_ADD, R8, R4),
+			BPF_ALU64_REG(BPF_ADD, R8, R5),
+			BPF_ALU64_REG(BPF_ADD, R8, R6),
+			BPF_ALU64_REG(BPF_ADD, R8, R7),
+			BPF_ALU64_REG(BPF_ADD, R8, R8),
+			BPF_ALU64_REG(BPF_ADD, R8, R9), /* R8 == 985799 */
+			BPF_JMP_IMM(BPF_JEQ, R8, 985799, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, R9, R0),
+			BPF_ALU64_REG(BPF_ADD, R9, R1),
+			BPF_ALU64_REG(BPF_ADD, R9, R2),
+			BPF_ALU64_REG(BPF_ADD, R9, R3),
+			BPF_ALU64_REG(BPF_ADD, R9, R4),
+			BPF_ALU64_REG(BPF_ADD, R9, R5),
+			BPF_ALU64_REG(BPF_ADD, R9, R6),
+			BPF_ALU64_REG(BPF_ADD, R9, R7),
+			BPF_ALU64_REG(BPF_ADD, R9, R8),
+			BPF_ALU64_REG(BPF_ADD, R9, R9), /* R9 == 2957380 */
+			BPF_ALU64_REG(BPF_MOV, R0, R9),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2957380 } }
+	},
+	{
+		"INT: ADD 32-bit",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 20),
+			BPF_ALU32_IMM(BPF_MOV, R1, 1),
+			BPF_ALU32_IMM(BPF_MOV, R2, 2),
+			BPF_ALU32_IMM(BPF_MOV, R3, 3),
+			BPF_ALU32_IMM(BPF_MOV, R4, 4),
+			BPF_ALU32_IMM(BPF_MOV, R5, 5),
+			BPF_ALU32_IMM(BPF_MOV, R6, 6),
+			BPF_ALU32_IMM(BPF_MOV, R7, 7),
+			BPF_ALU32_IMM(BPF_MOV, R8, 8),
+			BPF_ALU32_IMM(BPF_MOV, R9, 9),
+			BPF_ALU64_IMM(BPF_ADD, R1, 10),
+			BPF_ALU64_IMM(BPF_ADD, R2, 10),
+			BPF_ALU64_IMM(BPF_ADD, R3, 10),
+			BPF_ALU64_IMM(BPF_ADD, R4, 10),
+			BPF_ALU64_IMM(BPF_ADD, R5, 10),
+			BPF_ALU64_IMM(BPF_ADD, R6, 10),
+			BPF_ALU64_IMM(BPF_ADD, R7, 10),
+			BPF_ALU64_IMM(BPF_ADD, R8, 10),
+			BPF_ALU64_IMM(BPF_ADD, R9, 10),
+			BPF_ALU32_REG(BPF_ADD, R0, R1),
+			BPF_ALU32_REG(BPF_ADD, R0, R2),
+			BPF_ALU32_REG(BPF_ADD, R0, R3),
+			BPF_ALU32_REG(BPF_ADD, R0, R4),
+			BPF_ALU32_REG(BPF_ADD, R0, R5),
+			BPF_ALU32_REG(BPF_ADD, R0, R6),
+			BPF_ALU32_REG(BPF_ADD, R0, R7),
+			BPF_ALU32_REG(BPF_ADD, R0, R8),
+			BPF_ALU32_REG(BPF_ADD, R0, R9), /* R0 == 155 */
+			BPF_JMP_IMM(BPF_JEQ, R0, 155, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R1, R0),
+			BPF_ALU32_REG(BPF_ADD, R1, R1),
+			BPF_ALU32_REG(BPF_ADD, R1, R2),
+			BPF_ALU32_REG(BPF_ADD, R1, R3),
+			BPF_ALU32_REG(BPF_ADD, R1, R4),
+			BPF_ALU32_REG(BPF_ADD, R1, R5),
+			BPF_ALU32_REG(BPF_ADD, R1, R6),
+			BPF_ALU32_REG(BPF_ADD, R1, R7),
+			BPF_ALU32_REG(BPF_ADD, R1, R8),
+			BPF_ALU32_REG(BPF_ADD, R1, R9), /* R1 == 456 */
+			BPF_JMP_IMM(BPF_JEQ, R1, 456, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R2, R0),
+			BPF_ALU32_REG(BPF_ADD, R2, R1),
+			BPF_ALU32_REG(BPF_ADD, R2, R2),
+			BPF_ALU32_REG(BPF_ADD, R2, R3),
+			BPF_ALU32_REG(BPF_ADD, R2, R4),
+			BPF_ALU32_REG(BPF_ADD, R2, R5),
+			BPF_ALU32_REG(BPF_ADD, R2, R6),
+			BPF_ALU32_REG(BPF_ADD, R2, R7),
+			BPF_ALU32_REG(BPF_ADD, R2, R8),
+			BPF_ALU32_REG(BPF_ADD, R2, R9), /* R2 == 1358 */
+			BPF_JMP_IMM(BPF_JEQ, R2, 1358, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R3, R0),
+			BPF_ALU32_REG(BPF_ADD, R3, R1),
+			BPF_ALU32_REG(BPF_ADD, R3, R2),
+			BPF_ALU32_REG(BPF_ADD, R3, R3),
+			BPF_ALU32_REG(BPF_ADD, R3, R4),
+			BPF_ALU32_REG(BPF_ADD, R3, R5),
+			BPF_ALU32_REG(BPF_ADD, R3, R6),
+			BPF_ALU32_REG(BPF_ADD, R3, R7),
+			BPF_ALU32_REG(BPF_ADD, R3, R8),
+			BPF_ALU32_REG(BPF_ADD, R3, R9), /* R3 == 4063 */
+			BPF_JMP_IMM(BPF_JEQ, R3, 4063, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R4, R0),
+			BPF_ALU32_REG(BPF_ADD, R4, R1),
+			BPF_ALU32_REG(BPF_ADD, R4, R2),
+			BPF_ALU32_REG(BPF_ADD, R4, R3),
+			BPF_ALU32_REG(BPF_ADD, R4, R4),
+			BPF_ALU32_REG(BPF_ADD, R4, R5),
+			BPF_ALU32_REG(BPF_ADD, R4, R6),
+			BPF_ALU32_REG(BPF_ADD, R4, R7),
+			BPF_ALU32_REG(BPF_ADD, R4, R8),
+			BPF_ALU32_REG(BPF_ADD, R4, R9), /* R4 == 12177 */
+			BPF_JMP_IMM(BPF_JEQ, R4, 12177, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R5, R0),
+			BPF_ALU32_REG(BPF_ADD, R5, R1),
+			BPF_ALU32_REG(BPF_ADD, R5, R2),
+			BPF_ALU32_REG(BPF_ADD, R5, R3),
+			BPF_ALU32_REG(BPF_ADD, R5, R4),
+			BPF_ALU32_REG(BPF_ADD, R5, R5),
+			BPF_ALU32_REG(BPF_ADD, R5, R6),
+			BPF_ALU32_REG(BPF_ADD, R5, R7),
+			BPF_ALU32_REG(BPF_ADD, R5, R8),
+			BPF_ALU32_REG(BPF_ADD, R5, R9), /* R5 == 36518 */
+			BPF_JMP_IMM(BPF_JEQ, R5, 36518, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R6, R0),
+			BPF_ALU32_REG(BPF_ADD, R6, R1),
+			BPF_ALU32_REG(BPF_ADD, R6, R2),
+			BPF_ALU32_REG(BPF_ADD, R6, R3),
+			BPF_ALU32_REG(BPF_ADD, R6, R4),
+			BPF_ALU32_REG(BPF_ADD, R6, R5),
+			BPF_ALU32_REG(BPF_ADD, R6, R6),
+			BPF_ALU32_REG(BPF_ADD, R6, R7),
+			BPF_ALU32_REG(BPF_ADD, R6, R8),
+			BPF_ALU32_REG(BPF_ADD, R6, R9), /* R6 == 109540 */
+			BPF_JMP_IMM(BPF_JEQ, R6, 109540, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R7, R0),
+			BPF_ALU32_REG(BPF_ADD, R7, R1),
+			BPF_ALU32_REG(BPF_ADD, R7, R2),
+			BPF_ALU32_REG(BPF_ADD, R7, R3),
+			BPF_ALU32_REG(BPF_ADD, R7, R4),
+			BPF_ALU32_REG(BPF_ADD, R7, R5),
+			BPF_ALU32_REG(BPF_ADD, R7, R6),
+			BPF_ALU32_REG(BPF_ADD, R7, R7),
+			BPF_ALU32_REG(BPF_ADD, R7, R8),
+			BPF_ALU32_REG(BPF_ADD, R7, R9), /* R7 == 328605 */
+			BPF_JMP_IMM(BPF_JEQ, R7, 328605, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R8, R0),
+			BPF_ALU32_REG(BPF_ADD, R8, R1),
+			BPF_ALU32_REG(BPF_ADD, R8, R2),
+			BPF_ALU32_REG(BPF_ADD, R8, R3),
+			BPF_ALU32_REG(BPF_ADD, R8, R4),
+			BPF_ALU32_REG(BPF_ADD, R8, R5),
+			BPF_ALU32_REG(BPF_ADD, R8, R6),
+			BPF_ALU32_REG(BPF_ADD, R8, R7),
+			BPF_ALU32_REG(BPF_ADD, R8, R8),
+			BPF_ALU32_REG(BPF_ADD, R8, R9), /* R8 == 985799 */
+			BPF_JMP_IMM(BPF_JEQ, R8, 985799, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_REG(BPF_ADD, R9, R0),
+			BPF_ALU32_REG(BPF_ADD, R9, R1),
+			BPF_ALU32_REG(BPF_ADD, R9, R2),
+			BPF_ALU32_REG(BPF_ADD, R9, R3),
+			BPF_ALU32_REG(BPF_ADD, R9, R4),
+			BPF_ALU32_REG(BPF_ADD, R9, R5),
+			BPF_ALU32_REG(BPF_ADD, R9, R6),
+			BPF_ALU32_REG(BPF_ADD, R9, R7),
+			BPF_ALU32_REG(BPF_ADD, R9, R8),
+			BPF_ALU32_REG(BPF_ADD, R9, R9), /* R9 == 2957380 */
+			BPF_ALU32_REG(BPF_MOV, R0, R9),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2957380 } }
+	},
+	{	/* Mainly checking JIT here. */
+		"INT: SUB",
+		.u.insns_int = {
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_ALU64_IMM(BPF_MOV, R1, 1),
+			BPF_ALU64_IMM(BPF_MOV, R2, 2),
+			BPF_ALU64_IMM(BPF_MOV, R3, 3),
+			BPF_ALU64_IMM(BPF_MOV, R4, 4),
+			BPF_ALU64_IMM(BPF_MOV, R5, 5),
+			BPF_ALU64_IMM(BPF_MOV, R6, 6),
+			BPF_ALU64_IMM(BPF_MOV, R7, 7),
+			BPF_ALU64_IMM(BPF_MOV, R8, 8),
+			BPF_ALU64_IMM(BPF_MOV, R9, 9),
+			BPF_ALU64_REG(BPF_SUB, R0, R0),
+			BPF_ALU64_REG(BPF_SUB, R0, R1),
+			BPF_ALU64_REG(BPF_SUB, R0, R2),
+			BPF_ALU64_REG(BPF_SUB, R0, R3),
+			BPF_ALU64_REG(BPF_SUB, R0, R4),
+			BPF_ALU64_REG(BPF_SUB, R0, R5),
+			BPF_ALU64_REG(BPF_SUB, R0, R6),
+			BPF_ALU64_REG(BPF_SUB, R0, R7),
+			BPF_ALU64_REG(BPF_SUB, R0, R8),
+			BPF_ALU64_REG(BPF_SUB, R0, R9),
+			BPF_ALU64_IMM(BPF_SUB, R0, 10),
+			BPF_JMP_IMM(BPF_JEQ, R0, -55, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R1, R0),
+			BPF_ALU64_REG(BPF_SUB, R1, R2),
+			BPF_ALU64_REG(BPF_SUB, R1, R3),
+			BPF_ALU64_REG(BPF_SUB, R1, R4),
+			BPF_ALU64_REG(BPF_SUB, R1, R5),
+			BPF_ALU64_REG(BPF_SUB, R1, R6),
+			BPF_ALU64_REG(BPF_SUB, R1, R7),
+			BPF_ALU64_REG(BPF_SUB, R1, R8),
+			BPF_ALU64_REG(BPF_SUB, R1, R9),
+			BPF_ALU64_IMM(BPF_SUB, R1, 10),
+			BPF_ALU64_REG(BPF_SUB, R2, R0),
+			BPF_ALU64_REG(BPF_SUB, R2, R1),
+			BPF_ALU64_REG(BPF_SUB, R2, R3),
+			BPF_ALU64_REG(BPF_SUB, R2, R4),
+			BPF_ALU64_REG(BPF_SUB, R2, R5),
+			BPF_ALU64_REG(BPF_SUB, R2, R6),
+			BPF_ALU64_REG(BPF_SUB, R2, R7),
+			BPF_ALU64_REG(BPF_SUB, R2, R8),
+			BPF_ALU64_REG(BPF_SUB, R2, R9),
+			BPF_ALU64_IMM(BPF_SUB, R2, 10),
+			BPF_ALU64_REG(BPF_SUB, R3, R0),
+			BPF_ALU64_REG(BPF_SUB, R3, R1),
+			BPF_ALU64_REG(BPF_SUB, R3, R2),
+			BPF_ALU64_REG(BPF_SUB, R3, R4),
+			BPF_ALU64_REG(BPF_SUB, R3, R5),
+			BPF_ALU64_REG(BPF_SUB, R3, R6),
+			BPF_ALU64_REG(BPF_SUB, R3, R7),
+			BPF_ALU64_REG(BPF_SUB, R3, R8),
+			BPF_ALU64_REG(BPF_SUB, R3, R9),
+			BPF_ALU64_IMM(BPF_SUB, R3, 10),
+			BPF_ALU64_REG(BPF_SUB, R4, R0),
+			BPF_ALU64_REG(BPF_SUB, R4, R1),
+			BPF_ALU64_REG(BPF_SUB, R4, R2),
+			BPF_ALU64_REG(BPF_SUB, R4, R3),
+			BPF_ALU64_REG(BPF_SUB, R4, R5),
+			BPF_ALU64_REG(BPF_SUB, R4, R6),
+			BPF_ALU64_REG(BPF_SUB, R4, R7),
+			BPF_ALU64_REG(BPF_SUB, R4, R8),
+			BPF_ALU64_REG(BPF_SUB, R4, R9),
+			BPF_ALU64_IMM(BPF_SUB, R4, 10),
+			BPF_ALU64_REG(BPF_SUB, R5, R0),
+			BPF_ALU64_REG(BPF_SUB, R5, R1),
+			BPF_ALU64_REG(BPF_SUB, R5, R2),
+			BPF_ALU64_REG(BPF_SUB, R5, R3),
+			BPF_ALU64_REG(BPF_SUB, R5, R4),
+			BPF_ALU64_REG(BPF_SUB, R5, R6),
+			BPF_ALU64_REG(BPF_SUB, R5, R7),
+			BPF_ALU64_REG(BPF_SUB, R5, R8),
+			BPF_ALU64_REG(BPF_SUB, R5, R9),
+			BPF_ALU64_IMM(BPF_SUB, R5, 10),
+			BPF_ALU64_REG(BPF_SUB, R6, R0),
+			BPF_ALU64_REG(BPF_SUB, R6, R1),
+			BPF_ALU64_REG(BPF_SUB, R6, R2),
+			BPF_ALU64_REG(BPF_SUB, R6, R3),
+			BPF_ALU64_REG(BPF_SUB, R6, R4),
+			BPF_ALU64_REG(BPF_SUB, R6, R5),
+			BPF_ALU64_REG(BPF_SUB, R6, R7),
+			BPF_ALU64_REG(BPF_SUB, R6, R8),
+			BPF_ALU64_REG(BPF_SUB, R6, R9),
+			BPF_ALU64_IMM(BPF_SUB, R6, 10),
+			BPF_ALU64_REG(BPF_SUB, R7, R0),
+			BPF_ALU64_REG(BPF_SUB, R7, R1),
+			BPF_ALU64_REG(BPF_SUB, R7, R2),
+			BPF_ALU64_REG(BPF_SUB, R7, R3),
+			BPF_ALU64_REG(BPF_SUB, R7, R4),
+			BPF_ALU64_REG(BPF_SUB, R7, R5),
+			BPF_ALU64_REG(BPF_SUB, R7, R6),
+			BPF_ALU64_REG(BPF_SUB, R7, R8),
+			BPF_ALU64_REG(BPF_SUB, R7, R9),
+			BPF_ALU64_IMM(BPF_SUB, R7, 10),
+			BPF_ALU64_REG(BPF_SUB, R8, R0),
+			BPF_ALU64_REG(BPF_SUB, R8, R1),
+			BPF_ALU64_REG(BPF_SUB, R8, R2),
+			BPF_ALU64_REG(BPF_SUB, R8, R3),
+			BPF_ALU64_REG(BPF_SUB, R8, R4),
+			BPF_ALU64_REG(BPF_SUB, R8, R5),
+			BPF_ALU64_REG(BPF_SUB, R8, R6),
+			BPF_ALU64_REG(BPF_SUB, R8, R7),
+			BPF_ALU64_REG(BPF_SUB, R8, R9),
+			BPF_ALU64_IMM(BPF_SUB, R8, 10),
+			BPF_ALU64_REG(BPF_SUB, R9, R0),
+			BPF_ALU64_REG(BPF_SUB, R9, R1),
+			BPF_ALU64_REG(BPF_SUB, R9, R2),
+			BPF_ALU64_REG(BPF_SUB, R9, R3),
+			BPF_ALU64_REG(BPF_SUB, R9, R4),
+			BPF_ALU64_REG(BPF_SUB, R9, R5),
+			BPF_ALU64_REG(BPF_SUB, R9, R6),
+			BPF_ALU64_REG(BPF_SUB, R9, R7),
+			BPF_ALU64_REG(BPF_SUB, R9, R8),
+			BPF_ALU64_IMM(BPF_SUB, R9, 10),
+			BPF_ALU64_IMM(BPF_SUB, R0, 10),
+			BPF_ALU64_IMM(BPF_NEG, R0, 0),
+			BPF_ALU64_REG(BPF_SUB, R0, R1),
+			BPF_ALU64_REG(BPF_SUB, R0, R2),
+			BPF_ALU64_REG(BPF_SUB, R0, R3),
+			BPF_ALU64_REG(BPF_SUB, R0, R4),
+			BPF_ALU64_REG(BPF_SUB, R0, R5),
+			BPF_ALU64_REG(BPF_SUB, R0, R6),
+			BPF_ALU64_REG(BPF_SUB, R0, R7),
+			BPF_ALU64_REG(BPF_SUB, R0, R8),
+			BPF_ALU64_REG(BPF_SUB, R0, R9),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 11 } }
+	},
+	{	/* Mainly checking JIT here. */
+		"INT: XOR",
+		.u.insns_int = {
+			BPF_ALU64_REG(BPF_SUB, R0, R0),
+			BPF_ALU64_REG(BPF_XOR, R1, R1),
+			BPF_JMP_REG(BPF_JEQ, R0, R1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R0, 10),
+			BPF_ALU64_IMM(BPF_MOV, R1, -1),
+			BPF_ALU64_REG(BPF_SUB, R1, R1),
+			BPF_ALU64_REG(BPF_XOR, R2, R2),
+			BPF_JMP_REG(BPF_JEQ, R1, R2, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R2, R2),
+			BPF_ALU64_REG(BPF_XOR, R3, R3),
+			BPF_ALU64_IMM(BPF_MOV, R0, 10),
+			BPF_ALU64_IMM(BPF_MOV, R1, -1),
+			BPF_JMP_REG(BPF_JEQ, R2, R3, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R3, R3),
+			BPF_ALU64_REG(BPF_XOR, R4, R4),
+			BPF_ALU64_IMM(BPF_MOV, R2, 1),
+			BPF_ALU64_IMM(BPF_MOV, R5, -1),
+			BPF_JMP_REG(BPF_JEQ, R3, R4, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R4, R4),
+			BPF_ALU64_REG(BPF_XOR, R5, R5),
+			BPF_ALU64_IMM(BPF_MOV, R3, 1),
+			BPF_ALU64_IMM(BPF_MOV, R7, -1),
+			BPF_JMP_REG(BPF_JEQ, R5, R4, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R5, 1),
+			BPF_ALU64_REG(BPF_SUB, R5, R5),
+			BPF_ALU64_REG(BPF_XOR, R6, R6),
+			BPF_ALU64_IMM(BPF_MOV, R1, 1),
+			BPF_ALU64_IMM(BPF_MOV, R8, -1),
+			BPF_JMP_REG(BPF_JEQ, R5, R6, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R6, R6),
+			BPF_ALU64_REG(BPF_XOR, R7, R7),
+			BPF_JMP_REG(BPF_JEQ, R7, R6, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R7, R7),
+			BPF_ALU64_REG(BPF_XOR, R8, R8),
+			BPF_JMP_REG(BPF_JEQ, R7, R8, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R8, R8),
+			BPF_ALU64_REG(BPF_XOR, R9, R9),
+			BPF_JMP_REG(BPF_JEQ, R9, R8, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R9, R9),
+			BPF_ALU64_REG(BPF_XOR, R0, R0),
+			BPF_JMP_REG(BPF_JEQ, R9, R0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, R1, R1),
+			BPF_ALU64_REG(BPF_XOR, R0, R0),
+			BPF_JMP_REG(BPF_JEQ, R9, R0, 2),
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } }
+	},
+	{	/* Mainly checking JIT here. */
+		"INT: MUL",
+		.u.insns_int = {
+			BPF_ALU64_IMM(BPF_MOV, R0, 11),
+			BPF_ALU64_IMM(BPF_MOV, R1, 1),
+			BPF_ALU64_IMM(BPF_MOV, R2, 2),
+			BPF_ALU64_IMM(BPF_MOV, R3, 3),
+			BPF_ALU64_IMM(BPF_MOV, R4, 4),
+			BPF_ALU64_IMM(BPF_MOV, R5, 5),
+			BPF_ALU64_IMM(BPF_MOV, R6, 6),
+			BPF_ALU64_IMM(BPF_MOV, R7, 7),
+			BPF_ALU64_IMM(BPF_MOV, R8, 8),
+			BPF_ALU64_IMM(BPF_MOV, R9, 9),
+			BPF_ALU64_REG(BPF_MUL, R0, R0),
+			BPF_ALU64_REG(BPF_MUL, R0, R1),
+			BPF_ALU64_REG(BPF_MUL, R0, R2),
+			BPF_ALU64_REG(BPF_MUL, R0, R3),
+			BPF_ALU64_REG(BPF_MUL, R0, R4),
+			BPF_ALU64_REG(BPF_MUL, R0, R5),
+			BPF_ALU64_REG(BPF_MUL, R0, R6),
+			BPF_ALU64_REG(BPF_MUL, R0, R7),
+			BPF_ALU64_REG(BPF_MUL, R0, R8),
+			BPF_ALU64_REG(BPF_MUL, R0, R9),
+			BPF_ALU64_IMM(BPF_MUL, R0, 10),
+			BPF_JMP_IMM(BPF_JEQ, R0, 439084800, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_MUL, R1, R0),
+			BPF_ALU64_REG(BPF_MUL, R1, R2),
+			BPF_ALU64_REG(BPF_MUL, R1, R3),
+			BPF_ALU64_REG(BPF_MUL, R1, R4),
+			BPF_ALU64_REG(BPF_MUL, R1, R5),
+			BPF_ALU64_REG(BPF_MUL, R1, R6),
+			BPF_ALU64_REG(BPF_MUL, R1, R7),
+			BPF_ALU64_REG(BPF_MUL, R1, R8),
+			BPF_ALU64_REG(BPF_MUL, R1, R9),
+			BPF_ALU64_IMM(BPF_MUL, R1, 10),
+			BPF_ALU64_REG(BPF_MOV, R2, R1),
+			BPF_ALU64_IMM(BPF_RSH, R2, 32),
+			BPF_JMP_IMM(BPF_JEQ, R2, 0x5a924, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_LSH, R1, 32),
+			BPF_ALU64_IMM(BPF_ARSH, R1, 32),
+			BPF_JMP_IMM(BPF_JEQ, R1, 0xebb90000, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_MUL, R2, R0),
+			BPF_ALU64_REG(BPF_MUL, R2, R1),
+			BPF_ALU64_REG(BPF_MUL, R2, R3),
+			BPF_ALU64_REG(BPF_MUL, R2, R4),
+			BPF_ALU64_REG(BPF_MUL, R2, R5),
+			BPF_ALU64_REG(BPF_MUL, R2, R6),
+			BPF_ALU64_REG(BPF_MUL, R2, R7),
+			BPF_ALU64_REG(BPF_MUL, R2, R8),
+			BPF_ALU64_REG(BPF_MUL, R2, R9),
+			BPF_ALU64_IMM(BPF_MUL, R2, 10),
+			BPF_ALU64_IMM(BPF_RSH, R2, 32),
+			BPF_ALU64_REG(BPF_MOV, R0, R2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0x35d97ef2 } }
+	},
+	{
+		"INT: ALU MIX",
+		.u.insns_int = {
+			BPF_ALU64_IMM(BPF_MOV, R0, 11),
+			BPF_ALU64_IMM(BPF_ADD, R0, -1),
+			BPF_ALU64_IMM(BPF_MOV, R2, 2),
+			BPF_ALU64_IMM(BPF_XOR, R2, 3),
+			BPF_ALU64_REG(BPF_DIV, R0, R2),
+			BPF_JMP_IMM(BPF_JEQ, R0, 10, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOD, R0, 3),
+			BPF_JMP_IMM(BPF_JEQ, R0, 1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R0, -1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, -1 } }
+	},
+	{
+		"INT: shifts by register",
+		.u.insns_int = {
+			BPF_MOV64_IMM(R0, -1234),
+			BPF_MOV64_IMM(R1, 1),
+			BPF_ALU32_REG(BPF_RSH, R0, R1),
+			BPF_JMP_IMM(BPF_JEQ, R0, 0x7ffffd97, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(R2, 1),
+			BPF_ALU64_REG(BPF_LSH, R0, R2),
+			BPF_MOV32_IMM(R4, -1234),
+			BPF_JMP_REG(BPF_JEQ, R0, R4, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_AND, R4, 63),
+			BPF_ALU64_REG(BPF_LSH, R0, R4), /* R0 <= 46 */
+			BPF_MOV64_IMM(R3, 47),
+			BPF_ALU64_REG(BPF_ARSH, R0, R3),
+			BPF_JMP_IMM(BPF_JEQ, R0, -617, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(R2, 1),
+			BPF_ALU64_REG(BPF_LSH, R4, R2), /* R4 = 46 << 1 */
+			BPF_JMP_IMM(BPF_JEQ, R4, 92, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(R4, 4),
+			BPF_ALU64_REG(BPF_LSH, R4, R4), /* R4 = 4 << 4 */
+			BPF_JMP_IMM(BPF_JEQ, R4, 64, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(R4, 5),
+			BPF_ALU32_REG(BPF_LSH, R4, R4), /* R4 = 5 << 5 */
+			BPF_JMP_IMM(BPF_JEQ, R4, 160, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(R0, -1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, -1 } }
+	},
+	{
+		"INT: DIV + ABS",
+		.u.insns_int = {
+			BPF_ALU64_REG(BPF_MOV, R6, R1),
+			BPF_LD_ABS(BPF_B, 3),
+			BPF_ALU64_IMM(BPF_MOV, R2, 2),
+			BPF_ALU32_REG(BPF_DIV, R0, R2),
+			BPF_ALU64_REG(BPF_MOV, R8, R0),
+			BPF_LD_ABS(BPF_B, 4),
+			BPF_ALU64_REG(BPF_ADD, R8, R0),
+			BPF_LD_IND(BPF_B, R8, -70),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ 10, 20, 30, 40, 50 },
+		{ { 4, 0 }, { 5, 10 } }
+	},
+	{
+		"INT: DIV by zero",
+		.u.insns_int = {
+			BPF_ALU64_REG(BPF_MOV, R6, R1),
+			BPF_ALU64_IMM(BPF_MOV, R7, 0),
+			BPF_LD_ABS(BPF_B, 3),
+			BPF_ALU32_REG(BPF_DIV, R0, R7),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ 10, 20, 30, 40, 50 },
+		{ { 3, 0 }, { 4, 0 } }
+	},
+	{
+		"check: missing ret",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 1),
+		},
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+		{ },
+		{ }
+	},
+	{
+		"check: div_k_0",
+		.u.insns = {
+			BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0)
+		},
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+		{ },
+		{ }
+	},
+	{
+		"check: unknown insn",
+		.u.insns = {
+			/* seccomp insn, rejected in socket filter */
+			BPF_STMT(BPF_LDX | BPF_W | BPF_ABS, 0),
+			BPF_STMT(BPF_RET | BPF_K, 0)
+		},
+		CLASSIC | FLAG_EXPECTED_FAIL,
+		{ },
+		{ }
+	},
+	{
+		"check: out of range spill/fill",
+		.u.insns = {
+			BPF_STMT(BPF_STX, 16),
+			BPF_STMT(BPF_RET | BPF_K, 0)
+		},
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+		{ },
+		{ }
+	},
+	{
+		"JUMPS + HOLES",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JGE, 0, 13, 15),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90c2894d, 3, 4),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90c2894d, 1, 2),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JGE, 0, 14, 15),
+			BPF_JUMP(BPF_JMP | BPF_JGE, 0, 13, 14),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ, 0x2ac28349, 2, 3),
+			BPF_JUMP(BPF_JMP | BPF_JEQ, 0x2ac28349, 1, 2),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JGE, 0, 14, 15),
+			BPF_JUMP(BPF_JMP | BPF_JGE, 0, 13, 14),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90d2ff41, 2, 3),
+			BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90d2ff41, 1, 2),
+			BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0),
+		},
+		CLASSIC,
+		{ 0x00, 0x1b, 0x21, 0x3c, 0x9d, 0xf8,
+		  0x90, 0xe2, 0xba, 0x0a, 0x56, 0xb4,
+		  0x08, 0x00,
+		  0x45, 0x00, 0x00, 0x28, 0x00, 0x00,
+		  0x20, 0x00, 0x40, 0x11, 0x00, 0x00, /* IP header */
+		  0xc0, 0xa8, 0x33, 0x01,
+		  0xc0, 0xa8, 0x33, 0x02,
+		  0xbb, 0xb6,
+		  0xa9, 0xfa,
+		  0x00, 0x14, 0x00, 0x00,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+		  0xcc, 0xcc, 0xcc, 0xcc },
+		{ { 88, 0x001b } }
+	},
+	{
+		"check: RET X",
+		.u.insns = {
+			BPF_STMT(BPF_RET | BPF_X, 0),
+		},
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+		{ },
+		{ },
+	},
+	{
+		"check: LDX + RET X",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 42),
+			BPF_STMT(BPF_RET | BPF_X, 0),
+		},
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+		{ },
+		{ },
+	},
+	{	/* Mainly checking JIT here. */
+		"M[]: alt STX + LDX",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 100),
+			BPF_STMT(BPF_STX, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 0),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 1),
+			BPF_STMT(BPF_LDX | BPF_MEM, 1),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 2),
+			BPF_STMT(BPF_LDX | BPF_MEM, 2),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 3),
+			BPF_STMT(BPF_LDX | BPF_MEM, 3),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 4),
+			BPF_STMT(BPF_LDX | BPF_MEM, 4),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 5),
+			BPF_STMT(BPF_LDX | BPF_MEM, 5),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 6),
+			BPF_STMT(BPF_LDX | BPF_MEM, 6),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 7),
+			BPF_STMT(BPF_LDX | BPF_MEM, 7),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 8),
+			BPF_STMT(BPF_LDX | BPF_MEM, 8),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 9),
+			BPF_STMT(BPF_LDX | BPF_MEM, 9),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 10),
+			BPF_STMT(BPF_LDX | BPF_MEM, 10),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 11),
+			BPF_STMT(BPF_LDX | BPF_MEM, 11),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 12),
+			BPF_STMT(BPF_LDX | BPF_MEM, 12),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 13),
+			BPF_STMT(BPF_LDX | BPF_MEM, 13),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 14),
+			BPF_STMT(BPF_LDX | BPF_MEM, 14),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_STX, 15),
+			BPF_STMT(BPF_LDX | BPF_MEM, 15),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{ },
+		{ { 0, 116 } },
+	},
+	{	/* Mainly checking JIT here. */
+		"M[]: full STX + full LDX",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xbadfeedb),
+			BPF_STMT(BPF_STX, 0),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xecabedae),
+			BPF_STMT(BPF_STX, 1),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xafccfeaf),
+			BPF_STMT(BPF_STX, 2),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xbffdcedc),
+			BPF_STMT(BPF_STX, 3),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xfbbbdccb),
+			BPF_STMT(BPF_STX, 4),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xfbabcbda),
+			BPF_STMT(BPF_STX, 5),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xaedecbdb),
+			BPF_STMT(BPF_STX, 6),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xadebbade),
+			BPF_STMT(BPF_STX, 7),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xfcfcfaec),
+			BPF_STMT(BPF_STX, 8),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xbcdddbdc),
+			BPF_STMT(BPF_STX, 9),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xfeefdfac),
+			BPF_STMT(BPF_STX, 10),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xcddcdeea),
+			BPF_STMT(BPF_STX, 11),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xaccfaebb),
+			BPF_STMT(BPF_STX, 12),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xbdcccdcf),
+			BPF_STMT(BPF_STX, 13),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xaaedecde),
+			BPF_STMT(BPF_STX, 14),
+			BPF_STMT(BPF_LDX | BPF_IMM, 0xfaeacdad),
+			BPF_STMT(BPF_STX, 15),
+			BPF_STMT(BPF_LDX | BPF_MEM, 0),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 1),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 2),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 3),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 4),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 5),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 6),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 7),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 8),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 9),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 10),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 11),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 12),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 13),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 14),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_LDX | BPF_MEM, 15),
+			BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{ },
+		{ { 0, 0x2a5a5e5 } },
+	},
+	{
+		"check: SKF_AD_MAX",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF + SKF_AD_MAX),
+			BPF_STMT(BPF_RET | BPF_A, 0),
+		},
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+		{ },
+		{ },
+	},
+	{	/* Passes checker but fails during runtime. */
+		"LD [SKF_AD_OFF-1]",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				 SKF_AD_OFF - 1),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+		},
+		CLASSIC,
+		{ },
+		{ { 1, 0 } },
+	},
+	{
+		"load 64-bit immediate",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0x567800001234LL),
+			BPF_MOV64_REG(R2, R1),
+			BPF_MOV64_REG(R3, R2),
+			BPF_ALU64_IMM(BPF_RSH, R2, 32),
+			BPF_ALU64_IMM(BPF_LSH, R3, 32),
+			BPF_ALU64_IMM(BPF_RSH, R3, 32),
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_JMP_IMM(BPF_JEQ, R2, 0x5678, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JEQ, R3, 0x1234, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } }
+	},
+};
+
+static struct net_device dev;
+
+static struct sk_buff *populate_skb(char *buf, int size)
+{
+	struct sk_buff *skb;
+
+	if (size >= MAX_DATA)
+		return NULL;
+
+	skb = alloc_skb(MAX_DATA, GFP_KERNEL);
+	if (!skb)
+		return NULL;
+
+	memcpy(__skb_put(skb, size), buf, size);
+
+	/* Initialize a fake skb with test pattern. */
+	skb_reset_mac_header(skb);
+	skb->protocol = htons(ETH_P_IP);
+	skb->pkt_type = SKB_TYPE;
+	skb->mark = SKB_MARK;
+	skb->hash = SKB_HASH;
+	skb->queue_mapping = SKB_QUEUE_MAP;
+	skb->vlan_tci = SKB_VLAN_TCI;
+	skb->dev = &dev;
+	skb->dev->ifindex = SKB_DEV_IFINDEX;
+	skb->dev->type = SKB_DEV_TYPE;
+	skb_set_network_header(skb, min(size, ETH_HLEN));
+
+	return skb;
+}
+
+static void *generate_test_data(struct bpf_test *test, int sub)
+{
+	if (test->aux & FLAG_NO_DATA)
+		return NULL;
+
+	/* Test case expects an skb, so populate one. Various
+	 * subtests generate skbs of different sizes based on
+	 * the same data.
+	 */
+	return populate_skb(test->data, test->test[sub].data_size);
+}
+
+static void release_test_data(const struct bpf_test *test, void *data)
+{
+	if (test->aux & FLAG_NO_DATA)
+		return;
+
+	kfree_skb(data);
+}
+
+static int probe_filter_length(struct sock_filter *fp)
+{
+	int len = 0;
+
+	for (len = MAX_INSNS - 1; len > 0; --len)
+		if (fp[len].code != 0 || fp[len].k != 0)
+			break;
+
+	return len + 1;
+}
+
+static struct bpf_prog *generate_filter(int which, int *err)
+{
+	struct bpf_prog *fp;
+	struct sock_fprog_kern fprog;
+	unsigned int flen = probe_filter_length(tests[which].u.insns);
+	__u8 test_type = tests[which].aux & TEST_TYPE_MASK;
+
+	switch (test_type) {
+	case CLASSIC:
+		fprog.filter = tests[which].u.insns;
+		fprog.len = flen;
+
+		*err = bpf_prog_create(&fp, &fprog);
+		if (tests[which].aux & FLAG_EXPECTED_FAIL) {
+			if (*err == -EINVAL) {
+				pr_cont("PASS\n");
+				/* Verifier rejected filter as expected. */
+				*err = 0;
+				return NULL;
+			} else {
+				pr_cont("UNEXPECTED_PASS\n");
+				/* Verifier didn't reject the test that's
+				 * bad enough, just return!
+				 */
+				*err = -EINVAL;
+				return NULL;
+			}
+		}
+		/* We don't expect to fail. */
+		if (*err) {
+			pr_cont("FAIL to attach err=%d len=%d\n",
+				*err, fprog.len);
+			return NULL;
+		}
+		break;
+
+	case INTERNAL:
+		fp = bpf_prog_alloc(bpf_prog_size(flen), 0);
+		if (fp == NULL) {
+			pr_cont("UNEXPECTED_FAIL no memory left\n");
+			*err = -ENOMEM;
+			return NULL;
+		}
+
+		fp->len = flen;
+		memcpy(fp->insnsi, tests[which].u.insns_int,
+		       fp->len * sizeof(struct bpf_insn));
+
+		bpf_prog_select_runtime(fp);
+		break;
+	}
+
+	*err = 0;
+	return fp;
+}
+
+static void release_filter(struct bpf_prog *fp, int which)
+{
+	__u8 test_type = tests[which].aux & TEST_TYPE_MASK;
+
+	switch (test_type) {
+	case CLASSIC:
+		bpf_prog_destroy(fp);
+		break;
+	case INTERNAL:
+		bpf_prog_free(fp);
+		break;
+	}
+}
+
+static int __run_one(const struct bpf_prog *fp, const void *data,
+		     int runs, u64 *duration)
+{
+	u64 start, finish;
+	int ret = 0, i;
+
+	start = ktime_to_us(ktime_get());
+
+	for (i = 0; i < runs; i++)
+		ret = BPF_PROG_RUN(fp, data);
+
+	finish = ktime_to_us(ktime_get());
+
+	*duration = (finish - start) * 1000ULL;
+	do_div(*duration, runs);
+
+	return ret;
+}
+
+static int run_one(const struct bpf_prog *fp, struct bpf_test *test)
+{
+	int err_cnt = 0, i, runs = MAX_TESTRUNS;
+
+	for (i = 0; i < MAX_SUBTESTS; i++) {
+		void *data;
+		u64 duration;
+		u32 ret;
+
+		if (test->test[i].data_size == 0 &&
+		    test->test[i].result == 0)
+			break;
+
+		data = generate_test_data(test, i);
+		ret = __run_one(fp, data, runs, &duration);
+		release_test_data(test, data);
+
+		if (ret == test->test[i].result) {
+			pr_cont("%lld ", duration);
+		} else {
+			pr_cont("ret %d != %d ", ret,
+				test->test[i].result);
+			err_cnt++;
+		}
+	}
+
+	return err_cnt;
+}
+
+static __init int test_bpf(void)
+{
+	int i, err_cnt = 0, pass_cnt = 0;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		struct bpf_prog *fp;
+		int err;
+
+		pr_info("#%d %s ", i, tests[i].descr);
+
+		fp = generate_filter(i, &err);
+		if (fp == NULL) {
+			if (err == 0) {
+				pass_cnt++;
+				continue;
+			}
+
+			return err;
+		}
+		err = run_one(fp, &tests[i]);
+		release_filter(fp, i);
+
+		if (err) {
+			pr_cont("FAIL (%d times)\n", err);
+			err_cnt++;
+		} else {
+			pr_cont("PASS\n");
+			pass_cnt++;
+		}
+	}
+
+	pr_info("Summary: %d PASSED, %d FAILED\n", pass_cnt, err_cnt);
+	return err_cnt ? -EINVAL : 0;
+}
+
+static int __init test_bpf_init(void)
+{
+	return test_bpf();
+}
+
+static void __exit test_bpf_exit(void)
+{
+}
+
+module_init(test_bpf_init);
+module_exit(test_bpf_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
new file mode 100644
index 000000000000..86374c1c49a4
--- /dev/null
+++ b/lib/test_firmware.c
@@ -0,0 +1,117 @@
+/*
+ * This module provides an interface to trigger and test firmware loading.
+ *
+ * It is designed to be used for basic evaluation of the firmware loading
+ * subsystem (for example when validating firmware verification). It lacks
+ * any extra dependencies, and will not normally be loaded by the system
+ * unless explicitly requested by name.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/firmware.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+static DEFINE_MUTEX(test_fw_mutex);
+static const struct firmware *test_firmware;
+
+static ssize_t test_fw_misc_read(struct file *f, char __user *buf,
+				 size_t size, loff_t *offset)
+{
+	ssize_t rc = 0;
+
+	mutex_lock(&test_fw_mutex);
+	if (test_firmware)
+		rc = simple_read_from_buffer(buf, size, offset,
+					     test_firmware->data,
+					     test_firmware->size);
+	mutex_unlock(&test_fw_mutex);
+	return rc;
+}
+
+static const struct file_operations test_fw_fops = {
+	.owner          = THIS_MODULE,
+	.read           = test_fw_misc_read,
+};
+
+static struct miscdevice test_fw_misc_device = {
+	.minor          = MISC_DYNAMIC_MINOR,
+	.name           = "test_firmware",
+	.fops           = &test_fw_fops,
+};
+
+static ssize_t trigger_request_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	int rc;
+	char *name;
+
+	name = kzalloc(count + 1, GFP_KERNEL);
+	if (!name)
+		return -ENOSPC;
+	memcpy(name, buf, count);
+
+	pr_info("loading '%s'\n", name);
+
+	mutex_lock(&test_fw_mutex);
+	release_firmware(test_firmware);
+	test_firmware = NULL;
+	rc = request_firmware(&test_firmware, name, dev);
+	if (rc)
+		pr_info("load of '%s' failed: %d\n", name, rc);
+	pr_info("loaded: %zu\n", test_firmware ? test_firmware->size : 0);
+	mutex_unlock(&test_fw_mutex);
+
+	kfree(name);
+
+	return count;
+}
+static DEVICE_ATTR_WO(trigger_request);
+
+static int __init test_firmware_init(void)
+{
+	int rc;
+
+	rc = misc_register(&test_fw_misc_device);
+	if (rc) {
+		pr_err("could not register misc device: %d\n", rc);
+		return rc;
+	}
+	rc = device_create_file(test_fw_misc_device.this_device,
+				&dev_attr_trigger_request);
+	if (rc) {
+		pr_err("could not create sysfs interface: %d\n", rc);
+		goto dereg;
+	}
+
+	pr_warn("interface ready\n");
+
+	return 0;
+dereg:
+	misc_deregister(&test_fw_misc_device);
+	return rc;
+}
+
+module_init(test_firmware_init);
+
+static void __exit test_firmware_exit(void)
+{
+	release_firmware(test_firmware);
+	device_remove_file(test_fw_misc_device.this_device,
+			   &dev_attr_trigger_request);
+	misc_deregister(&test_fw_misc_device);
+	pr_warn("removed interface\n");
+}
+
+module_exit(test_firmware_exit);
+
+MODULE_AUTHOR("Kees Cook <keescook@chromium.org>");
+MODULE_LICENSE("GPL");
diff --git a/lib/test_module.c b/lib/test_module.c
new file mode 100644
index 000000000000..319b66f1ff61
--- /dev/null
+++ b/lib/test_module.c
@@ -0,0 +1,33 @@
+/*
+ * This module emits "Hello, world" on printk when loaded.
+ *
+ * It is designed to be used for basic evaluation of the module loading
+ * subsystem (for example when validating module signing/verification). It
+ * lacks any extra dependencies, and will not normally be loaded by the
+ * system unless explicitly requested by name.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+static int __init test_module_init(void)
+{
+	pr_warn("Hello, world\n");
+
+	return 0;
+}
+
+module_init(test_module_init);
+
+static void __exit test_module_exit(void)
+{
+	pr_warn("Goodbye\n");
+}
+
+module_exit(test_module_exit);
+
+MODULE_AUTHOR("Kees Cook <keescook@chromium.org>");
+MODULE_LICENSE("GPL");
diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c
new file mode 100644
index 000000000000..0ecef3e4690e
--- /dev/null
+++ b/lib/test_user_copy.c
@@ -0,0 +1,110 @@
+/*
+ * Kernel module for testing copy_to/from_user infrastructure.
+ *
+ * Copyright 2013 Google Inc. All Rights Reserved
+ *
+ * Authors:
+ *      Kees Cook       <keescook@chromium.org>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+#define test(condition, msg)		\
+({					\
+	int cond = (condition);		\
+	if (cond)			\
+		pr_warn("%s\n", msg);	\
+	cond;				\
+})
+
+static int __init test_user_copy_init(void)
+{
+	int ret = 0;
+	char *kmem;
+	char __user *usermem;
+	char *bad_usermem;
+	unsigned long user_addr;
+	unsigned long value = 0x5A;
+
+	kmem = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
+	if (!kmem)
+		return -ENOMEM;
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE * 2,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= (unsigned long)(TASK_SIZE)) {
+		pr_warn("Failed to allocate user memory\n");
+		kfree(kmem);
+		return -ENOMEM;
+	}
+
+	usermem = (char __user *)user_addr;
+	bad_usermem = (char *)user_addr;
+
+	/* Legitimate usage: none of these should fail. */
+	ret |= test(copy_from_user(kmem, usermem, PAGE_SIZE),
+		    "legitimate copy_from_user failed");
+	ret |= test(copy_to_user(usermem, kmem, PAGE_SIZE),
+		    "legitimate copy_to_user failed");
+	ret |= test(get_user(value, (unsigned long __user *)usermem),
+		    "legitimate get_user failed");
+	ret |= test(put_user(value, (unsigned long __user *)usermem),
+		    "legitimate put_user failed");
+
+	/* Invalid usage: none of these should succeed. */
+	ret |= test(!copy_from_user(kmem, (char __user *)(kmem + PAGE_SIZE),
+				    PAGE_SIZE),
+		    "illegal all-kernel copy_from_user passed");
+	ret |= test(!copy_from_user(bad_usermem, (char __user *)kmem,
+				    PAGE_SIZE),
+		    "illegal reversed copy_from_user passed");
+	ret |= test(!copy_to_user((char __user *)kmem, kmem + PAGE_SIZE,
+				  PAGE_SIZE),
+		    "illegal all-kernel copy_to_user passed");
+	ret |= test(!copy_to_user((char __user *)kmem, bad_usermem,
+				  PAGE_SIZE),
+		    "illegal reversed copy_to_user passed");
+	ret |= test(!get_user(value, (unsigned long __user *)kmem),
+		    "illegal get_user passed");
+	ret |= test(!put_user(value, (unsigned long __user *)kmem),
+		    "illegal put_user passed");
+
+	vm_munmap(user_addr, PAGE_SIZE * 2);
+	kfree(kmem);
+
+	if (ret == 0) {
+		pr_info("tests passed.\n");
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+module_init(test_user_copy_init);
+
+static void __exit test_user_copy_exit(void)
+{
+	pr_info("unloaded.\n");
+}
+
+module_exit(test_user_copy_exit);
+
+MODULE_AUTHOR("Kees Cook <keescook@chromium.org>");
+MODULE_LICENSE("GPL");
diff --git a/lib/textsearch.c b/lib/textsearch.c
index e0cc0146ae62..0c7e9ab2d88f 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -159,6 +159,7 @@ errout:
 	spin_unlock(&ts_mod_lock);
 	return err;
 }
+EXPORT_SYMBOL(textsearch_register);
 
 /**
  * textsearch_unregister - unregister a textsearch module
@@ -190,6 +191,7 @@ out:
 	spin_unlock(&ts_mod_lock);
 	return err;
 }
+EXPORT_SYMBOL(textsearch_unregister);
 
 struct ts_linear_state
 {
@@ -236,6 +238,7 @@ unsigned int textsearch_find_continuous(struct ts_config *conf,
 
 	return textsearch_find(conf, state);
 }
+EXPORT_SYMBOL(textsearch_find_continuous);
 
 /**
  * textsearch_prepare - Prepare a search
@@ -298,6 +301,7 @@ errout:
 		
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL(textsearch_prepare);
 
 /**
  * textsearch_destroy - destroy a search configuration
@@ -316,9 +320,4 @@ void textsearch_destroy(struct ts_config *conf)
 
 	kfree(conf);
 }
-
-EXPORT_SYMBOL(textsearch_register);
-EXPORT_SYMBOL(textsearch_unregister);
-EXPORT_SYMBOL(textsearch_prepare);
-EXPORT_SYMBOL(textsearch_find_continuous);
 EXPORT_SYMBOL(textsearch_destroy);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 10909c571494..ba3cd0a35640 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -364,7 +364,6 @@ enum format_type {
 	FORMAT_TYPE_SHORT,
 	FORMAT_TYPE_UINT,
 	FORMAT_TYPE_INT,
-	FORMAT_TYPE_NRCHARS,
 	FORMAT_TYPE_SIZE_T,
 	FORMAT_TYPE_PTRDIFF
 };
@@ -719,10 +718,15 @@ char *resource_string(char *buf, char *end, struct resource *res,
 		specp = &mem_spec;
 		decode = 0;
 	}
-	p = number(p, pend, res->start, *specp);
-	if (res->start != res->end) {
-		*p++ = '-';
-		p = number(p, pend, res->end, *specp);
+	if (decode && res->flags & IORESOURCE_UNSET) {
+		p = string(p, pend, "size ", str_spec);
+		p = number(p, pend, resource_size(res), *specp);
+	} else {
+		p = number(p, pend, res->start, *specp);
+		if (res->start != res->end) {
+			*p++ = '-';
+			p = number(p, pend, res->end, *specp);
+		}
 	}
 	if (decode) {
 		if (res->flags & IORESOURCE_MEM_64)
@@ -1155,6 +1159,30 @@ char *netdev_feature_string(char *buf, char *end, const u8 *addr,
 	return number(buf, end, *(const netdev_features_t *)addr, spec);
 }
 
+static noinline_for_stack
+char *address_val(char *buf, char *end, const void *addr,
+		  struct printf_spec spec, const char *fmt)
+{
+	unsigned long long num;
+
+	spec.flags |= SPECIAL | SMALL | ZEROPAD;
+	spec.base = 16;
+
+	switch (fmt[1]) {
+	case 'd':
+		num = *(const dma_addr_t *)addr;
+		spec.field_width = sizeof(dma_addr_t) * 2 + 2;
+		break;
+	case 'p':
+	default:
+		num = *(const phys_addr_t *)addr;
+		spec.field_width = sizeof(phys_addr_t) * 2 + 2;
+		break;
+	}
+
+	return number(buf, end, num, spec);
+}
+
 int kptr_restrict __read_mostly;
 
 /*
@@ -1218,7 +1246,8 @@ int kptr_restrict __read_mostly;
  *              N no separator
  *            The maximum supported length is 64 bytes of the input. Consider
  *            to use print_hex_dump() for the larger input.
- * - 'a' For a phys_addr_t type and its derivative types (passed by reference)
+ * - 'a[pd]' For address types [p] phys_addr_t, [d] dma_addr_t and derivatives
+ *           (default assumed to be phys_addr_t, passed by reference)
  * - 'd[234]' For a dentry name (optionally 2-4 last components)
  * - 'D[234]' Same as 'd' but for a struct file
  *
@@ -1353,11 +1382,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		}
 		break;
 	case 'a':
-		spec.flags |= SPECIAL | SMALL | ZEROPAD;
-		spec.field_width = sizeof(phys_addr_t) * 2 + 2;
-		spec.base = 16;
-		return number(buf, end,
-			      (unsigned long long) *((phys_addr_t *)ptr), spec);
+		return address_val(buf, end, ptr, spec, fmt);
 	case 'd':
 		return dentry_name(buf, end, ptr, spec, fmt);
 	case 'D':
@@ -1512,10 +1537,6 @@ qualifier:
 		return fmt - start;
 		/* skip alnum */
 
-	case 'n':
-		spec->type = FORMAT_TYPE_NRCHARS;
-		return ++fmt - start;
-
 	case '%':
 		spec->type = FORMAT_TYPE_PERCENT_CHAR;
 		return ++fmt - start;
@@ -1538,6 +1559,15 @@ qualifier:
 	case 'u':
 		break;
 
+	case 'n':
+		/*
+		 * Since %n poses a greater security risk than utility, treat
+		 * it as an invalid format specifier. Warn about its use so
+		 * that new instances don't get added.
+		 */
+		WARN_ONCE(1, "Please remove ignored %%n in '%s'\n", fmt);
+		/* Fall-through */
+
 	default:
 		spec->type = FORMAT_TYPE_INVALID;
 		return fmt - start;
@@ -1711,20 +1741,6 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 			++str;
 			break;
 
-		case FORMAT_TYPE_NRCHARS: {
-			/*
-			 * Since %n poses a greater security risk than
-			 * utility, ignore %n and skip its argument.
-			 */
-			void *skip_arg;
-
-			WARN_ONCE(1, "Please remove ignored %%n in '%s'\n",
-					old_fmt);
-
-			skip_arg = va_arg(args, void *);
-			break;
-		}
-
 		default:
 			switch (spec.type) {
 			case FORMAT_TYPE_LONG_LONG:
@@ -1921,7 +1937,7 @@ EXPORT_SYMBOL(sprintf);
  * @args: Arguments for the format string
  *
  * The format follows C99 vsnprintf, except %n is ignored, and its argument
- * is skiped.
+ * is skipped.
  *
  * The return value is the number of words(32bits) which would be generated for
  * the given input.
@@ -1999,19 +2015,6 @@ do {									\
 				fmt++;
 			break;
 
-		case FORMAT_TYPE_NRCHARS: {
-			/* skip %n 's argument */
-			u8 qualifier = spec.qualifier;
-			void *skip_arg;
-			if (qualifier == 'l')
-				skip_arg = va_arg(args, long *);
-			else if (_tolower(qualifier) == 'z')
-				skip_arg = va_arg(args, size_t *);
-			else
-				skip_arg = va_arg(args, int *);
-			break;
-		}
-
 		default:
 			switch (spec.type) {
 
@@ -2170,10 +2173,6 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 			++str;
 			break;
 
-		case FORMAT_TYPE_NRCHARS:
-			/* skip */
-			break;
-
 		default: {
 			unsigned long long num;
 
@@ -2348,7 +2347,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 			break;
 
 		base = 10;
-		is_sign = 0;
+		is_sign = false;
 
 		switch (*fmt++) {
 		case 'c':
@@ -2387,7 +2386,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 		case 'i':
 			base = 0;
 		case 'd':
-			is_sign = 1;
+			is_sign = true;
 		case 'u':
 			break;
 		case '%':
diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig
index 08837db52d94..12d2d777f36b 100644
--- a/lib/xz/Kconfig
+++ b/lib/xz/Kconfig
@@ -9,33 +9,33 @@ config XZ_DEC
 if XZ_DEC
 
 config XZ_DEC_X86
-	bool "x86 BCJ filter decoder"
-	default y if X86
+	bool "x86 BCJ filter decoder" if EXPERT
+	default y
 	select XZ_DEC_BCJ
 
 config XZ_DEC_POWERPC
-	bool "PowerPC BCJ filter decoder"
-	default y if PPC
+	bool "PowerPC BCJ filter decoder" if EXPERT
+	default y
 	select XZ_DEC_BCJ
 
 config XZ_DEC_IA64
-	bool "IA-64 BCJ filter decoder"
-	default y if IA64
+	bool "IA-64 BCJ filter decoder" if EXPERT
+	default y
 	select XZ_DEC_BCJ
 
 config XZ_DEC_ARM
-	bool "ARM BCJ filter decoder"
-	default y if ARM
+	bool "ARM BCJ filter decoder" if EXPERT
+	default y
 	select XZ_DEC_BCJ
 
 config XZ_DEC_ARMTHUMB
-	bool "ARM-Thumb BCJ filter decoder"
-	default y if (ARM && ARM_THUMB)
+	bool "ARM-Thumb BCJ filter decoder" if EXPERT
+	default y
 	select XZ_DEC_BCJ
 
 config XZ_DEC_SPARC
-	bool "SPARC BCJ filter decoder"
-	default y if SPARC
+	bool "SPARC BCJ filter decoder" if EXPERT
+	default y
 	select XZ_DEC_BCJ
 
 endif
diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
index a6cdc969ea42..08c3c8049998 100644
--- a/lib/xz/xz_dec_lzma2.c
+++ b/lib/xz/xz_dec_lzma2.c
@@ -1043,6 +1043,8 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
 
 			s->lzma2.sequence = SEQ_LZMA_PREPARE;
 
+		/* Fall through */
+
 		case SEQ_LZMA_PREPARE:
 			if (s->lzma2.compressed < RC_INIT_BYTES)
 				return XZ_DATA_ERROR;
@@ -1053,6 +1055,8 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
 			s->lzma2.compressed -= RC_INIT_BYTES;
 			s->lzma2.sequence = SEQ_LZMA_RUN;
 
+		/* Fall through */
+
 		case SEQ_LZMA_RUN:
 			/*
 			 * Set dictionary limit to indicate how much we want
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index d63381e8e333..d20ef458f137 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -250,52 +250,6 @@ int zlib_deflateInit2(
 }
 
 /* ========================================================================= */
-#if 0
-int zlib_deflateSetDictionary(
-	z_streamp strm,
-	const Byte *dictionary,
-	uInt  dictLength
-)
-{
-    deflate_state *s;
-    uInt length = dictLength;
-    uInt n;
-    IPos hash_head = 0;
-
-    if (strm == NULL || strm->state == NULL || dictionary == NULL)
-	return Z_STREAM_ERROR;
-
-    s = (deflate_state *) strm->state;
-    if (s->status != INIT_STATE) return Z_STREAM_ERROR;
-
-    strm->adler = zlib_adler32(strm->adler, dictionary, dictLength);
-
-    if (length < MIN_MATCH) return Z_OK;
-    if (length > MAX_DIST(s)) {
-	length = MAX_DIST(s);
-#ifndef USE_DICT_HEAD
-	dictionary += dictLength - length; /* use the tail of the dictionary */
-#endif
-    }
-    memcpy((char *)s->window, dictionary, length);
-    s->strstart = length;
-    s->block_start = (long)length;
-
-    /* Insert all strings in the hash table (except for the last two bytes).
-     * s->lookahead stays null, so s->ins_h will be recomputed at the next
-     * call of fill_window.
-     */
-    s->ins_h = s->window[0];
-    UPDATE_HASH(s, s->ins_h, s->window[1]);
-    for (n = 0; n <= length - MIN_MATCH; n++) {
-	INSERT_STRING(s, n, hash_head);
-    }
-    if (hash_head) hash_head = 0;  /* to make compiler happy */
-    return Z_OK;
-}
-#endif  /*  0  */
-
-/* ========================================================================= */
 int zlib_deflateReset(
 	z_streamp strm
 )
@@ -326,45 +280,6 @@ int zlib_deflateReset(
     return Z_OK;
 }
 
-/* ========================================================================= */
-#if 0
-int zlib_deflateParams(
-	z_streamp strm,
-	int level,
-	int strategy
-)
-{
-    deflate_state *s;
-    compress_func func;
-    int err = Z_OK;
-
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    s = (deflate_state *) strm->state;
-
-    if (level == Z_DEFAULT_COMPRESSION) {
-	level = 6;
-    }
-    if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
-	return Z_STREAM_ERROR;
-    }
-    func = configuration_table[s->level].func;
-
-    if (func != configuration_table[level].func && strm->total_in != 0) {
-	/* Flush the last buffer: */
-	err = zlib_deflate(strm, Z_PARTIAL_FLUSH);
-    }
-    if (s->level != level) {
-	s->level = level;
-	s->max_lazy_match   = configuration_table[level].max_lazy;
-	s->good_match       = configuration_table[level].good_length;
-	s->nice_match       = configuration_table[level].nice_length;
-	s->max_chain_length = configuration_table[level].max_chain;
-    }
-    s->strategy = strategy;
-    return err;
-}
-#endif  /*  0  */
-
 /* =========================================================================
  * Put a short in the pending buffer. The 16-bit value is put in MSB order.
  * IN assertion: the stream state is correct and there is enough room in
@@ -568,64 +483,6 @@ int zlib_deflateEnd(
     return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
 }
 
-/* =========================================================================
- * Copy the source state to the destination state.
- */
-#if 0
-int zlib_deflateCopy (
-	z_streamp dest,
-	z_streamp source
-)
-{
-#ifdef MAXSEG_64K
-    return Z_STREAM_ERROR;
-#else
-    deflate_state *ds;
-    deflate_state *ss;
-    ush *overlay;
-    deflate_workspace *mem;
-
-
-    if (source == NULL || dest == NULL || source->state == NULL) {
-        return Z_STREAM_ERROR;
-    }
-
-    ss = (deflate_state *) source->state;
-
-    *dest = *source;
-
-    mem = (deflate_workspace *) dest->workspace;
-
-    ds = &(mem->deflate_memory);
-
-    dest->state = (struct internal_state *) ds;
-    *ds = *ss;
-    ds->strm = dest;
-
-    ds->window = (Byte *) mem->window_memory;
-    ds->prev   = (Pos *)  mem->prev_memory;
-    ds->head   = (Pos *)  mem->head_memory;
-    overlay = (ush *) mem->overlay_memory;
-    ds->pending_buf = (uch *) overlay;
-
-    memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
-    memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
-    memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
-    memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
-
-    ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
-    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
-    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
-
-    ds->l_desc.dyn_tree = ds->dyn_ltree;
-    ds->d_desc.dyn_tree = ds->dyn_dtree;
-    ds->bl_desc.dyn_tree = ds->bl_tree;
-
-    return Z_OK;
-#endif
-}
-#endif  /*  0  */
-
 /* ===========================================================================
  * Read a new buffer from the current input stream, update the adler32
  * and total number of bytes read.  All deflate() input goes through
diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c
index f5ce87b0800e..58a733b10387 100644
--- a/lib/zlib_inflate/inflate.c
+++ b/lib/zlib_inflate/inflate.c
@@ -45,21 +45,6 @@ int zlib_inflateReset(z_streamp strm)
     return Z_OK;
 }
 
-#if 0
-int zlib_inflatePrime(z_streamp strm, int bits, int value)
-{
-    struct inflate_state *state;
-
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
-    value &= (1L << bits) - 1;
-    state->hold += value << state->bits;
-    state->bits += bits;
-    return Z_OK;
-}
-#endif
-
 int zlib_inflateInit2(z_streamp strm, int windowBits)
 {
     struct inflate_state *state;
@@ -761,123 +746,6 @@ int zlib_inflateEnd(z_streamp strm)
     return Z_OK;
 }
 
-#if 0
-int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary,
-        uInt dictLength)
-{
-    struct inflate_state *state;
-    unsigned long id;
-
-    /* check state */
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (state->wrap != 0 && state->mode != DICT)
-        return Z_STREAM_ERROR;
-
-    /* check for correct dictionary id */
-    if (state->mode == DICT) {
-        id = zlib_adler32(0L, NULL, 0);
-        id = zlib_adler32(id, dictionary, dictLength);
-        if (id != state->check)
-            return Z_DATA_ERROR;
-    }
-
-    /* copy dictionary to window */
-    zlib_updatewindow(strm, strm->avail_out);
-
-    if (dictLength > state->wsize) {
-        memcpy(state->window, dictionary + dictLength - state->wsize,
-                state->wsize);
-        state->whave = state->wsize;
-    }
-    else {
-        memcpy(state->window + state->wsize - dictLength, dictionary,
-                dictLength);
-        state->whave = dictLength;
-    }
-    state->havedict = 1;
-    return Z_OK;
-}
-#endif
-
-#if 0
-/*
-   Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff.  Return when found
-   or when out of input.  When called, *have is the number of pattern bytes
-   found in order so far, in 0..3.  On return *have is updated to the new
-   state.  If on return *have equals four, then the pattern was found and the
-   return value is how many bytes were read including the last byte of the
-   pattern.  If *have is less than four, then the pattern has not been found
-   yet and the return value is len.  In the latter case, zlib_syncsearch() can be
-   called again with more data and the *have state.  *have is initialized to
-   zero for the first call.
- */
-static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf,
-        unsigned len)
-{
-    unsigned got;
-    unsigned next;
-
-    got = *have;
-    next = 0;
-    while (next < len && got < 4) {
-        if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
-            got++;
-        else if (buf[next])
-            got = 0;
-        else
-            got = 4 - got;
-        next++;
-    }
-    *have = got;
-    return next;
-}
-#endif
-
-#if 0
-int zlib_inflateSync(z_streamp strm)
-{
-    unsigned len;               /* number of bytes to look at or looked at */
-    unsigned long in, out;      /* temporary to save total_in and total_out */
-    unsigned char buf[4];       /* to restore bit buffer to byte string */
-    struct inflate_state *state;
-
-    /* check parameters */
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
-
-    /* if first time, start search in bit buffer */
-    if (state->mode != SYNC) {
-        state->mode = SYNC;
-        state->hold <<= state->bits & 7;
-        state->bits -= state->bits & 7;
-        len = 0;
-        while (state->bits >= 8) {
-            buf[len++] = (unsigned char)(state->hold);
-            state->hold >>= 8;
-            state->bits -= 8;
-        }
-        state->have = 0;
-        zlib_syncsearch(&(state->have), buf, len);
-    }
-
-    /* search available input */
-    len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in);
-    strm->avail_in -= len;
-    strm->next_in += len;
-    strm->total_in += len;
-
-    /* return no joy or set up to restart inflate() on a new block */
-    if (state->have != 4) return Z_DATA_ERROR;
-    in = strm->total_in;  out = strm->total_out;
-    zlib_inflateReset(strm);
-    strm->total_in = in;  strm->total_out = out;
-    state->mode = TYPE;
-    return Z_OK;
-}
-#endif
-
 /*
  * This subroutine adds the data at next_in/avail_in to the output history
  * without performing any output.  The output buffer must be "caught up";