114 files changed, 8149 insertions, 1051 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 63b5782732ed..4a8aba2e5cc0 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -19,6 +19,20 @@ config RATIONAL
 config GENERIC_FIND_FIRST_BIT
 	bool
 
+config NO_GENERIC_PCI_IOPORT_MAP
+	bool
+
+config GENERIC_PCI_IOMAP
+	bool
+
+config GENERIC_IOMAP
+	bool
+	select GENERIC_PCI_IOMAP
+
+config GENERIC_IO
+	boolean
+	default n
+
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
@@ -51,14 +65,71 @@ config CRC_ITU_T
 	  functions require M here.
 
 config CRC32
-	tristate "CRC32 functions"
+	tristate "CRC32/CRC32c functions"
 	default y
 	select BITREVERSE
 	help
 	  This option is provided for the case where no in-kernel-tree
-	  modules require CRC32 functions, but a module built outside the
-	  kernel tree does. Such modules that use library CRC32 functions
-	  require M here.
+	  modules require CRC32/CRC32c functions, but a module built outside
+	  the kernel tree does. Such modules that use library CRC32/CRC32c
+	  functions require M here.
+
+config CRC32_SELFTEST
+	bool "CRC32 perform self test on init"
+	default n
+	depends on CRC32
+	help
+	  This option enables the CRC32 library functions to perform a
+	  self test on initialization. The self test computes crc32_le
+	  and crc32_be over byte strings with random alignment and length
+	  and computes the total elapsed time and number of bytes processed.
+
+choice
+	prompt "CRC32 implementation"
+	depends on CRC32
+	default CRC32_SLICEBY8
+	help
+	  This option allows a kernel builder to override the default choice
+	  of CRC32 algorithm.  Choose the default ("slice by 8") unless you
+	  know that you need one of the others.
+
+config CRC32_SLICEBY8
+	bool "Slice by 8 bytes"
+	help
+	  Calculate checksum 8 bytes at a time with a clever slicing algorithm.
+	  This is the fastest algorithm, but comes with a 8KiB lookup table.
+	  Most modern processors have enough cache to hold this table without
+	  thrashing the cache.
+
+	  This is the default implementation choice.  Choose this one unless
+	  you have a good reason not to.
+
+config CRC32_SLICEBY4
+	bool "Slice by 4 bytes"
+	help
+	  Calculate checksum 4 bytes at a time with a clever slicing algorithm.
+	  This is a bit slower than slice by 8, but has a smaller 4KiB lookup
+	  table.
+
+	  Only choose this option if you know what you are doing.
+
+config CRC32_SARWATE
+	bool "Sarwate's Algorithm (one byte at a time)"
+	help
+	  Calculate checksum a byte at a time using Sarwate's algorithm.  This
+	  is not particularly fast, but has a small 256 byte lookup table.
+
+	  Only choose this option if you know what you are doing.
+
+config CRC32_BIT
+	bool "Classic Algorithm (one bit at a time)"
+	help
+	  Calculate checksum one bit at a time.  This is VERY slow, but has
+	  no lookup table.  This is provided as a debugging option.
+
+	  Only choose this option if you are debugging crc32.
+
+endchoice
 
 config CRC7
 	tristate "CRC7 functions"
@@ -214,6 +285,7 @@ config BTREE
 config HAS_IOMEM
 	boolean
 	depends on !NO_IOMEM
+	select GENERIC_IO
 	default y
 
 config HAS_IOPORT
@@ -272,11 +344,38 @@ config AVERAGE
 
 	  If unsure, say N.
 
+config CLZ_TAB
+	bool
+
 config CORDIC
-	tristate "Cordic function"
+	tristate "CORDIC algorithm"
+	help
+	  This option provides an implementation of the CORDIC algorithm;
+	  calculations are in fixed point. Module will be called cordic.
+
+config MPILIB
+	tristate
+	select CLZ_TAB
+	help
+	  Multiprecision maths library from GnuPG.
+	  It is used to implement RSA digital signature verification,
+	  which is used by IMA/EVM digital signature extension.
+
+config MPILIB_EXTRA
+	bool
+	depends on MPILIB
+	help
+	  Additional sources of multiprecision maths library from GnuPG.
+	  This code is unnecessary for RSA digital signature verification,
+	  but can be compiled if needed.
+
+config SIGNATURE
+	tristate
+	depends on KEYS && CRYPTO
+	select CRYPTO_SHA1
+	select MPILIB
 	help
-	  The option provides arithmetic function using cordic algorithm
-	  so its calculations are in fixed point. Modules can select this
-	  when they require this function. Module will be called cordic.
+	  Digital signature verification. Currently only RSA is supported.
+	  Implementation is done using GnuPG MPI library
 
 endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 82928f5ea049..6777153f18f3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -166,22 +166,25 @@ config LOCKUP_DETECTOR
 	  hard and soft lockups.
 
 	  Softlockups are bugs that cause the kernel to loop in kernel
-	  mode for more than 60 seconds, without giving other tasks a
+	  mode for more than 20 seconds, without giving other tasks a
 	  chance to run.  The current stack trace is displayed upon
 	  detection and the system will stay locked up.
 
 	  Hardlockups are bugs that cause the CPU to loop in kernel mode
-	  for more than 60 seconds, without letting other interrupts have a
+	  for more than 10 seconds, without letting other interrupts have a
 	  chance to run.  The current stack trace is displayed upon detection
 	  and the system will stay locked up.
 
 	  The overhead should be minimal.  A periodic hrtimer runs to
-	  generate interrupts and kick the watchdog task every 10-12 seconds.
-	  An NMI is generated every 60 seconds or so to check for hardlockups.
+	  generate interrupts and kick the watchdog task every 4 seconds.
+	  An NMI is generated every 10 seconds or so to check for hardlockups.
+
+	  The frequency of hrtimer and NMI events and the soft and hard lockup
+	  thresholds can be controlled through the sysctl watchdog_thresh.
 
 config HARDLOCKUP_DETECTOR
 	def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
-		 !ARCH_HAS_NMI_WATCHDOG
+		 !HAVE_NMI_WATCHDOG
 
 config BOOTPARAM_HARDLOCKUP_PANIC
 	bool "Panic (Reboot) On Hard Lockups"
@@ -189,7 +192,8 @@ config BOOTPARAM_HARDLOCKUP_PANIC
 	help
 	  Say Y here to enable the kernel to panic on "hard lockups",
 	  which are bugs that cause the kernel to loop in kernel
-	  mode with interrupts disabled for more than 60 seconds.
+	  mode with interrupts disabled for more than 10 seconds (configurable
+	  using the watchdog_thresh sysctl).
 
 	  Say N if unsure.
 
@@ -206,8 +210,8 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
 	help
 	  Say Y here to enable the kernel to panic on "soft lockups",
 	  which are bugs that cause the kernel to loop in kernel
-	  mode for more than 60 seconds, without giving other tasks a
-	  chance to run.
+	  mode for more than 20 seconds (configurable using the watchdog_thresh
+	  sysctl), without giving other tasks a chance to run.
 
 	  The panic can be used in combination with panic_timeout,
 	  to cause the system to reboot automatically after a
@@ -414,7 +418,7 @@ config SLUB_STATS
 
 config DEBUG_KMEMLEAK
 	bool "Kernel memory leak detector"
-	depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
+	depends on DEBUG_KERNEL && EXPERIMENTAL && \
 		(X86 || ARM || PPC || MIPS || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE)
 
 	select DEBUG_FS
@@ -495,6 +499,7 @@ config RT_MUTEX_TESTER
 config DEBUG_SPINLOCK
 	bool "Spinlock and rw-lock debugging: basic checks"
 	depends on DEBUG_KERNEL
+	select UNINLINE_SPIN_UNLOCK
 	help
 	  Say Y here and build SMP to catch missing spinlock initialization
 	  and certain other kinds of spinlock errors commonly made.  This is
@@ -927,6 +932,30 @@ config RCU_CPU_STALL_VERBOSE
 
 	  Say Y if you want to enable such checks.
 
+config RCU_CPU_STALL_INFO
+	bool "Print additional diagnostics on RCU CPU stall"
+	depends on (TREE_RCU || TREE_PREEMPT_RCU) && DEBUG_KERNEL
+	default n
+	help
+	  For each stalled CPU that is aware of the current RCU grace
+	  period, print out additional per-CPU diagnostic information
+	  regarding scheduling-clock ticks, idle state, and,
+	  for RCU_FAST_NO_HZ kernels, idle-entry state.
+
+	  Say N if you are unsure.
+
+	  Say Y if you want to enable such diagnostics.
+
+config RCU_TRACE
+	bool "Enable tracing for RCU"
+	depends on DEBUG_KERNEL
+	help
+	  This option provides tracing in RCU which presents stats
+	  in debugfs for debugging RCU implementation.
+
+	  Say Y here if you want to enable RCU tracing
+	  Say N if you are unsure.
+
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
 	depends on DEBUG_KERNEL
@@ -1113,14 +1142,6 @@ config LATENCYTOP
 	  Enable this option if you want to use the LatencyTOP tool
 	  to find out which userspace is blocking on what kernel operations.
 
-config SYSCTL_SYSCALL_CHECK
-	bool "Sysctl checks"
-	depends on SYSCTL
-	---help---
-	  sys_sysctl uses binary paths that have been found challenging
-	  to properly maintain and use. This enables checks that help
-	  you to keep things correct.
-
 source mm/Kconfig.debug
 source kernel/trace/Kconfig
 
diff --git a/lib/Makefile b/lib/Makefile
index c0ffaaff6534..18515f0267c4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -33,6 +33,7 @@ endif
 
 lib-$(CONFIG_HOTPLUG) += kobject_uevent.o
 obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
+obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o
 obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
@@ -117,6 +118,11 @@ obj-$(CONFIG_CORDIC) += cordic.o
 
 obj-$(CONFIG_DQL) += dynamic_queue_limits.o
 
+obj-$(CONFIG_MPILIB) += mpi/
+obj-$(CONFIG_SIGNATURE) += digsig.o
+
+obj-$(CONFIG_CLZ_TAB) += clz_tab.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/argv_split.c b/lib/argv_split.c
index 4b1b083f219c..1e9a6cbc3689 100644
--- a/lib/argv_split.c
+++ b/lib/argv_split.c
@@ -6,7 +6,7 @@
 #include <linux/ctype.h>
 #include <linux/string.h>
 #include <linux/slab.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 static const char *skip_arg(const char *cp)
 {
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 3975470caf4f..978537809d84 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -13,7 +13,7 @@
 #include <linux/cache.h>
 #include <linux/spinlock.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/atomic.h>
 
 /*
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 0c33cde2a1e6..cb99b91c3a1d 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -9,6 +9,7 @@
  * (at your option) any later version.
  */
 #include <linux/init.h>
+#include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/atomic.h>
 
diff --git a/lib/average.c b/lib/average.c
index 5576c2841496..99a67e662b3c 100644
--- a/lib/average.c
+++ b/lib/average.c
@@ -5,8 +5,9 @@
  * Version 2.  See the file COPYING for more details.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/average.h>
+#include <linux/kernel.h>
 #include <linux/bug.h>
 #include <linux/log2.h>
 
diff --git a/lib/bcd.c b/lib/bcd.c
index d74257fd0fe7..55efaf742346 100644
--- a/lib/bcd.c
+++ b/lib/bcd.c
@@ -1,5 +1,5 @@
 #include <linux/bcd.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 unsigned bcd2bin(unsigned char val)
 {
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 0d4a127dd9b3..b5a8b6ad2454 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -5,11 +5,13 @@
  * This source code is licensed under the GNU General Public License,
  * Version 2.  See the file COPYING for more details.
  */
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/thread_info.h>
 #include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
+#include <linux/bug.h>
 #include <asm/uaccess.h>
 
 /*
diff --git a/lib/bsearch.c b/lib/bsearch.c
index 5b54758e2afb..e33c179089db 100644
--- a/lib/bsearch.c
+++ b/lib/bsearch.c
@@ -9,7 +9,7 @@
  * published by the Free Software Foundation; version 2.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/bsearch.h>
 
 /*
diff --git a/lib/btree.c b/lib/btree.c
index 2a34392bcecc..e5ec1e9c1aa5 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -357,6 +357,7 @@ miss:
 	}
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(btree_get_prev);
 
 static int getpos(struct btree_geo *geo, unsigned long *node,
 		unsigned long *key)
diff --git a/lib/bug.c b/lib/bug.c
index 19552096d16b..a28c1415357c 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -169,7 +169,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 		return BUG_TRAP_TYPE_WARN;
 	}
 
-	printk(KERN_EMERG "------------[ cut here ]------------\n");
+	printk(KERN_DEFAULT "------------[ cut here ]------------\n");
 
 	if (file)
 		printk(KERN_CRIT "kernel BUG at %s:%u!\n",
diff --git a/lib/check_signature.c b/lib/check_signature.c
index fd6af199247b..6b49797980c4 100644
--- a/lib/check_signature.c
+++ b/lib/check_signature.c
@@ -1,5 +1,5 @@
 #include <linux/io.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 /**
  *	check_signature		-	find BIOS signatures
diff --git a/lib/checksum.c b/lib/checksum.c
index 8df2f91e6d98..12dceb27ff20 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -32,7 +32,7 @@
 /* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access
  kills, so most of the assembly has to go. */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <net/checksum.h>
 
 #include <asm/byteorder.h>
diff --git a/lib/clz_tab.c b/lib/clz_tab.c
new file mode 100644
index 000000000000..7287b4a991a7
--- /dev/null
+++ b/lib/clz_tab.c
@@ -0,0 +1,18 @@
+const unsigned char __clz_tab[] = {
+	0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
+	    5, 5, 5, 5, 5, 5, 5, 5,
+	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+	    6, 6, 6, 6, 6, 6, 6, 6,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	    7, 7, 7, 7, 7, 7, 7, 7,
+	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+	    7, 7, 7, 7, 7, 7, 7, 7,
+	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+	    8, 8, 8, 8, 8, 8, 8, 8,
+	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+	    8, 8, 8, 8, 8, 8, 8, 8,
+	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+	    8, 8, 8, 8, 8, 8, 8, 8,
+	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+	    8, 8, 8, 8, 8, 8, 8, 8,
+};
diff --git a/lib/cmdline.c b/lib/cmdline.c
index f5f3ad8b62ff..eb6791188cf5 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -12,7 +12,7 @@
  *
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 
diff --git a/lib/cordic.c b/lib/cordic.c
index aa27a88d7e04..6cf477839ebd 100644
--- a/lib/cordic.c
+++ b/lib/cordic.c
@@ -96,6 +96,6 @@ struct cordic_iq cordic_calc_iq(s32 theta)
 }
 EXPORT_SYMBOL(cordic_calc_iq);
 
-MODULE_DESCRIPTION("Cordic functions");
+MODULE_DESCRIPTION("CORDIC algorithm");
 MODULE_AUTHOR("Broadcom Corporation");
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
index 987acfafeb83..145dec5267c9 100644
--- a/lib/cpu_rmap.c
+++ b/lib/cpu_rmap.c
@@ -11,7 +11,7 @@
 #ifdef CONFIG_GENERIC_HARDIRQS
 #include <linux/interrupt.h>
 #endif
-#include <linux/module.h>
+#include <linux/export.h>
 
 /*
  * These functions maintain a mapping from CPUs to some ordered set of
diff --git a/lib/cpumask.c b/lib/cpumask.c
index af3e5817de98..402a54ac35cb 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -2,7 +2,7 @@
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/cpumask.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/bootmem.h>
 
 int __first_cpu(const cpumask_t *srcp)
@@ -26,18 +26,6 @@ int __next_cpu_nr(int n, const cpumask_t *srcp)
 EXPORT_SYMBOL(__next_cpu_nr);
 #endif
 
-int __any_online_cpu(const cpumask_t *mask)
-{
-	int cpu;
-
-	for_each_cpu(cpu, mask) {
-		if (cpu_online(cpu))
-			break;
-	}
-	return cpu;
-}
-EXPORT_SYMBOL(__any_online_cpu);
-
 /**
  * cpumask_next_and - get the next cpu in *src1p & *src2p
  * @n: the cpu prior to the place to search (ie. return will be > @n)
diff --git a/lib/crc32.c b/lib/crc32.c
index a6e633a48cea..b0d278fb1d91 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -1,4 +1,8 @@
 /*
+ * Aug 8, 2011 Bob Pearson with help from Joakim Tjernlund and George Spelvin
+ * cleaned up code to current version of sparse and added the slicing-by-8
+ * algorithm to the closely similar existing slicing-by-4 algorithm.
+ *
  * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com>
  * Nicer crc32 functions/docs submitted by linux@horizon.com.  Thanks!
  * Code was from the public domain, copyright abandoned.  Code was
@@ -20,51 +24,58 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+/* see: Documentation/crc32.txt for a description of algorithms */
+
 #include <linux/crc32.h>
-#include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/compiler.h>
 #include <linux/types.h>
-#include <linux/init.h>
-#include <linux/atomic.h>
 #include "crc32defs.h"
-#if CRC_LE_BITS == 8
-# define tole(x) __constant_cpu_to_le32(x)
+
+#if CRC_LE_BITS > 8
+# define tole(x) ((__force u32) __constant_cpu_to_le32(x))
 #else
 # define tole(x) (x)
 #endif
 
-#if CRC_BE_BITS == 8
-# define tobe(x) __constant_cpu_to_be32(x)
+#if CRC_BE_BITS > 8
+# define tobe(x) ((__force u32) __constant_cpu_to_be32(x))
 #else
 # define tobe(x) (x)
 #endif
+
 #include "crc32table.h"
 
 MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
-MODULE_DESCRIPTION("Ethernet CRC32 calculations");
+MODULE_DESCRIPTION("Various CRC32 calculations");
 MODULE_LICENSE("GPL");
 
-#if CRC_LE_BITS == 8 || CRC_BE_BITS == 8
+#if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
 
+/* implements slicing-by-4 or slicing-by-8 algorithm */
 static inline u32
 crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 {
 # ifdef __LITTLE_ENDIAN
-#  define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8)
-#  define DO_CRC4 crc = tab[3][(crc) & 255] ^ \
-		tab[2][(crc >> 8) & 255] ^ \
-		tab[1][(crc >> 16) & 255] ^ \
-		tab[0][(crc >> 24) & 255]
+#  define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8)
+#  define DO_CRC4 (t3[(q) & 255] ^ t2[(q >> 8) & 255] ^ \
+		   t1[(q >> 16) & 255] ^ t0[(q >> 24) & 255])
+#  define DO_CRC8 (t7[(q) & 255] ^ t6[(q >> 8) & 255] ^ \
+		   t5[(q >> 16) & 255] ^ t4[(q >> 24) & 255])
 # else
-#  define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
-#  define DO_CRC4 crc = tab[0][(crc) & 255] ^ \
-		tab[1][(crc >> 8) & 255] ^ \
-		tab[2][(crc >> 16) & 255] ^ \
-		tab[3][(crc >> 24) & 255]
+#  define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
+#  define DO_CRC4 (t0[(q) & 255] ^ t1[(q >> 8) & 255] ^ \
+		   t2[(q >> 16) & 255] ^ t3[(q >> 24) & 255])
+#  define DO_CRC8 (t4[(q) & 255] ^ t5[(q >> 8) & 255] ^ \
+		   t6[(q >> 16) & 255] ^ t7[(q >> 24) & 255])
 # endif
 	const u32 *b;
 	size_t    rem_len;
+# ifdef CONFIG_X86
+	size_t i;
+# endif
+	const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3];
+	const u32 *t4 = tab[4], *t5 = tab[5], *t6 = tab[6], *t7 = tab[7];
+	u32 q;
 
 	/* Align it */
 	if (unlikely((long)buf & 3 && len)) {
@@ -72,27 +83,51 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 			DO_CRC(*buf++);
 		} while ((--len) && ((long)buf)&3);
 	}
+
+# if CRC_LE_BITS == 32
 	rem_len = len & 3;
-	/* load data 32 bits wide, xor data 32 bits wide. */
 	len = len >> 2;
+# else
+	rem_len = len & 7;
+	len = len >> 3;
+# endif
+
 	b = (const u32 *)buf;
+# ifdef CONFIG_X86
+	--b;
+	for (i = 0; i < len; i++) {
+# else
 	for (--b; len; --len) {
-		crc ^= *++b; /* use pre increment for speed */
-		DO_CRC4;
+# endif
+		q = crc ^ *++b; /* use pre increment for speed */
+# if CRC_LE_BITS == 32
+		crc = DO_CRC4;
+# else
+		crc = DO_CRC8;
+		q = *++b;
+		crc ^= DO_CRC4;
+# endif
 	}
 	len = rem_len;
 	/* And the last few bytes */
 	if (len) {
 		u8 *p = (u8 *)(b + 1) - 1;
+# ifdef CONFIG_X86
+		for (i = 0; i < len; i++)
+			DO_CRC(*++p); /* use pre increment for speed */
+# else
 		do {
 			DO_CRC(*++p); /* use pre increment for speed */
 		} while (--len);
+# endif
 	}
 	return crc;
 #undef DO_CRC
 #undef DO_CRC4
+#undef DO_CRC8
 }
 #endif
+
 /**
  * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32
  * @crc: seed value for computation.  ~0 for Ethernet, sometimes 0 for
@@ -100,53 +135,66 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
  * @p: pointer to buffer over which CRC is run
  * @len: length of buffer @p
  */
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len);
-
-#if CRC_LE_BITS == 1
-/*
- * In fact, the table-based code will work in this case, but it can be
- * simplified by inlining the table in ?: form.
- */
-
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
+static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
+					  size_t len, const u32 (*tab)[256],
+					  u32 polynomial)
 {
+#if CRC_LE_BITS == 1
 	int i;
 	while (len--) {
 		crc ^= *p++;
 		for (i = 0; i < 8; i++)
-			crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
+			crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0);
+	}
+# elif CRC_LE_BITS == 2
+	while (len--) {
+		crc ^= *p++;
+		crc = (crc >> 2) ^ tab[0][crc & 3];
+		crc = (crc >> 2) ^ tab[0][crc & 3];
+		crc = (crc >> 2) ^ tab[0][crc & 3];
+		crc = (crc >> 2) ^ tab[0][crc & 3];
 	}
-	return crc;
-}
-#else				/* Table-based approach */
-
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
-{
-# if CRC_LE_BITS == 8
-	const u32      (*tab)[] = crc32table_le;
-
-	crc = __cpu_to_le32(crc);
-	crc = crc32_body(crc, p, len, tab);
-	return __le32_to_cpu(crc);
 # elif CRC_LE_BITS == 4
 	while (len--) {
 		crc ^= *p++;
-		crc = (crc >> 4) ^ crc32table_le[crc & 15];
-		crc = (crc >> 4) ^ crc32table_le[crc & 15];
+		crc = (crc >> 4) ^ tab[0][crc & 15];
+		crc = (crc >> 4) ^ tab[0][crc & 15];
 	}
-	return crc;
-# elif CRC_LE_BITS == 2
+# elif CRC_LE_BITS == 8
+	/* aka Sarwate algorithm */
 	while (len--) {
 		crc ^= *p++;
-		crc = (crc >> 2) ^ crc32table_le[crc & 3];
-		crc = (crc >> 2) ^ crc32table_le[crc & 3];
-		crc = (crc >> 2) ^ crc32table_le[crc & 3];
-		crc = (crc >> 2) ^ crc32table_le[crc & 3];
+		crc = (crc >> 8) ^ tab[0][crc & 255];
 	}
+# else
+	crc = (__force u32) __cpu_to_le32(crc);
+	crc = crc32_body(crc, p, len, tab);
+	crc = __le32_to_cpu((__force __le32)crc);
+#endif
 	return crc;
-# endif
+}
+
+#if CRC_LE_BITS == 1
+u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
+{
+	return crc32_le_generic(crc, p, len, NULL, CRCPOLY_LE);
+}
+u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+{
+	return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE);
+}
+#else
+u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
+{
+	return crc32_le_generic(crc, p, len, crc32table_le, CRCPOLY_LE);
+}
+u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+{
+	return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE);
 }
 #endif
+EXPORT_SYMBOL(crc32_le);
+EXPORT_SYMBOL(__crc32c_le);
 
 /**
  * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
@@ -155,317 +203,913 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
  * @p: pointer to buffer over which CRC is run
  * @len: length of buffer @p
  */
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len);
-
-#if CRC_BE_BITS == 1
-/*
- * In fact, the table-based code will work in this case, but it can be
- * simplified by inlining the table in ?: form.
- */
-
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
+					  size_t len, const u32 (*tab)[256],
+					  u32 polynomial)
 {
+#if CRC_BE_BITS == 1
 	int i;
 	while (len--) {
 		crc ^= *p++ << 24;
 		for (i = 0; i < 8; i++)
 			crc =
-			    (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE :
+			    (crc << 1) ^ ((crc & 0x80000000) ? polynomial :
 					  0);
 	}
-	return crc;
-}
-
-#else				/* Table-based approach */
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
-{
-# if CRC_BE_BITS == 8
-	const u32      (*tab)[] = crc32table_be;
-
-	crc = __cpu_to_be32(crc);
-	crc = crc32_body(crc, p, len, tab);
-	return __be32_to_cpu(crc);
+# elif CRC_BE_BITS == 2
+	while (len--) {
+		crc ^= *p++ << 24;
+		crc = (crc << 2) ^ tab[0][crc >> 30];
+		crc = (crc << 2) ^ tab[0][crc >> 30];
+		crc = (crc << 2) ^ tab[0][crc >> 30];
+		crc = (crc << 2) ^ tab[0][crc >> 30];
+	}
 # elif CRC_BE_BITS == 4
 	while (len--) {
 		crc ^= *p++ << 24;
-		crc = (crc << 4) ^ crc32table_be[crc >> 28];
-		crc = (crc << 4) ^ crc32table_be[crc >> 28];
+		crc = (crc << 4) ^ tab[0][crc >> 28];
+		crc = (crc << 4) ^ tab[0][crc >> 28];
 	}
-	return crc;
-# elif CRC_BE_BITS == 2
+# elif CRC_BE_BITS == 8
 	while (len--) {
 		crc ^= *p++ << 24;
-		crc = (crc << 2) ^ crc32table_be[crc >> 30];
-		crc = (crc << 2) ^ crc32table_be[crc >> 30];
-		crc = (crc << 2) ^ crc32table_be[crc >> 30];
-		crc = (crc << 2) ^ crc32table_be[crc >> 30];
+		crc = (crc << 8) ^ tab[0][crc >> 24];
 	}
-	return crc;
+# else
+	crc = (__force u32) __cpu_to_be32(crc);
+	crc = crc32_body(crc, p, len, tab);
+	crc = __be32_to_cpu((__force __be32)crc);
 # endif
+	return crc;
 }
-#endif
 
-EXPORT_SYMBOL(crc32_le);
+#if CRC_LE_BITS == 1
+u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+{
+	return crc32_be_generic(crc, p, len, NULL, CRCPOLY_BE);
+}
+#else
+u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+{
+	return crc32_be_generic(crc, p, len, crc32table_be, CRCPOLY_BE);
+}
+#endif
 EXPORT_SYMBOL(crc32_be);
 
-/*
- * A brief CRC tutorial.
- *
- * A CRC is a long-division remainder.  You add the CRC to the message,
- * and the whole thing (message+CRC) is a multiple of the given
- * CRC polynomial.  To check the CRC, you can either check that the
- * CRC matches the recomputed value, *or* you can check that the
- * remainder computed on the message+CRC is 0.  This latter approach
- * is used by a lot of hardware implementations, and is why so many
- * protocols put the end-of-frame flag after the CRC.
- *
- * It's actually the same long division you learned in school, except that
- * - We're working in binary, so the digits are only 0 and 1, and
- * - When dividing polynomials, there are no carries.  Rather than add and
- *   subtract, we just xor.  Thus, we tend to get a bit sloppy about
- *   the difference between adding and subtracting.
- *
- * A 32-bit CRC polynomial is actually 33 bits long.  But since it's
- * 33 bits long, bit 32 is always going to be set, so usually the CRC
- * is written in hex with the most significant bit omitted.  (If you're
- * familiar with the IEEE 754 floating-point format, it's the same idea.)
- *
- * Note that a CRC is computed over a string of *bits*, so you have
- * to decide on the endianness of the bits within each byte.  To get
- * the best error-detecting properties, this should correspond to the
- * order they're actually sent.  For example, standard RS-232 serial is
- * little-endian; the most significant bit (sometimes used for parity)
- * is sent last.  And when appending a CRC word to a message, you should
- * do it in the right order, matching the endianness.
- *
- * Just like with ordinary division, the remainder is always smaller than
- * the divisor (the CRC polynomial) you're dividing by.  Each step of the
- * division, you take one more digit (bit) of the dividend and append it
- * to the current remainder.  Then you figure out the appropriate multiple
- * of the divisor to subtract to being the remainder back into range.
- * In binary, it's easy - it has to be either 0 or 1, and to make the
- * XOR cancel, it's just a copy of bit 32 of the remainder.
- *
- * When computing a CRC, we don't care about the quotient, so we can
- * throw the quotient bit away, but subtract the appropriate multiple of
- * the polynomial from the remainder and we're back to where we started,
- * ready to process the next bit.
- *
- * A big-endian CRC written this way would be coded like:
- * for (i = 0; i < input_bits; i++) {
- * 	multiple = remainder & 0x80000000 ? CRCPOLY : 0;
- * 	remainder = (remainder << 1 | next_input_bit()) ^ multiple;
- * }
- * Notice how, to get at bit 32 of the shifted remainder, we look
- * at bit 31 of the remainder *before* shifting it.
- *
- * But also notice how the next_input_bit() bits we're shifting into
- * the remainder don't actually affect any decision-making until
- * 32 bits later.  Thus, the first 32 cycles of this are pretty boring.
- * Also, to add the CRC to a message, we need a 32-bit-long hole for it at
- * the end, so we have to add 32 extra cycles shifting in zeros at the
- * end of every message,
- *
- * So the standard trick is to rearrage merging in the next_input_bit()
- * until the moment it's needed.  Then the first 32 cycles can be precomputed,
- * and merging in the final 32 zero bits to make room for the CRC can be
- * skipped entirely.
- * This changes the code to:
- * for (i = 0; i < input_bits; i++) {
- *      remainder ^= next_input_bit() << 31;
- * 	multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
- * 	remainder = (remainder << 1) ^ multiple;
- * }
- * With this optimization, the little-endian code is simpler:
- * for (i = 0; i < input_bits; i++) {
- *      remainder ^= next_input_bit();
- * 	multiple = (remainder & 1) ? CRCPOLY : 0;
- * 	remainder = (remainder >> 1) ^ multiple;
- * }
- *
- * Note that the other details of endianness have been hidden in CRCPOLY
- * (which must be bit-reversed) and next_input_bit().
- *
- * However, as long as next_input_bit is returning the bits in a sensible
- * order, we can actually do the merging 8 or more bits at a time rather
- * than one bit at a time:
- * for (i = 0; i < input_bytes; i++) {
- * 	remainder ^= next_input_byte() << 24;
- * 	for (j = 0; j < 8; j++) {
- * 		multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
- * 		remainder = (remainder << 1) ^ multiple;
- * 	}
- * }
- * Or in little-endian:
- * for (i = 0; i < input_bytes; i++) {
- * 	remainder ^= next_input_byte();
- * 	for (j = 0; j < 8; j++) {
- * 		multiple = (remainder & 1) ? CRCPOLY : 0;
- * 		remainder = (remainder << 1) ^ multiple;
- * 	}
- * }
- * If the input is a multiple of 32 bits, you can even XOR in a 32-bit
- * word at a time and increase the inner loop count to 32.
- *
- * You can also mix and match the two loop styles, for example doing the
- * bulk of a message byte-at-a-time and adding bit-at-a-time processing
- * for any fractional bytes at the end.
- *
- * The only remaining optimization is to the byte-at-a-time table method.
- * Here, rather than just shifting one bit of the remainder to decide
- * in the correct multiple to subtract, we can shift a byte at a time.
- * This produces a 40-bit (rather than a 33-bit) intermediate remainder,
- * but again the multiple of the polynomial to subtract depends only on
- * the high bits, the high 8 bits in this case.  
- *
- * The multiple we need in that case is the low 32 bits of a 40-bit
- * value whose high 8 bits are given, and which is a multiple of the
- * generator polynomial.  This is simply the CRC-32 of the given
- * one-byte message.
- *
- * Two more details: normally, appending zero bits to a message which
- * is already a multiple of a polynomial produces a larger multiple of that
- * polynomial.  To enable a CRC to detect this condition, it's common to
- * invert the CRC before appending it.  This makes the remainder of the
- * message+crc come out not as zero, but some fixed non-zero value.
- *
- * The same problem applies to zero bits prepended to the message, and
- * a similar solution is used.  Instead of starting with a remainder of
- * 0, an initial remainder of all ones is used.  As long as you start
- * the same way on decoding, it doesn't make a difference.
- */
-
-#ifdef UNITTEST
+#ifdef CONFIG_CRC32_SELFTEST
 
-#include <stdlib.h>
-#include <stdio.h>
+/* 4096 random bytes */
+static u8 __attribute__((__aligned__(8))) test_buf[] =
+{
+	0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30,
+	0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4,
+	0xc9, 0x6e, 0x8b, 0xdb, 0x98, 0x6b, 0xaa, 0x60,
+	0xa8, 0xb5, 0xbc, 0x6c, 0xa9, 0xb1, 0x5b, 0x2c,
+	0xea, 0xb4, 0x92, 0x6a, 0x3f, 0x79, 0x91, 0xe4,
+	0xe9, 0x70, 0x51, 0x8c, 0x7f, 0x95, 0x6f, 0x1a,
+	0x56, 0xa1, 0x5c, 0x27, 0x03, 0x67, 0x9f, 0x3a,
+	0xe2, 0x31, 0x11, 0x29, 0x6b, 0x98, 0xfc, 0xc4,
+	0x53, 0x24, 0xc5, 0x8b, 0xce, 0x47, 0xb2, 0xb9,
+	0x32, 0xcb, 0xc1, 0xd0, 0x03, 0x57, 0x4e, 0xd4,
+	0xe9, 0x3c, 0xa1, 0x63, 0xcf, 0x12, 0x0e, 0xca,
+	0xe1, 0x13, 0xd1, 0x93, 0xa6, 0x88, 0x5c, 0x61,
+	0x5b, 0xbb, 0xf0, 0x19, 0x46, 0xb4, 0xcf, 0x9e,
+	0xb6, 0x6b, 0x4c, 0x3a, 0xcf, 0x60, 0xf9, 0x7a,
+	0x8d, 0x07, 0x63, 0xdb, 0x40, 0xe9, 0x0b, 0x6f,
+	0xad, 0x97, 0xf1, 0xed, 0xd0, 0x1e, 0x26, 0xfd,
+	0xbf, 0xb7, 0xc8, 0x04, 0x94, 0xf8, 0x8b, 0x8c,
+	0xf1, 0xab, 0x7a, 0xd4, 0xdd, 0xf3, 0xe8, 0x88,
+	0xc3, 0xed, 0x17, 0x8a, 0x9b, 0x40, 0x0d, 0x53,
+	0x62, 0x12, 0x03, 0x5f, 0x1b, 0x35, 0x32, 0x1f,
+	0xb4, 0x7b, 0x93, 0x78, 0x0d, 0xdb, 0xce, 0xa4,
+	0xc0, 0x47, 0xd5, 0xbf, 0x68, 0xe8, 0x5d, 0x74,
+	0x8f, 0x8e, 0x75, 0x1c, 0xb2, 0x4f, 0x9a, 0x60,
+	0xd1, 0xbe, 0x10, 0xf4, 0x5c, 0xa1, 0x53, 0x09,
+	0xa5, 0xe0, 0x09, 0x54, 0x85, 0x5c, 0xdc, 0x07,
+	0xe7, 0x21, 0x69, 0x7b, 0x8a, 0xfd, 0x90, 0xf1,
+	0x22, 0xd0, 0xb4, 0x36, 0x28, 0xe6, 0xb8, 0x0f,
+	0x39, 0xde, 0xc8, 0xf3, 0x86, 0x60, 0x34, 0xd2,
+	0x5e, 0xdf, 0xfd, 0xcf, 0x0f, 0xa9, 0x65, 0xf0,
+	0xd5, 0x4d, 0x96, 0x40, 0xe3, 0xdf, 0x3f, 0x95,
+	0x5a, 0x39, 0x19, 0x93, 0xf4, 0x75, 0xce, 0x22,
+	0x00, 0x1c, 0x93, 0xe2, 0x03, 0x66, 0xf4, 0x93,
+	0x73, 0x86, 0x81, 0x8e, 0x29, 0x44, 0x48, 0x86,
+	0x61, 0x7c, 0x48, 0xa3, 0x43, 0xd2, 0x9c, 0x8d,
+	0xd4, 0x95, 0xdd, 0xe1, 0x22, 0x89, 0x3a, 0x40,
+	0x4c, 0x1b, 0x8a, 0x04, 0xa8, 0x09, 0x69, 0x8b,
+	0xea, 0xc6, 0x55, 0x8e, 0x57, 0xe6, 0x64, 0x35,
+	0xf0, 0xc7, 0x16, 0x9f, 0x5d, 0x5e, 0x86, 0x40,
+	0x46, 0xbb, 0xe5, 0x45, 0x88, 0xfe, 0xc9, 0x63,
+	0x15, 0xfb, 0xf5, 0xbd, 0x71, 0x61, 0xeb, 0x7b,
+	0x78, 0x70, 0x07, 0x31, 0x03, 0x9f, 0xb2, 0xc8,
+	0xa7, 0xab, 0x47, 0xfd, 0xdf, 0xa0, 0x78, 0x72,
+	0xa4, 0x2a, 0xe4, 0xb6, 0xba, 0xc0, 0x1e, 0x86,
+	0x71, 0xe6, 0x3d, 0x18, 0x37, 0x70, 0xe6, 0xff,
+	0xe0, 0xbc, 0x0b, 0x22, 0xa0, 0x1f, 0xd3, 0xed,
+	0xa2, 0x55, 0x39, 0xab, 0xa8, 0x13, 0x73, 0x7c,
+	0x3f, 0xb2, 0xd6, 0x19, 0xac, 0xff, 0x99, 0xed,
+	0xe8, 0xe6, 0xa6, 0x22, 0xe3, 0x9c, 0xf1, 0x30,
+	0xdc, 0x01, 0x0a, 0x56, 0xfa, 0xe4, 0xc9, 0x99,
+	0xdd, 0xa8, 0xd8, 0xda, 0x35, 0x51, 0x73, 0xb4,
+	0x40, 0x86, 0x85, 0xdb, 0x5c, 0xd5, 0x85, 0x80,
+	0x14, 0x9c, 0xfd, 0x98, 0xa9, 0x82, 0xc5, 0x37,
+	0xff, 0x32, 0x5d, 0xd0, 0x0b, 0xfa, 0xdc, 0x04,
+	0x5e, 0x09, 0xd2, 0xca, 0x17, 0x4b, 0x1a, 0x8e,
+	0x15, 0xe1, 0xcc, 0x4e, 0x52, 0x88, 0x35, 0xbd,
+	0x48, 0xfe, 0x15, 0xa0, 0x91, 0xfd, 0x7e, 0x6c,
+	0x0e, 0x5d, 0x79, 0x1b, 0x81, 0x79, 0xd2, 0x09,
+	0x34, 0x70, 0x3d, 0x81, 0xec, 0xf6, 0x24, 0xbb,
+	0xfb, 0xf1, 0x7b, 0xdf, 0x54, 0xea, 0x80, 0x9b,
+	0xc7, 0x99, 0x9e, 0xbd, 0x16, 0x78, 0x12, 0x53,
+	0x5e, 0x01, 0xa7, 0x4e, 0xbd, 0x67, 0xe1, 0x9b,
+	0x4c, 0x0e, 0x61, 0x45, 0x97, 0xd2, 0xf0, 0x0f,
+	0xfe, 0x15, 0x08, 0xb7, 0x11, 0x4c, 0xe7, 0xff,
+	0x81, 0x53, 0xff, 0x91, 0x25, 0x38, 0x7e, 0x40,
+	0x94, 0xe5, 0xe0, 0xad, 0xe6, 0xd9, 0x79, 0xb6,
+	0x92, 0xc9, 0xfc, 0xde, 0xc3, 0x1a, 0x23, 0xbb,
+	0xdd, 0xc8, 0x51, 0x0c, 0x3a, 0x72, 0xfa, 0x73,
+	0x6f, 0xb7, 0xee, 0x61, 0x39, 0x03, 0x01, 0x3f,
+	0x7f, 0x94, 0x2e, 0x2e, 0xba, 0x3a, 0xbb, 0xb4,
+	0xfa, 0x6a, 0x17, 0xfe, 0xea, 0xef, 0x5e, 0x66,
+	0x97, 0x3f, 0x32, 0x3d, 0xd7, 0x3e, 0xb1, 0xf1,
+	0x6c, 0x14, 0x4c, 0xfd, 0x37, 0xd3, 0x38, 0x80,
+	0xfb, 0xde, 0xa6, 0x24, 0x1e, 0xc8, 0xca, 0x7f,
+	0x3a, 0x93, 0xd8, 0x8b, 0x18, 0x13, 0xb2, 0xe5,
+	0xe4, 0x93, 0x05, 0x53, 0x4f, 0x84, 0x66, 0xa7,
+	0x58, 0x5c, 0x7b, 0x86, 0x52, 0x6d, 0x0d, 0xce,
+	0xa4, 0x30, 0x7d, 0xb6, 0x18, 0x9f, 0xeb, 0xff,
+	0x22, 0xbb, 0x72, 0x29, 0xb9, 0x44, 0x0b, 0x48,
+	0x1e, 0x84, 0x71, 0x81, 0xe3, 0x6d, 0x73, 0x26,
+	0x92, 0xb4, 0x4d, 0x2a, 0x29, 0xb8, 0x1f, 0x72,
+	0xed, 0xd0, 0xe1, 0x64, 0x77, 0xea, 0x8e, 0x88,
+	0x0f, 0xef, 0x3f, 0xb1, 0x3b, 0xad, 0xf9, 0xc9,
+	0x8b, 0xd0, 0xac, 0xc6, 0xcc, 0xa9, 0x40, 0xcc,
+	0x76, 0xf6, 0x3b, 0x53, 0xb5, 0x88, 0xcb, 0xc8,
+	0x37, 0xf1, 0xa2, 0xba, 0x23, 0x15, 0x99, 0x09,
+	0xcc, 0xe7, 0x7a, 0x3b, 0x37, 0xf7, 0x58, 0xc8,
+	0x46, 0x8c, 0x2b, 0x2f, 0x4e, 0x0e, 0xa6, 0x5c,
+	0xea, 0x85, 0x55, 0xba, 0x02, 0x0e, 0x0e, 0x48,
+	0xbc, 0xe1, 0xb1, 0x01, 0x35, 0x79, 0x13, 0x3d,
+	0x1b, 0xc0, 0x53, 0x68, 0x11, 0xe7, 0x95, 0x0f,
+	0x9d, 0x3f, 0x4c, 0x47, 0x7b, 0x4d, 0x1c, 0xae,
+	0x50, 0x9b, 0xcb, 0xdd, 0x05, 0x8d, 0x9a, 0x97,
+	0xfd, 0x8c, 0xef, 0x0c, 0x1d, 0x67, 0x73, 0xa8,
+	0x28, 0x36, 0xd5, 0xb6, 0x92, 0x33, 0x40, 0x75,
+	0x0b, 0x51, 0xc3, 0x64, 0xba, 0x1d, 0xc2, 0xcc,
+	0xee, 0x7d, 0x54, 0x0f, 0x27, 0x69, 0xa7, 0x27,
+	0x63, 0x30, 0x29, 0xd9, 0xc8, 0x84, 0xd8, 0xdf,
+	0x9f, 0x68, 0x8d, 0x04, 0xca, 0xa6, 0xc5, 0xc7,
+	0x7a, 0x5c, 0xc8, 0xd1, 0xcb, 0x4a, 0xec, 0xd0,
+	0xd8, 0x20, 0x69, 0xc5, 0x17, 0xcd, 0x78, 0xc8,
+	0x75, 0x23, 0x30, 0x69, 0xc9, 0xd4, 0xea, 0x5c,
+	0x4f, 0x6b, 0x86, 0x3f, 0x8b, 0xfe, 0xee, 0x44,
+	0xc9, 0x7c, 0xb7, 0xdd, 0x3e, 0xe5, 0xec, 0x54,
+	0x03, 0x3e, 0xaa, 0x82, 0xc6, 0xdf, 0xb2, 0x38,
+	0x0e, 0x5d, 0xb3, 0x88, 0xd9, 0xd3, 0x69, 0x5f,
+	0x8f, 0x70, 0x8a, 0x7e, 0x11, 0xd9, 0x1e, 0x7b,
+	0x38, 0xf1, 0x42, 0x1a, 0xc0, 0x35, 0xf5, 0xc7,
+	0x36, 0x85, 0xf5, 0xf7, 0xb8, 0x7e, 0xc7, 0xef,
+	0x18, 0xf1, 0x63, 0xd6, 0x7a, 0xc6, 0xc9, 0x0e,
+	0x4d, 0x69, 0x4f, 0x84, 0xef, 0x26, 0x41, 0x0c,
+	0xec, 0xc7, 0xe0, 0x7e, 0x3c, 0x67, 0x01, 0x4c,
+	0x62, 0x1a, 0x20, 0x6f, 0xee, 0x47, 0x4d, 0xc0,
+	0x99, 0x13, 0x8d, 0x91, 0x4a, 0x26, 0xd4, 0x37,
+	0x28, 0x90, 0x58, 0x75, 0x66, 0x2b, 0x0a, 0xdf,
+	0xda, 0xee, 0x92, 0x25, 0x90, 0x62, 0x39, 0x9e,
+	0x44, 0x98, 0xad, 0xc1, 0x88, 0xed, 0xe4, 0xb4,
+	0xaf, 0xf5, 0x8c, 0x9b, 0x48, 0x4d, 0x56, 0x60,
+	0x97, 0x0f, 0x61, 0x59, 0x9e, 0xa6, 0x27, 0xfe,
+	0xc1, 0x91, 0x15, 0x38, 0xb8, 0x0f, 0xae, 0x61,
+	0x7d, 0x26, 0x13, 0x5a, 0x73, 0xff, 0x1c, 0xa3,
+	0x61, 0x04, 0x58, 0x48, 0x55, 0x44, 0x11, 0xfe,
+	0x15, 0xca, 0xc3, 0xbd, 0xca, 0xc5, 0xb4, 0x40,
+	0x5d, 0x1b, 0x7f, 0x39, 0xb5, 0x9c, 0x35, 0xec,
+	0x61, 0x15, 0x32, 0x32, 0xb8, 0x4e, 0x40, 0x9f,
+	0x17, 0x1f, 0x0a, 0x4d, 0xa9, 0x91, 0xef, 0xb7,
+	0xb0, 0xeb, 0xc2, 0x83, 0x9a, 0x6c, 0xd2, 0x79,
+	0x43, 0x78, 0x5e, 0x2f, 0xe5, 0xdd, 0x1a, 0x3c,
+	0x45, 0xab, 0x29, 0x40, 0x3a, 0x37, 0x5b, 0x6f,
+	0xd7, 0xfc, 0x48, 0x64, 0x3c, 0x49, 0xfb, 0x21,
+	0xbe, 0xc3, 0xff, 0x07, 0xfb, 0x17, 0xe9, 0xc9,
+	0x0c, 0x4c, 0x5c, 0x15, 0x9e, 0x8e, 0x22, 0x30,
+	0x0a, 0xde, 0x48, 0x7f, 0xdb, 0x0d, 0xd1, 0x2b,
+	0x87, 0x38, 0x9e, 0xcc, 0x5a, 0x01, 0x16, 0xee,
+	0x75, 0x49, 0x0d, 0x30, 0x01, 0x34, 0x6a, 0xb6,
+	0x9a, 0x5a, 0x2a, 0xec, 0xbb, 0x48, 0xac, 0xd3,
+	0x77, 0x83, 0xd8, 0x08, 0x86, 0x4f, 0x48, 0x09,
+	0x29, 0x41, 0x79, 0xa1, 0x03, 0x12, 0xc4, 0xcd,
+	0x90, 0x55, 0x47, 0x66, 0x74, 0x9a, 0xcc, 0x4f,
+	0x35, 0x8c, 0xd6, 0x98, 0xef, 0xeb, 0x45, 0xb9,
+	0x9a, 0x26, 0x2f, 0x39, 0xa5, 0x70, 0x6d, 0xfc,
+	0xb4, 0x51, 0xee, 0xf4, 0x9c, 0xe7, 0x38, 0x59,
+	0xad, 0xf4, 0xbc, 0x46, 0xff, 0x46, 0x8e, 0x60,
+	0x9c, 0xa3, 0x60, 0x1d, 0xf8, 0x26, 0x72, 0xf5,
+	0x72, 0x9d, 0x68, 0x80, 0x04, 0xf6, 0x0b, 0xa1,
+	0x0a, 0xd5, 0xa7, 0x82, 0x3a, 0x3e, 0x47, 0xa8,
+	0x5a, 0xde, 0x59, 0x4f, 0x7b, 0x07, 0xb3, 0xe9,
+	0x24, 0x19, 0x3d, 0x34, 0x05, 0xec, 0xf1, 0xab,
+	0x6e, 0x64, 0x8f, 0xd3, 0xe6, 0x41, 0x86, 0x80,
+	0x70, 0xe3, 0x8d, 0x60, 0x9c, 0x34, 0x25, 0x01,
+	0x07, 0x4d, 0x19, 0x41, 0x4e, 0x3d, 0x5c, 0x7e,
+	0xa8, 0xf5, 0xcc, 0xd5, 0x7b, 0xe2, 0x7d, 0x3d,
+	0x49, 0x86, 0x7d, 0x07, 0xb7, 0x10, 0xe3, 0x35,
+	0xb8, 0x84, 0x6d, 0x76, 0xab, 0x17, 0xc6, 0x38,
+	0xb4, 0xd3, 0x28, 0x57, 0xad, 0xd3, 0x88, 0x5a,
+	0xda, 0xea, 0xc8, 0x94, 0xcc, 0x37, 0x19, 0xac,
+	0x9c, 0x9f, 0x4b, 0x00, 0x15, 0xc0, 0xc8, 0xca,
+	0x1f, 0x15, 0xaa, 0xe0, 0xdb, 0xf9, 0x2f, 0x57,
+	0x1b, 0x24, 0xc7, 0x6f, 0x76, 0x29, 0xfb, 0xed,
+	0x25, 0x0d, 0xc0, 0xfe, 0xbd, 0x5a, 0xbf, 0x20,
+	0x08, 0x51, 0x05, 0xec, 0x71, 0xa3, 0xbf, 0xef,
+	0x5e, 0x99, 0x75, 0xdb, 0x3c, 0x5f, 0x9a, 0x8c,
+	0xbb, 0x19, 0x5c, 0x0e, 0x93, 0x19, 0xf8, 0x6a,
+	0xbc, 0xf2, 0x12, 0x54, 0x2f, 0xcb, 0x28, 0x64,
+	0x88, 0xb3, 0x92, 0x0d, 0x96, 0xd1, 0xa6, 0xe4,
+	0x1f, 0xf1, 0x4d, 0xa4, 0xab, 0x1c, 0xee, 0x54,
+	0xf2, 0xad, 0x29, 0x6d, 0x32, 0x37, 0xb2, 0x16,
+	0x77, 0x5c, 0xdc, 0x2e, 0x54, 0xec, 0x75, 0x26,
+	0xc6, 0x36, 0xd9, 0x17, 0x2c, 0xf1, 0x7a, 0xdc,
+	0x4b, 0xf1, 0xe2, 0xd9, 0x95, 0xba, 0xac, 0x87,
+	0xc1, 0xf3, 0x8e, 0x58, 0x08, 0xd8, 0x87, 0x60,
+	0xc9, 0xee, 0x6a, 0xde, 0xa4, 0xd2, 0xfc, 0x0d,
+	0xe5, 0x36, 0xc4, 0x5c, 0x52, 0xb3, 0x07, 0x54,
+	0x65, 0x24, 0xc1, 0xb1, 0xd1, 0xb1, 0x53, 0x13,
+	0x31, 0x79, 0x7f, 0x05, 0x76, 0xeb, 0x37, 0x59,
+	0x15, 0x2b, 0xd1, 0x3f, 0xac, 0x08, 0x97, 0xeb,
+	0x91, 0x98, 0xdf, 0x6c, 0x09, 0x0d, 0x04, 0x9f,
+	0xdc, 0x3b, 0x0e, 0x60, 0x68, 0x47, 0x23, 0x15,
+	0x16, 0xc6, 0x0b, 0x35, 0xf8, 0x77, 0xa2, 0x78,
+	0x50, 0xd4, 0x64, 0x22, 0x33, 0xff, 0xfb, 0x93,
+	0x71, 0x46, 0x50, 0x39, 0x1b, 0x9c, 0xea, 0x4e,
+	0x8d, 0x0c, 0x37, 0xe5, 0x5c, 0x51, 0x3a, 0x31,
+	0xb2, 0x85, 0x84, 0x3f, 0x41, 0xee, 0xa2, 0xc1,
+	0xc6, 0x13, 0x3b, 0x54, 0x28, 0xd2, 0x18, 0x37,
+	0xcc, 0x46, 0x9f, 0x6a, 0x91, 0x3d, 0x5a, 0x15,
+	0x3c, 0x89, 0xa3, 0x61, 0x06, 0x7d, 0x2e, 0x78,
+	0xbe, 0x7d, 0x40, 0xba, 0x2f, 0x95, 0xb1, 0x2f,
+	0x87, 0x3b, 0x8a, 0xbe, 0x6a, 0xf4, 0xc2, 0x31,
+	0x74, 0xee, 0x91, 0xe0, 0x23, 0xaa, 0x5d, 0x7f,
+	0xdd, 0xf0, 0x44, 0x8c, 0x0b, 0x59, 0x2b, 0xfc,
+	0x48, 0x3a, 0xdf, 0x07, 0x05, 0x38, 0x6c, 0xc9,
+	0xeb, 0x18, 0x24, 0x68, 0x8d, 0x58, 0x98, 0xd3,
+	0x31, 0xa3, 0xe4, 0x70, 0x59, 0xb1, 0x21, 0xbe,
+	0x7e, 0x65, 0x7d, 0xb8, 0x04, 0xab, 0xf6, 0xe4,
+	0xd7, 0xda, 0xec, 0x09, 0x8f, 0xda, 0x6d, 0x24,
+	0x07, 0xcc, 0x29, 0x17, 0x05, 0x78, 0x1a, 0xc1,
+	0xb1, 0xce, 0xfc, 0xaa, 0x2d, 0xe7, 0xcc, 0x85,
+	0x84, 0x84, 0x03, 0x2a, 0x0c, 0x3f, 0xa9, 0xf8,
+	0xfd, 0x84, 0x53, 0x59, 0x5c, 0xf0, 0xd4, 0x09,
+	0xf0, 0xd2, 0x6c, 0x32, 0x03, 0xb0, 0xa0, 0x8c,
+	0x52, 0xeb, 0x23, 0x91, 0x88, 0x43, 0x13, 0x46,
+	0xf6, 0x1e, 0xb4, 0x1b, 0xf5, 0x8e, 0x3a, 0xb5,
+	0x3d, 0x00, 0xf6, 0xe5, 0x08, 0x3d, 0x5f, 0x39,
+	0xd3, 0x21, 0x69, 0xbc, 0x03, 0x22, 0x3a, 0xd2,
+	0x5c, 0x84, 0xf8, 0x15, 0xc4, 0x80, 0x0b, 0xbc,
+	0x29, 0x3c, 0xf3, 0x95, 0x98, 0xcd, 0x8f, 0x35,
+	0xbc, 0xa5, 0x3e, 0xfc, 0xd4, 0x13, 0x9e, 0xde,
+	0x4f, 0xce, 0x71, 0x9d, 0x09, 0xad, 0xf2, 0x80,
+	0x6b, 0x65, 0x7f, 0x03, 0x00, 0x14, 0x7c, 0x15,
+	0x85, 0x40, 0x6d, 0x70, 0xea, 0xdc, 0xb3, 0x63,
+	0x35, 0x4f, 0x4d, 0xe0, 0xd9, 0xd5, 0x3c, 0x58,
+	0x56, 0x23, 0x80, 0xe2, 0x36, 0xdd, 0x75, 0x1d,
+	0x94, 0x11, 0x41, 0x8e, 0xe0, 0x81, 0x8e, 0xcf,
+	0xe0, 0xe5, 0xf6, 0xde, 0xd1, 0xe7, 0x04, 0x12,
+	0x79, 0x92, 0x2b, 0x71, 0x2a, 0x79, 0x8b, 0x7c,
+	0x44, 0x79, 0x16, 0x30, 0x4e, 0xf4, 0xf6, 0x9b,
+	0xb7, 0x40, 0xa3, 0x5a, 0xa7, 0x69, 0x3e, 0xc1,
+	0x3a, 0x04, 0xd0, 0x88, 0xa0, 0x3b, 0xdd, 0xc6,
+	0x9e, 0x7e, 0x1e, 0x1e, 0x8f, 0x44, 0xf7, 0x73,
+	0x67, 0x1e, 0x1a, 0x78, 0xfa, 0x62, 0xf4, 0xa9,
+	0xa8, 0xc6, 0x5b, 0xb8, 0xfa, 0x06, 0x7d, 0x5e,
+	0x38, 0x1c, 0x9a, 0x39, 0xe9, 0x39, 0x98, 0x22,
+	0x0b, 0xa7, 0xac, 0x0b, 0xf3, 0xbc, 0xf1, 0xeb,
+	0x8c, 0x81, 0xe3, 0x48, 0x8a, 0xed, 0x42, 0xc2,
+	0x38, 0xcf, 0x3e, 0xda, 0xd2, 0x89, 0x8d, 0x9c,
+	0x53, 0xb5, 0x2f, 0x41, 0x01, 0x26, 0x84, 0x9c,
+	0xa3, 0x56, 0xf6, 0x49, 0xc7, 0xd4, 0x9f, 0x93,
+	0x1b, 0x96, 0x49, 0x5e, 0xad, 0xb3, 0x84, 0x1f,
+	0x3c, 0xa4, 0xe0, 0x9b, 0xd1, 0x90, 0xbc, 0x38,
+	0x6c, 0xdd, 0x95, 0x4d, 0x9d, 0xb1, 0x71, 0x57,
+	0x2d, 0x34, 0xe8, 0xb8, 0x42, 0xc7, 0x99, 0x03,
+	0xc7, 0x07, 0x30, 0x65, 0x91, 0x55, 0xd5, 0x90,
+	0x70, 0x97, 0x37, 0x68, 0xd4, 0x11, 0xf9, 0xe8,
+	0xce, 0xec, 0xdc, 0x34, 0xd5, 0xd3, 0xb7, 0xc4,
+	0xb8, 0x97, 0x05, 0x92, 0xad, 0xf8, 0xe2, 0x36,
+	0x64, 0x41, 0xc9, 0xc5, 0x41, 0x77, 0x52, 0xd7,
+	0x2c, 0xa5, 0x24, 0x2f, 0xd9, 0x34, 0x0b, 0x47,
+	0x35, 0xa7, 0x28, 0x8b, 0xc5, 0xcd, 0xe9, 0x46,
+	0xac, 0x39, 0x94, 0x3c, 0x10, 0xc6, 0x29, 0x73,
+	0x0e, 0x0e, 0x5d, 0xe0, 0x71, 0x03, 0x8a, 0x72,
+	0x0e, 0x26, 0xb0, 0x7d, 0x84, 0xed, 0x95, 0x23,
+	0x49, 0x5a, 0x45, 0x83, 0x45, 0x60, 0x11, 0x4a,
+	0x46, 0x31, 0xd4, 0xd8, 0x16, 0x54, 0x98, 0x58,
+	0xed, 0x6d, 0xcc, 0x5d, 0xd6, 0x50, 0x61, 0x9f,
+	0x9d, 0xc5, 0x3e, 0x9d, 0x32, 0x47, 0xde, 0x96,
+	0xe1, 0x5d, 0xd8, 0xf8, 0xb4, 0x69, 0x6f, 0xb9,
+	0x15, 0x90, 0x57, 0x7a, 0xf6, 0xad, 0xb0, 0x5b,
+	0xf5, 0xa6, 0x36, 0x94, 0xfd, 0x84, 0xce, 0x1c,
+	0x0f, 0x4b, 0xd0, 0xc2, 0x5b, 0x6b, 0x56, 0xef,
+	0x73, 0x93, 0x0b, 0xc3, 0xee, 0xd9, 0xcf, 0xd3,
+	0xa4, 0x22, 0x58, 0xcd, 0x50, 0x6e, 0x65, 0xf4,
+	0xe9, 0xb7, 0x71, 0xaf, 0x4b, 0xb3, 0xb6, 0x2f,
+	0x0f, 0x0e, 0x3b, 0xc9, 0x85, 0x14, 0xf5, 0x17,
+	0xe8, 0x7a, 0x3a, 0xbf, 0x5f, 0x5e, 0xf8, 0x18,
+	0x48, 0xa6, 0x72, 0xab, 0x06, 0x95, 0xe9, 0xc8,
+	0xa7, 0xf4, 0x32, 0x44, 0x04, 0x0c, 0x84, 0x98,
+	0x73, 0xe3, 0x89, 0x8d, 0x5f, 0x7e, 0x4a, 0x42,
+	0x8f, 0xc5, 0x28, 0xb1, 0x82, 0xef, 0x1c, 0x97,
+	0x31, 0x3b, 0x4d, 0xe0, 0x0e, 0x10, 0x10, 0x97,
+	0x93, 0x49, 0x78, 0x2f, 0x0d, 0x86, 0x8b, 0xa1,
+	0x53, 0xa9, 0x81, 0x20, 0x79, 0xe7, 0x07, 0x77,
+	0xb6, 0xac, 0x5e, 0xd2, 0x05, 0xcd, 0xe9, 0xdb,
+	0x8a, 0x94, 0x82, 0x8a, 0x23, 0xb9, 0x3d, 0x1c,
+	0xa9, 0x7d, 0x72, 0x4a, 0xed, 0x33, 0xa3, 0xdb,
+	0x21, 0xa7, 0x86, 0x33, 0x45, 0xa5, 0xaa, 0x56,
+	0x45, 0xb5, 0x83, 0x29, 0x40, 0x47, 0x79, 0x04,
+	0x6e, 0xb9, 0x95, 0xd0, 0x81, 0x77, 0x2d, 0x48,
+	0x1e, 0xfe, 0xc3, 0xc2, 0x1e, 0xe5, 0xf2, 0xbe,
+	0xfd, 0x3b, 0x94, 0x9f, 0xc4, 0xc4, 0x26, 0x9d,
+	0xe4, 0x66, 0x1e, 0x19, 0xee, 0x6c, 0x79, 0x97,
+	0x11, 0x31, 0x4b, 0x0d, 0x01, 0xcb, 0xde, 0xa8,
+	0xf6, 0x6d, 0x7c, 0x39, 0x46, 0x4e, 0x7e, 0x3f,
+	0x94, 0x17, 0xdf, 0xa1, 0x7d, 0xd9, 0x1c, 0x8e,
+	0xbc, 0x7d, 0x33, 0x7d, 0xe3, 0x12, 0x40, 0xca,
+	0xab, 0x37, 0x11, 0x46, 0xd4, 0xae, 0xef, 0x44,
+	0xa2, 0xb3, 0x6a, 0x66, 0x0e, 0x0c, 0x90, 0x7f,
+	0xdf, 0x5c, 0x66, 0x5f, 0xf2, 0x94, 0x9f, 0xa6,
+	0x73, 0x4f, 0xeb, 0x0d, 0xad, 0xbf, 0xc0, 0x63,
+	0x5c, 0xdc, 0x46, 0x51, 0xe8, 0x8e, 0x90, 0x19,
+	0xa8, 0xa4, 0x3c, 0x91, 0x79, 0xfa, 0x7e, 0x58,
+	0x85, 0x13, 0x55, 0xc5, 0x19, 0x82, 0x37, 0x1b,
+	0x0a, 0x02, 0x1f, 0x99, 0x6b, 0x18, 0xf1, 0x28,
+	0x08, 0xa2, 0x73, 0xb8, 0x0f, 0x2e, 0xcd, 0xbf,
+	0xf3, 0x86, 0x7f, 0xea, 0xef, 0xd0, 0xbb, 0xa6,
+	0x21, 0xdf, 0x49, 0x73, 0x51, 0xcc, 0x36, 0xd3,
+	0x3e, 0xa0, 0xf8, 0x44, 0xdf, 0xd3, 0xa6, 0xbe,
+	0x8a, 0xd4, 0x57, 0xdd, 0x72, 0x94, 0x61, 0x0f,
+	0x82, 0xd1, 0x07, 0xb8, 0x7c, 0x18, 0x83, 0xdf,
+	0x3a, 0xe5, 0x50, 0x6a, 0x82, 0x20, 0xac, 0xa9,
+	0xa8, 0xff, 0xd9, 0xf3, 0x77, 0x33, 0x5a, 0x9e,
+	0x7f, 0x6d, 0xfe, 0x5d, 0x33, 0x41, 0x42, 0xe7,
+	0x6c, 0x19, 0xe0, 0x44, 0x8a, 0x15, 0xf6, 0x70,
+	0x98, 0xb7, 0x68, 0x4d, 0xfa, 0x97, 0x39, 0xb0,
+	0x8e, 0xe8, 0x84, 0x8b, 0x75, 0x30, 0xb7, 0x7d,
+	0x92, 0x69, 0x20, 0x9c, 0x81, 0xfb, 0x4b, 0xf4,
+	0x01, 0x50, 0xeb, 0xce, 0x0c, 0x1c, 0x6c, 0xb5,
+	0x4a, 0xd7, 0x27, 0x0c, 0xce, 0xbb, 0xe5, 0x85,
+	0xf0, 0xb6, 0xee, 0xd5, 0x70, 0xdd, 0x3b, 0xfc,
+	0xd4, 0x99, 0xf1, 0x33, 0xdd, 0x8b, 0xc4, 0x2f,
+	0xae, 0xab, 0x74, 0x96, 0x32, 0xc7, 0x4c, 0x56,
+	0x3c, 0x89, 0x0f, 0x96, 0x0b, 0x42, 0xc0, 0xcb,
+	0xee, 0x0f, 0x0b, 0x8c, 0xfb, 0x7e, 0x47, 0x7b,
+	0x64, 0x48, 0xfd, 0xb2, 0x00, 0x80, 0x89, 0xa5,
+	0x13, 0x55, 0x62, 0xfc, 0x8f, 0xe2, 0x42, 0x03,
+	0xb7, 0x4e, 0x2a, 0x79, 0xb4, 0x82, 0xea, 0x23,
+	0x49, 0xda, 0xaf, 0x52, 0x63, 0x1e, 0x60, 0x03,
+	0x89, 0x06, 0x44, 0x46, 0x08, 0xc3, 0xc4, 0x87,
+	0x70, 0x2e, 0xda, 0x94, 0xad, 0x6b, 0xe0, 0xe4,
+	0xd1, 0x8a, 0x06, 0xc2, 0xa8, 0xc0, 0xa7, 0x43,
+	0x3c, 0x47, 0x52, 0x0e, 0xc3, 0x77, 0x81, 0x11,
+	0x67, 0x0e, 0xa0, 0x70, 0x04, 0x47, 0x29, 0x40,
+	0x86, 0x0d, 0x34, 0x56, 0xa7, 0xc9, 0x35, 0x59,
+	0x68, 0xdc, 0x93, 0x81, 0x70, 0xee, 0x86, 0xd9,
+	0x80, 0x06, 0x40, 0x4f, 0x1a, 0x0d, 0x40, 0x30,
+	0x0b, 0xcb, 0x96, 0x47, 0xc1, 0xb7, 0x52, 0xfd,
+	0x56, 0xe0, 0x72, 0x4b, 0xfb, 0xbd, 0x92, 0x45,
+	0x61, 0x71, 0xc2, 0x33, 0x11, 0xbf, 0x52, 0x83,
+	0x79, 0x26, 0xe0, 0x49, 0x6b, 0xb7, 0x05, 0x8b,
+	0xe8, 0x0e, 0x87, 0x31, 0xd7, 0x9d, 0x8a, 0xf5,
+	0xc0, 0x5f, 0x2e, 0x58, 0x4a, 0xdb, 0x11, 0xb3,
+	0x6c, 0x30, 0x2a, 0x46, 0x19, 0xe3, 0x27, 0x84,
+	0x1f, 0x63, 0x6e, 0xf6, 0x57, 0xc7, 0xc9, 0xd8,
+	0x5e, 0xba, 0xb3, 0x87, 0xd5, 0x83, 0x26, 0x34,
+	0x21, 0x9e, 0x65, 0xde, 0x42, 0xd3, 0xbe, 0x7b,
+	0xbc, 0x91, 0x71, 0x44, 0x4d, 0x99, 0x3b, 0x31,
+	0xe5, 0x3f, 0x11, 0x4e, 0x7f, 0x13, 0x51, 0x3b,
+	0xae, 0x79, 0xc9, 0xd3, 0x81, 0x8e, 0x25, 0x40,
+	0x10, 0xfc, 0x07, 0x1e, 0xf9, 0x7b, 0x9a, 0x4b,
+	0x6c, 0xe3, 0xb3, 0xad, 0x1a, 0x0a, 0xdd, 0x9e,
+	0x59, 0x0c, 0xa2, 0xcd, 0xae, 0x48, 0x4a, 0x38,
+	0x5b, 0x47, 0x41, 0x94, 0x65, 0x6b, 0xbb, 0xeb,
+	0x5b, 0xe3, 0xaf, 0x07, 0x5b, 0xd4, 0x4a, 0xa2,
+	0xc9, 0x5d, 0x2f, 0x64, 0x03, 0xd7, 0x3a, 0x2c,
+	0x6e, 0xce, 0x76, 0x95, 0xb4, 0xb3, 0xc0, 0xf1,
+	0xe2, 0x45, 0x73, 0x7a, 0x5c, 0xab, 0xc1, 0xfc,
+	0x02, 0x8d, 0x81, 0x29, 0xb3, 0xac, 0x07, 0xec,
+	0x40, 0x7d, 0x45, 0xd9, 0x7a, 0x59, 0xee, 0x34,
+	0xf0, 0xe9, 0xd5, 0x7b, 0x96, 0xb1, 0x3d, 0x95,
+	0xcc, 0x86, 0xb5, 0xb6, 0x04, 0x2d, 0xb5, 0x92,
+	0x7e, 0x76, 0xf4, 0x06, 0xa9, 0xa3, 0x12, 0x0f,
+	0xb1, 0xaf, 0x26, 0xba, 0x7c, 0xfc, 0x7e, 0x1c,
+	0xbc, 0x2c, 0x49, 0x97, 0x53, 0x60, 0x13, 0x0b,
+	0xa6, 0x61, 0x83, 0x89, 0x42, 0xd4, 0x17, 0x0c,
+	0x6c, 0x26, 0x52, 0xc3, 0xb3, 0xd4, 0x67, 0xf5,
+	0xe3, 0x04, 0xb7, 0xf4, 0xcb, 0x80, 0xb8, 0xcb,
+	0x77, 0x56, 0x3e, 0xaa, 0x57, 0x54, 0xee, 0xb4,
+	0x2c, 0x67, 0xcf, 0xf2, 0xdc, 0xbe, 0x55, 0xf9,
+	0x43, 0x1f, 0x6e, 0x22, 0x97, 0x67, 0x7f, 0xc4,
+	0xef, 0xb1, 0x26, 0x31, 0x1e, 0x27, 0xdf, 0x41,
+	0x80, 0x47, 0x6c, 0xe2, 0xfa, 0xa9, 0x8c, 0x2a,
+	0xf6, 0xf2, 0xab, 0xf0, 0x15, 0xda, 0x6c, 0xc8,
+	0xfe, 0xb5, 0x23, 0xde, 0xa9, 0x05, 0x3f, 0x06,
+	0x54, 0x4c, 0xcd, 0xe1, 0xab, 0xfc, 0x0e, 0x62,
+	0x33, 0x31, 0x73, 0x2c, 0x76, 0xcb, 0xb4, 0x47,
+	0x1e, 0x20, 0xad, 0xd8, 0xf2, 0x31, 0xdd, 0xc4,
+	0x8b, 0x0c, 0x77, 0xbe, 0xe1, 0x8b, 0x26, 0x00,
+	0x02, 0x58, 0xd6, 0x8d, 0xef, 0xad, 0x74, 0x67,
+	0xab, 0x3f, 0xef, 0xcb, 0x6f, 0xb0, 0xcc, 0x81,
+	0x44, 0x4c, 0xaf, 0xe9, 0x49, 0x4f, 0xdb, 0xa0,
+	0x25, 0xa4, 0xf0, 0x89, 0xf1, 0xbe, 0xd8, 0x10,
+	0xff, 0xb1, 0x3b, 0x4b, 0xfa, 0x98, 0xf5, 0x79,
+	0x6d, 0x1e, 0x69, 0x4d, 0x57, 0xb1, 0xc8, 0x19,
+	0x1b, 0xbd, 0x1e, 0x8c, 0x84, 0xb7, 0x7b, 0xe8,
+	0xd2, 0x2d, 0x09, 0x41, 0x41, 0x37, 0x3d, 0xb1,
+	0x6f, 0x26, 0x5d, 0x71, 0x16, 0x3d, 0xb7, 0x83,
+	0x27, 0x2c, 0xa7, 0xb6, 0x50, 0xbd, 0x91, 0x86,
+	0xab, 0x24, 0xa1, 0x38, 0xfd, 0xea, 0x71, 0x55,
+	0x7e, 0x9a, 0x07, 0x77, 0x4b, 0xfa, 0x61, 0x66,
+	0x20, 0x1e, 0x28, 0x95, 0x18, 0x1b, 0xa4, 0xa0,
+	0xfd, 0xc0, 0x89, 0x72, 0x43, 0xd9, 0x3b, 0x49,
+	0x5a, 0x3f, 0x9d, 0xbf, 0xdb, 0xb4, 0x46, 0xea,
+	0x42, 0x01, 0x77, 0x23, 0x68, 0x95, 0xb6, 0x24,
+	0xb3, 0xa8, 0x6c, 0x28, 0x3b, 0x11, 0x40, 0x7e,
+	0x18, 0x65, 0x6d, 0xd8, 0x24, 0x42, 0x7d, 0x88,
+	0xc0, 0x52, 0xd9, 0x05, 0xe4, 0x95, 0x90, 0x87,
+	0x8c, 0xf4, 0xd0, 0x6b, 0xb9, 0x83, 0x99, 0x34,
+	0x6d, 0xfe, 0x54, 0x40, 0x94, 0x52, 0x21, 0x4f,
+	0x14, 0x25, 0xc5, 0xd6, 0x5e, 0x95, 0xdc, 0x0a,
+	0x2b, 0x89, 0x20, 0x11, 0x84, 0x48, 0xd6, 0x3a,
+	0xcd, 0x5c, 0x24, 0xad, 0x62, 0xe3, 0xb1, 0x93,
+	0x25, 0x8d, 0xcd, 0x7e, 0xfc, 0x27, 0xa3, 0x37,
+	0xfd, 0x84, 0xfc, 0x1b, 0xb2, 0xf1, 0x27, 0x38,
+	0x5a, 0xb7, 0xfc, 0xf2, 0xfa, 0x95, 0x66, 0xd4,
+	0xfb, 0xba, 0xa7, 0xd7, 0xa3, 0x72, 0x69, 0x48,
+	0x48, 0x8c, 0xeb, 0x28, 0x89, 0xfe, 0x33, 0x65,
+	0x5a, 0x36, 0x01, 0x7e, 0x06, 0x79, 0x0a, 0x09,
+	0x3b, 0x74, 0x11, 0x9a, 0x6e, 0xbf, 0xd4, 0x9e,
+	0x58, 0x90, 0x49, 0x4f, 0x4d, 0x08, 0xd4, 0xe5,
+	0x4a, 0x09, 0x21, 0xef, 0x8b, 0xb8, 0x74, 0x3b,
+	0x91, 0xdd, 0x36, 0x85, 0x60, 0x2d, 0xfa, 0xd4,
+	0x45, 0x7b, 0x45, 0x53, 0xf5, 0x47, 0x87, 0x7e,
+	0xa6, 0x37, 0xc8, 0x78, 0x7a, 0x68, 0x9d, 0x8d,
+	0x65, 0x2c, 0x0e, 0x91, 0x5c, 0xa2, 0x60, 0xf0,
+	0x8e, 0x3f, 0xe9, 0x1a, 0xcd, 0xaa, 0xe7, 0xd5,
+	0x77, 0x18, 0xaf, 0xc9, 0xbc, 0x18, 0xea, 0x48,
+	0x1b, 0xfb, 0x22, 0x48, 0x70, 0x16, 0x29, 0x9e,
+	0x5b, 0xc1, 0x2c, 0x66, 0x23, 0xbc, 0xf0, 0x1f,
+	0xef, 0xaf, 0xe4, 0xd6, 0x04, 0x19, 0x82, 0x7a,
+	0x0b, 0xba, 0x4b, 0x46, 0xb1, 0x6a, 0x85, 0x5d,
+	0xb4, 0x73, 0xd6, 0x21, 0xa1, 0x71, 0x60, 0x14,
+	0xee, 0x0a, 0x77, 0xc4, 0x66, 0x2e, 0xf9, 0x69,
+	0x30, 0xaf, 0x41, 0x0b, 0xc8, 0x83, 0x3c, 0x53,
+	0x99, 0x19, 0x27, 0x46, 0xf7, 0x41, 0x6e, 0x56,
+	0xdc, 0x94, 0x28, 0x67, 0x4e, 0xb7, 0x25, 0x48,
+	0x8a, 0xc2, 0xe0, 0x60, 0x96, 0xcc, 0x18, 0xf4,
+	0x84, 0xdd, 0xa7, 0x5e, 0x3e, 0x05, 0x0b, 0x26,
+	0x26, 0xb2, 0x5c, 0x1f, 0x57, 0x1a, 0x04, 0x7e,
+	0x6a, 0xe3, 0x2f, 0xb4, 0x35, 0xb6, 0x38, 0x40,
+	0x40, 0xcd, 0x6f, 0x87, 0x2e, 0xef, 0xa3, 0xd7,
+	0xa9, 0xc2, 0xe8, 0x0d, 0x27, 0xdf, 0x44, 0x62,
+	0x99, 0xa0, 0xfc, 0xcf, 0x81, 0x78, 0xcb, 0xfe,
+	0xe5, 0xa0, 0x03, 0x4e, 0x6c, 0xd7, 0xf4, 0xaf,
+	0x7a, 0xbb, 0x61, 0x82, 0xfe, 0x71, 0x89, 0xb2,
+	0x22, 0x7c, 0x8e, 0x83, 0x04, 0xce, 0xf6, 0x5d,
+	0x84, 0x8f, 0x95, 0x6a, 0x7f, 0xad, 0xfd, 0x32,
+	0x9c, 0x5e, 0xe4, 0x9c, 0x89, 0x60, 0x54, 0xaa,
+	0x96, 0x72, 0xd2, 0xd7, 0x36, 0x85, 0xa9, 0x45,
+	0xd2, 0x2a, 0xa1, 0x81, 0x49, 0x6f, 0x7e, 0x04,
+	0xfa, 0xe2, 0xfe, 0x90, 0x26, 0x77, 0x5a, 0x33,
+	0xb8, 0x04, 0x9a, 0x7a, 0xe6, 0x4c, 0x4f, 0xad,
+	0x72, 0x96, 0x08, 0x28, 0x58, 0x13, 0xf8, 0xc4,
+	0x1c, 0xf0, 0xc3, 0x45, 0x95, 0x49, 0x20, 0x8c,
+	0x9f, 0x39, 0x70, 0xe1, 0x77, 0xfe, 0xd5, 0x4b,
+	0xaf, 0x86, 0xda, 0xef, 0x22, 0x06, 0x83, 0x36,
+	0x29, 0x12, 0x11, 0x40, 0xbc, 0x3b, 0x86, 0xaa,
+	0xaa, 0x65, 0x60, 0xc3, 0x80, 0xca, 0xed, 0xa9,
+	0xf3, 0xb0, 0x79, 0x96, 0xa2, 0x55, 0x27, 0x28,
+	0x55, 0x73, 0x26, 0xa5, 0x50, 0xea, 0x92, 0x4b,
+	0x3c, 0x5c, 0x82, 0x33, 0xf0, 0x01, 0x3f, 0x03,
+	0xc1, 0x08, 0x05, 0xbf, 0x98, 0xf4, 0x9b, 0x6d,
+	0xa5, 0xa8, 0xb4, 0x82, 0x0c, 0x06, 0xfa, 0xff,
+	0x2d, 0x08, 0xf3, 0x05, 0x4f, 0x57, 0x2a, 0x39,
+	0xd4, 0x83, 0x0d, 0x75, 0x51, 0xd8, 0x5b, 0x1b,
+	0xd3, 0x51, 0x5a, 0x32, 0x2a, 0x9b, 0x32, 0xb2,
+	0xf2, 0xa4, 0x96, 0x12, 0xf2, 0xae, 0x40, 0x34,
+	0x67, 0xa8, 0xf5, 0x44, 0xd5, 0x35, 0x53, 0xfe,
+	0xa3, 0x60, 0x96, 0x63, 0x0f, 0x1f, 0x6e, 0xb0,
+	0x5a, 0x42, 0xa6, 0xfc, 0x51, 0x0b, 0x60, 0x27,
+	0xbc, 0x06, 0x71, 0xed, 0x65, 0x5b, 0x23, 0x86,
+	0x4a, 0x07, 0x3b, 0x22, 0x07, 0x46, 0xe6, 0x90,
+	0x3e, 0xf3, 0x25, 0x50, 0x1b, 0x4c, 0x7f, 0x03,
+	0x08, 0xa8, 0x36, 0x6b, 0x87, 0xe5, 0xe3, 0xdb,
+	0x9a, 0x38, 0x83, 0xff, 0x9f, 0x1a, 0x9f, 0x57,
+	0xa4, 0x2a, 0xf6, 0x37, 0xbc, 0x1a, 0xff, 0xc9,
+	0x1e, 0x35, 0x0c, 0xc3, 0x7c, 0xa3, 0xb2, 0xe5,
+	0xd2, 0xc6, 0xb4, 0x57, 0x47, 0xe4, 0x32, 0x16,
+	0x6d, 0xa9, 0xae, 0x64, 0xe6, 0x2d, 0x8d, 0xc5,
+	0x8d, 0x50, 0x8e, 0xe8, 0x1a, 0x22, 0x34, 0x2a,
+	0xd9, 0xeb, 0x51, 0x90, 0x4a, 0xb1, 0x41, 0x7d,
+	0x64, 0xf9, 0xb9, 0x0d, 0xf6, 0x23, 0x33, 0xb0,
+	0x33, 0xf4, 0xf7, 0x3f, 0x27, 0x84, 0xc6, 0x0f,
+	0x54, 0xa5, 0xc0, 0x2e, 0xec, 0x0b, 0x3a, 0x48,
+	0x6e, 0x80, 0x35, 0x81, 0x43, 0x9b, 0x90, 0xb1,
+	0xd0, 0x2b, 0xea, 0x21, 0xdc, 0xda, 0x5b, 0x09,
+	0xf4, 0xcc, 0x10, 0xb4, 0xc7, 0xfe, 0x79, 0x51,
+	0xc3, 0xc5, 0xac, 0x88, 0x74, 0x84, 0x0b, 0x4b,
+	0xca, 0x79, 0x16, 0x29, 0xfb, 0x69, 0x54, 0xdf,
+	0x41, 0x7e, 0xe9, 0xc7, 0x8e, 0xea, 0xa5, 0xfe,
+	0xfc, 0x76, 0x0e, 0x90, 0xc4, 0x92, 0x38, 0xad,
+	0x7b, 0x48, 0xe6, 0x6e, 0xf7, 0x21, 0xfd, 0x4e,
+	0x93, 0x0a, 0x7b, 0x41, 0x83, 0x68, 0xfb, 0x57,
+	0x51, 0x76, 0x34, 0xa9, 0x6c, 0x00, 0xaa, 0x4f,
+	0x66, 0x65, 0x98, 0x4a, 0x4f, 0xa3, 0xa0, 0xef,
+	0x69, 0x3f, 0xe3, 0x1c, 0x92, 0x8c, 0xfd, 0xd8,
+	0xe8, 0xde, 0x7c, 0x7f, 0x3e, 0x84, 0x8e, 0x69,
+	0x3c, 0xf1, 0xf2, 0x05, 0x46, 0xdc, 0x2f, 0x9d,
+	0x5e, 0x6e, 0x4c, 0xfb, 0xb5, 0x99, 0x2a, 0x59,
+	0x63, 0xc1, 0x34, 0xbc, 0x57, 0xc0, 0x0d, 0xb9,
+	0x61, 0x25, 0xf3, 0x33, 0x23, 0x51, 0xb6, 0x0d,
+	0x07, 0xa6, 0xab, 0x94, 0x4a, 0xb7, 0x2a, 0xea,
+	0xee, 0xac, 0xa3, 0xc3, 0x04, 0x8b, 0x0e, 0x56,
+	0xfe, 0x44, 0xa7, 0x39, 0xe2, 0xed, 0xed, 0xb4,
+	0x22, 0x2b, 0xac, 0x12, 0x32, 0x28, 0x91, 0xd8,
+	0xa5, 0xab, 0xff, 0x5f, 0xe0, 0x4b, 0xda, 0x78,
+	0x17, 0xda, 0xf1, 0x01, 0x5b, 0xcd, 0xe2, 0x5f,
+	0x50, 0x45, 0x73, 0x2b, 0xe4, 0x76, 0x77, 0xf4,
+	0x64, 0x1d, 0x43, 0xfb, 0x84, 0x7a, 0xea, 0x91,
+	0xae, 0xf9, 0x9e, 0xb7, 0xb4, 0xb0, 0x91, 0x5f,
+	0x16, 0x35, 0x9a, 0x11, 0xb8, 0xc7, 0xc1, 0x8c,
+	0xc6, 0x10, 0x8d, 0x2f, 0x63, 0x4a, 0xa7, 0x57,
+	0x3a, 0x51, 0xd6, 0x32, 0x2d, 0x64, 0x72, 0xd4,
+	0x66, 0xdc, 0x10, 0xa6, 0x67, 0xd6, 0x04, 0x23,
+	0x9d, 0x0a, 0x11, 0x77, 0xdd, 0x37, 0x94, 0x17,
+	0x3c, 0xbf, 0x8b, 0x65, 0xb0, 0x2e, 0x5e, 0x66,
+	0x47, 0x64, 0xac, 0xdd, 0xf0, 0x84, 0xfd, 0x39,
+	0xfa, 0x15, 0x5d, 0xef, 0xae, 0xca, 0xc1, 0x36,
+	0xa7, 0x5c, 0xbf, 0xc7, 0x08, 0xc2, 0x66, 0x00,
+	0x74, 0x74, 0x4e, 0x27, 0x3f, 0x55, 0x8a, 0xb7,
+	0x38, 0x66, 0x83, 0x6d, 0xcf, 0x99, 0x9e, 0x60,
+	0x8f, 0xdd, 0x2e, 0x62, 0x22, 0x0e, 0xef, 0x0c,
+	0x98, 0xa7, 0x85, 0x74, 0x3b, 0x9d, 0xec, 0x9e,
+	0xa9, 0x19, 0x72, 0xa5, 0x7f, 0x2c, 0x39, 0xb7,
+	0x7d, 0xb7, 0xf1, 0x12, 0x65, 0x27, 0x4b, 0x5a,
+	0xde, 0x17, 0xfe, 0xad, 0x44, 0xf3, 0x20, 0x4d,
+	0xfd, 0xe4, 0x1f, 0xb5, 0x81, 0xb0, 0x36, 0x37,
+	0x08, 0x6f, 0xc3, 0x0c, 0xe9, 0x85, 0x98, 0x82,
+	0xa9, 0x62, 0x0c, 0xc4, 0x97, 0xc0, 0x50, 0xc8,
+	0xa7, 0x3c, 0x50, 0x9f, 0x43, 0xb9, 0xcd, 0x5e,
+	0x4d, 0xfa, 0x1c, 0x4b, 0x0b, 0xa9, 0x98, 0x85,
+	0x38, 0x92, 0xac, 0x8d, 0xe4, 0xad, 0x9b, 0x98,
+	0xab, 0xd9, 0x38, 0xac, 0x62, 0x52, 0xa3, 0x22,
+	0x63, 0x0f, 0xbf, 0x95, 0x48, 0xdf, 0x69, 0xe7,
+	0x8b, 0x33, 0xd5, 0xb2, 0xbd, 0x05, 0x49, 0x49,
+	0x9d, 0x57, 0x73, 0x19, 0x33, 0xae, 0xfa, 0x33,
+	0xf1, 0x19, 0xa8, 0x80, 0xce, 0x04, 0x9f, 0xbc,
+	0x1d, 0x65, 0x82, 0x1b, 0xe5, 0x3a, 0x51, 0xc8,
+	0x1c, 0x21, 0xe3, 0x5d, 0xf3, 0x7d, 0x9b, 0x2f,
+	0x2c, 0x1d, 0x4a, 0x7f, 0x9b, 0x68, 0x35, 0xa3,
+	0xb2, 0x50, 0xf7, 0x62, 0x79, 0xcd, 0xf4, 0x98,
+	0x4f, 0xe5, 0x63, 0x7c, 0x3e, 0x45, 0x31, 0x8c,
+	0x16, 0xa0, 0x12, 0xc8, 0x58, 0xce, 0x39, 0xa6,
+	0xbc, 0x54, 0xdb, 0xc5, 0xe0, 0xd5, 0xba, 0xbc,
+	0xb9, 0x04, 0xf4, 0x8d, 0xe8, 0x2f, 0x15, 0x9d,
+};
 
-#if 0				/*Not used at present */
-static void
-buf_dump(char const *prefix, unsigned char const *buf, size_t len)
+/* 100 test cases */
+static struct crc_test {
+	u32 crc;	/* random starting crc */
+	u32 start;	/* random 6 bit offset in buf */
+	u32 length;	/* random 11 bit length of test */
+	u32 crc_le;	/* expected crc32_le result */
+	u32 crc_be;	/* expected crc32_be result */
+	u32 crc32c_le;	/* expected crc32c_le result */
+} test[] =
 {
-	fputs(prefix, stdout);
-	while (len--)
-		printf(" %02x", *buf++);
-	putchar('\n');
+	{0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1,
+	 0xf6e93d6c},
+	{0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad,
+	 0x0fe92aca},
+	{0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f,
+	 0x52e1ebb8},
+	{0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a,
+	 0x0798af9a},
+	{0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2,
+	 0x18eb3152},
+	{0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793,
+	 0xd00d08c7},
+	{0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed,
+	 0x8ba966bc},
+	{0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35,
+	 0x11d694a2},
+	{0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2,
+	 0x6ab3208d},
+	{0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10,
+	 0xba4603c5},
+	{0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb,
+	 0xe6071c6f},
+	{0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0,
+	 0x179ec30a},
+	{0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb,
+	 0x0903beb8},
+	{0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed,
+	 0x6a7cb4fa},
+	{0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591,
+	 0xdb535801},
+	{0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67,
+	 0x92bed597},
+	{0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd,
+	 0x192a3f1b},
+	{0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a,
+	 0xccbaec1a},
+	{0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b,
+	 0x7eabae4d},
+	{0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f,
+	 0x28c72982},
+	{0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d,
+	 0xc3cd4d18},
+	{0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a,
+	 0xbca8f0e7},
+	{0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97,
+	 0x713f60b3},
+	{0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2,
+	 0xebd08fd5},
+	{0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138,
+	 0x64406c59},
+	{0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032,
+	 0x7421890e},
+	{0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f,
+	 0xe9347603},
+	{0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f,
+	 0x1bef9060},
+	{0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32,
+	 0x34720072},
+	{0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef,
+	 0x48310f59},
+	{0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0,
+	 0x783a4213},
+	{0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59,
+	 0x9e8efd41},
+	{0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4,
+	 0xfc3d34a5},
+	{0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c,
+	 0x17a52ae2},
+	{0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51,
+	 0x886d935a},
+	{0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11,
+	 0xeaaeaeb2},
+	{0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659,
+	 0x8e900a4b},
+	{0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af,
+	 0xd74662b1},
+	{0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99,
+	 0xd26752ba},
+	{0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b,
+	 0x8b1fcd62},
+	{0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521,
+	 0xf54342fe},
+	{0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3,
+	 0x5b95b988},
+	{0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d,
+	 0x2e1176be},
+	{0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f,
+	 0x66120546},
+	{0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b,
+	 0xf256a5cc},
+	{0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0,
+	 0x4af1dd69},
+	{0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195,
+	 0x56f0a04a},
+	{0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d,
+	 0x74f6b6b2},
+	{0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4,
+	 0x085951fd},
+	{0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3,
+	 0xc65387eb},
+	{0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643,
+	 0x1ca9257b},
+	{0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10,
+	 0xfd196d76},
+	{0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d,
+	 0x5ef88339},
+	{0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5,
+	 0x2c3714d9},
+	{0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b,
+	 0x58576548},
+	{0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee,
+	 0xfd7c57de},
+	{0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14,
+	 0xd5fedd59},
+	{0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a,
+	 0x1cc3b17b},
+	{0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b,
+	 0x270eed73},
+	{0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3,
+	 0x91ecbb11},
+	{0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826,
+	 0x05ed8d0c},
+	{0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06,
+	 0x0b09ad5b},
+	{0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35,
+	 0xf8d511fb},
+	{0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801,
+	 0x5ad832cc},
+	{0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2,
+	 0x1214d196},
+	{0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d,
+	 0x5747218a},
+	{0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c,
+	 0xde8f14de},
+	{0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba,
+	 0x3563b7b9},
+	{0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5,
+	 0x071475d0},
+	{0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b,
+	 0x54c79d60},
+	{0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178,
+	 0x4c53eee6},
+	{0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3,
+	 0x10137a3c},
+	{0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605,
+	 0xaa9d6c73},
+	{0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1,
+	 0xb63d23e7},
+	{0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9,
+	 0x7f53e9cf},
+	{0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78,
+	 0x13c1cd83},
+	{0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9,
+	 0x49ff5867},
+	{0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd,
+	 0x8467f211},
+	{0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab,
+	 0x3f9683b2},
+	{0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb,
+	 0x76a3f874},
+	{0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77,
+	 0x863b702f},
+	{0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da,
+	 0xdc6c58ff},
+	{0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39,
+	 0x0622cc95},
+	{0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16,
+	 0xe85605cd},
+	{0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208,
+	 0x31da5f06},
+	{0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e,
+	 0xa1f2e784},
+	{0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5,
+	 0xb07cc616},
+	{0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892,
+	 0xbf943b6c},
+	{0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db,
+	 0x2c01af1c},
+	{0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43,
+	 0x0fe5f56d},
+	{0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac,
+	 0xf8943b2d},
+	{0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7,
+	 0xe4d89272},
+	{0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2,
+	 0x7c2f6bbb},
+	{0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2,
+	 0xabbf388b},
+	{0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640,
+	 0x1dca1f4e},
+	{0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f,
+	 0x5c170e23},
+	{0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99,
+	 0xc0e9d672},
+	{0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7,
+	 0xc18bdc86},
+	{0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499,
+	 0xa874fcdd},
+	{0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a,
+	 0x9dc0bb48},
+};
 
-}
-#endif
+#include <linux/time.h>
 
-static void bytereverse(unsigned char *buf, size_t len)
+static int __init crc32c_test(void)
 {
-	while (len--) {
-		unsigned char x = bitrev8(*buf);
-		*buf++ = x;
+	int i;
+	int errors = 0;
+	int bytes = 0;
+	struct timespec start, stop;
+	u64 nsec;
+	unsigned long flags;
+
+	/* keep static to prevent cache warming code from
+	 * getting eliminated by the compiler */
+	static u32 crc;
+
+	/* pre-warm the cache */
+	for (i = 0; i < 100; i++) {
+		bytes += 2*test[i].length;
+
+		crc ^= __crc32c_le(test[i].crc, test_buf +
+		    test[i].start, test[i].length);
 	}
-}
 
-static void random_garbage(unsigned char *buf, size_t len)
-{
-	while (len--)
-		*buf++ = (unsigned char) random();
-}
+	/* reduce OS noise */
+	local_irq_save(flags);
+	local_irq_disable();
 
-#if 0				/* Not used at present */
-static void store_le(u32 x, unsigned char *buf)
-{
-	buf[0] = (unsigned char) x;
-	buf[1] = (unsigned char) (x >> 8);
-	buf[2] = (unsigned char) (x >> 16);
-	buf[3] = (unsigned char) (x >> 24);
-}
-#endif
+	getnstimeofday(&start);
+	for (i = 0; i < 100; i++) {
+		if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf +
+		    test[i].start, test[i].length))
+			errors++;
+	}
+	getnstimeofday(&stop);
 
-static void store_be(u32 x, unsigned char *buf)
-{
-	buf[0] = (unsigned char) (x >> 24);
-	buf[1] = (unsigned char) (x >> 16);
-	buf[2] = (unsigned char) (x >> 8);
-	buf[3] = (unsigned char) x;
+	local_irq_restore(flags);
+	local_irq_enable();
+
+	nsec = stop.tv_nsec - start.tv_nsec +
+		1000000000 * (stop.tv_sec - start.tv_sec);
+
+	pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS);
+
+	if (errors)
+		pr_warn("crc32c: %d self tests failed\n", errors);
+	else {
+		pr_info("crc32c: self tests passed, processed %d bytes in %lld nsec\n",
+			bytes, nsec);
+	}
+
+	return 0;
 }
 
-/*
- * This checks that CRC(buf + CRC(buf)) = 0, and that
- * CRC commutes with bit-reversal.  This has the side effect
- * of bytewise bit-reversing the input buffer, and returns
- * the CRC of the reversed buffer.
- */
-static u32 test_step(u32 init, unsigned char *buf, size_t len)
+static int __init crc32_test(void)
 {
-	u32 crc1, crc2;
-	size_t i;
+	int i;
+	int errors = 0;
+	int bytes = 0;
+	struct timespec start, stop;
+	u64 nsec;
+	unsigned long flags;
+
+	/* keep static to prevent cache warming code from
+	 * getting eliminated by the compiler */
+	static u32 crc;
+
+	/* pre-warm the cache */
+	for (i = 0; i < 100; i++) {
+		bytes += 2*test[i].length;
 
-	crc1 = crc32_be(init, buf, len);
-	store_be(crc1, buf + len);
-	crc2 = crc32_be(init, buf, len + 4);
-	if (crc2)
-		printf("\nCRC cancellation fail: 0x%08x should be 0\n",
-		       crc2);
-
-	for (i = 0; i <= len + 4; i++) {
-		crc2 = crc32_be(init, buf, i);
-		crc2 = crc32_be(crc2, buf + i, len + 4 - i);
-		if (crc2)
-			printf("\nCRC split fail: 0x%08x\n", crc2);
+		crc ^= crc32_le(test[i].crc, test_buf +
+		    test[i].start, test[i].length);
+
+		crc ^= crc32_be(test[i].crc, test_buf +
+		    test[i].start, test[i].length);
 	}
 
-	/* Now swap it around for the other test */
-
-	bytereverse(buf, len + 4);
-	init = bitrev32(init);
-	crc2 = bitrev32(crc1);
-	if (crc1 != bitrev32(crc2))
-		printf("\nBit reversal fail: 0x%08x -> 0x%08x -> 0x%08x\n",
-		       crc1, crc2, bitrev32(crc2));
-	crc1 = crc32_le(init, buf, len);
-	if (crc1 != crc2)
-		printf("\nCRC endianness fail: 0x%08x != 0x%08x\n", crc1,
-		       crc2);
-	crc2 = crc32_le(init, buf, len + 4);
-	if (crc2)
-		printf("\nCRC cancellation fail: 0x%08x should be 0\n",
-		       crc2);
-
-	for (i = 0; i <= len + 4; i++) {
-		crc2 = crc32_le(init, buf, i);
-		crc2 = crc32_le(crc2, buf + i, len + 4 - i);
-		if (crc2)
-			printf("\nCRC split fail: 0x%08x\n", crc2);
+	/* reduce OS noise */
+	local_irq_save(flags);
+	local_irq_disable();
+
+	getnstimeofday(&start);
+	for (i = 0; i < 100; i++) {
+		if (test[i].crc_le != crc32_le(test[i].crc, test_buf +
+		    test[i].start, test[i].length))
+			errors++;
+
+		if (test[i].crc_be != crc32_be(test[i].crc, test_buf +
+		    test[i].start, test[i].length))
+			errors++;
 	}
+	getnstimeofday(&stop);
 
-	return crc1;
-}
+	local_irq_restore(flags);
+	local_irq_enable();
 
-#define SIZE 64
-#define INIT1 0
-#define INIT2 0
+	nsec = stop.tv_nsec - start.tv_nsec +
+		1000000000 * (stop.tv_sec - start.tv_sec);
 
-int main(void)
-{
-	unsigned char buf1[SIZE + 4];
-	unsigned char buf2[SIZE + 4];
-	unsigned char buf3[SIZE + 4];
-	int i, j;
-	u32 crc1, crc2, crc3;
-
-	for (i = 0; i <= SIZE; i++) {
-		printf("\rTesting length %d...", i);
-		fflush(stdout);
-		random_garbage(buf1, i);
-		random_garbage(buf2, i);
-		for (j = 0; j < i; j++)
-			buf3[j] = buf1[j] ^ buf2[j];
-
-		crc1 = test_step(INIT1, buf1, i);
-		crc2 = test_step(INIT2, buf2, i);
-		/* Now check that CRC(buf1 ^ buf2) = CRC(buf1) ^ CRC(buf2) */
-		crc3 = test_step(INIT1 ^ INIT2, buf3, i);
-		if (crc3 != (crc1 ^ crc2))
-			printf("CRC XOR fail: 0x%08x != 0x%08x ^ 0x%08x\n",
-			       crc3, crc1, crc2);
+	pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n",
+		 CRC_LE_BITS, CRC_BE_BITS);
+
+	if (errors)
+		pr_warn("crc32: %d self tests failed\n", errors);
+	else {
+		pr_info("crc32: self tests passed, processed %d bytes in %lld nsec\n",
+			bytes, nsec);
 	}
-	printf("\nAll test complete.  No failures expected.\n");
+
 	return 0;
 }
 
-#endif				/* UNITTEST */
+static int __init crc32test_init(void)
+{
+	crc32_test();
+	crc32c_test();
+	return 0;
+}
+
+static void __exit crc32_exit(void)
+{
+}
+
+module_init(crc32test_init);
+module_exit(crc32_exit);
+#endif /* CONFIG_CRC32_SELFTEST */
diff --git a/lib/crc32defs.h b/lib/crc32defs.h
index 9b6773d73749..64cba2c3c700 100644
--- a/lib/crc32defs.h
+++ b/lib/crc32defs.h
@@ -6,27 +6,67 @@
 #define CRCPOLY_LE 0xedb88320
 #define CRCPOLY_BE 0x04c11db7
 
-/* How many bits at a time to use.  Requires a table of 4<<CRC_xx_BITS bytes. */
-/* For less performance-sensitive, use 4 */
-#ifndef CRC_LE_BITS 
+/*
+ * This is the CRC32c polynomial, as outlined by Castagnoli.
+ * x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+x^11+x^10+x^9+
+ * x^8+x^6+x^0
+ */
+#define CRC32C_POLY_LE 0x82F63B78
+
+/* Try to choose an implementation variant via Kconfig */
+#ifdef CONFIG_CRC32_SLICEBY8
+# define CRC_LE_BITS 64
+# define CRC_BE_BITS 64
+#endif
+#ifdef CONFIG_CRC32_SLICEBY4
+# define CRC_LE_BITS 32
+# define CRC_BE_BITS 32
+#endif
+#ifdef CONFIG_CRC32_SARWATE
 # define CRC_LE_BITS 8
+# define CRC_BE_BITS 8
+#endif
+#ifdef CONFIG_CRC32_BIT
+# define CRC_LE_BITS 1
+# define CRC_BE_BITS 1
+#endif
+
+/*
+ * How many bits at a time to use.  Valid values are 1, 2, 4, 8, 32 and 64.
+ * For less performance-sensitive, use 4 or 8 to save table size.
+ * For larger systems choose same as CPU architecture as default.
+ * This works well on X86_64, SPARC64 systems. This may require some
+ * elaboration after experiments with other architectures.
+ */
+#ifndef CRC_LE_BITS
+#  ifdef CONFIG_64BIT
+#  define CRC_LE_BITS 64
+#  else
+#  define CRC_LE_BITS 32
+#  endif
 #endif
 #ifndef CRC_BE_BITS
-# define CRC_BE_BITS 8
+#  ifdef CONFIG_64BIT
+#  define CRC_BE_BITS 64
+#  else
+#  define CRC_BE_BITS 32
+#  endif
 #endif
 
 /*
  * Little-endian CRC computation.  Used with serial bit streams sent
  * lsbit-first.  Be sure to use cpu_to_le32() to append the computed CRC.
  */
-#if CRC_LE_BITS > 8 || CRC_LE_BITS < 1 || CRC_LE_BITS & CRC_LE_BITS-1
-# error CRC_LE_BITS must be a power of 2 between 1 and 8
+#if CRC_LE_BITS > 64 || CRC_LE_BITS < 1 || CRC_LE_BITS == 16 || \
+	CRC_LE_BITS & CRC_LE_BITS-1
+# error "CRC_LE_BITS must be one of {1, 2, 4, 8, 32, 64}"
 #endif
 
 /*
  * Big-endian CRC computation.  Used with serial bit streams sent
  * msbit-first.  Be sure to use cpu_to_be32() to append the computed CRC.
  */
-#if CRC_BE_BITS > 8 || CRC_BE_BITS < 1 || CRC_BE_BITS & CRC_BE_BITS-1
-# error CRC_BE_BITS must be a power of 2 between 1 and 8
+#if CRC_BE_BITS > 64 || CRC_BE_BITS < 1 || CRC_BE_BITS == 16 || \
+	CRC_BE_BITS & CRC_BE_BITS-1
+# error "CRC_BE_BITS must be one of {1, 2, 4, 8, 32, 64}"
 #endif
diff --git a/lib/ctype.c b/lib/ctype.c
index 26baa620e95b..c646df91a2f7 100644
--- a/lib/ctype.c
+++ b/lib/ctype.c
@@ -5,7 +5,8 @@
  */
 
 #include <linux/ctype.h>
-#include <linux/module.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
 
 const unsigned char _ctype[] = {
 _C,_C,_C,_C,_C,_C,_C,_C,				/* 0-7 */
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index b1c177307677..f2fa60c59343 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -10,7 +10,7 @@
  */
 #include <linux/rwsem.h>
 #include <linux/mutex.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/debug_locks.h>
 
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 77cb245f8e7b..0ab9ae8057f0 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -818,17 +818,9 @@ static int __init fixup_activate(void *addr, enum debug_obj_state state)
 		if (obj->static_init == 1) {
 			debug_object_init(obj, &descr_type_test);
 			debug_object_activate(obj, &descr_type_test);
-			/*
-			 * Real code should return 0 here ! This is
-			 * not a fixup of some bad behaviour. We
-			 * merily call the debug_init function to keep
-			 * track of the object.
-			 */
-			return 1;
-		} else {
-			/* Real code needs to emit a warning here */
+			return 0;
 		}
-		return 0;
+		return 1;
 
 	case ODEBUG_STATE_ACTIVE:
 		debug_object_deactivate(obj, &descr_type_test);
@@ -967,7 +959,7 @@ static void __init debug_objects_selftest(void)
 
 	obj.static_init = 1;
 	debug_object_activate(&obj, &descr_type_test);
-	if (check_results(&obj, ODEBUG_STATE_ACTIVE, ++fixups, warnings))
+	if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
 		goto out;
 	debug_object_init(&obj, &descr_type_test);
 	if (check_results(&obj, ODEBUG_STATE_INIT, ++fixups, ++warnings))
diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c
index b5257725daad..e26278576b31 100644
--- a/lib/dec_and_lock.c
+++ b/lib/dec_and_lock.c
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
 
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index a7b80c1d6a0d..31c5f7675fbf 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -1,4 +1,3 @@
-/* vi: set sw = 4 ts = 4: */
 /*	Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
 
 	Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
@@ -691,7 +690,7 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
 		outbuf = malloc(BZIP2_IOBUF_SIZE);
 
 	if (!outbuf) {
-		error("Could not allocate output bufer");
+		error("Could not allocate output buffer");
 		return RETVAL_OUT_OF_MEMORY;
 	}
 	if (buf)
@@ -699,7 +698,7 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
 	else
 		inbuf = malloc(BZIP2_IOBUF_SIZE);
 	if (!inbuf) {
-		error("Could not allocate input bufer");
+		error("Could not allocate input buffer");
 		i = RETVAL_OUT_OF_MEMORY;
 		goto exit_0;
 	}
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 476c65af9709..32adb73a9038 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -562,7 +562,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
 	else
 		inbuf = malloc(LZMA_IOBUF_SIZE);
 	if (!inbuf) {
-		error("Could not allocate input bufer");
+		error("Could not allocate input buffer");
 		goto exit_0;
 	}
 
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index 5a7a2adf4c4c..4531294fa62f 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -279,7 +279,7 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
 	ret = 0;
 exit_2:
 	if (!input)
-		free(in_buf);
+		free(in_buf_save);
 exit_1:
 	if (!output)
 		free(out_buf);
diff --git a/lib/devres.c b/lib/devres.c
index 4fbc09e6e9e6..80b9c76d436a 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -1,7 +1,7 @@
 #include <linux/pci.h>
 #include <linux/io.h>
 #include <linux/gfp.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 void devm_ioremap_release(struct device *dev, void *res)
 {
@@ -304,7 +304,7 @@ EXPORT_SYMBOL(pcim_iounmap);
  *
  * Request and iomap regions specified by @mask.
  */
-int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name)
+int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name)
 {
 	void __iomem * const *iomap;
 	int i, rc;
@@ -357,7 +357,7 @@ EXPORT_SYMBOL(pcim_iomap_regions);
  *
  * Request all PCI BARs and iomap regions specified by @mask.
  */
-int pcim_iomap_regions_request_all(struct pci_dev *pdev, u16 mask,
+int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask,
 				   const char *name)
 {
 	int request_mask = ((1 << 6) - 1) & ~mask;
@@ -381,7 +381,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all);
  *
  * Unmap and release regions specified by @mask.
  */
-void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask)
+void pcim_iounmap_regions(struct pci_dev *pdev, int mask)
 {
 	void __iomem * const *iomap;
 	int i;
diff --git a/lib/digsig.c b/lib/digsig.c
new file mode 100644
index 000000000000..286d558033e2
--- /dev/null
+++ b/lib/digsig.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright (C) 2011 Nokia Corporation
+ * Copyright (C) 2011 Intel Corporation
+ *
+ * Author:
+ * Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
+ *                 <dmitry.kasatkin@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ *
+ * File: sign.c
+ *	implements signature (RSA) verification
+ *	pkcs decoding is based on LibTomCrypt code
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/key.h>
+#include <linux/crypto.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <keys/user-type.h>
+#include <linux/mpi.h>
+#include <linux/digsig.h>
+
+static struct crypto_shash *shash;
+
+static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg,
+			unsigned long  msglen,
+			unsigned long  modulus_bitlen,
+			unsigned char *out,
+			unsigned long *outlen)
+{
+	unsigned long modulus_len, ps_len, i;
+
+	modulus_len = (modulus_bitlen >> 3) + (modulus_bitlen & 7 ? 1 : 0);
+
+	/* test message size */
+	if ((msglen > modulus_len) || (modulus_len < 11))
+		return -EINVAL;
+
+	/* separate encoded message */
+	if ((msg[0] != 0x00) || (msg[1] != (unsigned char)1))
+		return -EINVAL;
+
+	for (i = 2; i < modulus_len - 1; i++)
+		if (msg[i] != 0xFF)
+			break;
+
+	/* separator check */
+	if (msg[i] != 0)
+		/* There was no octet with hexadecimal value 0x00
+		to separate ps from m. */
+		return -EINVAL;
+
+	ps_len = i - 2;
+
+	if (*outlen < (msglen - (2 + ps_len + 1))) {
+		*outlen = msglen - (2 + ps_len + 1);
+		return -EOVERFLOW;
+	}
+
+	*outlen = (msglen - (2 + ps_len + 1));
+	memcpy(out, &msg[2 + ps_len + 1], *outlen);
+
+	return 0;
+}
+
+/*
+ * RSA Signature verification with public key
+ */
+static int digsig_verify_rsa(struct key *key,
+		    const char *sig, int siglen,
+		       const char *h, int hlen)
+{
+	int err = -EINVAL;
+	unsigned long len;
+	unsigned long mlen, mblen;
+	unsigned nret, l;
+	int head, i;
+	unsigned char *out1 = NULL, *out2 = NULL;
+	MPI in = NULL, res = NULL, pkey[2];
+	uint8_t *p, *datap, *endp;
+	struct user_key_payload *ukp;
+	struct pubkey_hdr *pkh;
+
+	down_read(&key->sem);
+	ukp = key->payload.data;
+
+	if (ukp->datalen < sizeof(*pkh))
+		goto err1;
+
+	pkh = (struct pubkey_hdr *)ukp->data;
+
+	if (pkh->version != 1)
+		goto err1;
+
+	if (pkh->algo != PUBKEY_ALGO_RSA)
+		goto err1;
+
+	if (pkh->nmpi != 2)
+		goto err1;
+
+	datap = pkh->mpi;
+	endp = ukp->data + ukp->datalen;
+
+	err = -ENOMEM;
+
+	for (i = 0; i < pkh->nmpi; i++) {
+		unsigned int remaining = endp - datap;
+		pkey[i] = mpi_read_from_buffer(datap, &remaining);
+		if (!pkey[i])
+			goto err;
+		datap += remaining;
+	}
+
+	mblen = mpi_get_nbits(pkey[0]);
+	mlen = (mblen + 7)/8;
+
+	if (mlen == 0)
+		goto err;
+
+	out1 = kzalloc(mlen, GFP_KERNEL);
+	if (!out1)
+		goto err;
+
+	out2 = kzalloc(mlen, GFP_KERNEL);
+	if (!out2)
+		goto err;
+
+	nret = siglen;
+	in = mpi_read_from_buffer(sig, &nret);
+	if (!in)
+		goto err;
+
+	res = mpi_alloc(mpi_get_nlimbs(in) * 2);
+	if (!res)
+		goto err;
+
+	err = mpi_powm(res, in, pkey[1], pkey[0]);
+	if (err)
+		goto err;
+
+	if (mpi_get_nlimbs(res) * BYTES_PER_MPI_LIMB > mlen) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	p = mpi_get_buffer(res, &l, NULL);
+	if (!p) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	len = mlen;
+	head = len - l;
+	memset(out1, 0, head);
+	memcpy(out1 + head, p, l);
+
+	err = pkcs_1_v1_5_decode_emsa(out1, len, mblen, out2, &len);
+
+	if (!err && len == hlen)
+		err = memcmp(out2, h, hlen);
+
+err:
+	mpi_free(in);
+	mpi_free(res);
+	kfree(out1);
+	kfree(out2);
+	while (--i >= 0)
+		mpi_free(pkey[i]);
+err1:
+	up_read(&key->sem);
+
+	return err;
+}
+
+/**
+ * digsig_verify() - digital signature verification with public key
+ * @keyring:	keyring to search key in
+ * @sig:	digital signature
+ * @sigen:	length of the signature
+ * @data:	data
+ * @datalen:	length of the data
+ * @return:	0 on success, -EINVAL otherwise
+ *
+ * Verifies data integrity against digital signature.
+ * Currently only RSA is supported.
+ * Normally hash of the content is used as a data for this function.
+ *
+ */
+int digsig_verify(struct key *keyring, const char *sig, int siglen,
+						const char *data, int datalen)
+{
+	int err = -ENOMEM;
+	struct signature_hdr *sh = (struct signature_hdr *)sig;
+	struct shash_desc *desc = NULL;
+	unsigned char hash[SHA1_DIGEST_SIZE];
+	struct key *key;
+	char name[20];
+
+	if (siglen < sizeof(*sh) + 2)
+		return -EINVAL;
+
+	if (sh->algo != PUBKEY_ALGO_RSA)
+		return -ENOTSUPP;
+
+	sprintf(name, "%llX", __be64_to_cpup((uint64_t *)sh->keyid));
+
+	if (keyring) {
+		/* search in specific keyring */
+		key_ref_t kref;
+		kref = keyring_search(make_key_ref(keyring, 1UL),
+						&key_type_user, name);
+		if (IS_ERR(kref))
+			key = ERR_PTR(PTR_ERR(kref));
+		else
+			key = key_ref_to_ptr(kref);
+	} else {
+		key = request_key(&key_type_user, name, NULL);
+	}
+	if (IS_ERR(key)) {
+		pr_err("key not found, id: %s\n", name);
+		return PTR_ERR(key);
+	}
+
+	desc = kzalloc(sizeof(*desc) + crypto_shash_descsize(shash),
+		       GFP_KERNEL);
+	if (!desc)
+		goto err;
+
+	desc->tfm = shash;
+	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	crypto_shash_init(desc);
+	crypto_shash_update(desc, data, datalen);
+	crypto_shash_update(desc, sig, sizeof(*sh));
+	crypto_shash_final(desc, hash);
+
+	kfree(desc);
+
+	/* pass signature mpis address */
+	err = digsig_verify_rsa(key, sig + sizeof(*sh), siglen - sizeof(*sh),
+			     hash, sizeof(hash));
+
+err:
+	key_put(key);
+
+	return err ? -EINVAL : 0;
+}
+EXPORT_SYMBOL_GPL(digsig_verify);
+
+static int __init digsig_init(void)
+{
+	shash = crypto_alloc_shash("sha1", 0, 0);
+	if (IS_ERR(shash)) {
+		pr_err("shash allocation failed\n");
+		return  PTR_ERR(shash);
+	}
+
+	return 0;
+
+}
+
+static void __exit digsig_cleanup(void)
+{
+	crypto_free_shash(shash);
+}
+
+module_init(digsig_init);
+module_exit(digsig_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/div64.c b/lib/div64.c
index 5b4919191778..3ea24907d52e 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -16,7 +16,8 @@
  * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
 #include <linux/math64.h>
 
 /* Not needed on 64bit architectures */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index fea790a2b176..13ef2338be41 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -170,7 +170,7 @@ static bool driver_filter(struct device *dev)
 		return false;
 
 	/* driver filter on but not yet initialized */
-	drv = get_driver(dev->driver);
+	drv = dev->driver;
 	if (!drv)
 		return false;
 
@@ -185,7 +185,6 @@ static bool driver_filter(struct device *dev)
 	}
 
 	read_unlock_irqrestore(&driver_name_lock, flags);
-	put_driver(drv);
 
 	return ret;
 }
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index 53bff4c8452b..42f4f55c9458 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -4,7 +4,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 void dump_stack(void)
 {
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index dcdade39e47f..310c753cf83e 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -60,6 +60,7 @@ struct ddebug_iter {
 static DEFINE_MUTEX(ddebug_lock);
 static LIST_HEAD(ddebug_tables);
 static int verbose = 0;
+module_param(verbose, int, 0644);
 
 /* Return the last part of a pathname */
 static inline const char *basename(const char *path)
@@ -68,12 +69,24 @@ static inline const char *basename(const char *path)
 	return tail ? tail+1 : path;
 }
 
+/* Return the path relative to source root */
+static inline const char *trim_prefix(const char *path)
+{
+	int skip = strlen(__FILE__) - strlen("lib/dynamic_debug.c");
+
+	if (strncmp(path, __FILE__, skip))
+		skip = 0; /* prefix mismatch, don't skip */
+
+	return path + skip;
+}
+
 static struct { unsigned flag:8; char opt_char; } opt_array[] = {
 	{ _DPRINTK_FLAGS_PRINT, 'p' },
 	{ _DPRINTK_FLAGS_INCL_MODNAME, 'm' },
 	{ _DPRINTK_FLAGS_INCL_FUNCNAME, 'f' },
 	{ _DPRINTK_FLAGS_INCL_LINENO, 'l' },
 	{ _DPRINTK_FLAGS_INCL_TID, 't' },
+	{ _DPRINTK_FLAGS_NONE, '_' },
 };
 
 /* format a string into buf[] which describes the _ddebug's flags */
@@ -83,58 +96,74 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
 	char *p = buf;
 	int i;
 
-	BUG_ON(maxlen < 4);
+	BUG_ON(maxlen < 6);
 	for (i = 0; i < ARRAY_SIZE(opt_array); ++i)
 		if (dp->flags & opt_array[i].flag)
 			*p++ = opt_array[i].opt_char;
 	if (p == buf)
-		*p++ = '-';
+		*p++ = '_';
 	*p = '\0';
 
 	return buf;
 }
 
+#define vpr_info_dq(q, msg)						\
+do {									\
+	if (verbose)							\
+		/* trim last char off format print */			\
+		pr_info("%s: func=\"%s\" file=\"%s\" "			\
+			"module=\"%s\" format=\"%.*s\" "		\
+			"lineno=%u-%u",					\
+			msg,						\
+			q->function ? q->function : "",			\
+			q->filename ? q->filename : "",			\
+			q->module ? q->module : "",			\
+			(int)(q->format ? strlen(q->format) - 1 : 0),	\
+			q->format ? q->format : "",			\
+			q->first_lineno, q->last_lineno);		\
+} while (0)
+
 /*
- * Search the tables for _ddebug's which match the given
- * `query' and apply the `flags' and `mask' to them.  Tells
- * the user which ddebug's were changed, or whether none
- * were matched.
+ * Search the tables for _ddebug's which match the given `query' and
+ * apply the `flags' and `mask' to them.  Returns number of matching
+ * callsites, normally the same as number of changes.  If verbose,
+ * logs the changes.  Takes ddebug_lock.
  */
-static void ddebug_change(const struct ddebug_query *query,
-			   unsigned int flags, unsigned int mask)
+static int ddebug_change(const struct ddebug_query *query,
+			unsigned int flags, unsigned int mask)
 {
 	int i;
 	struct ddebug_table *dt;
 	unsigned int newflags;
 	unsigned int nfound = 0;
-	char flagbuf[8];
+	char flagbuf[10];
 
 	/* search for matching ddebugs */
 	mutex_lock(&ddebug_lock);
 	list_for_each_entry(dt, &ddebug_tables, link) {
 
 		/* match against the module name */
-		if (query->module != NULL &&
-		    strcmp(query->module, dt->mod_name))
+		if (query->module && strcmp(query->module, dt->mod_name))
 			continue;
 
 		for (i = 0 ; i < dt->num_ddebugs ; i++) {
 			struct _ddebug *dp = &dt->ddebugs[i];
 
 			/* match against the source filename */
-			if (query->filename != NULL &&
+			if (query->filename &&
 			    strcmp(query->filename, dp->filename) &&
-			    strcmp(query->filename, basename(dp->filename)))
+			    strcmp(query->filename, basename(dp->filename)) &&
+			    strcmp(query->filename, trim_prefix(dp->filename)))
 				continue;
 
 			/* match against the function */
-			if (query->function != NULL &&
+			if (query->function &&
 			    strcmp(query->function, dp->function))
 				continue;
 
 			/* match against the format */
-			if (query->format != NULL &&
-			    strstr(dp->format, query->format) == NULL)
+			if (query->format &&
+			    !strstr(dp->format, query->format))
 				continue;
 
 			/* match against the line number range */
@@ -151,13 +180,9 @@ static void ddebug_change(const struct ddebug_query *query,
 			if (newflags == dp->flags)
 				continue;
 			dp->flags = newflags;
-			if (newflags)
-				dp->enabled = 1;
-			else
-				dp->enabled = 0;
 			if (verbose)
-				pr_info("changed %s:%d [%s]%s %s\n",
-					dp->filename, dp->lineno,
+				pr_info("changed %s:%d [%s]%s =%s\n",
+					trim_prefix(dp->filename), dp->lineno,
 					dt->mod_name, dp->function,
 					ddebug_describe_flags(dp, flagbuf,
 							sizeof(flagbuf)));
@@ -167,6 +192,8 @@ static void ddebug_change(const struct ddebug_query *query,
 
 	if (!nfound && verbose)
 		pr_info("no matches for query\n");
+
+	return nfound;
 }
 
 /*
@@ -186,8 +213,10 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords)
 		buf = skip_spaces(buf);
 		if (!*buf)
 			break;	/* oh, it was trailing whitespace */
+		if (*buf == '#')
+			break;	/* token starts comment, skip rest of line */
 
-		/* Run `end' over a word, either whitespace separated or quoted */
+		/* find `end' of word, whitespace separated or quoted */
 		if (*buf == '"' || *buf == '\'') {
 			int quote = *buf++;
 			for (end = buf ; *end && *end != quote ; end++)
@@ -199,8 +228,8 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords)
 				;
 			BUG_ON(end == buf);
 		}
-		/* Here `buf' is the start of the word, `end' is one past the end */
 
+		/* `buf' is start of word, `end' is one past its end */
 		if (nwords == maxwords)
 			return -EINVAL;	/* ran out of words[] before bytes */
 		if (*end)
@@ -279,6 +308,19 @@ static char *unescape(char *str)
 	return str;
 }
 
+static int check_set(const char **dest, char *src, char *name)
+{
+	int rc = 0;
+
+	if (*dest) {
+		rc = -EINVAL;
+		pr_err("match-spec:%s val:%s overridden by %s",
+			name, *dest, src);
+	}
+	*dest = src;
+	return rc;
+}
+
 /*
  * Parse words[] as a ddebug query specification, which is a series
  * of (keyword, value) pairs chosen from these possibilities:
@@ -290,11 +332,15 @@ static char *unescape(char *str)
  * format <escaped-string-to-find-in-format>
  * line <lineno>
  * line <first-lineno>-<last-lineno> // where either may be empty
+ *
+ * Only 1 of each type is allowed.
+ * Returns 0 on success, <0 on error.
  */
 static int ddebug_parse_query(char *words[], int nwords,
 			       struct ddebug_query *query)
 {
 	unsigned int i;
+	int rc;
 
 	/* check we have an even number of words */
 	if (nwords % 2 != 0)
@@ -303,41 +349,43 @@ static int ddebug_parse_query(char *words[], int nwords,
 
 	for (i = 0 ; i < nwords ; i += 2) {
 		if (!strcmp(words[i], "func"))
-			query->function = words[i+1];
+			rc = check_set(&query->function, words[i+1], "func");
 		else if (!strcmp(words[i], "file"))
-			query->filename = words[i+1];
+			rc = check_set(&query->filename, words[i+1], "file");
 		else if (!strcmp(words[i], "module"))
-			query->module = words[i+1];
+			rc = check_set(&query->module, words[i+1], "module");
 		else if (!strcmp(words[i], "format"))
-			query->format = unescape(words[i+1]);
+			rc = check_set(&query->format, unescape(words[i+1]),
+				"format");
 		else if (!strcmp(words[i], "line")) {
 			char *first = words[i+1];
 			char *last = strchr(first, '-');
+			if (query->first_lineno || query->last_lineno) {
+				pr_err("match-spec:line given 2 times\n");
+				return -EINVAL;
+			}
 			if (last)
 				*last++ = '\0';
 			if (parse_lineno(first, &query->first_lineno) < 0)
 				return -EINVAL;
-			if (last != NULL) {
+			if (last) {
 				/* range <first>-<last> */
-				if (parse_lineno(last, &query->last_lineno) < 0)
+				if (parse_lineno(last, &query->last_lineno)
+				    < query->first_lineno) {
+					pr_err("last-line < 1st-line\n");
 					return -EINVAL;
+				}
 			} else {
 				query->last_lineno = query->first_lineno;
 			}
 		} else {
-			if (verbose)
-				pr_err("unknown keyword \"%s\"\n", words[i]);
+			pr_err("unknown keyword \"%s\"\n", words[i]);
 			return -EINVAL;
 		}
+		if (rc)
+			return rc;
 	}
-
-	if (verbose)
-		pr_info("q->function=\"%s\" q->filename=\"%s\" "
-			"q->module=\"%s\" q->format=\"%s\" q->lineno=%u-%u\n",
-			query->function, query->filename,
-			query->module, query->format, query->first_lineno,
-			query->last_lineno);
-
+	vpr_info_dq(query, "parsed");
 	return 0;
 }
 
@@ -375,8 +423,6 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
 		if (i < 0)
 			return -EINVAL;
 	}
-	if (flags == 0)
-		return -EINVAL;
 	if (verbose)
 		pr_info("flags=0x%x\n", flags);
 
@@ -405,7 +451,7 @@ static int ddebug_exec_query(char *query_string)
 	unsigned int flags = 0, mask = 0;
 	struct ddebug_query query;
 #define MAXWORDS 9
-	int nwords;
+	int nwords, nfound;
 	char *words[MAXWORDS];
 
 	nwords = ddebug_tokenize(query_string, words, MAXWORDS);
@@ -417,8 +463,47 @@ static int ddebug_exec_query(char *query_string)
 		return -EINVAL;
 
 	/* actually go and implement the change */
-	ddebug_change(&query, flags, mask);
-	return 0;
+	nfound = ddebug_change(&query, flags, mask);
+	vpr_info_dq((&query), (nfound) ? "applied" : "no-match");
+
+	return nfound;
+}
+
+/* handle multiple queries in query string, continue on error, return
+   last error or number of matching callsites.  Module name is either
+   in param (for boot arg) or perhaps in query string.
+*/
+static int ddebug_exec_queries(char *query)
+{
+	char *split;
+	int i, errs = 0, exitcode = 0, rc, nfound = 0;
+
+	for (i = 0; query; query = split) {
+		split = strpbrk(query, ";\n");
+		if (split)
+			*split++ = '\0';
+
+		query = skip_spaces(query);
+		if (!query || !*query || *query == '#')
+			continue;
+
+		if (verbose)
+			pr_info("query %d: \"%s\"\n", i, query);
+
+		rc = ddebug_exec_query(query);
+		if (rc < 0) {
+			errs++;
+			exitcode = rc;
+		} else
+			nfound += rc;
+		i++;
+	}
+	pr_info("processed %d queries, with %d matches, %d errs\n",
+		 i, nfound, errs);
+
+	if (exitcode)
+		return exitcode;
+	return nfound;
 }
 
 #define PREFIX_SIZE 64
@@ -452,7 +537,8 @@ static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf)
 		pos += snprintf(buf + pos, remaining(pos), "%s:",
 					desc->function);
 	if (desc->flags & _DPRINTK_FLAGS_INCL_LINENO)
-		pos += snprintf(buf + pos, remaining(pos), "%d:", desc->lineno);
+		pos += snprintf(buf + pos, remaining(pos), "%d:",
+					desc->lineno);
 	if (pos - pos_after_tid)
 		pos += snprintf(buf + pos, remaining(pos), " ");
 	if (pos >= PREFIX_SIZE)
@@ -527,14 +613,16 @@ EXPORT_SYMBOL(__dynamic_netdev_dbg);
 
 #endif
 
-static __initdata char ddebug_setup_string[1024];
+#define DDEBUG_STRING_SIZE 1024
+static __initdata char ddebug_setup_string[DDEBUG_STRING_SIZE];
+
 static __init int ddebug_setup_query(char *str)
 {
-	if (strlen(str) >= 1024) {
+	if (strlen(str) >= DDEBUG_STRING_SIZE) {
 		pr_warn("ddebug boot param string too large\n");
 		return 0;
 	}
-	strcpy(ddebug_setup_string, str);
+	strlcpy(ddebug_setup_string, str, DDEBUG_STRING_SIZE);
 	return 1;
 }
 
@@ -544,25 +632,33 @@ __setup("ddebug_query=", ddebug_setup_query);
  * File_ops->write method for <debugfs>/dynamic_debug/conrol.  Gathers the
  * command text from userspace, parses and executes it.
  */
+#define USER_BUF_PAGE 4096
 static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
 				  size_t len, loff_t *offp)
 {
-	char tmpbuf[256];
+	char *tmpbuf;
 	int ret;
 
 	if (len == 0)
 		return 0;
-	/* we don't check *offp -- multiple writes() are allowed */
-	if (len > sizeof(tmpbuf)-1)
+	if (len > USER_BUF_PAGE - 1) {
+		pr_warn("expected <%d bytes into control\n", USER_BUF_PAGE);
 		return -E2BIG;
-	if (copy_from_user(tmpbuf, ubuf, len))
+	}
+	tmpbuf = kmalloc(len + 1, GFP_KERNEL);
+	if (!tmpbuf)
+		return -ENOMEM;
+	if (copy_from_user(tmpbuf, ubuf, len)) {
+		kfree(tmpbuf);
 		return -EFAULT;
+	}
 	tmpbuf[len] = '\0';
 	if (verbose)
 		pr_info("read %d bytes from userspace\n", (int)len);
 
-	ret = ddebug_exec_query(tmpbuf);
-	if (ret)
+	ret = ddebug_exec_queries(tmpbuf);
+	kfree(tmpbuf);
+	if (ret < 0)
 		return ret;
 
 	*offp += len;
@@ -668,7 +764,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
 {
 	struct ddebug_iter *iter = m->private;
 	struct _ddebug *dp = p;
-	char flagsbuf[8];
+	char flagsbuf[10];
 
 	if (verbose)
 		pr_info("called m=%p p=%p\n", m, p);
@@ -679,10 +775,10 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
 		return 0;
 	}
 
-	seq_printf(m, "%s:%u [%s]%s %s \"",
-		   dp->filename, dp->lineno,
-		   iter->table->mod_name, dp->function,
-		   ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf)));
+	seq_printf(m, "%s:%u [%s]%s =%s \"",
+		trim_prefix(dp->filename), dp->lineno,
+		iter->table->mod_name, dp->function,
+		ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf)));
 	seq_escape(m, dp->format, "\t\r\n\"");
 	seq_puts(m, "\"\n");
 
@@ -708,10 +804,11 @@ static const struct seq_operations ddebug_proc_seqops = {
 };
 
 /*
- * File_ops->open method for <debugfs>/dynamic_debug/control.  Does the seq_file
- * setup dance, and also creates an iterator to walk the _ddebugs.
- * Note that we create a seq_file always, even for O_WRONLY files
- * where it's not needed, as doing so simplifies the ->release method.
+ * File_ops->open method for <debugfs>/dynamic_debug/control.  Does
+ * the seq_file setup dance, and also creates an iterator to walk the
+ * _ddebugs.  Note that we create a seq_file always, even for O_WRONLY
+ * files where it's not needed, as doing so simplifies the ->release
+ * method.
  */
 static int ddebug_proc_open(struct inode *inode, struct file *file)
 {
@@ -846,33 +943,40 @@ static int __init dynamic_debug_init(void)
 	int ret = 0;
 	int n = 0;
 
-	if (__start___verbose != __stop___verbose) {
-		iter = __start___verbose;
-		modname = iter->modname;
-		iter_start = iter;
-		for (; iter < __stop___verbose; iter++) {
-			if (strcmp(modname, iter->modname)) {
-				ret = ddebug_add_module(iter_start, n, modname);
-				if (ret)
-					goto out_free;
-				n = 0;
-				modname = iter->modname;
-				iter_start = iter;
-			}
-			n++;
+	if (__start___verbose == __stop___verbose) {
+		pr_warn("_ddebug table is empty in a "
+			"CONFIG_DYNAMIC_DEBUG build");
+		return 1;
+	}
+	iter = __start___verbose;
+	modname = iter->modname;
+	iter_start = iter;
+	for (; iter < __stop___verbose; iter++) {
+		if (strcmp(modname, iter->modname)) {
+			ret = ddebug_add_module(iter_start, n, modname);
+			if (ret)
+				goto out_free;
+			n = 0;
+			modname = iter->modname;
+			iter_start = iter;
 		}
-		ret = ddebug_add_module(iter_start, n, modname);
+		n++;
 	}
+	ret = ddebug_add_module(iter_start, n, modname);
+	if (ret)
+		goto out_free;
 
 	/* ddebug_query boot param got passed -> set it up */
 	if (ddebug_setup_string[0] != '\0') {
-		ret = ddebug_exec_query(ddebug_setup_string);
-		if (ret)
+		ret = ddebug_exec_queries(ddebug_setup_string);
+		if (ret < 0)
 			pr_warn("Invalid ddebug boot param %s",
 				ddebug_setup_string);
 		else
-			pr_info("ddebug initialized with string %s",
-				ddebug_setup_string);
+			pr_info("%d changes by ddebug_query\n", ret);
+
+		/* keep tables even on ddebug_query parse error */
+		ret = 0;
 	}
 
 out_free:
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index 3d1bdcdd7db4..6ab4587d052b 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -7,6 +7,7 @@
 #include <linux/types.h>
 #include <linux/ctype.h>
 #include <linux/kernel.h>
+#include <linux/jiffies.h>
 #include <linux/dynamic_queue_limits.h>
 
 #define POSDIFF(A, B) ((A) > (B) ? (A) - (B) : 0)
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index b4801f51b607..6805453c18e7 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -5,7 +5,7 @@
 #include <linux/stat.h>
 #include <linux/types.h>
 #include <linux/fs.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/interrupt.h>
 #include <linux/stacktrace.h>
 #include <linux/fault-inject.h>
diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
index d903959ad695..91ca09fbf6f9 100644
--- a/lib/find_last_bit.c
+++ b/lib/find_last_bit.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/bitops.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <asm/types.h>
 #include <asm/byteorder.h>
 
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
index 4bd75a73ba00..0cbfc0b4398f 100644
--- a/lib/find_next_bit.c
+++ b/lib/find_next_bit.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/bitops.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <asm/types.h>
 #include <asm/byteorder.h>
 
diff --git a/lib/flex_array.c b/lib/flex_array.c
index 9b8b89458c4c..6948a6692fc4 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -23,7 +23,7 @@
 #include <linux/flex_array.h>
 #include <linux/slab.h>
 #include <linux/stddef.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/reciprocal_div.h>
 
 struct flex_array_part {
diff --git a/lib/gcd.c b/lib/gcd.c
index f879033d9822..cce4f3cd14b3 100644
--- a/lib/gcd.c
+++ b/lib/gcd.c
@@ -1,6 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/gcd.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 /* Greatest common divisor */
 unsigned long gcd(unsigned long a, unsigned long b)
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index 85d0e412a04f..8f8d5439e2d9 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -1,14 +1,29 @@
 #include <stdio.h>
+#include "../include/generated/autoconf.h"
 #include "crc32defs.h"
 #include <inttypes.h>
 
 #define ENTRIES_PER_LINE 4
 
-#define LE_TABLE_SIZE (1 << CRC_LE_BITS)
-#define BE_TABLE_SIZE (1 << CRC_BE_BITS)
+#if CRC_LE_BITS > 8
+# define LE_TABLE_ROWS (CRC_LE_BITS/8)
+# define LE_TABLE_SIZE 256
+#else
+# define LE_TABLE_ROWS 1
+# define LE_TABLE_SIZE (1 << CRC_LE_BITS)
+#endif
 
-static uint32_t crc32table_le[4][LE_TABLE_SIZE];
-static uint32_t crc32table_be[4][BE_TABLE_SIZE];
+#if CRC_BE_BITS > 8
+# define BE_TABLE_ROWS (CRC_BE_BITS/8)
+# define BE_TABLE_SIZE 256
+#else
+# define BE_TABLE_ROWS 1
+# define BE_TABLE_SIZE (1 << CRC_BE_BITS)
+#endif
+
+static uint32_t crc32table_le[LE_TABLE_ROWS][256];
+static uint32_t crc32table_be[BE_TABLE_ROWS][256];
+static uint32_t crc32ctable_le[LE_TABLE_ROWS][256];
 
 /**
  * crc32init_le() - allocate and initialize LE table data
@@ -17,27 +32,38 @@ static uint32_t crc32table_be[4][BE_TABLE_SIZE];
  * fact that crctable[i^j] = crctable[i] ^ crctable[j].
  *
  */
-static void crc32init_le(void)
+static void crc32init_le_generic(const uint32_t polynomial,
+				 uint32_t (*tab)[256])
 {
 	unsigned i, j;
 	uint32_t crc = 1;
 
-	crc32table_le[0][0] = 0;
+	tab[0][0] = 0;
 
-	for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) {
-		crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
+	for (i = LE_TABLE_SIZE >> 1; i; i >>= 1) {
+		crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0);
 		for (j = 0; j < LE_TABLE_SIZE; j += 2 * i)
-			crc32table_le[0][i + j] = crc ^ crc32table_le[0][j];
+			tab[0][i + j] = crc ^ tab[0][j];
 	}
 	for (i = 0; i < LE_TABLE_SIZE; i++) {
-		crc = crc32table_le[0][i];
-		for (j = 1; j < 4; j++) {
-			crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8);
-			crc32table_le[j][i] = crc;
+		crc = tab[0][i];
+		for (j = 1; j < LE_TABLE_ROWS; j++) {
+			crc = tab[0][crc & 0xff] ^ (crc >> 8);
+			tab[j][i] = crc;
 		}
 	}
 }
 
+static void crc32init_le(void)
+{
+	crc32init_le_generic(CRCPOLY_LE, crc32table_le);
+}
+
+static void crc32cinit_le(void)
+{
+	crc32init_le_generic(CRC32C_POLY_LE, crc32ctable_le);
+}
+
 /**
  * crc32init_be() - allocate and initialize BE table data
  */
@@ -55,18 +81,18 @@ static void crc32init_be(void)
 	}
 	for (i = 0; i < BE_TABLE_SIZE; i++) {
 		crc = crc32table_be[0][i];
-		for (j = 1; j < 4; j++) {
+		for (j = 1; j < BE_TABLE_ROWS; j++) {
 			crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8);
 			crc32table_be[j][i] = crc;
 		}
 	}
 }
 
-static void output_table(uint32_t table[4][256], int len, char *trans)
+static void output_table(uint32_t (*table)[256], int rows, int len, char *trans)
 {
 	int i, j;
 
-	for (j = 0 ; j < 4; j++) {
+	for (j = 0 ; j < rows; j++) {
 		printf("{");
 		for (i = 0; i < len - 1; i++) {
 			if (i % ENTRIES_PER_LINE == 0)
@@ -83,15 +109,30 @@ int main(int argc, char** argv)
 
 	if (CRC_LE_BITS > 1) {
 		crc32init_le();
-		printf("static const u32 crc32table_le[4][256] = {");
-		output_table(crc32table_le, LE_TABLE_SIZE, "tole");
+		printf("static const u32 __cacheline_aligned "
+		       "crc32table_le[%d][%d] = {",
+		       LE_TABLE_ROWS, LE_TABLE_SIZE);
+		output_table(crc32table_le, LE_TABLE_ROWS,
+			     LE_TABLE_SIZE, "tole");
 		printf("};\n");
 	}
 
 	if (CRC_BE_BITS > 1) {
 		crc32init_be();
-		printf("static const u32 crc32table_be[4][256] = {");
-		output_table(crc32table_be, BE_TABLE_SIZE, "tobe");
+		printf("static const u32 __cacheline_aligned "
+		       "crc32table_be[%d][%d] = {",
+		       BE_TABLE_ROWS, BE_TABLE_SIZE);
+		output_table(crc32table_be, LE_TABLE_ROWS,
+			     BE_TABLE_SIZE, "tobe");
+		printf("};\n");
+	}
+	if (CRC_LE_BITS > 1) {
+		crc32cinit_le();
+		printf("static const u32 __cacheline_aligned "
+		       "crc32ctable_le[%d][%d] = {",
+		       LE_TABLE_ROWS, LE_TABLE_SIZE);
+		output_table(crc32ctable_le, LE_TABLE_ROWS,
+			     LE_TABLE_SIZE, "tole");
 		printf("};\n");
 	}
 
diff --git a/lib/genalloc.c b/lib/genalloc.c
index f352cc42f4f8..6bc04aab6ec7 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -29,7 +29,7 @@
  */
 
 #include <linux/slab.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/bitmap.h>
 #include <linux/rculist.h>
 #include <linux/interrupt.h>
diff --git a/lib/halfmd4.c b/lib/halfmd4.c
index e11db26f8ae5..66d0ee8b7776 100644
--- a/lib/halfmd4.c
+++ b/lib/halfmd4.c
@@ -1,5 +1,5 @@
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/cryptohash.h>
 
 /* F, G and H are basic MD4 functions: selection, majority, parity */
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 51d5ae210244..6540d657dca4 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -10,7 +10,7 @@
 #include <linux/types.h>
 #include <linux/ctype.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 const char hex_asc[] = "0123456789abcdef";
 EXPORT_SYMBOL(hex_asc);
diff --git a/lib/hweight.c b/lib/hweight.c
index 3c79d50814cf..b7d81ba143d1 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/bitops.h>
 #include <asm/types.h>
 
diff --git a/lib/idr.c b/lib/idr.c
index ed055b297c81..4046e29c0a99 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -29,7 +29,7 @@
 #ifndef TEST                        // to test in user space...
 #include <linux/slab.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #endif
 #include <linux/err.h>
 #include <linux/string.h>
@@ -595,8 +595,10 @@ EXPORT_SYMBOL(idr_for_each);
  * Returns pointer to registered object with id, which is next number to
  * given id. After being looked up, *@nextidp will be updated for the next
  * iteration.
+ *
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
  */
-
 void *idr_get_next(struct idr *idp, int *nextidp)
 {
 	struct idr_layer *p, *pa[MAX_LEVEL];
@@ -605,11 +607,11 @@ void *idr_get_next(struct idr *idp, int *nextidp)
 	int n, max;
 
 	/* find first ent */
-	n = idp->layers * IDR_BITS;
-	max = 1 << n;
 	p = rcu_dereference_raw(idp->top);
 	if (!p)
 		return NULL;
+	n = (p->layer + 1) * IDR_BITS;
+	max = 1 << n;
 
 	while (id < max) {
 		while (n > 0 && p) {
diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c
index fd355a99327c..fc2eeb7cb2ea 100644
--- a/lib/int_sqrt.c
+++ b/lib/int_sqrt.c
@@ -1,6 +1,6 @@
 
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 /**
  * int_sqrt - rough approximation to sqrt
diff --git a/lib/iomap.c b/lib/iomap.c
index 5dbcb4b2d864..2c08f36862eb 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -6,7 +6,7 @@
 #include <linux/pci.h>
 #include <linux/io.h>
 
-#include <linux/module.h>
+#include <linux/export.h>
 
 /*
  * Read/write from/to an (offsettable) iomem cookie. It might be a PIO
@@ -242,45 +242,11 @@ EXPORT_SYMBOL(ioport_unmap);
 #endif /* CONFIG_HAS_IOPORT */
 
 #ifdef CONFIG_PCI
-/**
- * pci_iomap - create a virtual mapping cookie for a PCI BAR
- * @dev: PCI device that owns the BAR
- * @bar: BAR number
- * @maxlen: length of the memory to map
- *
- * Using this function you will get a __iomem address to your device BAR.
- * You can access it using ioread*() and iowrite*(). These functions hide
- * the details if this is a MMIO or PIO address space and will just do what
- * you expect from them in the correct way.
- *
- * @maxlen specifies the maximum length to map. If you want to get access to
- * the complete BAR without checking for its length first, pass %0 here.
- * */
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
-{
-	resource_size_t start = pci_resource_start(dev, bar);
-	resource_size_t len = pci_resource_len(dev, bar);
-	unsigned long flags = pci_resource_flags(dev, bar);
-
-	if (!len || !start)
-		return NULL;
-	if (maxlen && len > maxlen)
-		len = maxlen;
-	if (flags & IORESOURCE_IO)
-		return ioport_map(start, len);
-	if (flags & IORESOURCE_MEM) {
-		if (flags & IORESOURCE_CACHEABLE)
-			return ioremap(start, len);
-		return ioremap_nocache(start, len);
-	}
-	/* What? */
-	return NULL;
-}
-
+/* Hide the details if this is a MMIO or PIO address space and just do what
+ * you expect in the correct way. */
 void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 {
 	IO_COND(addr, /* nothing */, iounmap(addr));
 }
-EXPORT_SYMBOL(pci_iomap);
 EXPORT_SYMBOL(pci_iounmap);
 #endif /* CONFIG_PCI */
diff --git a/lib/iomap_copy.c b/lib/iomap_copy.c
index 864fc5ea398c..4527e751b5e0 100644
--- a/lib/iomap_copy.c
+++ b/lib/iomap_copy.c
@@ -15,7 +15,7 @@
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/io.h>
 
 /**
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index da053313ee5c..c27e269210c4 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -2,8 +2,9 @@
  * IOMMU helper functions for the free area management
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/bitmap.h>
+#include <linux/bug.h>
 
 int iommu_is_span_boundary(unsigned int index, unsigned int nr,
 			   unsigned long shift,
diff --git a/lib/ioremap.c b/lib/ioremap.c
index da4e2ad74b68..0c9216c48762 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -9,7 +9,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/io.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 
diff --git a/lib/irq_regs.c b/lib/irq_regs.c
index 753880a5440c..9c0a1d70fbe8 100644
--- a/lib/irq_regs.c
+++ b/lib/irq_regs.c
@@ -8,7 +8,8 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
 #include <asm/irq_regs.h>
 
 #ifndef ARCH_HAS_OWN_IRQ_REGS
diff --git a/lib/kasprintf.c b/lib/kasprintf.c
index 9c4233b23783..ae0de80c1c88 100644
--- a/lib/kasprintf.c
+++ b/lib/kasprintf.c
@@ -5,7 +5,7 @@
  */
 
 #include <stdarg.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/string.h>
diff --git a/lib/klist.c b/lib/klist.c
index 573d6068a42e..0874e41609a6 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -35,7 +35,7 @@
  */
 
 #include <linux/klist.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/sched.h>
 
 /*
diff --git a/lib/kobject.c b/lib/kobject.c
index c33d7a18d635..21dee7c19afd 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -14,7 +14,7 @@
 
 #include <linux/kobject.h>
 #include <linux/string.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/stat.h>
 #include <linux/slab.h>
 
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index e66e9b632617..1a91efa6d121 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -17,7 +17,8 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/kobject.h>
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kmod.h>
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
 #include <linux/socket.h>
@@ -29,16 +30,17 @@
 
 u64 uevent_seqnum;
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
-static DEFINE_SPINLOCK(sequence_lock);
 #ifdef CONFIG_NET
 struct uevent_sock {
 	struct list_head list;
 	struct sock *sk;
 };
 static LIST_HEAD(uevent_sock_list);
-static DEFINE_MUTEX(uevent_sock_mutex);
 #endif
 
+/* This lock protects uevent_seqnum and uevent_sock_list */
+static DEFINE_MUTEX(uevent_sock_mutex);
+
 /* the strings here must match the enum in include/linux/kobject.h */
 static const char *kobject_actions[] = {
 	[KOBJ_ADD] =		"add",
@@ -136,7 +138,6 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 	struct kobject *top_kobj;
 	struct kset *kset;
 	const struct kset_uevent_ops *uevent_ops;
-	u64 seq;
 	int i = 0;
 	int retval = 0;
 #ifdef CONFIG_NET
@@ -243,17 +244,16 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 	else if (action == KOBJ_REMOVE)
 		kobj->state_remove_uevent_sent = 1;
 
+	mutex_lock(&uevent_sock_mutex);
 	/* we will send an event, so request a new sequence number */
-	spin_lock(&sequence_lock);
-	seq = ++uevent_seqnum;
-	spin_unlock(&sequence_lock);
-	retval = add_uevent_var(env, "SEQNUM=%llu", (unsigned long long)seq);
-	if (retval)
+	retval = add_uevent_var(env, "SEQNUM=%llu", (unsigned long long)++uevent_seqnum);
+	if (retval) {
+		mutex_unlock(&uevent_sock_mutex);
 		goto exit;
+	}
 
 #if defined(CONFIG_NET)
 	/* send netlink message */
-	mutex_lock(&uevent_sock_mutex);
 	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
 		struct sock *uevent_sock = ue_sk->sk;
 		struct sk_buff *skb;
@@ -290,8 +290,8 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 		} else
 			retval = -ENOMEM;
 	}
-	mutex_unlock(&uevent_sock_mutex);
 #endif
+	mutex_unlock(&uevent_sock_mutex);
 
 	/* call uevent_helper, usually only enabled during early boot */
 	if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index 7a94c8f14e29..c3615eab0cc3 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -15,7 +15,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/math64.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/types.h>
 #include <asm/uaccess.h>
 #include "kstrtox.h"
@@ -44,12 +44,13 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
  *
  * Don't you dare use this function.
  */
-unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res)
+unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
 {
+	unsigned long long res;
 	unsigned int rv;
 	int overflow;
 
-	*res = 0;
+	res = 0;
 	rv = 0;
 	overflow = 0;
 	while (*s) {
@@ -64,12 +65,19 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
 
 		if (val >= base)
 			break;
-		if (*res > div_u64(ULLONG_MAX - val, base))
-			overflow = 1;
-		*res = *res * base + val;
+		/*
+		 * Check for overflow only if we are within range of
+		 * it in the max base we support (16)
+		 */
+		if (unlikely(res & (~0ull << 60))) {
+			if (res > div_u64(ULLONG_MAX - val, base))
+				overflow = 1;
+		}
+		res = res * base + val;
 		rv++;
 		s++;
 	}
+	*p = res;
 	if (overflow)
 		rv |= KSTRTOX_OVERFLOW;
 	return rv;
diff --git a/lib/lcm.c b/lib/lcm.c
index 10b5cfcacf6b..b9c8de461e9e 100644
--- a/lib/lcm.c
+++ b/lib/lcm.c
@@ -1,6 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/gcd.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/lcm.h>
 
 /* Lowest common multiple */
diff --git a/lib/list_debug.c b/lib/list_debug.c
index b8029a5583ff..982b850d4e7a 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -6,8 +6,10 @@
  * DEBUG_LIST.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/list.h>
+#include <linux/bug.h>
+#include <linux/kernel.h>
 
 /*
  * Insert a new entry between two known consecutive entries.
diff --git a/lib/llist.c b/lib/llist.c
index 700cff77a387..4a15115e90f8 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -23,11 +23,10 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/interrupt.h>
 #include <linux/llist.h>
 
-#include <asm/system.h>
 
 /**
  * llist_add_batch - add several linked entries in batch
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 507a22fab738..7aae0f2a5e0a 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -14,7 +14,6 @@
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/delay.h>
-#include <linux/module.h>
 #include <linux/lockdep.h>
 #include <linux/spinlock.h>
 #include <linux/kallsyms.h>
diff --git a/lib/md5.c b/lib/md5.c
index c777180e1f2f..958a3c15923c 100644
--- a/lib/md5.c
+++ b/lib/md5.c
@@ -1,5 +1,5 @@
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/cryptohash.h>
 
 #define F1(x, y, z)	(z ^ (x & (y ^ z)))
diff --git a/lib/mpi/Makefile b/lib/mpi/Makefile
new file mode 100644
index 000000000000..567d52e74d77
--- /dev/null
+++ b/lib/mpi/Makefile
@@ -0,0 +1,32 @@
+#
+# MPI multiprecision maths library (from gpg)
+#
+
+obj-$(CONFIG_MPILIB) = mpi.o
+
+mpi-y = \
+	generic_mpih-lshift.o		\
+	generic_mpih-mul1.o		\
+	generic_mpih-mul2.o		\
+	generic_mpih-mul3.o		\
+	generic_mpih-rshift.o		\
+	generic_mpih-sub1.o		\
+	generic_mpih-add1.o		\
+	mpicoder.o			\
+	mpi-bit.o			\
+	mpih-cmp.o			\
+	mpih-div.o			\
+	mpih-mul.o			\
+	mpi-pow.o			\
+	mpiutil.o
+
+mpi-$(CONFIG_MPILIB_EXTRA) += \
+	mpi-add.o			\
+	mpi-div.o			\
+	mpi-cmp.o			\
+	mpi-gcd.o			\
+	mpi-inline.o			\
+	mpi-inv.o			\
+	mpi-mpow.o			\
+	mpi-mul.o			\
+	mpi-scan.o
diff --git a/lib/mpi/generic_mpi-asm-defs.h b/lib/mpi/generic_mpi-asm-defs.h
new file mode 100644
index 000000000000..047d1f5a7249
--- /dev/null
+++ b/lib/mpi/generic_mpi-asm-defs.h
@@ -0,0 +1,4 @@
+/* This file defines some basic constants for the MPI machinery.  We
+ * need to define the types on a per-CPU basis, so it is done with
+ * this file here.  */
+#define BYTES_PER_MPI_LIMB  (SIZEOF_UNSIGNED_LONG)
diff --git a/lib/mpi/generic_mpih-add1.c b/lib/mpi/generic_mpih-add1.c
new file mode 100644
index 000000000000..c94c7dd344b3
--- /dev/null
+++ b/lib/mpi/generic_mpih-add1.c
@@ -0,0 +1,61 @@
+/* mpihelp-add_1.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998,
+ *               2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+mpi_limb_t
+mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	      mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+	mpi_limb_t x, y, cy;
+	mpi_size_t j;
+
+	/* The loop counter and index J goes from -SIZE to -1.  This way
+	   the loop becomes faster.  */
+	j = -size;
+
+	/* Offset the base pointers to compensate for the negative indices. */
+	s1_ptr -= j;
+	s2_ptr -= j;
+	res_ptr -= j;
+
+	cy = 0;
+	do {
+		y = s2_ptr[j];
+		x = s1_ptr[j];
+		y += cy;	/* add previous carry to one addend */
+		cy = y < cy;	/* get out carry from that addition */
+		y += x;		/* add other addend */
+		cy += y < x;	/* get out carry from that add, combine */
+		res_ptr[j] = y;
+	} while (++j);
+
+	return cy;
+}
diff --git a/lib/mpi/generic_mpih-lshift.c b/lib/mpi/generic_mpih-lshift.c
new file mode 100644
index 000000000000..86318927231a
--- /dev/null
+++ b/lib/mpi/generic_mpih-lshift.c
@@ -0,0 +1,63 @@
+/* mpihelp-lshift.c  -	MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+
+/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left
+ * and store the USIZE least significant digits of the result at WP.
+ * Return the bits shifted out from the most significant digit.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be >= UP.
+ */
+
+mpi_limb_t
+mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned int cnt)
+{
+	mpi_limb_t high_limb, low_limb;
+	unsigned sh_1, sh_2;
+	mpi_size_t i;
+	mpi_limb_t retval;
+
+	sh_1 = cnt;
+	wp += 1;
+	sh_2 = BITS_PER_MPI_LIMB - sh_1;
+	i = usize - 1;
+	low_limb = up[i];
+	retval = low_limb >> sh_2;
+	high_limb = low_limb;
+	while (--i >= 0) {
+		low_limb = up[i];
+		wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
+		high_limb = low_limb;
+	}
+	wp[i] = high_limb << sh_1;
+
+	return retval;
+}
diff --git a/lib/mpi/generic_mpih-mul1.c b/lib/mpi/generic_mpih-mul1.c
new file mode 100644
index 000000000000..1668dfd9092c
--- /dev/null
+++ b/lib/mpi/generic_mpih-mul1.c
@@ -0,0 +1,57 @@
+/* mpihelp-mul_1.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+mpi_limb_t
+mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+	      mpi_limb_t s2_limb)
+{
+	mpi_limb_t cy_limb;
+	mpi_size_t j;
+	mpi_limb_t prod_high, prod_low;
+
+	/* The loop counter and index J goes from -S1_SIZE to -1.  This way
+	 * the loop becomes faster.  */
+	j = -s1_size;
+
+	/* Offset the base pointers to compensate for the negative indices.  */
+	s1_ptr -= j;
+	res_ptr -= j;
+
+	cy_limb = 0;
+	do {
+		umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+		prod_low += cy_limb;
+		cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+		res_ptr[j] = prod_low;
+	} while (++j);
+
+	return cy_limb;
+}
diff --git a/lib/mpi/generic_mpih-mul2.c b/lib/mpi/generic_mpih-mul2.c
new file mode 100644
index 000000000000..8a7b29ee1740
--- /dev/null
+++ b/lib/mpi/generic_mpih-mul2.c
@@ -0,0 +1,60 @@
+/* mpihelp-mul_2.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+mpi_limb_t
+mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+		 mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+	mpi_limb_t cy_limb;
+	mpi_size_t j;
+	mpi_limb_t prod_high, prod_low;
+	mpi_limb_t x;
+
+	/* The loop counter and index J goes from -SIZE to -1.  This way
+	 * the loop becomes faster.  */
+	j = -s1_size;
+	res_ptr -= j;
+	s1_ptr -= j;
+
+	cy_limb = 0;
+	do {
+		umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+
+		prod_low += cy_limb;
+		cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+
+		x = res_ptr[j];
+		prod_low = x + prod_low;
+		cy_limb += prod_low < x ? 1 : 0;
+		res_ptr[j] = prod_low;
+	} while (++j);
+	return cy_limb;
+}
diff --git a/lib/mpi/generic_mpih-mul3.c b/lib/mpi/generic_mpih-mul3.c
new file mode 100644
index 000000000000..f96df327be63
--- /dev/null
+++ b/lib/mpi/generic_mpih-mul3.c
@@ -0,0 +1,61 @@
+/* mpihelp-mul_3.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+mpi_limb_t
+mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+		 mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+	mpi_limb_t cy_limb;
+	mpi_size_t j;
+	mpi_limb_t prod_high, prod_low;
+	mpi_limb_t x;
+
+	/* The loop counter and index J goes from -SIZE to -1.  This way
+	 * the loop becomes faster.  */
+	j = -s1_size;
+	res_ptr -= j;
+	s1_ptr -= j;
+
+	cy_limb = 0;
+	do {
+		umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+
+		prod_low += cy_limb;
+		cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+
+		x = res_ptr[j];
+		prod_low = x - prod_low;
+		cy_limb += prod_low > x ? 1 : 0;
+		res_ptr[j] = prod_low;
+	} while (++j);
+
+	return cy_limb;
+}
diff --git a/lib/mpi/generic_mpih-rshift.c b/lib/mpi/generic_mpih-rshift.c
new file mode 100644
index 000000000000..ffa328818ca6
--- /dev/null
+++ b/lib/mpi/generic_mpih-rshift.c
@@ -0,0 +1,63 @@
+/* mpih-rshift.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 1999,
+ *               2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+
+/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right
+ * and store the USIZE least significant limbs of the result at WP.
+ * The bits shifted out to the right are returned.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be <= UP.
+ */
+
+mpi_limb_t
+mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt)
+{
+	mpi_limb_t high_limb, low_limb;
+	unsigned sh_1, sh_2;
+	mpi_size_t i;
+	mpi_limb_t retval;
+
+	sh_1 = cnt;
+	wp -= 1;
+	sh_2 = BITS_PER_MPI_LIMB - sh_1;
+	high_limb = up[0];
+	retval = high_limb << sh_2;
+	low_limb = high_limb;
+	for (i = 1; i < usize; i++) {
+		high_limb = up[i];
+		wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
+		low_limb = high_limb;
+	}
+	wp[i] = low_limb >> sh_1;
+
+	return retval;
+}
diff --git a/lib/mpi/generic_mpih-sub1.c b/lib/mpi/generic_mpih-sub1.c
new file mode 100644
index 000000000000..5d98ab7d6853
--- /dev/null
+++ b/lib/mpi/generic_mpih-sub1.c
@@ -0,0 +1,60 @@
+/* mpihelp-add_2.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+mpi_limb_t
+mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	      mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+	mpi_limb_t x, y, cy;
+	mpi_size_t j;
+
+	/* The loop counter and index J goes from -SIZE to -1.  This way
+	   the loop becomes faster.  */
+	j = -size;
+
+	/* Offset the base pointers to compensate for the negative indices.  */
+	s1_ptr -= j;
+	s2_ptr -= j;
+	res_ptr -= j;
+
+	cy = 0;
+	do {
+		y = s2_ptr[j];
+		x = s1_ptr[j];
+		y += cy;	/* add previous carry to subtrahend */
+		cy = y < cy;	/* get out carry from that addition */
+		y = x - y;	/* main subtract */
+		cy += y > x;	/* get out carry from the subtract, combine */
+		res_ptr[j] = y;
+	} while (++j);
+
+	return cy;
+}
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
new file mode 100644
index 000000000000..29f98624ef93
--- /dev/null
+++ b/lib/mpi/longlong.h
@@ -0,0 +1,1500 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+ * Note: I added some stuff for use with gnupg
+ *
+ * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
+ *	2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this file; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA. */
+
+/* You have to define the following before including this file:
+ *
+ * UWtype -- An unsigned type, default type for operations (typically a "word")
+ * UHWtype -- An unsigned type, at least half the size of UWtype.
+ * UDWtype -- An unsigned type, at least twice as large a UWtype
+ * W_TYPE_SIZE -- size in bits of UWtype
+ *
+ * SItype, USItype -- Signed and unsigned 32 bit types.
+ * DItype, UDItype -- Signed and unsigned 64 bit types.
+ *
+ * On a 32 bit machine UWtype should typically be USItype;
+ * on a 64 bit machine, UWtype should typically be UDItype.
+*/
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+/* This is used to make sure no undesirable sharing between different libraries
+	that use this file takes place.  */
+#ifndef __MPN
+#define __MPN(x) __##x
+#endif
+
+/* Define auxiliary asm macros.
+ *
+ * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
+ * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
+ * word product in HIGH_PROD and LOW_PROD.
+ *
+ * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+ * UDWtype product.  This is just a variant of umul_ppmm.
+
+ * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator) divides a UDWtype, composed by the UWtype integers
+ * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+ * in QUOTIENT and the remainder in REMAINDER.	HIGH_NUMERATOR must be less
+ * than DENOMINATOR for correct operation.  If, in addition, the most
+ * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+ * UDIV_NEEDS_NORMALIZATION is defined to 1.
+ * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
+ * is rounded towards 0.
+ *
+ * 5) count_leading_zeros(count, x) counts the number of zero-bits from the
+ * msb to the first non-zero bit in the UWtype X.  This is the number of
+ * steps X needs to be shifted left to set the msb.  Undefined for X == 0,
+ * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+ *
+ * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+ * from the least significant end.
+ *
+ * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+ * high_addend_2, low_addend_2) adds two UWtype integers, composed by
+ * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+ * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+ * (i.e. carry out) is not stored anywhere, and is lost.
+ *
+ * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+ * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+ * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+ * LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+ * and LOW_DIFFERENCE.	Overflow (i.e. carry out) is not stored anywhere,
+ * and is lost.
+ *
+ * If any of these macros are left undefined for a particular CPU,
+ * C macros are used.  */
+
+/* The CPUs come in alphabetical order below.
+ *
+ * Please add support for more CPUs here, or improve the current support
+ * for the CPUs below!	*/
+
+#if defined(__GNUC__) && !defined(NO_ASM)
+
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+	understood by gcc1.	Use cpp to avoid major code duplication.  */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+
+/***************************************
+	**************  A29K  *****************
+	***************************************/
+#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add %1,%4,%5\n" \
+		"addc %0,%2,%3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "%r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub %1,%4,%5\n" \
+		"subc %0,%2,%3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+		USItype __m0 = (m0), __m1 = (m1); \
+		__asm__ ("multiplu %0,%1,%2" \
+		: "=r" ((USItype)(xl)) \
+		: "r" (__m0), \
+			"r" (__m1)); \
+		__asm__ ("multmu %0,%1,%2" \
+		: "=r" ((USItype)(xh)) \
+		: "r" (__m0), \
+			"r" (__m1)); \
+} while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("dividu %0,%3,%4" \
+	: "=r" ((USItype)(q)), \
+		"=q" ((USItype)(r)) \
+	: "1" ((USItype)(n1)), \
+		"r" ((USItype)(n0)), \
+		"r" ((USItype)(d)))
+
+#define count_leading_zeros(count, x) \
+	__asm__ ("clz %0,%1" \
+	: "=r" ((USItype)(count)) \
+	: "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* __a29k__ */
+
+#if defined(__alpha) && W_TYPE_SIZE == 64
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+		UDItype __m0 = (m0), __m1 = (m1); \
+		__asm__ ("umulh %r1,%2,%0" \
+		: "=r" ((UDItype) ph) \
+		: "%rJ" (__m0), \
+			"rI" (__m1)); \
+		(pl) = __m0 * __m1; \
+	} while (0)
+#define UMUL_TIME 46
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { UDItype __r; \
+	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+	(r) = __r; \
+} while (0)
+extern UDItype __udiv_qrnnd();
+#define UDIV_TIME 220
+#endif /* LONGLONG_STANDALONE */
+#endif /* __alpha */
+
+/***************************************
+	**************  ARM  ******************
+	***************************************/
+#if defined(__arm__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("adds %1, %4, %5\n" \
+		"adc  %0, %2, %3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "%r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subs %1, %4, %5\n" \
+		"sbc  %0, %2, %3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__
+#define umul_ppmm(xh, xl, a, b) \
+	__asm__ ("%@ Inlined umul_ppmm\n" \
+		"mov	%|r0, %2, lsr #16		@ AAAA\n" \
+		"mov	%|r2, %3, lsr #16		@ BBBB\n" \
+		"bic	%|r1, %2, %|r0, lsl #16		@ aaaa\n" \
+		"bic	%0, %3, %|r2, lsl #16		@ bbbb\n" \
+		"mul	%1, %|r1, %|r2			@ aaaa * BBBB\n" \
+		"mul	%|r2, %|r0, %|r2		@ AAAA * BBBB\n" \
+		"mul	%|r1, %0, %|r1			@ aaaa * bbbb\n" \
+		"mul	%0, %|r0, %0			@ AAAA * bbbb\n" \
+		"adds	%|r0, %1, %0			@ central sum\n" \
+		"addcs	%|r2, %|r2, #65536\n" \
+		"adds	%1, %|r1, %|r0, lsl #16\n" \
+		"adc	%0, %|r2, %|r0, lsr #16" \
+	: "=&r" ((USItype)(xh)), \
+		"=r" ((USItype)(xl)) \
+	: "r" ((USItype)(a)), \
+		"r" ((USItype)(b)) \
+	: "r0", "r1", "r2")
+#else
+#define umul_ppmm(xh, xl, a, b) \
+	__asm__ ("%@ Inlined umul_ppmm\n" \
+		"umull %r1, %r0, %r2, %r3" \
+	: "=&r" ((USItype)(xh)), \
+			"=r" ((USItype)(xl)) \
+	: "r" ((USItype)(a)), \
+			"r" ((USItype)(b)) \
+	: "r0", "r1")
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 100
+#endif /* __arm__ */
+
+/***************************************
+	**************  CLIPPER  **************
+	***************************************/
+#if defined(__clipper__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+		struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("mulwux %2,%0" \
+	: "=r" (__xx.__ll) \
+	: "%0" ((USItype)(u)), \
+		"r" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define smul_ppmm(w1, w0, u, v) \
+	({union {DItype __ll; \
+		struct {SItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("mulwx %2,%0" \
+	: "=r" (__xx.__ll) \
+	: "%0" ((SItype)(u)), \
+		"r" ((SItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+	({UDItype __w; \
+	__asm__ ("mulwux %2,%0" \
+	: "=r" (__w) \
+	: "%0" ((USItype)(u)), \
+		"r" ((USItype)(v))); \
+	__w; })
+#endif /* __clipper__ */
+
+/***************************************
+	**************  GMICRO  ***************
+	***************************************/
+#if defined(__gmicro__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add.w %5,%1\n" \
+		"addx %3,%0" \
+	: "=g" ((USItype)(sh)), \
+		"=&g" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+		"g" ((USItype)(bh)), \
+		"%1" ((USItype)(al)), \
+		"g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub.w %5,%1\n" \
+		"subx %3,%0" \
+	: "=g" ((USItype)(sh)), \
+		"=&g" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+		"g" ((USItype)(bh)), \
+		"1" ((USItype)(al)), \
+		"g" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+	__asm__ ("mulx %3,%0,%1" \
+	: "=g" ((USItype)(ph)), \
+		"=r" ((USItype)(pl)) \
+	: "%0" ((USItype)(m0)), \
+		"g" ((USItype)(m1)))
+#define udiv_qrnnd(q, r, nh, nl, d) \
+	__asm__ ("divx %4,%0,%1" \
+	: "=g" ((USItype)(q)), \
+		"=r" ((USItype)(r)) \
+	: "1" ((USItype)(nh)), \
+		"0" ((USItype)(nl)), \
+		"g" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+	__asm__ ("bsch/1 %1,%0" \
+	: "=g" (count) \
+	: "g" ((USItype)(x)), \
+	     "0" ((USItype)0))
+#endif
+
+/***************************************
+	**************  HPPA  *****************
+	***************************************/
+#if defined(__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add %4,%5,%1\n" \
+		   "addc %2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%rM" ((USItype)(ah)), \
+	     "rM" ((USItype)(bh)), \
+	     "%rM" ((USItype)(al)), \
+	     "rM" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub %4,%5,%1\n" \
+	   "subb %2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "rM" ((USItype)(ah)), \
+	     "rM" ((USItype)(bh)), \
+	     "rM" ((USItype)(al)), \
+	     "rM" ((USItype)(bl)))
+#if defined(_PA_RISC1_1)
+#define umul_ppmm(wh, wl, u, v) \
+do { \
+	union {UDItype __ll; \
+	struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__asm__ ("xmpyu %1,%2,%0" \
+	: "=*f" (__xx.__ll) \
+	: "*f" ((USItype)(u)), \
+	       "*f" ((USItype)(v))); \
+	(wh) = __xx.__i.__h; \
+	(wl) = __xx.__i.__l; \
+} while (0)
+#define UMUL_TIME 8
+#define UDIV_TIME 60
+#else
+#define UMUL_TIME 40
+#define UDIV_TIME 80
+#endif
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { USItype __r; \
+	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+	(r) = __r; \
+} while (0)
+extern USItype __udiv_qrnnd();
+#endif /* LONGLONG_STANDALONE */
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __tmp; \
+	__asm__ ( \
+	"ldi             1,%0\n" \
+	"extru,=	%1,15,16,%%r0  ; Bits 31..16 zero?\n" \
+	"extru,tr	%1,15,16,%1    ; No.  Shift down, skip add.\n" \
+	"ldo		16(%0),%0      ; Yes.	Perform add.\n" \
+	"extru,=	%1,23,8,%%r0   ; Bits 15..8 zero?\n" \
+	"extru,tr	%1,23,8,%1     ; No.  Shift down, skip add.\n" \
+	"ldo		8(%0),%0       ; Yes.	Perform add.\n" \
+	"extru,=	%1,27,4,%%r0   ; Bits 7..4 zero?\n" \
+	"extru,tr	%1,27,4,%1     ; No.  Shift down, skip add.\n" \
+	"ldo		4(%0),%0       ; Yes.	Perform add.\n" \
+	"extru,=	%1,29,2,%%r0   ; Bits 3..2 zero?\n" \
+	"extru,tr	%1,29,2,%1     ; No.  Shift down, skip add.\n" \
+	"ldo		2(%0),%0       ; Yes.	Perform add.\n" \
+	"extru		%1,30,1,%1     ; Extract bit 1.\n" \
+	"sub		%0,%1,%0       ; Subtract it.              " \
+	: "=r" (count), "=r" (__tmp) : "1" (x)); \
+} while (0)
+#endif /* hppa */
+
+/***************************************
+	**************  I370  *****************
+	***************************************/
+#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __xx; \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mr %0,%3" \
+	: "=r" (__xx.__i.__h), \
+	       "=r" (__xx.__i.__l) \
+	: "%1" (__m0), \
+	       "r" (__m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+	(xh) += ((((SItype) __m0 >> 31) & __m1) \
+	     + (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define smul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {DItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__asm__ ("mr %0,%3" \
+	: "=r" (__xx.__i.__h), \
+	       "=r" (__xx.__i.__l) \
+	: "%1" (m0), \
+	       "r" (m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+} while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+do { \
+	union {DItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__xx.__i.__h = n1; __xx.__i.__l = n0; \
+	__asm__ ("dr %0,%2" \
+	: "=r" (__xx.__ll) \
+	: "0" (__xx.__ll), "r" (d)); \
+	(q) = __xx.__i.__l; (r) = __xx.__i.__h; \
+} while (0)
+#endif
+
+/***************************************
+	**************  I386  *****************
+	***************************************/
+#undef __i386__
+#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addl %5,%1\n" \
+	   "adcl %3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	     "g" ((USItype)(bh)), \
+	     "%1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subl %5,%1\n" \
+	   "sbbl %3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	     "g" ((USItype)(bh)), \
+	     "1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("mull %3" \
+	: "=a" ((USItype)(w0)), \
+	     "=d" ((USItype)(w1)) \
+	: "%0" ((USItype)(u)), \
+	     "rm" ((USItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("divl %4" \
+	: "=a" ((USItype)(q)), \
+	     "=d" ((USItype)(r)) \
+	: "0" ((USItype)(n0)), \
+	     "1" ((USItype)(n1)), \
+	     "rm" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __cbtmp; \
+	__asm__ ("bsrl %1,%0" \
+	: "=r" (__cbtmp) : "rm" ((USItype)(x))); \
+	(count) = __cbtmp ^ 31; \
+} while (0)
+#define count_trailing_zeros(count, x) \
+	__asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
+#ifndef UMUL_TIME
+#define UMUL_TIME 40
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 40
+#endif
+#endif /* 80x86 */
+
+/***************************************
+	**************  I860  *****************
+	***************************************/
+#if defined(__i860__) && W_TYPE_SIZE == 32
+#define rshift_rhlc(r, h, l, c) \
+	__asm__ ("shr %3,r0,r0\n" \
+	"shrd %1,%2,%0" \
+	   "=r" (r) : "r" (h), "r" (l), "rn" (c))
+#endif /* i860 */
+
+/***************************************
+	**************  I960  *****************
+	***************************************/
+#if defined(__i960__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("cmpo 1,0\n" \
+	"addc %5,%4,%1\n" \
+	"addc %3,%2,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%dI" ((USItype)(ah)), \
+	     "dI" ((USItype)(bh)), \
+	     "%dI" ((USItype)(al)), \
+	     "dI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("cmpo 0,0\n" \
+	"subc %5,%4,%1\n" \
+	"subc %3,%2,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "dI" ((USItype)(ah)), \
+	     "dI" ((USItype)(bh)), \
+	     "dI" ((USItype)(al)), \
+	     "dI" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("emul        %2,%1,%0" \
+	: "=d" (__xx.__ll) \
+	: "%dI" ((USItype)(u)), \
+	     "dI" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+	({UDItype __w; \
+	__asm__ ("emul      %2,%1,%0" \
+	: "=d" (__w) \
+	: "%dI" ((USItype)(u)), \
+	       "dI" ((USItype)(v))); \
+	__w; })
+#define udiv_qrnnd(q, r, nh, nl, d) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __nn; \
+	__nn.__i.__h = (nh); __nn.__i.__l = (nl); \
+	__asm__ ("ediv %d,%n,%0" \
+	: "=d" (__rq.__ll) \
+	: "dI" (__nn.__ll), \
+	     "dI" ((USItype)(d))); \
+	(r) = __rq.__i.__l; (q) = __rq.__i.__h; \
+} while (0)
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __cbtmp; \
+	__asm__ ("scanbit %1,%0" \
+	: "=r" (__cbtmp) \
+	: "r" ((USItype)(x))); \
+	(count) = __cbtmp ^ 31; \
+} while (0)
+#define COUNT_LEADING_ZEROS_0 (-32)	/* sic */
+#if defined(__i960mx)		/* what is the proper symbol to test??? */
+#define rshift_rhlc(r, h, l, c) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __nn; \
+	__nn.__i.__h = (h); __nn.__i.__l = (l); \
+	__asm__ ("shre %2,%1,%0" \
+	: "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
+}
+#endif /* i960mx */
+#endif /* i960 */
+
+/***************************************
+	**************  68000	****************
+	***************************************/
+#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add%.l %5,%1\n" \
+	   "addx%.l %3,%0" \
+	: "=d" ((USItype)(sh)), \
+	     "=&d" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	     "d" ((USItype)(bh)), \
+	     "%1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub%.l %5,%1\n" \
+	   "subx%.l %3,%0" \
+	: "=d" ((USItype)(sh)), \
+	     "=&d" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	     "d" ((USItype)(bh)), \
+	     "1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020))
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("mulu%.l %3,%1:%0" \
+	: "=d" ((USItype)(w0)), \
+	     "=d" ((USItype)(w1)) \
+	: "%0" ((USItype)(u)), \
+	     "dmi" ((USItype)(v)))
+#define UMUL_TIME 45
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("divu%.l %4,%1:%0" \
+	: "=d" ((USItype)(q)), \
+	     "=d" ((USItype)(r)) \
+	: "0" ((USItype)(n0)), \
+	     "1" ((USItype)(n1)), \
+	     "dmi" ((USItype)(d)))
+#define UDIV_TIME 90
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("divs%.l %4,%1:%0" \
+	: "=d" ((USItype)(q)), \
+	     "=d" ((USItype)(r)) \
+	: "0" ((USItype)(n0)), \
+	     "1" ((USItype)(n1)), \
+	     "dmi" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+	__asm__ ("bfffo %1{%b2:%b2},%0" \
+	: "=d" ((USItype)(count)) \
+	: "od" ((USItype)(x)), "n" (0))
+#define COUNT_LEADING_ZEROS_0 32
+#else /* not mc68020 */
+#define umul_ppmm(xh, xl, a, b) \
+do { USItype __umul_tmp1, __umul_tmp2; \
+	__asm__ ("| Inlined umul_ppmm\n" \
+	"move%.l %5,%3\n" \
+	"move%.l %2,%0\n" \
+	"move%.w %3,%1\n" \
+	"swap	%3\n" \
+	"swap	%0\n" \
+	"mulu	%2,%1\n" \
+	"mulu	%3,%0\n" \
+	"mulu	%2,%3\n" \
+	"swap	%2\n" \
+	"mulu	%5,%2\n" \
+	"add%.l	%3,%2\n" \
+	"jcc	1f\n" \
+	"add%.l	%#0x10000,%0\n" \
+	"1:	move%.l %2,%3\n" \
+	"clr%.w	%2\n" \
+	"swap	%2\n" \
+	"swap	%3\n" \
+	"clr%.w	%3\n" \
+	"add%.l	%3,%1\n" \
+	"addx%.l %2,%0\n" \
+	"| End inlined umul_ppmm" \
+	: "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
+		"=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
+	: "%2" ((USItype)(a)), "d" ((USItype)(b))); \
+} while (0)
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#endif /* not mc68020 */
+#endif /* mc68000 */
+
+/***************************************
+	**************  88000	****************
+	***************************************/
+#if defined(__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addu.co %1,%r4,%r5\n" \
+	   "addu.ci %0,%r2,%r3" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%rJ" ((USItype)(ah)), \
+	     "rJ" ((USItype)(bh)), \
+	     "%rJ" ((USItype)(al)), \
+	     "rJ" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subu.co %1,%r4,%r5\n" \
+	   "subu.ci %0,%r2,%r3" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "rJ" ((USItype)(ah)), \
+	     "rJ" ((USItype)(bh)), \
+	     "rJ" ((USItype)(al)), \
+	     "rJ" ((USItype)(bl)))
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __cbtmp; \
+	__asm__ ("ff1 %0,%1" \
+	: "=r" (__cbtmp) \
+	: "r" ((USItype)(x))); \
+	(count) = __cbtmp ^ 31; \
+} while (0)
+#define COUNT_LEADING_ZEROS_0 63	/* sic */
+#if defined(__m88110__)
+#define umul_ppmm(wh, wl, u, v) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __x; \
+	__asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
+	(wh) = __x.__i.__h; \
+	(wl) = __x.__i.__l; \
+} while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	({union {UDItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __x, __q; \
+	__x.__i.__h = (n1); __x.__i.__l = (n0); \
+	__asm__ ("divu.d %0,%1,%2" \
+	: "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
+	(r) = (n0) - __q.__l * (d); (q) = __q.__l; })
+#define UMUL_TIME 5
+#define UDIV_TIME 25
+#else
+#define UMUL_TIME 17
+#define UDIV_TIME 150
+#endif /* __m88110__ */
+#endif /* __m88000__ */
+
+/***************************************
+	**************  MIPS  *****************
+	***************************************/
+#if defined(__mips__) && W_TYPE_SIZE == 32
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("multu %2,%3" \
+	: "=l" ((USItype)(w0)), \
+	     "=h" ((USItype)(w1)) \
+	: "d" ((USItype)(u)), \
+	     "d" ((USItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("multu %2,%3\n" \
+	   "mflo %0\n" \
+	   "mfhi %1" \
+	: "=d" ((USItype)(w0)), \
+	     "=d" ((USItype)(w1)) \
+	: "d" ((USItype)(u)), \
+	     "d" ((USItype)(v)))
+#endif
+#define UMUL_TIME 10
+#define UDIV_TIME 100
+#endif /* __mips__ */
+
+/***************************************
+	**************  MIPS/64  **************
+	***************************************/
+#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("dmultu %2,%3" \
+	: "=l" ((UDItype)(w0)), \
+	     "=h" ((UDItype)(w1)) \
+	: "d" ((UDItype)(u)), \
+	     "d" ((UDItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("dmultu %2,%3\n" \
+	   "mflo %0\n" \
+	   "mfhi %1" \
+	: "=d" ((UDItype)(w0)), \
+	     "=d" ((UDItype)(w1)) \
+	: "d" ((UDItype)(u)), \
+	     "d" ((UDItype)(v)))
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 140
+#endif /* __mips__ */
+
+/***************************************
+	**************  32000	****************
+	***************************************/
+#if defined(__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("meid %2,%0" \
+	: "=g" (__xx.__ll) \
+	: "%0" ((USItype)(u)), \
+	     "g" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+	({UDItype __w; \
+	__asm__ ("meid %2,%0" \
+	: "=g" (__w) \
+	: "%0" ((USItype)(u)), \
+	       "g" ((USItype)(v))); \
+	__w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	({union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__xx.__i.__h = (n1); __xx.__i.__l = (n0); \
+	__asm__ ("deid %2,%0" \
+	: "=g" (__xx.__ll) \
+	: "0" (__xx.__ll), \
+	     "g" ((USItype)(d))); \
+	(r) = __xx.__i.__l; (q) = __xx.__i.__h; })
+#define count_trailing_zeros(count, x) \
+do { \
+	__asm__("ffsd      %2,%0" \
+	: "=r"((USItype) (count)) \
+	: "0"((USItype) 0), "r"((USItype) (x))); \
+	} while (0)
+#endif /* __ns32000__ */
+
+/***************************************
+	**************  PPC  ******************
+	***************************************/
+#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+	if (__builtin_constant_p(bh) && (bh) == 0) \
+		__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "%r" ((USItype)(ah)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl))); \
+	else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
+		__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "%r" ((USItype)(ah)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl))); \
+	else \
+		__asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "%r" ((USItype)(ah)), \
+		"r" ((USItype)(bh)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl))); \
+} while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+	if (__builtin_constant_p(ah) && (ah) == 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(bh)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(bh)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else if (__builtin_constant_p(bh) && (bh) == 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else \
+		__asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(ah)), \
+		"r" ((USItype)(bh)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+} while (0)
+#define count_leading_zeros(count, x) \
+	__asm__ ("{cntlz|cntlzw} %0,%1" \
+	: "=r" ((USItype)(count)) \
+	: "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#if defined(_ARCH_PPC)
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mulhwu %0,%1,%2" \
+	: "=r" ((USItype) ph) \
+	: "%r" (__m0), \
+	"r" (__m1)); \
+	(pl) = __m0 * __m1; \
+} while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+do { \
+	SItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mulhw %0,%1,%2" \
+	: "=r" ((SItype) ph) \
+	: "%r" (__m0), \
+	"r" (__m1)); \
+	(pl) = __m0 * __m1; \
+} while (0)
+#define SMUL_TIME 14
+#define UDIV_TIME 120
+#else
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mul %0,%2,%3" \
+	: "=r" ((USItype)(xh)), \
+	"=q" ((USItype)(xl)) \
+	: "r" (__m0), \
+	"r" (__m1)); \
+	(xh) += ((((SItype) __m0 >> 31) & __m1) \
+	+ (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define UMUL_TIME 8
+#define smul_ppmm(xh, xl, m0, m1) \
+	__asm__ ("mul %0,%2,%3" \
+	: "=r" ((SItype)(xh)), \
+	"=q" ((SItype)(xl)) \
+	: "r" (m0), \
+	"r" (m1))
+#define SMUL_TIME 4
+#define sdiv_qrnnd(q, r, nh, nl, d) \
+	__asm__ ("div %0,%2,%4" \
+	: "=r" ((SItype)(q)), "=q" ((SItype)(r)) \
+	: "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
+#define UDIV_TIME 100
+#endif
+#endif /* Power architecture variants.  */
+
+/***************************************
+	**************  PYR  ******************
+	***************************************/
+#if defined(__pyr__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addw        %5,%1\n" \
+	"addwc	%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"%1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subw        %5,%1\n" \
+	"subwb	%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+	/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+	struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__asm__ ("movw %1,%R0\n" \
+	"uemul %2,%0" \
+	: "=&r" (__xx.__ll) \
+	: "g" ((USItype) (u)), \
+	"g" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#endif /* __pyr__ */
+
+/***************************************
+	**************  RT/ROMP  **************
+	***************************************/
+#if defined(__ibm032__) /* RT/ROMP */	&& W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("a %1,%5\n" \
+	"ae %0,%3" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	"r" ((USItype)(bh)), \
+	"%1" ((USItype)(al)), \
+	"r" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("s %1,%5\n" \
+	"se %0,%3" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	"r" ((USItype)(bh)), \
+	"1" ((USItype)(al)), \
+	"r" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ( \
+	"s       r2,r2\n" \
+	"mts	r10,%2\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"cas	%0,r2,r0\n" \
+	"mfs	r10,%1" \
+	: "=r" ((USItype)(ph)), \
+	"=r" ((USItype)(pl)) \
+	: "%r" (__m0), \
+	"r" (__m1) \
+	: "r2"); \
+	(ph) += ((((SItype) __m0 >> 31) & __m1) \
+	+ (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define UMUL_TIME 20
+#define UDIV_TIME 200
+#define count_leading_zeros(count, x) \
+do { \
+	if ((x) >= 0x10000) \
+		__asm__ ("clz     %0,%1" \
+		: "=r" ((USItype)(count)) \
+		: "r" ((USItype)(x) >> 16)); \
+	else { \
+		__asm__ ("clz   %0,%1" \
+		: "=r" ((USItype)(count)) \
+		: "r" ((USItype)(x))); \
+		(count) += 16; \
+	} \
+} while (0)
+#endif /* RT/ROMP */
+
+/***************************************
+	**************  SH2  ******************
+	***************************************/
+#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \
+	&& W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ( \
+	"dmulu.l %2,%3\n" \
+	"sts	macl,%1\n" \
+	"sts	mach,%0" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "r" ((USItype)(u)), \
+	"r" ((USItype)(v)) \
+	: "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
+/***************************************
+	**************  SPARC	****************
+	***************************************/
+#if defined(__sparc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addcc %r4,%5,%1\n" \
+	"addx %r2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "%rJ" ((USItype)(ah)), \
+	"rI" ((USItype)(bh)), \
+	"%rJ" ((USItype)(al)), \
+	"rI" ((USItype)(bl)) \
+	__CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subcc %r4,%5,%1\n" \
+	"subx %r2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "rJ" ((USItype)(ah)), \
+	"rI" ((USItype)(bh)), \
+	"rJ" ((USItype)(al)), \
+	"rI" ((USItype)(bl)) \
+	__CLOBBER_CC)
+#if defined(__sparc_v8__)
+/* Don't match immediate range because, 1) it is not often useful,
+	2) the 'I' flag thinks of the range as a 13 bit signed interval,
+	while we want to match a 13 bit interval, sign extended to 32 bits,
+	but INTERPRETED AS UNSIGNED.  */
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("umul %2,%3,%1;rd %%y,%0" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "r" ((USItype)(u)), \
+	"r" ((USItype)(v)))
+#define UMUL_TIME 5
+#ifndef SUPERSPARC		/* SuperSPARC's udiv only handles 53 bit dividends */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { \
+	USItype __q; \
+	__asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+	: "=r" ((USItype)(__q)) \
+	: "r" ((USItype)(n1)), \
+	"r" ((USItype)(n0)), \
+	"r" ((USItype)(d))); \
+	(r) = (n0) - __q * (d); \
+	(q) = __q; \
+} while (0)
+#define UDIV_TIME 25
+#endif /* SUPERSPARC */
+#else /* ! __sparc_v8__ */
+#if defined(__sparclite__)
+/* This has hardware multiply but not divide.  It also has two additional
+	instructions scan (ffs from high bit) and divscc.  */
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("umul %2,%3,%1;rd %%y,%0" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "r" ((USItype)(u)), \
+	"r" ((USItype)(v)))
+#define UMUL_TIME 5
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("! Inlined udiv_qrnnd\n" \
+	"wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n" \
+	"tst	%%g0\n" \
+	"divscc	%3,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%0\n" \
+	"rd	%%y,%1\n" \
+	"bl,a 1f\n" \
+	"add	%1,%4,%1\n" \
+	"1:	! End of inline udiv_qrnnd" \
+	: "=r" ((USItype)(q)), \
+	"=r" ((USItype)(r)) \
+	: "r" ((USItype)(n1)), \
+	"r" ((USItype)(n0)), \
+	"rI" ((USItype)(d)) \
+	: "%g1" __AND_CLOBBER_CC)
+#define UDIV_TIME 37
+#define count_leading_zeros(count, x) \
+	__asm__ ("scan %1,0,%0" \
+	: "=r" ((USItype)(x)) \
+	: "r" ((USItype)(count)))
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+	implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
+	undefined.  */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+	/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
+#ifndef umul_ppmm
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("! Inlined umul_ppmm\n" \
+	"wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n" \
+	"sra	%3,31,%%g2	! Don't move this insn\n" \
+	"and	%2,%%g2,%%g2	! Don't move this insn\n" \
+	"andcc	%%g0,0,%%g1	! Don't move this insn\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,0,%%g1\n" \
+	"add	%%g1,%%g2,%0\n" \
+	"rd	%%y,%1" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "%rI" ((USItype)(u)), \
+	"r" ((USItype)(v)) \
+	: "%g1", "%g2" __AND_CLOBBER_CC)
+#define UMUL_TIME 39		/* 39 instructions */
+/* It's quite necessary to add this much assembler for the sparc.
+   The default udiv_qrnnd (in C) is more than 10 times slower!  */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("! Inlined udiv_qrnnd\n\t"					\
+	   "mov	32,%%g1\n\t"						\
+	   "subcc	%1,%2,%%g0\n\t"					\
+	   "1:	bcs	5f\n\t"						\
+	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\
+	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\
+	   "addx	%1,%1,%1	! so this can't give carry\n\t"	\
+	   "subcc	%%g1,1,%%g1\n\t"				\
+	   "2:	bne	1b\n\t"						\
+	   "subcc	%1,%2,%%g0\n\t"					\
+	   "bcs	3f\n\t"							\
+	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\
+	   "b		3f\n\t"						\
+	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\
+	   "4:	sub	%1,%2,%1\n\t"					\
+	   "5:	addxcc	%1,%1,%1\n\t"					\
+	   "bcc	2b\n\t"							\
+	   "subcc	%%g1,1,%%g1\n\t"				\
+	   "! Got carry from n.  Subtract next step to cancel this carry.\n\t" \
+	   "bne	4b\n\t"							\
+	   "addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb\n\t" \
+	   "sub	%1,%2,%1\n\t"						\
+	   "3:	xnor	%0,0,%0\n\t"					\
+	   "! End of inline udiv_qrnnd\n"				\
+	   : "=&r" ((USItype)(q)),					\
+	     "=&r" ((USItype)(r))					\
+	   : "r" ((USItype)(d)),					\
+	     "1" ((USItype)(n1)),					\
+	     "0" ((USItype)(n0)) : "%g1", "cc")
+#define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
+#endif
+#endif /* __sparc__ */
+
+/***************************************
+	**************  VAX  ******************
+	***************************************/
+#if defined(__vax__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addl2 %5,%1\n" \
+	"adwc %3,%0" \
+	: "=g" ((USItype)(sh)), \
+	"=&g" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"%1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subl2 %5,%1\n" \
+	"sbwc %3,%0" \
+	: "=g" ((USItype)(sh)), \
+	"=&g" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {UDItype __ll; \
+	struct {USItype __l, __h; } __i; \
+	} __xx; \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("emul %1,%2,$0,%0" \
+	: "=g" (__xx.__ll) \
+	: "g" (__m0), \
+	"g" (__m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+	(xh) += ((((SItype) __m0 >> 31) & __m1) \
+	+ (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+do { \
+	union {DItype __ll; \
+	struct {SItype __l, __h; } __i; \
+	} __xx; \
+	__xx.__i.__h = n1; __xx.__i.__l = n0; \
+	__asm__ ("ediv %3,%2,%0,%1" \
+	: "=g" (q), "=g" (r) \
+	: "g" (__xx.__ll), "g" (d)); \
+} while (0)
+#endif /* __vax__ */
+
+/***************************************
+	**************  Z8000	****************
+	***************************************/
+#if defined(__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add %H1,%H5\n\tadc  %H0,%H3" \
+	: "=r" ((unsigned int)(sh)), \
+	"=&r" ((unsigned int)(sl)) \
+	: "%0" ((unsigned int)(ah)), \
+	"r" ((unsigned int)(bh)), \
+	"%1" ((unsigned int)(al)), \
+	"rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3" \
+	: "=r" ((unsigned int)(sh)), \
+	"=&r" ((unsigned int)(sl)) \
+	: "0" ((unsigned int)(ah)), \
+	"r" ((unsigned int)(bh)), \
+	"1" ((unsigned int)(al)), \
+	"rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {long int __ll; \
+	struct {unsigned int __h, __l; } __i; \
+	} __xx; \
+	unsigned int __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mult      %S0,%H3" \
+	: "=r" (__xx.__i.__h), \
+	"=r" (__xx.__i.__l) \
+	: "%1" (__m0), \
+	"rQR" (__m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+	(xh) += ((((signed int) __m0 >> 15) & __m1) \
+	+ (((signed int) __m1 >> 15) & __m0)); \
+} while (0)
+#endif /* __z8000__ */
+
+#endif /* __GNUC__ */
+
+/***************************************
+	***********  Generic Versions	********
+	***************************************/
+#if !defined(umul_ppmm) && defined(__umulsidi3)
+#define umul_ppmm(ph, pl, m0, m1) \
+{ \
+	UDWtype __ll = __umulsidi3(m0, m1); \
+	ph = (UWtype) (__ll >> W_TYPE_SIZE); \
+	pl = (UWtype) __ll; \
+}
+#endif
+
+#if !defined(__umulsidi3)
+#define __umulsidi3(u, v) \
+	({UWtype __hi, __lo; \
+	umul_ppmm(__hi, __lo, u, v); \
+	((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
+#endif
+
+	/* If this machine has no inline assembler, use C macros.  */
+
+#if !defined(add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+	UWtype __x; \
+	__x = (al) + (bl); \
+	(sh) = (ah) + (bh) + (__x < (al)); \
+	(sl) = __x; \
+} while (0)
+#endif
+
+#if !defined(sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+	UWtype __x; \
+	__x = (al) - (bl); \
+	(sh) = (ah) - (bh) - (__x > (al)); \
+	(sl) = __x; \
+} while (0)
+#endif
+
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+do { \
+	UWtype __x0, __x1, __x2, __x3; \
+	UHWtype __ul, __vl, __uh, __vh; \
+	UWtype __u = (u), __v = (v); \
+	\
+	__ul = __ll_lowpart(__u); \
+	__uh = __ll_highpart(__u); \
+	__vl = __ll_lowpart(__v); \
+	__vh = __ll_highpart(__v); \
+	\
+	__x0 = (UWtype) __ul * __vl; \
+	__x1 = (UWtype) __ul * __vh; \
+	__x2 = (UWtype) __uh * __vl; \
+	__x3 = (UWtype) __uh * __vh; \
+	\
+	__x1 += __ll_highpart(__x0);/* this can't give carry */ \
+	__x1 += __x2;		/* but this indeed can */ \
+	if (__x1 < __x2)		/* did we get it? */ \
+	__x3 += __ll_B;		/* yes, add it in the proper pos. */ \
+	\
+	(w1) = __x3 + __ll_highpart(__x1); \
+	(w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \
+} while (0)
+#endif
+
+#if !defined(umul_ppmm)
+#define smul_ppmm(w1, w0, u, v) \
+do { \
+	UWtype __w1; \
+	UWtype __m0 = (u), __m1 = (v); \
+	umul_ppmm(__w1, w0, __m0, __m1); \
+	(w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
+	- (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
+} while (0)
+#endif
+
+	/* Define this unconditionally, so it can be used for debugging.  */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+do { \
+	UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
+	__d1 = __ll_highpart(d); \
+	__d0 = __ll_lowpart(d); \
+	\
+	__r1 = (n1) % __d1; \
+	__q1 = (n1) / __d1; \
+	__m = (UWtype) __q1 * __d0; \
+	__r1 = __r1 * __ll_B | __ll_highpart(n0); \
+	if (__r1 < __m) { \
+		__q1--, __r1 += (d); \
+		if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \
+		if (__r1 < __m) \
+			__q1--, __r1 += (d); \
+	} \
+	__r1 -= __m; \
+	\
+	__r0 = __r1 % __d1; \
+	__q0 = __r1 / __d1; \
+	__m = (UWtype) __q0 * __d0; \
+	__r0 = __r0 * __ll_B | __ll_lowpart(n0); \
+	if (__r0 < __m) { \
+		__q0--, __r0 += (d); \
+		if (__r0 >= (d)) \
+			if (__r0 < __m) \
+				__q0--, __r0 += (d); \
+	} \
+	__r0 -= __m; \
+	\
+	(q) = (UWtype) __q1 * __ll_B | __q0; \
+	(r) = __r0; \
+} while (0)
+
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+	__udiv_w_sdiv (defined in libgcc or elsewhere).  */
+#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+do { \
+	UWtype __r; \
+	(q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
+	(r) = __r; \
+} while (0)
+#endif
+
+	/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
+#if !defined(udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+#undef count_leading_zeros
+#if !defined(count_leading_zeros)
+	extern
+#ifdef __STDC__
+			const
+#endif
+			unsigned char __clz_tab[];
+#define count_leading_zeros(count, x) \
+do { \
+	UWtype __xr = (x); \
+	UWtype __a; \
+	\
+	if (W_TYPE_SIZE <= 32) { \
+		__a = __xr < ((UWtype) 1 << 2*__BITS4) \
+		? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \
+		: (__xr < ((UWtype) 1 << 3*__BITS4) ?  2*__BITS4 : 3*__BITS4); \
+	} \
+	else { \
+		for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
+			if (((__xr >> __a) & 0xff) != 0) \
+				break; \
+	} \
+	\
+	(count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
+} while (0)
+	/* This version gives a well-defined value for zero. */
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#endif
+
+#if !defined(count_trailing_zeros)
+/* Define count_trailing_zeros using count_leading_zeros.  The latter might be
+	defined in asm, but if it is not, the C version above is good enough.  */
+#define count_trailing_zeros(count, x) \
+do { \
+	UWtype __ctz_x = (x); \
+	UWtype __ctz_c; \
+	count_leading_zeros(__ctz_c, __ctz_x & -__ctz_x); \
+	(count) = W_TYPE_SIZE - 1 - __ctz_c; \
+} while (0)
+#endif
+
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
diff --git a/lib/mpi/mpi-add.c b/lib/mpi/mpi-add.c
new file mode 100644
index 000000000000..f56b9ba295e6
--- /dev/null
+++ b/lib/mpi/mpi-add.c
@@ -0,0 +1,234 @@
+/* mpi-add.c  -  MPI functions
+ *	Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Add the unsigned integer V to the mpi-integer U and store the
+ * result in W. U and V may be the same.
+ */
+int mpi_add_ui(MPI w, const MPI u, unsigned long v)
+{
+	mpi_ptr_t wp, up;
+	mpi_size_t usize, wsize;
+	int usign, wsign;
+
+	usize = u->nlimbs;
+	usign = u->sign;
+	wsign = 0;
+
+	/* If not space for W (and possible carry), increase space.  */
+	wsize = usize + 1;
+	if (w->alloced < wsize)
+		if (mpi_resize(w, wsize) < 0)
+			return -ENOMEM;
+
+	/* These must be after realloc (U may be the same as W).  */
+	up = u->d;
+	wp = w->d;
+
+	if (!usize) {		/* simple */
+		wp[0] = v;
+		wsize = v ? 1 : 0;
+	} else if (!usign) {	/* mpi is not negative */
+		mpi_limb_t cy;
+		cy = mpihelp_add_1(wp, up, usize, v);
+		wp[usize] = cy;
+		wsize = usize + cy;
+	} else {		/* The signs are different.  Need exact comparison to determine
+				 * which operand to subtract from which.  */
+		if (usize == 1 && up[0] < v) {
+			wp[0] = v - up[0];
+			wsize = 1;
+		} else {
+			mpihelp_sub_1(wp, up, usize, v);
+			/* Size can decrease with at most one limb. */
+			wsize = usize - (wp[usize - 1] == 0);
+			wsign = 1;
+		}
+	}
+
+	w->nlimbs = wsize;
+	w->sign = wsign;
+	return 0;
+}
+
+int mpi_add(MPI w, MPI u, MPI v)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t usize, vsize, wsize;
+	int usign, vsign, wsign;
+
+	if (u->nlimbs < v->nlimbs) {	/* Swap U and V. */
+		usize = v->nlimbs;
+		usign = v->sign;
+		vsize = u->nlimbs;
+		vsign = u->sign;
+		wsize = usize + 1;
+		if (RESIZE_IF_NEEDED(w, wsize) < 0)
+			return -ENOMEM;
+		/* These must be after realloc (u or v may be the same as w).  */
+		up = v->d;
+		vp = u->d;
+	} else {
+		usize = u->nlimbs;
+		usign = u->sign;
+		vsize = v->nlimbs;
+		vsign = v->sign;
+		wsize = usize + 1;
+		if (RESIZE_IF_NEEDED(w, wsize) < 0)
+			return -ENOMEM;
+		/* These must be after realloc (u or v may be the same as w).  */
+		up = u->d;
+		vp = v->d;
+	}
+	wp = w->d;
+	wsign = 0;
+
+	if (!vsize) {		/* simple */
+		MPN_COPY(wp, up, usize);
+		wsize = usize;
+		wsign = usign;
+	} else if (usign != vsign) {	/* different sign */
+		/* This test is right since USIZE >= VSIZE */
+		if (usize != vsize) {
+			mpihelp_sub(wp, up, usize, vp, vsize);
+			wsize = usize;
+			MPN_NORMALIZE(wp, wsize);
+			wsign = usign;
+		} else if (mpihelp_cmp(up, vp, usize) < 0) {
+			mpihelp_sub_n(wp, vp, up, usize);
+			wsize = usize;
+			MPN_NORMALIZE(wp, wsize);
+			if (!usign)
+				wsign = 1;
+		} else {
+			mpihelp_sub_n(wp, up, vp, usize);
+			wsize = usize;
+			MPN_NORMALIZE(wp, wsize);
+			if (usign)
+				wsign = 1;
+		}
+	} else {		/* U and V have same sign. Add them. */
+		mpi_limb_t cy = mpihelp_add(wp, up, usize, vp, vsize);
+		wp[usize] = cy;
+		wsize = usize + cy;
+		if (usign)
+			wsign = 1;
+	}
+
+	w->nlimbs = wsize;
+	w->sign = wsign;
+	return 0;
+}
+
+/****************
+ * Subtract the unsigned integer V from the mpi-integer U and store the
+ * result in W.
+ */
+int mpi_sub_ui(MPI w, MPI u, unsigned long v)
+{
+	mpi_ptr_t wp, up;
+	mpi_size_t usize, wsize;
+	int usign, wsign;
+
+	usize = u->nlimbs;
+	usign = u->sign;
+	wsign = 0;
+
+	/* If not space for W (and possible carry), increase space.  */
+	wsize = usize + 1;
+	if (w->alloced < wsize)
+		if (mpi_resize(w, wsize) < 0)
+			return -ENOMEM;
+
+	/* These must be after realloc (U may be the same as W).  */
+	up = u->d;
+	wp = w->d;
+
+	if (!usize) {		/* simple */
+		wp[0] = v;
+		wsize = v ? 1 : 0;
+		wsign = 1;
+	} else if (usign) {	/* mpi and v are negative */
+		mpi_limb_t cy;
+		cy = mpihelp_add_1(wp, up, usize, v);
+		wp[usize] = cy;
+		wsize = usize + cy;
+	} else {		/* The signs are different.  Need exact comparison to determine
+				 * which operand to subtract from which.  */
+		if (usize == 1 && up[0] < v) {
+			wp[0] = v - up[0];
+			wsize = 1;
+			wsign = 1;
+		} else {
+			mpihelp_sub_1(wp, up, usize, v);
+			/* Size can decrease with at most one limb. */
+			wsize = usize - (wp[usize - 1] == 0);
+		}
+	}
+
+	w->nlimbs = wsize;
+	w->sign = wsign;
+	return 0;
+}
+
+int mpi_sub(MPI w, MPI u, MPI v)
+{
+	int rc;
+
+	if (w == v) {
+		MPI vv;
+		if (mpi_copy(&vv, v) < 0)
+			return -ENOMEM;
+		vv->sign = !vv->sign;
+		rc = mpi_add(w, u, vv);
+		mpi_free(vv);
+	} else {
+		/* fixme: this is not thread-save (we temp. modify v) */
+		v->sign = !v->sign;
+		rc = mpi_add(w, u, v);
+		v->sign = !v->sign;
+	}
+	return rc;
+}
+
+int mpi_addm(MPI w, MPI u, MPI v, MPI m)
+{
+	if (mpi_add(w, u, v) < 0 || mpi_fdiv_r(w, w, m) < 0)
+		return -ENOMEM;
+	return 0;
+}
+
+int mpi_subm(MPI w, MPI u, MPI v, MPI m)
+{
+	if (mpi_sub(w, u, v) < 0 || mpi_fdiv_r(w, w, m) < 0)
+		return -ENOMEM;
+	return 0;
+}
diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c
new file mode 100644
index 000000000000..2f526627e4f5
--- /dev/null
+++ b/lib/mpi/mpi-bit.c
@@ -0,0 +1,217 @@
+/* mpi-bit.c  -  MPI bit level fucntions
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+#define A_LIMB_1 ((mpi_limb_t) 1)
+
+/****************
+ * Sometimes we have MSL (most significant limbs) which are 0;
+ * this is for some reasons not good, so this function removes them.
+ */
+void mpi_normalize(MPI a)
+{
+	for (; a->nlimbs && !a->d[a->nlimbs - 1]; a->nlimbs--)
+		;
+}
+
+/****************
+ * Return the number of bits in A.
+ */
+unsigned mpi_get_nbits(MPI a)
+{
+	unsigned n;
+
+	mpi_normalize(a);
+
+	if (a->nlimbs) {
+		mpi_limb_t alimb = a->d[a->nlimbs - 1];
+		if (alimb)
+			count_leading_zeros(n, alimb);
+		else
+			n = BITS_PER_MPI_LIMB;
+		n = BITS_PER_MPI_LIMB - n + (a->nlimbs - 1) * BITS_PER_MPI_LIMB;
+	} else
+		n = 0;
+	return n;
+}
+EXPORT_SYMBOL_GPL(mpi_get_nbits);
+
+/****************
+ * Test whether bit N is set.
+ */
+int mpi_test_bit(MPI a, unsigned n)
+{
+	unsigned limbno, bitno;
+	mpi_limb_t limb;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs)
+		return 0;	/* too far left: this is a 0 */
+	limb = a->d[limbno];
+	return (limb & (A_LIMB_1 << bitno)) ? 1 : 0;
+}
+
+/****************
+ * Set bit N of A.
+ */
+int mpi_set_bit(MPI a, unsigned n)
+{
+	unsigned limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs) {	/* resize */
+		if (a->alloced >= limbno)
+			if (mpi_resize(a, limbno + 1) < 0)
+				return -ENOMEM;
+		a->nlimbs = limbno + 1;
+	}
+	a->d[limbno] |= (A_LIMB_1 << bitno);
+	return 0;
+}
+
+/****************
+ * Set bit N of A. and clear all bits above
+ */
+int mpi_set_highbit(MPI a, unsigned n)
+{
+	unsigned limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs) {	/* resize */
+		if (a->alloced >= limbno)
+			if (mpi_resize(a, limbno + 1) < 0)
+				return -ENOMEM;
+		a->nlimbs = limbno + 1;
+	}
+	a->d[limbno] |= (A_LIMB_1 << bitno);
+	for (bitno++; bitno < BITS_PER_MPI_LIMB; bitno++)
+		a->d[limbno] &= ~(A_LIMB_1 << bitno);
+	a->nlimbs = limbno + 1;
+	return 0;
+}
+
+/****************
+ * clear bit N of A and all bits above
+ */
+void mpi_clear_highbit(MPI a, unsigned n)
+{
+	unsigned limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs)
+		return;		/* not allocated, so need to clear bits :-) */
+
+	for (; bitno < BITS_PER_MPI_LIMB; bitno++)
+		a->d[limbno] &= ~(A_LIMB_1 << bitno);
+	a->nlimbs = limbno + 1;
+}
+
+/****************
+ * Clear bit N of A.
+ */
+void mpi_clear_bit(MPI a, unsigned n)
+{
+	unsigned limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs)
+		return;		/* don't need to clear this bit, it's to far to left */
+	a->d[limbno] &= ~(A_LIMB_1 << bitno);
+}
+
+/****************
+ * Shift A by N bits to the right
+ * FIXME: should use alloc_limb if X and A are same.
+ */
+int mpi_rshift(MPI x, MPI a, unsigned n)
+{
+	mpi_ptr_t xp;
+	mpi_size_t xsize;
+
+	xsize = a->nlimbs;
+	x->sign = a->sign;
+	if (RESIZE_IF_NEEDED(x, (size_t) xsize) < 0)
+		return -ENOMEM;
+	xp = x->d;
+
+	if (xsize) {
+		mpihelp_rshift(xp, a->d, xsize, n);
+		MPN_NORMALIZE(xp, xsize);
+	}
+	x->nlimbs = xsize;
+	return 0;
+}
+
+/****************
+ * Shift A by COUNT limbs to the left
+ * This is used only within the MPI library
+ */
+int mpi_lshift_limbs(MPI a, unsigned int count)
+{
+	mpi_ptr_t ap = a->d;
+	int n = a->nlimbs;
+	int i;
+
+	if (!count || !n)
+		return 0;
+
+	if (RESIZE_IF_NEEDED(a, n + count) < 0)
+		return -ENOMEM;
+
+	for (i = n - 1; i >= 0; i--)
+		ap[i + count] = ap[i];
+	for (i = 0; i < count; i++)
+		ap[i] = 0;
+	a->nlimbs += count;
+	return 0;
+}
+
+/****************
+ * Shift A by COUNT limbs to the right
+ * This is used only within the MPI library
+ */
+void mpi_rshift_limbs(MPI a, unsigned int count)
+{
+	mpi_ptr_t ap = a->d;
+	mpi_size_t n = a->nlimbs;
+	unsigned int i;
+
+	if (count >= n) {
+		a->nlimbs = 0;
+		return;
+	}
+
+	for (i = 0; i < n - count; i++)
+		ap[i] = ap[i + count];
+	ap[i] = 0;
+	a->nlimbs -= count;
+}
diff --git a/lib/mpi/mpi-cmp.c b/lib/mpi/mpi-cmp.c
new file mode 100644
index 000000000000..914bc42a8a80
--- /dev/null
+++ b/lib/mpi/mpi-cmp.c
@@ -0,0 +1,68 @@
+/* mpi-cmp.c  -  MPI functions
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+
+int mpi_cmp_ui(MPI u, unsigned long v)
+{
+	mpi_limb_t limb = v;
+
+	mpi_normalize(u);
+	if (!u->nlimbs && !limb)
+		return 0;
+	if (u->sign)
+		return -1;
+	if (u->nlimbs > 1)
+		return 1;
+
+	if (u->d[0] == limb)
+		return 0;
+	else if (u->d[0] > limb)
+		return 1;
+	else
+		return -1;
+}
+
+int mpi_cmp(MPI u, MPI v)
+{
+	mpi_size_t usize, vsize;
+	int cmp;
+
+	mpi_normalize(u);
+	mpi_normalize(v);
+	usize = u->nlimbs;
+	vsize = v->nlimbs;
+	if (!u->sign && v->sign)
+		return 1;
+	if (u->sign && !v->sign)
+		return -1;
+	if (usize != vsize && !u->sign && !v->sign)
+		return usize - vsize;
+	if (usize != vsize && u->sign && v->sign)
+		return vsize + usize;
+	if (!usize)
+		return 0;
+	cmp = mpihelp_cmp(u->d, v->d, usize);
+	if (!cmp)
+		return 0;
+	if ((cmp < 0 ? 1 : 0) == (u->sign ? 1 : 0))
+		return 1;
+	return -1;
+}
diff --git a/lib/mpi/mpi-div.c b/lib/mpi/mpi-div.c
new file mode 100644
index 000000000000..f68cbbb4d4a4
--- /dev/null
+++ b/lib/mpi/mpi-div.c
@@ -0,0 +1,338 @@
+/* mpi-div.c  -  MPI functions
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <linux/string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+int mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor)
+{
+	int rc = -ENOMEM;
+	int divisor_sign = divisor->sign;
+	MPI temp_divisor = NULL;
+
+	/* We need the original value of the divisor after the remainder has been
+	 * preliminary calculated.      We have to copy it to temporary space if it's
+	 * the same variable as REM.  */
+	if (rem == divisor) {
+		if (mpi_copy(&temp_divisor, divisor) < 0)
+			goto nomem;
+		divisor = temp_divisor;
+	}
+
+	if (mpi_tdiv_qr(NULL, rem, dividend, divisor) < 0)
+		goto nomem;
+	if (((divisor_sign ? 1 : 0) ^ (dividend->sign ? 1 : 0)) && rem->nlimbs)
+		if (mpi_add(rem, rem, divisor) < 0)
+			goto nomem;
+
+	rc = 0;
+
+nomem:
+	if (temp_divisor)
+		mpi_free(temp_divisor);
+	return rc;
+}
+
+/****************
+ * Division rounding the quotient towards -infinity.
+ * The remainder gets the same sign as the denominator.
+ * rem is optional
+ */
+
+ulong mpi_fdiv_r_ui(MPI rem, MPI dividend, ulong divisor)
+{
+	mpi_limb_t rlimb;
+
+	rlimb = mpihelp_mod_1(dividend->d, dividend->nlimbs, divisor);
+	if (rlimb && dividend->sign)
+		rlimb = divisor - rlimb;
+
+	if (rem) {
+		rem->d[0] = rlimb;
+		rem->nlimbs = rlimb ? 1 : 0;
+	}
+	return rlimb;
+}
+
+int mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor)
+{
+	MPI tmp = mpi_alloc(mpi_get_nlimbs(quot));
+	if (!tmp)
+		return -ENOMEM;
+	mpi_fdiv_qr(quot, tmp, dividend, divisor);
+	mpi_free(tmp);
+	return 0;
+}
+
+int mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor)
+{
+	int divisor_sign = divisor->sign;
+	MPI temp_divisor = NULL;
+
+	if (quot == divisor || rem == divisor) {
+		if (mpi_copy(&temp_divisor, divisor) < 0)
+			return -ENOMEM;
+		divisor = temp_divisor;
+	}
+
+	if (mpi_tdiv_qr(quot, rem, dividend, divisor) < 0)
+		goto nomem;
+
+	if ((divisor_sign ^ dividend->sign) && rem->nlimbs) {
+		if (mpi_sub_ui(quot, quot, 1) < 0)
+			goto nomem;
+		if (mpi_add(rem, rem, divisor) < 0)
+			goto nomem;
+	}
+
+	if (temp_divisor)
+		mpi_free(temp_divisor);
+
+	return 0;
+
+nomem:
+	mpi_free(temp_divisor);
+	return -ENOMEM;
+}
+
+/* If den == quot, den needs temporary storage.
+ * If den == rem, den needs temporary storage.
+ * If num == quot, num needs temporary storage.
+ * If den has temporary storage, it can be normalized while being copied,
+ *   i.e no extra storage should be allocated.
+ */
+
+int mpi_tdiv_r(MPI rem, MPI num, MPI den)
+{
+	return mpi_tdiv_qr(NULL, rem, num, den);
+}
+
+int mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
+{
+	int rc = -ENOMEM;
+	mpi_ptr_t np, dp;
+	mpi_ptr_t qp, rp;
+	mpi_size_t nsize = num->nlimbs;
+	mpi_size_t dsize = den->nlimbs;
+	mpi_size_t qsize, rsize;
+	mpi_size_t sign_remainder = num->sign;
+	mpi_size_t sign_quotient = num->sign ^ den->sign;
+	unsigned normalization_steps;
+	mpi_limb_t q_limb;
+	mpi_ptr_t marker[5];
+	int markidx = 0;
+
+	if (!dsize)
+		return -EINVAL;
+
+	memset(marker, 0, sizeof(marker));
+
+	/* Ensure space is enough for quotient and remainder.
+	 * We need space for an extra limb in the remainder, because it's
+	 * up-shifted (normalized) below.  */
+	rsize = nsize + 1;
+	if (mpi_resize(rem, rsize) < 0)
+		goto nomem;
+
+	qsize = rsize - dsize;	/* qsize cannot be bigger than this.  */
+	if (qsize <= 0) {
+		if (num != rem) {
+			rem->nlimbs = num->nlimbs;
+			rem->sign = num->sign;
+			MPN_COPY(rem->d, num->d, nsize);
+		}
+		if (quot) {
+			/* This needs to follow the assignment to rem, in case the
+			 * numerator and quotient are the same.  */
+			quot->nlimbs = 0;
+			quot->sign = 0;
+		}
+		return 0;
+	}
+
+	if (quot)
+		if (mpi_resize(quot, qsize) < 0)
+			goto nomem;
+
+	/* Read pointers here, when reallocation is finished.  */
+	np = num->d;
+	dp = den->d;
+	rp = rem->d;
+
+	/* Optimize division by a single-limb divisor.  */
+	if (dsize == 1) {
+		mpi_limb_t rlimb;
+		if (quot) {
+			qp = quot->d;
+			rlimb = mpihelp_divmod_1(qp, np, nsize, dp[0]);
+			qsize -= qp[qsize - 1] == 0;
+			quot->nlimbs = qsize;
+			quot->sign = sign_quotient;
+		} else
+			rlimb = mpihelp_mod_1(np, nsize, dp[0]);
+		rp[0] = rlimb;
+		rsize = rlimb != 0 ? 1 : 0;
+		rem->nlimbs = rsize;
+		rem->sign = sign_remainder;
+		return 0;
+	}
+
+	if (quot) {
+		qp = quot->d;
+		/* Make sure QP and NP point to different objects.  Otherwise the
+		 * numerator would be gradually overwritten by the quotient limbs.  */
+		if (qp == np) {	/* Copy NP object to temporary space.  */
+			np = marker[markidx++] = mpi_alloc_limb_space(nsize);
+			if (!np)
+				goto nomem;
+			MPN_COPY(np, qp, nsize);
+		}
+	} else			/* Put quotient at top of remainder. */
+		qp = rp + dsize;
+
+	count_leading_zeros(normalization_steps, dp[dsize - 1]);
+
+	/* Normalize the denominator, i.e. make its most significant bit set by
+	 * shifting it NORMALIZATION_STEPS bits to the left.  Also shift the
+	 * numerator the same number of steps (to keep the quotient the same!).
+	 */
+	if (normalization_steps) {
+		mpi_ptr_t tp;
+		mpi_limb_t nlimb;
+
+		/* Shift up the denominator setting the most significant bit of
+		 * the most significant word.  Use temporary storage not to clobber
+		 * the original contents of the denominator.  */
+		tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+		if (!tp)
+			goto nomem;
+		mpihelp_lshift(tp, dp, dsize, normalization_steps);
+		dp = tp;
+
+		/* Shift up the numerator, possibly introducing a new most
+		 * significant word.  Move the shifted numerator in the remainder
+		 * meanwhile.  */
+		nlimb = mpihelp_lshift(rp, np, nsize, normalization_steps);
+		if (nlimb) {
+			rp[nsize] = nlimb;
+			rsize = nsize + 1;
+		} else
+			rsize = nsize;
+	} else {
+		/* The denominator is already normalized, as required.  Copy it to
+		 * temporary space if it overlaps with the quotient or remainder.  */
+		if (dp == rp || (quot && (dp == qp))) {
+			mpi_ptr_t tp;
+
+			tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+			if (!tp)
+				goto nomem;
+			MPN_COPY(tp, dp, dsize);
+			dp = tp;
+		}
+
+		/* Move the numerator to the remainder.  */
+		if (rp != np)
+			MPN_COPY(rp, np, nsize);
+
+		rsize = nsize;
+	}
+
+	q_limb = mpihelp_divrem(qp, 0, rp, rsize, dp, dsize);
+
+	if (quot) {
+		qsize = rsize - dsize;
+		if (q_limb) {
+			qp[qsize] = q_limb;
+			qsize += 1;
+		}
+
+		quot->nlimbs = qsize;
+		quot->sign = sign_quotient;
+	}
+
+	rsize = dsize;
+	MPN_NORMALIZE(rp, rsize);
+
+	if (normalization_steps && rsize) {
+		mpihelp_rshift(rp, rp, rsize, normalization_steps);
+		rsize -= rp[rsize - 1] == 0 ? 1 : 0;
+	}
+
+	rem->nlimbs = rsize;
+	rem->sign = sign_remainder;
+
+	rc = 0;
+nomem:
+	while (markidx)
+		mpi_free_limb_space(marker[--markidx]);
+	return rc;
+}
+
+int mpi_tdiv_q_2exp(MPI w, MPI u, unsigned count)
+{
+	mpi_size_t usize, wsize;
+	mpi_size_t limb_cnt;
+
+	usize = u->nlimbs;
+	limb_cnt = count / BITS_PER_MPI_LIMB;
+	wsize = usize - limb_cnt;
+	if (limb_cnt >= usize)
+		w->nlimbs = 0;
+	else {
+		mpi_ptr_t wp;
+		mpi_ptr_t up;
+
+		if (RESIZE_IF_NEEDED(w, wsize) < 0)
+			return -ENOMEM;
+		wp = w->d;
+		up = u->d;
+
+		count %= BITS_PER_MPI_LIMB;
+		if (count) {
+			mpihelp_rshift(wp, up + limb_cnt, wsize, count);
+			wsize -= !wp[wsize - 1];
+		} else {
+			MPN_COPY_INCR(wp, up + limb_cnt, wsize);
+		}
+
+		w->nlimbs = wsize;
+	}
+	return 0;
+}
+
+/****************
+ * Check whether dividend is divisible by divisor
+ * (note: divisor must fit into a limb)
+ */
+int mpi_divisible_ui(MPI dividend, ulong divisor)
+{
+	return !mpihelp_mod_1(dividend->d, dividend->nlimbs, divisor);
+}
diff --git a/lib/mpi/mpi-gcd.c b/lib/mpi/mpi-gcd.c
new file mode 100644
index 000000000000..13c48aef9c4e
--- /dev/null
+++ b/lib/mpi/mpi-gcd.c
@@ -0,0 +1,59 @@
+/* mpi-gcd.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Find the greatest common divisor G of A and B.
+ * Return: true if this 1, false in all other cases
+ */
+int mpi_gcd(MPI g, const MPI xa, const MPI xb)
+{
+	MPI a = NULL, b = NULL;
+
+	if (mpi_copy(&a, xa) < 0)
+		goto nomem;
+
+	if (mpi_copy(&b, xb) < 0)
+		goto nomem;
+
+	/* TAOCP Vol II, 4.5.2, Algorithm A */
+	a->sign = 0;
+	b->sign = 0;
+	while (mpi_cmp_ui(b, 0)) {
+		if (mpi_fdiv_r(g, a, b) < 0)	/* g used as temorary variable */
+			goto nomem;
+		if (mpi_set(a, b) < 0)
+			goto nomem;
+		if (mpi_set(b, g) < 0)
+			goto nomem;
+	}
+	if (mpi_set(g, a) < 0)
+		goto nomem;
+
+	mpi_free(a);
+	mpi_free(b);
+	return !mpi_cmp_ui(g, 1);
+
+nomem:
+	mpi_free(a);
+	mpi_free(b);
+	return -ENOMEM;
+}
diff --git a/lib/mpi/mpi-inline.c b/lib/mpi/mpi-inline.c
new file mode 100644
index 000000000000..654f68aeed8b
--- /dev/null
+++ b/lib/mpi/mpi-inline.c
@@ -0,0 +1,31 @@
+/* mpi-inline.c
+ * Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/* put the inline functions as real functions into the lib */
+#define G10_MPI_INLINE_DECL
+
+#include "mpi-internal.h"
+
+/* always include the header becuase it is only
+ * included by mpi-internal if __GCC__ is defined but we
+ * need it here in all cases and the above definition of
+ * of the macro allows us to do so
+ */
+#include "mpi-inline.h"
diff --git a/lib/mpi/mpi-inline.h b/lib/mpi/mpi-inline.h
new file mode 100644
index 000000000000..e2b39852b30a
--- /dev/null
+++ b/lib/mpi/mpi-inline.h
@@ -0,0 +1,122 @@
+/* mpi-inline.h  -  Internal to the Multi Precision Integers
+ *	Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#ifndef G10_MPI_INLINE_H
+#define G10_MPI_INLINE_H
+
+#ifndef G10_MPI_INLINE_DECL
+#define G10_MPI_INLINE_DECL  extern inline
+#endif
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	      mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+	mpi_limb_t x;
+
+	x = *s1_ptr++;
+	s2_limb += x;
+	*res_ptr++ = s2_limb;
+	if (s2_limb < x) {	/* sum is less than the left operand: handle carry */
+		while (--s1_size) {
+			x = *s1_ptr++ + 1;	/* add carry */
+			*res_ptr++ = x;	/* and store */
+			if (x)	/* not 0 (no overflow): we can stop */
+				goto leave;
+		}
+		return 1;	/* return carry (size of s1 to small) */
+	}
+
+leave:
+	if (res_ptr != s1_ptr) {	/* not the same variable */
+		mpi_size_t i;	/* copy the rest */
+		for (i = 0; i < s1_size - 1; i++)
+			res_ptr[i] = s1_ptr[i];
+	}
+	return 0;		/* no carry */
+}
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+	    mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+	mpi_limb_t cy = 0;
+
+	if (s2_size)
+		cy = mpihelp_add_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+
+	if (s1_size - s2_size)
+		cy = mpihelp_add_1(res_ptr + s2_size, s1_ptr + s2_size,
+				   s1_size - s2_size, cy);
+	return cy;
+}
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	      mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+	mpi_limb_t x;
+
+	x = *s1_ptr++;
+	s2_limb = x - s2_limb;
+	*res_ptr++ = s2_limb;
+	if (s2_limb > x) {
+		while (--s1_size) {
+			x = *s1_ptr++;
+			*res_ptr++ = x - 1;
+			if (x)
+				goto leave;
+		}
+		return 1;
+	}
+
+leave:
+	if (res_ptr != s1_ptr) {
+		mpi_size_t i;
+		for (i = 0; i < s1_size - 1; i++)
+			res_ptr[i] = s1_ptr[i];
+	}
+	return 0;
+}
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+	    mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+	mpi_limb_t cy = 0;
+
+	if (s2_size)
+		cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+
+	if (s1_size - s2_size)
+		cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size,
+				   s1_size - s2_size, cy);
+	return cy;
+}
+
+#endif /*G10_MPI_INLINE_H */
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
new file mode 100644
index 000000000000..77adcf6bc257
--- /dev/null
+++ b/lib/mpi/mpi-internal.h
@@ -0,0 +1,261 @@
+/* mpi-internal.h  -  Internal to the Multi Precision Integers
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#ifndef G10_MPI_INTERNAL_H
+#define G10_MPI_INTERNAL_H
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/mpi.h>
+#include <linux/errno.h>
+
+#define log_debug printk
+#define log_bug printk
+
+#define assert(x) \
+	do { \
+		if (!x) \
+			log_bug("failed assertion\n"); \
+	} while (0);
+
+/* If KARATSUBA_THRESHOLD is not already defined, define it to a
+ * value which is good on most machines.  */
+
+/* tested 4, 16, 32 and 64, where 16 gave the best performance when
+ * checking a 768 and a 1024 bit ElGamal signature.
+ * (wk 22.12.97) */
+#ifndef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 16
+#endif
+
+/* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
+#if KARATSUBA_THRESHOLD < 2
+#undef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 2
+#endif
+
+typedef mpi_limb_t *mpi_ptr_t;	/* pointer to a limb */
+typedef int mpi_size_t;		/* (must be a signed type) */
+
+#define ABS(x) (x >= 0 ? x : -x)
+#define MIN(l, o) ((l) < (o) ? (l) : (o))
+#define MAX(h, i) ((h) > (i) ? (h) : (i))
+
+static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
+{
+	if (a->alloced < b)
+		return mpi_resize(a, b);
+	return 0;
+}
+
+/* Copy N limbs from S to D.  */
+#define MPN_COPY(d, s, n) \
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = (s)[_i];	\
+	} while (0)
+
+#define MPN_COPY_INCR(d, s, n) \
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = (d)[_i];	\
+	} while (0)
+
+#define MPN_COPY_DECR(d, s, n) \
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = (n)-1; _i >= 0; _i--) \
+			(d)[_i] = (s)[_i];	\
+	} while (0)
+
+/* Zero N limbs at D */
+#define MPN_ZERO(d, n) \
+	do {					\
+		int  _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = 0;		\
+	} while (0)
+
+#define MPN_NORMALIZE(d, n)  \
+	do {					\
+		while ((n) > 0) {		\
+			if ((d)[(n)-1])		\
+				break;		\
+			(n)--;			\
+		}				\
+	} while (0)
+
+#define MPN_NORMALIZE_NOT_ZERO(d, n) \
+	do {				\
+		for (;;) {		\
+			if ((d)[(n)-1])	\
+				break;	\
+			(n)--;		\
+		}			\
+	} while (0)
+
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
+	do {							\
+		if ((size) < KARATSUBA_THRESHOLD)		\
+			mul_n_basecase(prodp, up, vp, size);	\
+		else						\
+			mul_n(prodp, up, vp, size, tspace);	\
+	} while (0);
+
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+ * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+ * If this would yield overflow, DI should be the largest possible number
+ * (i.e., only ones).  For correct operation, the most significant bit of D
+ * has to be set.  Put the quotient in Q and the remainder in R.
+ */
+#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
+	do {								\
+		mpi_limb_t _q, _ql, _r;					\
+		mpi_limb_t _xh, _xl;					\
+		umul_ppmm(_q, _ql, (nh), (di));				\
+		_q += (nh);	/* DI is 2**BITS_PER_MPI_LIMB too small */ \
+		umul_ppmm(_xh, _xl, _q, (d));				\
+		sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl);		\
+		if (_xh) {						\
+			sub_ddmmss(_xh, _r, _xh, _r, 0, (d));		\
+			_q++;						\
+			if (_xh) {					\
+				sub_ddmmss(_xh, _r, _xh, _r, 0, (d));	\
+				_q++;					\
+			}						\
+		}							\
+		if (_r >= (d)) {					\
+			_r -= (d);					\
+			_q++;						\
+		}							\
+		(r) = _r;						\
+		(q) = _q;						\
+	} while (0)
+
+/*-- mpiutil.c --*/
+mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs);
+void mpi_free_limb_space(mpi_ptr_t a);
+void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs);
+
+/*-- mpi-bit.c --*/
+void mpi_rshift_limbs(MPI a, unsigned int count);
+int mpi_lshift_limbs(MPI a, unsigned int count);
+
+/*-- mpihelp-add.c --*/
+mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+
+/*-- mpihelp-sub.c --*/
+mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+
+/*-- mpihelp-cmp.c --*/
+int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size);
+
+/*-- mpihelp-mul.c --*/
+
+struct karatsuba_ctx {
+	struct karatsuba_ctx *next;
+	mpi_ptr_t tspace;
+	mpi_size_t tspace_size;
+	mpi_ptr_t tp;
+	mpi_size_t tp_size;
+};
+
+void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx);
+
+mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			    mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			    mpi_size_t s1_size, mpi_limb_t s2_limb);
+int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size);
+int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+		mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result);
+void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size);
+void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
+		mpi_ptr_t tspace);
+
+int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
+			       mpi_ptr_t up, mpi_size_t usize,
+			       mpi_ptr_t vp, mpi_size_t vsize,
+			       struct karatsuba_ctx *ctx);
+
+/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/
+mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_size_t s1_size, mpi_limb_t s2_limb);
+
+/*-- mpihelp-div.c --*/
+mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			 mpi_limb_t divisor_limb);
+mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
+			  mpi_ptr_t np, mpi_size_t nsize,
+			  mpi_ptr_t dp, mpi_size_t dsize);
+mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr,
+			    mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			    mpi_limb_t divisor_limb);
+
+/*-- mpihelp-shift.c --*/
+mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+			  unsigned cnt);
+mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+			  unsigned cnt);
+
+/* Define stuff for longlong.h.  */
+#define W_TYPE_SIZE BITS_PER_MPI_LIMB
+typedef mpi_limb_t UWtype;
+typedef unsigned int UHWtype;
+#if defined(__GNUC__)
+typedef unsigned int UQItype __attribute__ ((mode(QI)));
+typedef int SItype __attribute__ ((mode(SI)));
+typedef unsigned int USItype __attribute__ ((mode(SI)));
+typedef int DItype __attribute__ ((mode(DI)));
+typedef unsigned int UDItype __attribute__ ((mode(DI)));
+#else
+typedef unsigned char UQItype;
+typedef long SItype;
+typedef unsigned long USItype;
+#endif
+
+#ifdef __GNUC__
+#include "mpi-inline.h"
+#endif
+
+#endif /*G10_MPI_INTERNAL_H */
diff --git a/lib/mpi/mpi-inv.c b/lib/mpi/mpi-inv.c
new file mode 100644
index 000000000000..0951f9847745
--- /dev/null
+++ b/lib/mpi/mpi-inv.c
@@ -0,0 +1,187 @@
+/* mpi-inv.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Calculate the multiplicative inverse X of A mod N
+ * That is: Find the solution x for
+ *		1 = (a*x) mod n
+ */
+int mpi_invm(MPI x, const MPI a, const MPI n)
+{
+	/* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X)
+	 * modified according to Michael Penk's solution for Exercice 35
+	 * with further enhancement */
+	MPI u = NULL, v = NULL;
+	MPI u1 = NULL, u2 = NULL, u3 = NULL;
+	MPI v1 = NULL, v2 = NULL, v3 = NULL;
+	MPI t1 = NULL, t2 = NULL, t3 = NULL;
+	unsigned k;
+	int sign;
+	int odd = 0;
+	int rc = -ENOMEM;
+
+	if (mpi_copy(&u, a) < 0)
+		goto cleanup;
+	if (mpi_copy(&v, n) < 0)
+		goto cleanup;
+
+	for (k = 0; !mpi_test_bit(u, 0) && !mpi_test_bit(v, 0); k++) {
+		if (mpi_rshift(u, u, 1) < 0)
+			goto cleanup;
+		if (mpi_rshift(v, v, 1) < 0)
+			goto cleanup;
+	}
+	odd = mpi_test_bit(v, 0);
+
+	u1 = mpi_alloc_set_ui(1);
+	if (!u1)
+		goto cleanup;
+	if (!odd) {
+		u2 = mpi_alloc_set_ui(0);
+		if (!u2)
+			goto cleanup;
+	}
+	if (mpi_copy(&u3, u) < 0)
+		goto cleanup;
+	if (mpi_copy(&v1, v) < 0)
+		goto cleanup;
+	if (!odd) {
+		v2 = mpi_alloc(mpi_get_nlimbs(u));
+		if (!v2)
+			goto cleanup;
+		if (mpi_sub(v2, u1, u) < 0)
+			goto cleanup;	/* U is used as const 1 */
+	}
+	if (mpi_copy(&v3, v) < 0)
+		goto cleanup;
+	if (mpi_test_bit(u, 0)) {	/* u is odd */
+		t1 = mpi_alloc_set_ui(0);
+		if (!t1)
+			goto cleanup;
+		if (!odd) {
+			t2 = mpi_alloc_set_ui(1);
+			if (!t2)
+				goto cleanup;
+			t2->sign = 1;
+		}
+		if (mpi_copy(&t3, v) < 0)
+			goto cleanup;
+		t3->sign = !t3->sign;
+		goto Y4;
+	} else {
+		t1 = mpi_alloc_set_ui(1);
+		if (!t1)
+			goto cleanup;
+		if (!odd) {
+			t2 = mpi_alloc_set_ui(0);
+			if (!t2)
+				goto cleanup;
+		}
+		if (mpi_copy(&t3, u) < 0)
+			goto cleanup;
+	}
+	do {
+		do {
+			if (!odd) {
+				if (mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0)) {	/* one is odd */
+					if (mpi_add(t1, t1, v) < 0)
+						goto cleanup;
+					if (mpi_sub(t2, t2, u) < 0)
+						goto cleanup;
+				}
+				if (mpi_rshift(t1, t1, 1) < 0)
+					goto cleanup;
+				if (mpi_rshift(t2, t2, 1) < 0)
+					goto cleanup;
+				if (mpi_rshift(t3, t3, 1) < 0)
+					goto cleanup;
+			} else {
+				if (mpi_test_bit(t1, 0))
+					if (mpi_add(t1, t1, v) < 0)
+						goto cleanup;
+				if (mpi_rshift(t1, t1, 1) < 0)
+					goto cleanup;
+				if (mpi_rshift(t3, t3, 1) < 0)
+					goto cleanup;
+			}
+Y4:
+			;
+		} while (!mpi_test_bit(t3, 0));	/* while t3 is even */
+
+		if (!t3->sign) {
+			if (mpi_set(u1, t1) < 0)
+				goto cleanup;
+			if (!odd)
+				if (mpi_set(u2, t2) < 0)
+					goto cleanup;
+			if (mpi_set(u3, t3) < 0)
+				goto cleanup;
+		} else {
+			if (mpi_sub(v1, v, t1) < 0)
+				goto cleanup;
+			sign = u->sign;
+			u->sign = !u->sign;
+			if (!odd)
+				if (mpi_sub(v2, u, t2) < 0)
+					goto cleanup;
+			u->sign = sign;
+			sign = t3->sign;
+			t3->sign = !t3->sign;
+			if (mpi_set(v3, t3) < 0)
+				goto cleanup;
+			t3->sign = sign;
+		}
+		if (mpi_sub(t1, u1, v1) < 0)
+			goto cleanup;
+		if (!odd)
+			if (mpi_sub(t2, u2, v2) < 0)
+				goto cleanup;
+		if (mpi_sub(t3, u3, v3) < 0)
+			goto cleanup;
+		if (t1->sign) {
+			if (mpi_add(t1, t1, v) < 0)
+				goto cleanup;
+			if (!odd)
+				if (mpi_sub(t2, t2, u) < 0)
+					goto cleanup;
+		}
+	} while (mpi_cmp_ui(t3, 0));	/* while t3 != 0 */
+	/* mpi_lshift( u3, k ); */
+	rc = mpi_set(x, u1);
+
+cleanup:
+	mpi_free(u1);
+	mpi_free(v1);
+	mpi_free(t1);
+	if (!odd) {
+		mpi_free(u2);
+		mpi_free(v2);
+		mpi_free(t2);
+	}
+	mpi_free(u3);
+	mpi_free(v3);
+	mpi_free(t3);
+
+	mpi_free(u);
+	mpi_free(v);
+	return rc;
+}
diff --git a/lib/mpi/mpi-mpow.c b/lib/mpi/mpi-mpow.c
new file mode 100644
index 000000000000..7328d0d6c748
--- /dev/null
+++ b/lib/mpi/mpi-mpow.c
@@ -0,0 +1,134 @@
+/* mpi-mpow.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+static int build_index(const MPI *exparray, int k, int i, int t)
+{
+	int j, bitno;
+	int index = 0;
+
+	bitno = t - i;
+	for (j = k - 1; j >= 0; j--) {
+		index <<= 1;
+		if (mpi_test_bit(exparray[j], bitno))
+			index |= 1;
+	}
+	return index;
+}
+
+/****************
+ * RES = (BASE[0] ^ EXP[0]) *  (BASE[1] ^ EXP[1]) * ... * mod M
+ */
+int mpi_mulpowm(MPI res, MPI *basearray, MPI *exparray, MPI m)
+{
+	int rc = -ENOMEM;
+	int k;			/* number of elements */
+	int t;			/* bit size of largest exponent */
+	int i, j, idx;
+	MPI *G = NULL;		/* table with precomputed values of size 2^k */
+	MPI tmp = NULL;
+
+	for (k = 0; basearray[k]; k++)
+		;
+	if (!k) {
+		pr_emerg("mpi_mulpowm: assert(k) failed\n");
+		BUG();
+	}
+	for (t = 0, i = 0; (tmp = exparray[i]); i++) {
+		j = mpi_get_nbits(tmp);
+		if (j > t)
+			t = j;
+	}
+	if (i != k) {
+		pr_emerg("mpi_mulpowm: assert(i==k) failed\n");
+		BUG();
+	}
+	if (!t) {
+		pr_emerg("mpi_mulpowm: assert(t) failed\n");
+		BUG();
+	}
+	if (k >= 10) {
+		pr_emerg("mpi_mulpowm: assert(k<10) failed\n");
+		BUG();
+	}
+
+	G = kzalloc((1 << k) * sizeof *G, GFP_KERNEL);
+	if (!G)
+		goto err_out;
+
+	/* and calculate */
+	tmp = mpi_alloc(mpi_get_nlimbs(m) + 1);
+	if (!tmp)
+		goto nomem;
+	if (mpi_set_ui(res, 1) < 0)
+		goto nomem;
+	for (i = 1; i <= t; i++) {
+		if (mpi_mulm(tmp, res, res, m) < 0)
+			goto nomem;
+		idx = build_index(exparray, k, i, t);
+		if (!(idx >= 0 && idx < (1 << k))) {
+			pr_emerg("mpi_mulpowm: assert(idx >= 0 && idx < (1<<k)) failed\n");
+			BUG();
+		}
+		if (!G[idx]) {
+			if (!idx) {
+				G[0] = mpi_alloc_set_ui(1);
+				if (!G[0])
+					goto nomem;
+			} else {
+				for (j = 0; j < k; j++) {
+					if ((idx & (1 << j))) {
+						if (!G[idx]) {
+							if (mpi_copy
+							    (&G[idx],
+							     basearray[j]) < 0)
+								goto nomem;
+						} else {
+							if (mpi_mulm
+							    (G[idx], G[idx],
+							     basearray[j],
+							     m) < 0)
+								goto nomem;
+						}
+					}
+				}
+				if (!G[idx]) {
+					G[idx] = mpi_alloc(0);
+					if (!G[idx])
+						goto nomem;
+				}
+			}
+		}
+		if (mpi_mulm(res, tmp, G[idx], m) < 0)
+			goto nomem;
+	}
+
+	rc = 0;
+nomem:
+	/* cleanup */
+	mpi_free(tmp);
+	for (i = 0; i < (1 << k); i++)
+		mpi_free(G[i]);
+	kfree(G);
+err_out:
+	return rc;
+}
diff --git a/lib/mpi/mpi-mul.c b/lib/mpi/mpi-mul.c
new file mode 100644
index 000000000000..1f3219e27292
--- /dev/null
+++ b/lib/mpi/mpi-mul.c
@@ -0,0 +1,194 @@
+/* mpi-mul.c  -  MPI functions
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+
+int mpi_mul_ui(MPI prod, MPI mult, unsigned long small_mult)
+{
+	mpi_size_t size, prod_size;
+	mpi_ptr_t prod_ptr;
+	mpi_limb_t cy;
+	int sign;
+
+	size = mult->nlimbs;
+	sign = mult->sign;
+
+	if (!size || !small_mult) {
+		prod->nlimbs = 0;
+		prod->sign = 0;
+		return 0;
+	}
+
+	prod_size = size + 1;
+	if (prod->alloced < prod_size)
+		if (mpi_resize(prod, prod_size) < 0)
+			return -ENOMEM;
+	prod_ptr = prod->d;
+
+	cy = mpihelp_mul_1(prod_ptr, mult->d, size, (mpi_limb_t) small_mult);
+	if (cy)
+		prod_ptr[size++] = cy;
+	prod->nlimbs = size;
+	prod->sign = sign;
+	return 0;
+}
+
+int mpi_mul_2exp(MPI w, MPI u, unsigned long cnt)
+{
+	mpi_size_t usize, wsize, limb_cnt;
+	mpi_ptr_t wp;
+	mpi_limb_t wlimb;
+	int usign, wsign;
+
+	usize = u->nlimbs;
+	usign = u->sign;
+
+	if (!usize) {
+		w->nlimbs = 0;
+		w->sign = 0;
+		return 0;
+	}
+
+	limb_cnt = cnt / BITS_PER_MPI_LIMB;
+	wsize = usize + limb_cnt + 1;
+	if (w->alloced < wsize)
+		if (mpi_resize(w, wsize) < 0)
+			return -ENOMEM;
+	wp = w->d;
+	wsize = usize + limb_cnt;
+	wsign = usign;
+
+	cnt %= BITS_PER_MPI_LIMB;
+	if (cnt) {
+		wlimb = mpihelp_lshift(wp + limb_cnt, u->d, usize, cnt);
+		if (wlimb) {
+			wp[wsize] = wlimb;
+			wsize++;
+		}
+	} else {
+		MPN_COPY_DECR(wp + limb_cnt, u->d, usize);
+	}
+
+	/* Zero all whole limbs at low end.  Do it here and not before calling
+	 * mpn_lshift, not to lose for U == W.  */
+	MPN_ZERO(wp, limb_cnt);
+
+	w->nlimbs = wsize;
+	w->sign = wsign;
+	return 0;
+}
+
+int mpi_mul(MPI w, MPI u, MPI v)
+{
+	int rc = -ENOMEM;
+	mpi_size_t usize, vsize, wsize;
+	mpi_ptr_t up, vp, wp;
+	mpi_limb_t cy;
+	int usign, vsign, sign_product;
+	int assign_wp = 0;
+	mpi_ptr_t tmp_limb = NULL;
+
+	if (u->nlimbs < v->nlimbs) {	/* Swap U and V. */
+		usize = v->nlimbs;
+		usign = v->sign;
+		up = v->d;
+		vsize = u->nlimbs;
+		vsign = u->sign;
+		vp = u->d;
+	} else {
+		usize = u->nlimbs;
+		usign = u->sign;
+		up = u->d;
+		vsize = v->nlimbs;
+		vsign = v->sign;
+		vp = v->d;
+	}
+	sign_product = usign ^ vsign;
+	wp = w->d;
+
+	/* Ensure W has space enough to store the result.  */
+	wsize = usize + vsize;
+	if (w->alloced < (size_t) wsize) {
+		if (wp == up || wp == vp) {
+			wp = mpi_alloc_limb_space(wsize);
+			if (!wp)
+				goto nomem;
+			assign_wp = 1;
+		} else {
+			if (mpi_resize(w, wsize) < 0)
+				goto nomem;
+			wp = w->d;
+		}
+	} else {		/* Make U and V not overlap with W.      */
+		if (wp == up) {
+			/* W and U are identical.  Allocate temporary space for U.      */
+			up = tmp_limb = mpi_alloc_limb_space(usize);
+			if (!up)
+				goto nomem;
+			/* Is V identical too?  Keep it identical with U.  */
+			if (wp == vp)
+				vp = up;
+			/* Copy to the temporary space.  */
+			MPN_COPY(up, wp, usize);
+		} else if (wp == vp) {
+			/* W and V are identical.  Allocate temporary space for V.      */
+			vp = tmp_limb = mpi_alloc_limb_space(vsize);
+			if (!vp)
+				goto nomem;
+			/* Copy to the temporary space.  */
+			MPN_COPY(vp, wp, vsize);
+		}
+	}
+
+	if (!vsize)
+		wsize = 0;
+	else {
+		if (mpihelp_mul(wp, up, usize, vp, vsize, &cy) < 0)
+			goto nomem;
+		wsize -= cy ? 0 : 1;
+	}
+
+	if (assign_wp)
+		mpi_assign_limb_space(w, wp, wsize);
+
+	w->nlimbs = wsize;
+	w->sign = sign_product;
+	rc = 0;
+nomem:
+	if (tmp_limb)
+		mpi_free_limb_space(tmp_limb);
+	return rc;
+}
+
+int mpi_mulm(MPI w, MPI u, MPI v, MPI m)
+{
+	if (mpi_mul(w, u, v) < 0)
+		return -ENOMEM;
+	return mpi_fdiv_r(w, w, m);
+}
diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c
new file mode 100644
index 000000000000..67f3e79af914
--- /dev/null
+++ b/lib/mpi/mpi-pow.c
@@ -0,0 +1,323 @@
+/* mpi-pow.c  -  MPI functions
+ *	Copyright (C) 1994, 1996, 1998, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <linux/string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+/****************
+ * RES = BASE ^ EXP mod MOD
+ */
+int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
+{
+	mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL;
+	mpi_ptr_t xp_marker = NULL;
+	mpi_ptr_t tspace = NULL;
+	mpi_ptr_t rp, ep, mp, bp;
+	mpi_size_t esize, msize, bsize, rsize;
+	int esign, msign, bsign, rsign;
+	mpi_size_t size;
+	int mod_shift_cnt;
+	int negative_result;
+	int assign_rp = 0;
+	mpi_size_t tsize = 0;	/* to avoid compiler warning */
+	/* fixme: we should check that the warning is void */
+	int rc = -ENOMEM;
+
+	esize = exp->nlimbs;
+	msize = mod->nlimbs;
+	size = 2 * msize;
+	esign = exp->sign;
+	msign = mod->sign;
+
+	rp = res->d;
+	ep = exp->d;
+
+	if (!msize)
+		return -EINVAL;
+
+	if (!esize) {
+		/* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
+		 * depending on if MOD equals 1.  */
+		rp[0] = 1;
+		res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
+		res->sign = 0;
+		goto leave;
+	}
+
+	/* Normalize MOD (i.e. make its most significant bit set) as required by
+	 * mpn_divrem.  This will make the intermediate values in the calculation
+	 * slightly larger, but the correct result is obtained after a final
+	 * reduction using the original MOD value.  */
+	mp = mp_marker = mpi_alloc_limb_space(msize);
+	if (!mp)
+		goto enomem;
+	count_leading_zeros(mod_shift_cnt, mod->d[msize - 1]);
+	if (mod_shift_cnt)
+		mpihelp_lshift(mp, mod->d, msize, mod_shift_cnt);
+	else
+		MPN_COPY(mp, mod->d, msize);
+
+	bsize = base->nlimbs;
+	bsign = base->sign;
+	if (bsize > msize) {	/* The base is larger than the module. Reduce it. */
+		/* Allocate (BSIZE + 1) with space for remainder and quotient.
+		 * (The quotient is (bsize - msize + 1) limbs.)  */
+		bp = bp_marker = mpi_alloc_limb_space(bsize + 1);
+		if (!bp)
+			goto enomem;
+		MPN_COPY(bp, base->d, bsize);
+		/* We don't care about the quotient, store it above the remainder,
+		 * at BP + MSIZE.  */
+		mpihelp_divrem(bp + msize, 0, bp, bsize, mp, msize);
+		bsize = msize;
+		/* Canonicalize the base, since we are going to multiply with it
+		 * quite a few times.  */
+		MPN_NORMALIZE(bp, bsize);
+	} else
+		bp = base->d;
+
+	if (!bsize) {
+		res->nlimbs = 0;
+		res->sign = 0;
+		goto leave;
+	}
+
+	if (res->alloced < size) {
+		/* We have to allocate more space for RES.  If any of the input
+		 * parameters are identical to RES, defer deallocation of the old
+		 * space.  */
+		if (rp == ep || rp == mp || rp == bp) {
+			rp = mpi_alloc_limb_space(size);
+			if (!rp)
+				goto enomem;
+			assign_rp = 1;
+		} else {
+			if (mpi_resize(res, size) < 0)
+				goto enomem;
+			rp = res->d;
+		}
+	} else {		/* Make BASE, EXP and MOD not overlap with RES.  */
+		if (rp == bp) {
+			/* RES and BASE are identical.  Allocate temp. space for BASE.  */
+			BUG_ON(bp_marker);
+			bp = bp_marker = mpi_alloc_limb_space(bsize);
+			if (!bp)
+				goto enomem;
+			MPN_COPY(bp, rp, bsize);
+		}
+		if (rp == ep) {
+			/* RES and EXP are identical.  Allocate temp. space for EXP.  */
+			ep = ep_marker = mpi_alloc_limb_space(esize);
+			if (!ep)
+				goto enomem;
+			MPN_COPY(ep, rp, esize);
+		}
+		if (rp == mp) {
+			/* RES and MOD are identical.  Allocate temporary space for MOD. */
+			BUG_ON(mp_marker);
+			mp = mp_marker = mpi_alloc_limb_space(msize);
+			if (!mp)
+				goto enomem;
+			MPN_COPY(mp, rp, msize);
+		}
+	}
+
+	MPN_COPY(rp, bp, bsize);
+	rsize = bsize;
+	rsign = bsign;
+
+	{
+		mpi_size_t i;
+		mpi_ptr_t xp;
+		int c;
+		mpi_limb_t e;
+		mpi_limb_t carry_limb;
+		struct karatsuba_ctx karactx;
+
+		xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1));
+		if (!xp)
+			goto enomem;
+
+		memset(&karactx, 0, sizeof karactx);
+		negative_result = (ep[0] & 1) && base->sign;
+
+		i = esize - 1;
+		e = ep[i];
+		count_leading_zeros(c, e);
+		e = (e << c) << 1;	/* shift the exp bits to the left, lose msb */
+		c = BITS_PER_MPI_LIMB - 1 - c;
+
+		/* Main loop.
+		 *
+		 * Make the result be pointed to alternately by XP and RP.  This
+		 * helps us avoid block copying, which would otherwise be necessary
+		 * with the overlap restrictions of mpihelp_divmod. With 50% probability
+		 * the result after this loop will be in the area originally pointed
+		 * by RP (==RES->d), and with 50% probability in the area originally
+		 * pointed to by XP.
+		 */
+
+		for (;;) {
+			while (c) {
+				mpi_ptr_t tp;
+				mpi_size_t xsize;
+
+				/*if (mpihelp_mul_n(xp, rp, rp, rsize) < 0) goto enomem */
+				if (rsize < KARATSUBA_THRESHOLD)
+					mpih_sqr_n_basecase(xp, rp, rsize);
+				else {
+					if (!tspace) {
+						tsize = 2 * rsize;
+						tspace =
+						    mpi_alloc_limb_space(tsize);
+						if (!tspace)
+							goto enomem;
+					} else if (tsize < (2 * rsize)) {
+						mpi_free_limb_space(tspace);
+						tsize = 2 * rsize;
+						tspace =
+						    mpi_alloc_limb_space(tsize);
+						if (!tspace)
+							goto enomem;
+					}
+					mpih_sqr_n(xp, rp, rsize, tspace);
+				}
+
+				xsize = 2 * rsize;
+				if (xsize > msize) {
+					mpihelp_divrem(xp + msize, 0, xp, xsize,
+						       mp, msize);
+					xsize = msize;
+				}
+
+				tp = rp;
+				rp = xp;
+				xp = tp;
+				rsize = xsize;
+
+				if ((mpi_limb_signed_t) e < 0) {
+					/*mpihelp_mul( xp, rp, rsize, bp, bsize ); */
+					if (bsize < KARATSUBA_THRESHOLD) {
+						mpi_limb_t tmp;
+						if (mpihelp_mul
+						    (xp, rp, rsize, bp, bsize,
+						     &tmp) < 0)
+							goto enomem;
+					} else {
+						if (mpihelp_mul_karatsuba_case
+						    (xp, rp, rsize, bp, bsize,
+						     &karactx) < 0)
+							goto enomem;
+					}
+
+					xsize = rsize + bsize;
+					if (xsize > msize) {
+						mpihelp_divrem(xp + msize, 0,
+							       xp, xsize, mp,
+							       msize);
+						xsize = msize;
+					}
+
+					tp = rp;
+					rp = xp;
+					xp = tp;
+					rsize = xsize;
+				}
+				e <<= 1;
+				c--;
+			}
+
+			i--;
+			if (i < 0)
+				break;
+			e = ep[i];
+			c = BITS_PER_MPI_LIMB;
+		}
+
+		/* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT
+		 * steps.  Adjust the result by reducing it with the original MOD.
+		 *
+		 * Also make sure the result is put in RES->d (where it already
+		 * might be, see above).
+		 */
+		if (mod_shift_cnt) {
+			carry_limb =
+			    mpihelp_lshift(res->d, rp, rsize, mod_shift_cnt);
+			rp = res->d;
+			if (carry_limb) {
+				rp[rsize] = carry_limb;
+				rsize++;
+			}
+		} else {
+			MPN_COPY(res->d, rp, rsize);
+			rp = res->d;
+		}
+
+		if (rsize >= msize) {
+			mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize);
+			rsize = msize;
+		}
+
+		/* Remove any leading zero words from the result.  */
+		if (mod_shift_cnt)
+			mpihelp_rshift(rp, rp, rsize, mod_shift_cnt);
+		MPN_NORMALIZE(rp, rsize);
+
+		mpihelp_release_karatsuba_ctx(&karactx);
+	}
+
+	if (negative_result && rsize) {
+		if (mod_shift_cnt)
+			mpihelp_rshift(mp, mp, msize, mod_shift_cnt);
+		mpihelp_sub(rp, mp, msize, rp, rsize);
+		rsize = msize;
+		rsign = msign;
+		MPN_NORMALIZE(rp, rsize);
+	}
+	res->nlimbs = rsize;
+	res->sign = rsign;
+
+leave:
+	rc = 0;
+enomem:
+	if (assign_rp)
+		mpi_assign_limb_space(res, rp, size);
+	if (mp_marker)
+		mpi_free_limb_space(mp_marker);
+	if (bp_marker)
+		mpi_free_limb_space(bp_marker);
+	if (ep_marker)
+		mpi_free_limb_space(ep_marker);
+	if (xp_marker)
+		mpi_free_limb_space(xp_marker);
+	if (tspace)
+		mpi_free_limb_space(tspace);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(mpi_powm);
diff --git a/lib/mpi/mpi-scan.c b/lib/mpi/mpi-scan.c
new file mode 100644
index 000000000000..b2da5ad96199
--- /dev/null
+++ b/lib/mpi/mpi-scan.c
@@ -0,0 +1,136 @@
+/* mpi-scan.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+/****************
+ * Scan through an mpi and return byte for byte. a -1 is returned to indicate
+ * the end of the mpi. Scanning is done from the lsb to the msb, returned
+ * values are in the range of 0 .. 255.
+ *
+ * FIXME: This code is VERY ugly!
+ */
+int mpi_getbyte(const MPI a, unsigned idx)
+{
+	int i, j;
+	unsigned n;
+	mpi_ptr_t ap;
+	mpi_limb_t limb;
+
+	ap = a->d;
+	for (n = 0, i = 0; i < a->nlimbs; i++) {
+		limb = ap[i];
+		for (j = 0; j < BYTES_PER_MPI_LIMB; j++, n++)
+			if (n == idx)
+				return (limb >> j * 8) & 0xff;
+	}
+	return -1;
+}
+
+/****************
+ * Put a value at position IDX into A. idx counts from lsb to msb
+ */
+void mpi_putbyte(MPI a, unsigned idx, int xc)
+{
+	int i, j;
+	unsigned n;
+	mpi_ptr_t ap;
+	mpi_limb_t limb, c;
+
+	c = xc & 0xff;
+	ap = a->d;
+	for (n = 0, i = 0; i < a->alloced; i++) {
+		limb = ap[i];
+		for (j = 0; j < BYTES_PER_MPI_LIMB; j++, n++)
+			if (n == idx) {
+#if BYTES_PER_MPI_LIMB == 4
+				if (j == 0)
+					limb = (limb & 0xffffff00) | c;
+				else if (j == 1)
+					limb = (limb & 0xffff00ff) | (c << 8);
+				else if (j == 2)
+					limb = (limb & 0xff00ffff) | (c << 16);
+				else
+					limb = (limb & 0x00ffffff) | (c << 24);
+#elif BYTES_PER_MPI_LIMB == 8
+				if (j == 0)
+					limb = (limb & 0xffffffffffffff00) | c;
+				else if (j == 1)
+					limb =
+					    (limb & 0xffffffffffff00ff) | (c <<
+									   8);
+				else if (j == 2)
+					limb =
+					    (limb & 0xffffffffff00ffff) | (c <<
+									   16);
+				else if (j == 3)
+					limb =
+					    (limb & 0xffffffff00ffffff) | (c <<
+									   24);
+				else if (j == 4)
+					limb =
+					    (limb & 0xffffff00ffffffff) | (c <<
+									   32);
+				else if (j == 5)
+					limb =
+					    (limb & 0xffff00ffffffffff) | (c <<
+									   40);
+				else if (j == 6)
+					limb =
+					    (limb & 0xff00ffffffffffff) | (c <<
+									   48);
+				else
+					limb =
+					    (limb & 0x00ffffffffffffff) | (c <<
+									   56);
+#else
+#error please enhance this function, its ugly - i know.
+#endif
+				if (a->nlimbs <= i)
+					a->nlimbs = i + 1;
+				ap[i] = limb;
+				return;
+			}
+	}
+	log_bug("index out of range\n");
+}
+
+/****************
+ * Count the number of zerobits at the low end of A
+ */
+unsigned mpi_trailing_zeros(const MPI a)
+{
+	unsigned n, count = 0;
+
+	for (n = 0; n < a->nlimbs; n++) {
+		if (a->d[n]) {
+			unsigned nn;
+			mpi_limb_t alimb = a->d[n];
+
+			count_trailing_zeros(nn, alimb);
+			count += nn;
+			break;
+		}
+		count += BITS_PER_MPI_LIMB;
+	}
+	return count;
+
+}
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
new file mode 100644
index 000000000000..f26b41fcb48c
--- /dev/null
+++ b/lib/mpi/mpicoder.c
@@ -0,0 +1,280 @@
+/* mpicoder.c  -  Coder for the external representation of MPIs
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+
+#define MAX_EXTERN_MPI_BITS 16384
+
+MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
+{
+	const uint8_t *buffer = xbuffer;
+	int i, j;
+	unsigned nbits, nbytes, nlimbs, nread = 0;
+	mpi_limb_t a;
+	MPI val = NULL;
+
+	if (*ret_nread < 2)
+		goto leave;
+	nbits = buffer[0] << 8 | buffer[1];
+
+	if (nbits > MAX_EXTERN_MPI_BITS) {
+		pr_info("MPI: mpi too large (%u bits)\n", nbits);
+		goto leave;
+	}
+	buffer += 2;
+	nread = 2;
+
+	nbytes = (nbits + 7) / 8;
+	nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+	val = mpi_alloc(nlimbs);
+	if (!val)
+		return NULL;
+	i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+	i %= BYTES_PER_MPI_LIMB;
+	val->nbits = nbits;
+	j = val->nlimbs = nlimbs;
+	val->sign = 0;
+	for (; j > 0; j--) {
+		a = 0;
+		for (; i < BYTES_PER_MPI_LIMB; i++) {
+			if (++nread > *ret_nread) {
+				printk
+				    ("MPI: mpi larger than buffer nread=%d ret_nread=%d\n",
+				     nread, *ret_nread);
+				goto leave;
+			}
+			a <<= 8;
+			a |= *buffer++;
+		}
+		i = 0;
+		val->d[j - 1] = a;
+	}
+
+leave:
+	*ret_nread = nread;
+	return val;
+}
+EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
+
+/****************
+ * Make an mpi from a character string.
+ */
+int mpi_fromstr(MPI val, const char *str)
+{
+	int hexmode = 0, sign = 0, prepend_zero = 0, i, j, c, c1, c2;
+	unsigned nbits, nbytes, nlimbs;
+	mpi_limb_t a;
+
+	if (*str == '-') {
+		sign = 1;
+		str++;
+	}
+	if (*str == '0' && str[1] == 'x')
+		hexmode = 1;
+	else
+		return -EINVAL;	/* other bases are not yet supported */
+	str += 2;
+
+	nbits = strlen(str) * 4;
+	if (nbits % 8)
+		prepend_zero = 1;
+	nbytes = (nbits + 7) / 8;
+	nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+	if (val->alloced < nlimbs)
+		if (!mpi_resize(val, nlimbs))
+			return -ENOMEM;
+	i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+	i %= BYTES_PER_MPI_LIMB;
+	j = val->nlimbs = nlimbs;
+	val->sign = sign;
+	for (; j > 0; j--) {
+		a = 0;
+		for (; i < BYTES_PER_MPI_LIMB; i++) {
+			if (prepend_zero) {
+				c1 = '0';
+				prepend_zero = 0;
+			} else
+				c1 = *str++;
+			assert(c1);
+			c2 = *str++;
+			assert(c2);
+			if (c1 >= '0' && c1 <= '9')
+				c = c1 - '0';
+			else if (c1 >= 'a' && c1 <= 'f')
+				c = c1 - 'a' + 10;
+			else if (c1 >= 'A' && c1 <= 'F')
+				c = c1 - 'A' + 10;
+			else {
+				mpi_clear(val);
+				return 1;
+			}
+			c <<= 4;
+			if (c2 >= '0' && c2 <= '9')
+				c |= c2 - '0';
+			else if (c2 >= 'a' && c2 <= 'f')
+				c |= c2 - 'a' + 10;
+			else if (c2 >= 'A' && c2 <= 'F')
+				c |= c2 - 'A' + 10;
+			else {
+				mpi_clear(val);
+				return 1;
+			}
+			a <<= 8;
+			a |= c;
+		}
+		i = 0;
+
+		val->d[j - 1] = a;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mpi_fromstr);
+
+/****************
+ * Return an allocated buffer with the MPI (msb first).
+ * NBYTES receives the length of this buffer. Caller must free the
+ * return string (This function does return a 0 byte buffer with NBYTES
+ * set to zero if the value of A is zero. If sign is not NULL, it will
+ * be set to the sign of the A.
+ */
+void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign)
+{
+	uint8_t *p, *buffer;
+	mpi_limb_t alimb;
+	int i;
+	unsigned int n;
+
+	if (sign)
+		*sign = a->sign;
+	*nbytes = n = a->nlimbs * BYTES_PER_MPI_LIMB;
+	if (!n)
+		n++;		/* avoid zero length allocation */
+	p = buffer = kmalloc(n, GFP_KERNEL);
+	if (!p)
+		return NULL;
+
+	for (i = a->nlimbs - 1; i >= 0; i--) {
+		alimb = a->d[i];
+#if BYTES_PER_MPI_LIMB == 4
+		*p++ = alimb >> 24;
+		*p++ = alimb >> 16;
+		*p++ = alimb >> 8;
+		*p++ = alimb;
+#elif BYTES_PER_MPI_LIMB == 8
+		*p++ = alimb >> 56;
+		*p++ = alimb >> 48;
+		*p++ = alimb >> 40;
+		*p++ = alimb >> 32;
+		*p++ = alimb >> 24;
+		*p++ = alimb >> 16;
+		*p++ = alimb >> 8;
+		*p++ = alimb;
+#else
+#error please implement for this limb size.
+#endif
+	}
+
+	/* this is sub-optimal but we need to do the shift operation
+	 * because the caller has to free the returned buffer */
+	for (p = buffer; !*p && *nbytes; p++, --*nbytes)
+		;
+	if (p != buffer)
+		memmove(buffer, p, *nbytes);
+
+	return buffer;
+}
+EXPORT_SYMBOL_GPL(mpi_get_buffer);
+
+/****************
+ * Use BUFFER to update MPI.
+ */
+int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign)
+{
+	const uint8_t *buffer = xbuffer, *p;
+	mpi_limb_t alimb;
+	int nlimbs;
+	int i;
+
+	nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+	if (RESIZE_IF_NEEDED(a, nlimbs) < 0)
+		return -ENOMEM;
+	a->sign = sign;
+
+	for (i = 0, p = buffer + nbytes - 1; p >= buffer + BYTES_PER_MPI_LIMB;) {
+#if BYTES_PER_MPI_LIMB == 4
+		alimb = (mpi_limb_t) *p--;
+		alimb |= (mpi_limb_t) *p-- << 8;
+		alimb |= (mpi_limb_t) *p-- << 16;
+		alimb |= (mpi_limb_t) *p-- << 24;
+#elif BYTES_PER_MPI_LIMB == 8
+		alimb = (mpi_limb_t) *p--;
+		alimb |= (mpi_limb_t) *p-- << 8;
+		alimb |= (mpi_limb_t) *p-- << 16;
+		alimb |= (mpi_limb_t) *p-- << 24;
+		alimb |= (mpi_limb_t) *p-- << 32;
+		alimb |= (mpi_limb_t) *p-- << 40;
+		alimb |= (mpi_limb_t) *p-- << 48;
+		alimb |= (mpi_limb_t) *p-- << 56;
+#else
+#error please implement for this limb size.
+#endif
+		a->d[i++] = alimb;
+	}
+	if (p >= buffer) {
+#if BYTES_PER_MPI_LIMB == 4
+		alimb = *p--;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 8;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 16;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 24;
+#elif BYTES_PER_MPI_LIMB == 8
+		alimb = (mpi_limb_t) *p--;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 8;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 16;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 24;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 32;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 40;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 48;
+		if (p >= buffer)
+			alimb |= (mpi_limb_t) *p-- << 56;
+#else
+#error please implement for this limb size.
+#endif
+		a->d[i++] = alimb;
+	}
+	a->nlimbs = i;
+
+	if (i != nlimbs) {
+		pr_emerg("MPI: mpi_set_buffer: Assertion failed (%d != %d)", i,
+		       nlimbs);
+		BUG();
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mpi_set_buffer);
diff --git a/lib/mpi/mpih-cmp.c b/lib/mpi/mpih-cmp.c
new file mode 100644
index 000000000000..b2fd39677f1b
--- /dev/null
+++ b/lib/mpi/mpih-cmp.c
@@ -0,0 +1,56 @@
+/* mpihelp-sub.c  -  MPI helper functions
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
+ * There are no restrictions on the relative sizes of
+ * the two arguments.
+ * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2.
+ */
+int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size)
+{
+	mpi_size_t i;
+	mpi_limb_t op1_word, op2_word;
+
+	for (i = size - 1; i >= 0; i--) {
+		op1_word = op1_ptr[i];
+		op2_word = op2_ptr[i];
+		if (op1_word != op2_word)
+			goto diff;
+	}
+	return 0;
+
+diff:
+	/* This can *not* be simplified to
+	 *   op2_word - op2_word
+	 * since that expression might give signed overflow.  */
+	return (op1_word > op2_word) ? 1 : -1;
+}
diff --git a/lib/mpi/mpih-div.c b/lib/mpi/mpih-div.c
new file mode 100644
index 000000000000..cde1aaec18da
--- /dev/null
+++ b/lib/mpi/mpih-div.c
@@ -0,0 +1,545 @@
+/* mpihelp-div.c  -  MPI helper functions
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+/* FIXME: We should be using invert_limb (or invert_normalized_limb)
+ * here (not udiv_qrnnd).
+ */
+
+mpi_limb_t
+mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+	      mpi_limb_t divisor_limb)
+{
+	mpi_size_t i;
+	mpi_limb_t n1, n0, r;
+	int dummy;
+
+	/* Botch: Should this be handled at all?  Rely on callers?  */
+	if (!dividend_size)
+		return 0;
+
+	/* If multiplication is much faster than division, and the
+	 * dividend is large, pre-invert the divisor, and use
+	 * only multiplications in the inner loop.
+	 *
+	 * This test should be read:
+	 *   Does it ever help to use udiv_qrnnd_preinv?
+	 *     && Does what we save compensate for the inversion overhead?
+	 */
+	if (UDIV_TIME > (2 * UMUL_TIME + 6)
+	    && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
+		int normalization_steps;
+
+		count_leading_zeros(normalization_steps, divisor_limb);
+		if (normalization_steps) {
+			mpi_limb_t divisor_limb_inverted;
+
+			divisor_limb <<= normalization_steps;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 *
+			 * Special case for DIVISOR_LIMB == 100...000.
+			 */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t) 0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+					   -divisor_limb, 0, divisor_limb);
+
+			n1 = dividend_ptr[dividend_size - 1];
+			r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+			/* Possible optimization:
+			 * if (r == 0
+			 * && divisor_limb > ((n1 << normalization_steps)
+			 *                 | (dividend_ptr[dividend_size - 2] >> ...)))
+			 * ...one division less...
+			 */
+			for (i = dividend_size - 2; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(dummy, r, r,
+						  ((n1 << normalization_steps)
+						   | (n0 >>
+						      (BITS_PER_MPI_LIMB -
+						       normalization_steps))),
+						  divisor_limb,
+						  divisor_limb_inverted);
+				n1 = n0;
+			}
+			UDIV_QRNND_PREINV(dummy, r, r,
+					  n1 << normalization_steps,
+					  divisor_limb, divisor_limb_inverted);
+			return r >> normalization_steps;
+		} else {
+			mpi_limb_t divisor_limb_inverted;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 *
+			 * Special case for DIVISOR_LIMB == 100...000.
+			 */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t) 0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+					   -divisor_limb, 0, divisor_limb);
+
+			i = dividend_size - 1;
+			r = dividend_ptr[i];
+
+			if (r >= divisor_limb)
+				r = 0;
+			else
+				i--;
+
+			for (; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(dummy, r, r,
+						  n0, divisor_limb,
+						  divisor_limb_inverted);
+			}
+			return r;
+		}
+	} else {
+		if (UDIV_NEEDS_NORMALIZATION) {
+			int normalization_steps;
+
+			count_leading_zeros(normalization_steps, divisor_limb);
+			if (normalization_steps) {
+				divisor_limb <<= normalization_steps;
+
+				n1 = dividend_ptr[dividend_size - 1];
+				r = n1 >> (BITS_PER_MPI_LIMB -
+					   normalization_steps);
+
+				/* Possible optimization:
+				 * if (r == 0
+				 * && divisor_limb > ((n1 << normalization_steps)
+				 *                 | (dividend_ptr[dividend_size - 2] >> ...)))
+				 * ...one division less...
+				 */
+				for (i = dividend_size - 2; i >= 0; i--) {
+					n0 = dividend_ptr[i];
+					udiv_qrnnd(dummy, r, r,
+						   ((n1 << normalization_steps)
+						    | (n0 >>
+						       (BITS_PER_MPI_LIMB -
+							normalization_steps))),
+						   divisor_limb);
+					n1 = n0;
+				}
+				udiv_qrnnd(dummy, r, r,
+					   n1 << normalization_steps,
+					   divisor_limb);
+				return r >> normalization_steps;
+			}
+		}
+		/* No normalization needed, either because udiv_qrnnd doesn't require
+		 * it, or because DIVISOR_LIMB is already normalized.  */
+		i = dividend_size - 1;
+		r = dividend_ptr[i];
+
+		if (r >= divisor_limb)
+			r = 0;
+		else
+			i--;
+
+		for (; i >= 0; i--) {
+			n0 = dividend_ptr[i];
+			udiv_qrnnd(dummy, r, r, n0, divisor_limb);
+		}
+		return r;
+	}
+}
+
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+ * the NSIZE-DSIZE least significant quotient limbs at QP
+ * and the DSIZE long remainder at NP.	If QEXTRA_LIMBS is
+ * non-zero, generate that many fraction bits and append them after the
+ * other quotient limbs.
+ * Return the most significant limb of the quotient, this is always 0 or 1.
+ *
+ * Preconditions:
+ * 0. NSIZE >= DSIZE.
+ * 1. The most significant bit of the divisor must be set.
+ * 2. QP must either not overlap with the input operands at all, or
+ *    QP + DSIZE >= NP must hold true.	(This means that it's
+ *    possible to put the quotient in the high part of NUM, right after the
+ *    remainder in NUM.
+ * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
+ */
+
+mpi_limb_t
+mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
+	       mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize)
+{
+	mpi_limb_t most_significant_q_limb = 0;
+
+	switch (dsize) {
+	case 0:
+		/* We are asked to divide by zero, so go ahead and do it!  (To make
+		   the compiler not remove this statement, return the value.)  */
+		/*
+		 * existing clients of this function have been modified
+		 * not to call it with dsize == 0, so this should not happen
+		 */
+		return 1 / dsize;
+
+	case 1:
+		{
+			mpi_size_t i;
+			mpi_limb_t n1;
+			mpi_limb_t d;
+
+			d = dp[0];
+			n1 = np[nsize - 1];
+
+			if (n1 >= d) {
+				n1 -= d;
+				most_significant_q_limb = 1;
+			}
+
+			qp += qextra_limbs;
+			for (i = nsize - 2; i >= 0; i--)
+				udiv_qrnnd(qp[i], n1, n1, np[i], d);
+			qp -= qextra_limbs;
+
+			for (i = qextra_limbs - 1; i >= 0; i--)
+				udiv_qrnnd(qp[i], n1, n1, 0, d);
+
+			np[0] = n1;
+		}
+		break;
+
+	case 2:
+		{
+			mpi_size_t i;
+			mpi_limb_t n1, n0, n2;
+			mpi_limb_t d1, d0;
+
+			np += nsize - 2;
+			d1 = dp[1];
+			d0 = dp[0];
+			n1 = np[1];
+			n0 = np[0];
+
+			if (n1 >= d1 && (n1 > d1 || n0 >= d0)) {
+				sub_ddmmss(n1, n0, n1, n0, d1, d0);
+				most_significant_q_limb = 1;
+			}
+
+			for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) {
+				mpi_limb_t q;
+				mpi_limb_t r;
+
+				if (i >= qextra_limbs)
+					np--;
+				else
+					np[0] = 0;
+
+				if (n1 == d1) {
+					/* Q should be either 111..111 or 111..110.  Need special
+					 * treatment of this rare case as normal division would
+					 * give overflow.  */
+					q = ~(mpi_limb_t) 0;
+
+					r = n0 + d1;
+					if (r < d1) {	/* Carry in the addition? */
+						add_ssaaaa(n1, n0, r - d0,
+							   np[0], 0, d0);
+						qp[i] = q;
+						continue;
+					}
+					n1 = d0 - (d0 != 0 ? 1 : 0);
+					n0 = -d0;
+				} else {
+					udiv_qrnnd(q, r, n1, n0, d1);
+					umul_ppmm(n1, n0, d0, q);
+				}
+
+				n2 = np[0];
+q_test:
+				if (n1 > r || (n1 == r && n0 > n2)) {
+					/* The estimated Q was too large.  */
+					q--;
+					sub_ddmmss(n1, n0, n1, n0, 0, d0);
+					r += d1;
+					if (r >= d1)	/* If not carry, test Q again.  */
+						goto q_test;
+				}
+
+				qp[i] = q;
+				sub_ddmmss(n1, n0, r, n2, n1, n0);
+			}
+			np[1] = n1;
+			np[0] = n0;
+		}
+		break;
+
+	default:
+		{
+			mpi_size_t i;
+			mpi_limb_t dX, d1, n0;
+
+			np += nsize - dsize;
+			dX = dp[dsize - 1];
+			d1 = dp[dsize - 2];
+			n0 = np[dsize - 1];
+
+			if (n0 >= dX) {
+				if (n0 > dX
+				    || mpihelp_cmp(np, dp, dsize - 1) >= 0) {
+					mpihelp_sub_n(np, np, dp, dsize);
+					n0 = np[dsize - 1];
+					most_significant_q_limb = 1;
+				}
+			}
+
+			for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
+				mpi_limb_t q;
+				mpi_limb_t n1, n2;
+				mpi_limb_t cy_limb;
+
+				if (i >= qextra_limbs) {
+					np--;
+					n2 = np[dsize];
+				} else {
+					n2 = np[dsize - 1];
+					MPN_COPY_DECR(np + 1, np, dsize - 1);
+					np[0] = 0;
+				}
+
+				if (n0 == dX) {
+					/* This might over-estimate q, but it's probably not worth
+					 * the extra code here to find out.  */
+					q = ~(mpi_limb_t) 0;
+				} else {
+					mpi_limb_t r;
+
+					udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
+					umul_ppmm(n1, n0, d1, q);
+
+					while (n1 > r
+					       || (n1 == r
+						   && n0 > np[dsize - 2])) {
+						q--;
+						r += dX;
+						if (r < dX)	/* I.e. "carry in previous addition?" */
+							break;
+						n1 -= n0 < d1;
+						n0 -= d1;
+					}
+				}
+
+				/* Possible optimization: We already have (q * n0) and (1 * n1)
+				 * after the calculation of q.  Taking advantage of that, we
+				 * could make this loop make two iterations less.  */
+				cy_limb = mpihelp_submul_1(np, dp, dsize, q);
+
+				if (n2 != cy_limb) {
+					mpihelp_add_n(np, np, dp, dsize);
+					q--;
+				}
+
+				qp[i] = q;
+				n0 = np[dsize - 1];
+			}
+		}
+	}
+
+	return most_significant_q_limb;
+}
+
+/****************
+ * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+ * Return the single-limb remainder.
+ * There are no constraints on the value of the divisor.
+ *
+ * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
+ */
+
+mpi_limb_t
+mpihelp_divmod_1(mpi_ptr_t quot_ptr,
+		 mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+		 mpi_limb_t divisor_limb)
+{
+	mpi_size_t i;
+	mpi_limb_t n1, n0, r;
+	int dummy;
+
+	if (!dividend_size)
+		return 0;
+
+	/* If multiplication is much faster than division, and the
+	 * dividend is large, pre-invert the divisor, and use
+	 * only multiplications in the inner loop.
+	 *
+	 * This test should be read:
+	 * Does it ever help to use udiv_qrnnd_preinv?
+	 * && Does what we save compensate for the inversion overhead?
+	 */
+	if (UDIV_TIME > (2 * UMUL_TIME + 6)
+	    && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
+		int normalization_steps;
+
+		count_leading_zeros(normalization_steps, divisor_limb);
+		if (normalization_steps) {
+			mpi_limb_t divisor_limb_inverted;
+
+			divisor_limb <<= normalization_steps;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 */
+			/* Special case for DIVISOR_LIMB == 100...000.  */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t) 0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+					   -divisor_limb, 0, divisor_limb);
+
+			n1 = dividend_ptr[dividend_size - 1];
+			r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+			/* Possible optimization:
+			 * if (r == 0
+			 * && divisor_limb > ((n1 << normalization_steps)
+			 *                 | (dividend_ptr[dividend_size - 2] >> ...)))
+			 * ...one division less...
+			 */
+			for (i = dividend_size - 2; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r,
+						  ((n1 << normalization_steps)
+						   | (n0 >>
+						      (BITS_PER_MPI_LIMB -
+						       normalization_steps))),
+						  divisor_limb,
+						  divisor_limb_inverted);
+				n1 = n0;
+			}
+			UDIV_QRNND_PREINV(quot_ptr[0], r, r,
+					  n1 << normalization_steps,
+					  divisor_limb, divisor_limb_inverted);
+			return r >> normalization_steps;
+		} else {
+			mpi_limb_t divisor_limb_inverted;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 */
+			/* Special case for DIVISOR_LIMB == 100...000.  */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t) 0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+					   -divisor_limb, 0, divisor_limb);
+
+			i = dividend_size - 1;
+			r = dividend_ptr[i];
+
+			if (r >= divisor_limb)
+				r = 0;
+			else
+				quot_ptr[i--] = 0;
+
+			for (; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(quot_ptr[i], r, r,
+						  n0, divisor_limb,
+						  divisor_limb_inverted);
+			}
+			return r;
+		}
+	} else {
+		if (UDIV_NEEDS_NORMALIZATION) {
+			int normalization_steps;
+
+			count_leading_zeros(normalization_steps, divisor_limb);
+			if (normalization_steps) {
+				divisor_limb <<= normalization_steps;
+
+				n1 = dividend_ptr[dividend_size - 1];
+				r = n1 >> (BITS_PER_MPI_LIMB -
+					   normalization_steps);
+
+				/* Possible optimization:
+				 * if (r == 0
+				 * && divisor_limb > ((n1 << normalization_steps)
+				 *                 | (dividend_ptr[dividend_size - 2] >> ...)))
+				 * ...one division less...
+				 */
+				for (i = dividend_size - 2; i >= 0; i--) {
+					n0 = dividend_ptr[i];
+					udiv_qrnnd(quot_ptr[i + 1], r, r,
+						   ((n1 << normalization_steps)
+						    | (n0 >>
+						       (BITS_PER_MPI_LIMB -
+							normalization_steps))),
+						   divisor_limb);
+					n1 = n0;
+				}
+				udiv_qrnnd(quot_ptr[0], r, r,
+					   n1 << normalization_steps,
+					   divisor_limb);
+				return r >> normalization_steps;
+			}
+		}
+		/* No normalization needed, either because udiv_qrnnd doesn't require
+		 * it, or because DIVISOR_LIMB is already normalized.  */
+		i = dividend_size - 1;
+		r = dividend_ptr[i];
+
+		if (r >= divisor_limb)
+			r = 0;
+		else
+			quot_ptr[i--] = 0;
+
+		for (; i >= 0; i--) {
+			n0 = dividend_ptr[i];
+			udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb);
+		}
+		return r;
+	}
+}
diff --git a/lib/mpi/mpih-mul.c b/lib/mpi/mpih-mul.c
new file mode 100644
index 000000000000..c69c5eef233b
--- /dev/null
+++ b/lib/mpi/mpih-mul.c
@@ -0,0 +1,527 @@
+/* mpihelp-mul.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 1999,
+ *               2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#include <linux/string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace)		\
+	do {							\
+		if ((size) < KARATSUBA_THRESHOLD)		\
+			mul_n_basecase(prodp, up, vp, size);	\
+		else						\
+			mul_n(prodp, up, vp, size, tspace);	\
+	} while (0);
+
+#define MPN_SQR_N_RECURSE(prodp, up, size, tspace)		\
+	do {							\
+		if ((size) < KARATSUBA_THRESHOLD)		\
+			mpih_sqr_n_basecase(prodp, up, size);	\
+		else						\
+			mpih_sqr_n(prodp, up, size, tspace);	\
+	} while (0);
+
+/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
+ * both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
+ * always stored.  Return the most significant limb.
+ *
+ * Argument constraints:
+ * 1. PRODP != UP and PRODP != VP, i.e. the destination
+ *    must be distinct from the multiplier and the multiplicand.
+ *
+ *
+ * Handle simple cases with traditional multiplication.
+ *
+ * This is the most critical code of multiplication.  All multiplies rely
+ * on this, both small and huge.  Small ones arrive here immediately.  Huge
+ * ones arrive here as this is the base case for Karatsuba's recursive
+ * algorithm below.
+ */
+
+static mpi_limb_t
+mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
+{
+	mpi_size_t i;
+	mpi_limb_t cy;
+	mpi_limb_t v_limb;
+
+	/* Multiply by the first limb in V separately, as the result can be
+	 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+	v_limb = vp[0];
+	if (v_limb <= 1) {
+		if (v_limb == 1)
+			MPN_COPY(prodp, up, size);
+		else
+			MPN_ZERO(prodp, size);
+		cy = 0;
+	} else
+		cy = mpihelp_mul_1(prodp, up, size, v_limb);
+
+	prodp[size] = cy;
+	prodp++;
+
+	/* For each iteration in the outer loop, multiply one limb from
+	 * U with one limb from V, and add it to PROD.  */
+	for (i = 1; i < size; i++) {
+		v_limb = vp[i];
+		if (v_limb <= 1) {
+			cy = 0;
+			if (v_limb == 1)
+				cy = mpihelp_add_n(prodp, prodp, up, size);
+		} else
+			cy = mpihelp_addmul_1(prodp, up, size, v_limb);
+
+		prodp[size] = cy;
+		prodp++;
+	}
+
+	return cy;
+}
+
+static void
+mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
+		mpi_size_t size, mpi_ptr_t tspace)
+{
+	if (size & 1) {
+		/* The size is odd, and the code below doesn't handle that.
+		 * Multiply the least significant (size - 1) limbs with a recursive
+		 * call, and handle the most significant limb of S1 and S2
+		 * separately.
+		 * A slightly faster way to do this would be to make the Karatsuba
+		 * code below behave as if the size were even, and let it check for
+		 * odd size in the end.  I.e., in essence move this code to the end.
+		 * Doing so would save us a recursive call, and potentially make the
+		 * stack grow a lot less.
+		 */
+		mpi_size_t esize = size - 1;	/* even size */
+		mpi_limb_t cy_limb;
+
+		MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace);
+		cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]);
+		prodp[esize + esize] = cy_limb;
+		cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]);
+		prodp[esize + size] = cy_limb;
+	} else {
+		/* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
+		 *
+		 * Split U in two pieces, U1 and U0, such that
+		 * U = U0 + U1*(B**n),
+		 * and V in V1 and V0, such that
+		 * V = V0 + V1*(B**n).
+		 *
+		 * UV is then computed recursively using the identity
+		 *
+		 *        2n   n          n                     n
+		 * UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
+		 *                1 1        1  0   0  1              0 0
+		 *
+		 * Where B = 2**BITS_PER_MP_LIMB.
+		 */
+		mpi_size_t hsize = size >> 1;
+		mpi_limb_t cy;
+		int negflg;
+
+		/* Product H.      ________________  ________________
+		 *                |_____U1 x V1____||____U0 x V0_____|
+		 * Put result in upper part of PROD and pass low part of TSPACE
+		 * as new TSPACE.
+		 */
+		MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize,
+				  tspace);
+
+		/* Product M.      ________________
+		 *                |_(U1-U0)(V0-V1)_|
+		 */
+		if (mpihelp_cmp(up + hsize, up, hsize) >= 0) {
+			mpihelp_sub_n(prodp, up + hsize, up, hsize);
+			negflg = 0;
+		} else {
+			mpihelp_sub_n(prodp, up, up + hsize, hsize);
+			negflg = 1;
+		}
+		if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) {
+			mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
+			negflg ^= 1;
+		} else {
+			mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
+			/* No change of NEGFLG.  */
+		}
+		/* Read temporary operands from low part of PROD.
+		 * Put result in low part of TSPACE using upper part of TSPACE
+		 * as new TSPACE.
+		 */
+		MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize,
+				  tspace + size);
+
+		/* Add/copy product H. */
+		MPN_COPY(prodp + hsize, prodp + size, hsize);
+		cy = mpihelp_add_n(prodp + size, prodp + size,
+				   prodp + size + hsize, hsize);
+
+		/* Add product M (if NEGFLG M is a negative number) */
+		if (negflg)
+			cy -=
+			    mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace,
+					  size);
+		else
+			cy +=
+			    mpihelp_add_n(prodp + hsize, prodp + hsize, tspace,
+					  size);
+
+		/* Product L.      ________________  ________________
+		 *                |________________||____U0 x V0_____|
+		 * Read temporary operands from low part of PROD.
+		 * Put result in low part of TSPACE using upper part of TSPACE
+		 * as new TSPACE.
+		 */
+		MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
+
+		/* Add/copy Product L (twice) */
+
+		cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+		if (cy)
+			mpihelp_add_1(prodp + hsize + size,
+				      prodp + hsize + size, hsize, cy);
+
+		MPN_COPY(prodp, tspace, hsize);
+		cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
+				   hsize);
+		if (cy)
+			mpihelp_add_1(prodp + size, prodp + size, size, 1);
+	}
+}
+
+void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size)
+{
+	mpi_size_t i;
+	mpi_limb_t cy_limb;
+	mpi_limb_t v_limb;
+
+	/* Multiply by the first limb in V separately, as the result can be
+	 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+	v_limb = up[0];
+	if (v_limb <= 1) {
+		if (v_limb == 1)
+			MPN_COPY(prodp, up, size);
+		else
+			MPN_ZERO(prodp, size);
+		cy_limb = 0;
+	} else
+		cy_limb = mpihelp_mul_1(prodp, up, size, v_limb);
+
+	prodp[size] = cy_limb;
+	prodp++;
+
+	/* For each iteration in the outer loop, multiply one limb from
+	 * U with one limb from V, and add it to PROD.  */
+	for (i = 1; i < size; i++) {
+		v_limb = up[i];
+		if (v_limb <= 1) {
+			cy_limb = 0;
+			if (v_limb == 1)
+				cy_limb = mpihelp_add_n(prodp, prodp, up, size);
+		} else
+			cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
+
+		prodp[size] = cy_limb;
+		prodp++;
+	}
+}
+
+void
+mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
+{
+	if (size & 1) {
+		/* The size is odd, and the code below doesn't handle that.
+		 * Multiply the least significant (size - 1) limbs with a recursive
+		 * call, and handle the most significant limb of S1 and S2
+		 * separately.
+		 * A slightly faster way to do this would be to make the Karatsuba
+		 * code below behave as if the size were even, and let it check for
+		 * odd size in the end.  I.e., in essence move this code to the end.
+		 * Doing so would save us a recursive call, and potentially make the
+		 * stack grow a lot less.
+		 */
+		mpi_size_t esize = size - 1;	/* even size */
+		mpi_limb_t cy_limb;
+
+		MPN_SQR_N_RECURSE(prodp, up, esize, tspace);
+		cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]);
+		prodp[esize + esize] = cy_limb;
+		cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]);
+
+		prodp[esize + size] = cy_limb;
+	} else {
+		mpi_size_t hsize = size >> 1;
+		mpi_limb_t cy;
+
+		/* Product H.      ________________  ________________
+		 *                |_____U1 x U1____||____U0 x U0_____|
+		 * Put result in upper part of PROD and pass low part of TSPACE
+		 * as new TSPACE.
+		 */
+		MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
+
+		/* Product M.      ________________
+		 *                |_(U1-U0)(U0-U1)_|
+		 */
+		if (mpihelp_cmp(up + hsize, up, hsize) >= 0)
+			mpihelp_sub_n(prodp, up + hsize, up, hsize);
+		else
+			mpihelp_sub_n(prodp, up, up + hsize, hsize);
+
+		/* Read temporary operands from low part of PROD.
+		 * Put result in low part of TSPACE using upper part of TSPACE
+		 * as new TSPACE.  */
+		MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
+
+		/* Add/copy product H  */
+		MPN_COPY(prodp + hsize, prodp + size, hsize);
+		cy = mpihelp_add_n(prodp + size, prodp + size,
+				   prodp + size + hsize, hsize);
+
+		/* Add product M (if NEGFLG M is a negative number).  */
+		cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
+
+		/* Product L.      ________________  ________________
+		 *                |________________||____U0 x U0_____|
+		 * Read temporary operands from low part of PROD.
+		 * Put result in low part of TSPACE using upper part of TSPACE
+		 * as new TSPACE.  */
+		MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size);
+
+		/* Add/copy Product L (twice).  */
+		cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+		if (cy)
+			mpihelp_add_1(prodp + hsize + size,
+				      prodp + hsize + size, hsize, cy);
+
+		MPN_COPY(prodp, tspace, hsize);
+		cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
+				   hsize);
+		if (cy)
+			mpihelp_add_1(prodp + size, prodp + size, size, 1);
+	}
+}
+
+/* This should be made into an inline function in gmp.h.  */
+int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
+{
+	if (up == vp) {
+		if (size < KARATSUBA_THRESHOLD)
+			mpih_sqr_n_basecase(prodp, up, size);
+		else {
+			mpi_ptr_t tspace;
+			tspace = mpi_alloc_limb_space(2 * size);
+			if (!tspace)
+				return -ENOMEM;
+			mpih_sqr_n(prodp, up, size, tspace);
+			mpi_free_limb_space(tspace);
+		}
+	} else {
+		if (size < KARATSUBA_THRESHOLD)
+			mul_n_basecase(prodp, up, vp, size);
+		else {
+			mpi_ptr_t tspace;
+			tspace = mpi_alloc_limb_space(2 * size);
+			if (!tspace)
+				return -ENOMEM;
+			mul_n(prodp, up, vp, size, tspace);
+			mpi_free_limb_space(tspace);
+		}
+	}
+
+	return 0;
+}
+
+int
+mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
+			   mpi_ptr_t up, mpi_size_t usize,
+			   mpi_ptr_t vp, mpi_size_t vsize,
+			   struct karatsuba_ctx *ctx)
+{
+	mpi_limb_t cy;
+
+	if (!ctx->tspace || ctx->tspace_size < vsize) {
+		if (ctx->tspace)
+			mpi_free_limb_space(ctx->tspace);
+		ctx->tspace = mpi_alloc_limb_space(2 * vsize);
+		if (!ctx->tspace)
+			return -ENOMEM;
+		ctx->tspace_size = vsize;
+	}
+
+	MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace);
+
+	prodp += vsize;
+	up += vsize;
+	usize -= vsize;
+	if (usize >= vsize) {
+		if (!ctx->tp || ctx->tp_size < vsize) {
+			if (ctx->tp)
+				mpi_free_limb_space(ctx->tp);
+			ctx->tp = mpi_alloc_limb_space(2 * vsize);
+			if (!ctx->tp) {
+				if (ctx->tspace)
+					mpi_free_limb_space(ctx->tspace);
+				ctx->tspace = NULL;
+				return -ENOMEM;
+			}
+			ctx->tp_size = vsize;
+		}
+
+		do {
+			MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace);
+			cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize);
+			mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize,
+				      cy);
+			prodp += vsize;
+			up += vsize;
+			usize -= vsize;
+		} while (usize >= vsize);
+	}
+
+	if (usize) {
+		if (usize < KARATSUBA_THRESHOLD) {
+			mpi_limb_t tmp;
+			if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp)
+			    < 0)
+				return -ENOMEM;
+		} else {
+			if (!ctx->next) {
+				ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL);
+				if (!ctx->next)
+					return -ENOMEM;
+			}
+			if (mpihelp_mul_karatsuba_case(ctx->tspace,
+						       vp, vsize,
+						       up, usize,
+						       ctx->next) < 0)
+				return -ENOMEM;
+		}
+
+		cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize);
+		mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy);
+	}
+
+	return 0;
+}
+
+void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx)
+{
+	struct karatsuba_ctx *ctx2;
+
+	if (ctx->tp)
+		mpi_free_limb_space(ctx->tp);
+	if (ctx->tspace)
+		mpi_free_limb_space(ctx->tspace);
+	for (ctx = ctx->next; ctx; ctx = ctx2) {
+		ctx2 = ctx->next;
+		if (ctx->tp)
+			mpi_free_limb_space(ctx->tp);
+		if (ctx->tspace)
+			mpi_free_limb_space(ctx->tspace);
+		kfree(ctx);
+	}
+}
+
+/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
+ * and v (pointed to by VP, with VSIZE limbs), and store the result at
+ * PRODP.  USIZE + VSIZE limbs are always stored, but if the input
+ * operands are normalized.  Return the most significant limb of the
+ * result.
+ *
+ * NOTE: The space pointed to by PRODP is overwritten before finished
+ * with U and V, so overlap is an error.
+ *
+ * Argument constraints:
+ * 1. USIZE >= VSIZE.
+ * 2. PRODP != UP and PRODP != VP, i.e. the destination
+ *    must be distinct from the multiplier and the multiplicand.
+ */
+
+int
+mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+	    mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result)
+{
+	mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
+	mpi_limb_t cy;
+	struct karatsuba_ctx ctx;
+
+	if (vsize < KARATSUBA_THRESHOLD) {
+		mpi_size_t i;
+		mpi_limb_t v_limb;
+
+		if (!vsize) {
+			*_result = 0;
+			return 0;
+		}
+
+		/* Multiply by the first limb in V separately, as the result can be
+		 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+		v_limb = vp[0];
+		if (v_limb <= 1) {
+			if (v_limb == 1)
+				MPN_COPY(prodp, up, usize);
+			else
+				MPN_ZERO(prodp, usize);
+			cy = 0;
+		} else
+			cy = mpihelp_mul_1(prodp, up, usize, v_limb);
+
+		prodp[usize] = cy;
+		prodp++;
+
+		/* For each iteration in the outer loop, multiply one limb from
+		 * U with one limb from V, and add it to PROD.  */
+		for (i = 1; i < vsize; i++) {
+			v_limb = vp[i];
+			if (v_limb <= 1) {
+				cy = 0;
+				if (v_limb == 1)
+					cy = mpihelp_add_n(prodp, prodp, up,
+							   usize);
+			} else
+				cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
+
+			prodp[usize] = cy;
+			prodp++;
+		}
+
+		*_result = cy;
+		return 0;
+	}
+
+	memset(&ctx, 0, sizeof ctx);
+	if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0)
+		return -ENOMEM;
+	mpihelp_release_karatsuba_ctx(&ctx);
+	*_result = *prod_endp;
+	return 0;
+}
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
new file mode 100644
index 000000000000..26e4ed31e256
--- /dev/null
+++ b/lib/mpi/mpiutil.c
@@ -0,0 +1,211 @@
+/* mpiutil.ac  -  Utility functions for MPI
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Note:  It was a bad idea to use the number of limbs to allocate
+ *	  because on a alpha the limbs are large but we normally need
+ *	  integers of n bits - So we should chnage this to bits (or bytes).
+ *
+ *	  But mpi_alloc is used in a lot of places :-)
+ */
+MPI mpi_alloc(unsigned nlimbs)
+{
+	MPI a;
+
+	a = kmalloc(sizeof *a, GFP_KERNEL);
+	if (!a)
+		return a;
+
+	if (nlimbs) {
+		a->d = mpi_alloc_limb_space(nlimbs);
+		if (!a->d) {
+			kfree(a);
+			return NULL;
+		}
+	} else {
+		a->d = NULL;
+	}
+
+	a->alloced = nlimbs;
+	a->nlimbs = 0;
+	a->sign = 0;
+	a->flags = 0;
+	a->nbits = 0;
+	return a;
+}
+EXPORT_SYMBOL_GPL(mpi_alloc);
+
+mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs)
+{
+	size_t len = nlimbs * sizeof(mpi_limb_t);
+
+	if (!len)
+		return NULL;
+
+	return kmalloc(len, GFP_KERNEL);
+}
+
+void mpi_free_limb_space(mpi_ptr_t a)
+{
+	if (!a)
+		return;
+
+	kfree(a);
+}
+
+void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs)
+{
+	mpi_free_limb_space(a->d);
+	a->d = ap;
+	a->alloced = nlimbs;
+}
+
+/****************
+ * Resize the array of A to NLIMBS. the additional space is cleared
+ * (set to 0) [done by m_realloc()]
+ */
+int mpi_resize(MPI a, unsigned nlimbs)
+{
+	void *p;
+
+	if (nlimbs <= a->alloced)
+		return 0;	/* no need to do it */
+
+	if (a->d) {
+		p = kmalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL);
+		if (!p)
+			return -ENOMEM;
+		memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t));
+		kfree(a->d);
+		a->d = p;
+	} else {
+		a->d = kzalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL);
+		if (!a->d)
+			return -ENOMEM;
+	}
+	a->alloced = nlimbs;
+	return 0;
+}
+
+void mpi_clear(MPI a)
+{
+	a->nlimbs = 0;
+	a->nbits = 0;
+	a->flags = 0;
+}
+
+void mpi_free(MPI a)
+{
+	if (!a)
+		return;
+
+	if (a->flags & 4)
+		kfree(a->d);
+	else
+		mpi_free_limb_space(a->d);
+
+	if (a->flags & ~7)
+		pr_info("invalid flag value in mpi\n");
+	kfree(a);
+}
+EXPORT_SYMBOL_GPL(mpi_free);
+
+/****************
+ * Note: This copy function should not interpret the MPI
+ *	 but copy it transparently.
+ */
+int mpi_copy(MPI *copied, const MPI a)
+{
+	size_t i;
+	MPI b;
+
+	*copied = NULL;
+
+	if (a) {
+		b = mpi_alloc(a->nlimbs);
+		if (!b)
+			return -ENOMEM;
+
+		b->nlimbs = a->nlimbs;
+		b->sign = a->sign;
+		b->flags = a->flags;
+		b->nbits = a->nbits;
+
+		for (i = 0; i < b->nlimbs; i++)
+			b->d[i] = a->d[i];
+
+		*copied = b;
+	}
+
+	return 0;
+}
+
+int mpi_set(MPI w, const MPI u)
+{
+	mpi_ptr_t wp, up;
+	mpi_size_t usize = u->nlimbs;
+	int usign = u->sign;
+
+	if (RESIZE_IF_NEEDED(w, (size_t) usize) < 0)
+		return -ENOMEM;
+
+	wp = w->d;
+	up = u->d;
+	MPN_COPY(wp, up, usize);
+	w->nlimbs = usize;
+	w->nbits = u->nbits;
+	w->flags = u->flags;
+	w->sign = usign;
+	return 0;
+}
+
+int mpi_set_ui(MPI w, unsigned long u)
+{
+	if (RESIZE_IF_NEEDED(w, 1) < 0)
+		return -ENOMEM;
+	w->d[0] = u;
+	w->nlimbs = u ? 1 : 0;
+	w->sign = 0;
+	w->nbits = 0;
+	w->flags = 0;
+	return 0;
+}
+
+MPI mpi_alloc_set_ui(unsigned long u)
+{
+	MPI w = mpi_alloc(1);
+	if (!w)
+		return w;
+	w->d[0] = u;
+	w->nlimbs = u ? 1 : 0;
+	w->sign = 0;
+	return w;
+}
+
+void mpi_swap(MPI a, MPI b)
+{
+	struct gcry_mpi tmp;
+
+	tmp = *a;
+	*a = *b;
+	*b = tmp;
+}
diff --git a/lib/nlattr.c b/lib/nlattr.c
index a8408b6cacdf..4226dfeb5178 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -5,7 +5,7 @@
  * 				Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/jiffies.h>
diff --git a/lib/parser.c b/lib/parser.c
index dcbaaef6cf11..c43410084838 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -6,7 +6,8 @@
  */
 
 #include <linux/ctype.h>
-#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/export.h>
 #include <linux/parser.h>
 #include <linux/slab.h>
 #include <linux/string.h>
diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c
new file mode 100644
index 000000000000..0d83ea8a9605
--- /dev/null
+++ b/lib/pci_iomap.c
@@ -0,0 +1,48 @@
+/*
+ * Implement the default iomap interfaces
+ *
+ * (C) Copyright 2004 Linus Torvalds
+ */
+#include <linux/pci.h>
+#include <linux/io.h>
+
+#include <linux/export.h>
+
+#ifdef CONFIG_PCI
+/**
+ * pci_iomap - create a virtual mapping cookie for a PCI BAR
+ * @dev: PCI device that owns the BAR
+ * @bar: BAR number
+ * @maxlen: length of the memory to map
+ *
+ * Using this function you will get a __iomem address to your device BAR.
+ * You can access it using ioread*() and iowrite*(). These functions hide
+ * the details if this is a MMIO or PIO address space and will just do what
+ * you expect from them in the correct way.
+ *
+ * @maxlen specifies the maximum length to map. If you want to get access to
+ * the complete BAR without checking for its length first, pass %0 here.
+ * */
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+	resource_size_t start = pci_resource_start(dev, bar);
+	resource_size_t len = pci_resource_len(dev, bar);
+	unsigned long flags = pci_resource_flags(dev, bar);
+
+	if (!len || !start)
+		return NULL;
+	if (maxlen && len > maxlen)
+		len = maxlen;
+	if (flags & IORESOURCE_IO)
+		return __pci_ioport_map(dev, start, len);
+	if (flags & IORESOURCE_MEM) {
+		if (flags & IORESOURCE_CACHEABLE)
+			return ioremap(start, len);
+		return ioremap_nocache(start, len);
+	}
+	/* What? */
+	return NULL;
+}
+
+EXPORT_SYMBOL(pci_iomap);
+#endif /* CONFIG_PCI */
diff --git a/lib/plist.c b/lib/plist.c
index a0a4da489c22..6ab0e521c48b 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -23,6 +23,7 @@
  * information.
  */
 
+#include <linux/bug.h>
 #include <linux/plist.h>
 #include <linux/spinlock.h>
 
diff --git a/lib/prio_tree.c b/lib/prio_tree.c
index ccfd850b0dec..8d443af03b4c 100644
--- a/lib/prio_tree.c
+++ b/lib/prio_tree.c
@@ -85,6 +85,17 @@ static inline unsigned long prio_tree_maxindex(unsigned int bits)
 	return index_bits_to_maxindex[bits - 1];
 }
 
+static void prio_set_parent(struct prio_tree_node *parent,
+			    struct prio_tree_node *child, bool left)
+{
+	if (left)
+		parent->left = child;
+	else
+		parent->right = child;
+
+	child->parent = parent;
+}
+
 /*
  * Extend a priority search tree so that it can store a node with heap_index
  * max_heap_index. In the worst case, this algorithm takes O((log n)^2).
@@ -94,45 +105,32 @@ static inline unsigned long prio_tree_maxindex(unsigned int bits)
 static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root,
 		struct prio_tree_node *node, unsigned long max_heap_index)
 {
-	struct prio_tree_node *first = NULL, *prev, *last = NULL;
+	struct prio_tree_node *prev;
 
 	if (max_heap_index > prio_tree_maxindex(root->index_bits))
 		root->index_bits++;
 
+	prev = node;
+	INIT_PRIO_TREE_NODE(node);
+
 	while (max_heap_index > prio_tree_maxindex(root->index_bits)) {
+		struct prio_tree_node *tmp = root->prio_tree_node;
+
 		root->index_bits++;
 
 		if (prio_tree_empty(root))
 			continue;
 
-		if (first == NULL) {
-			first = root->prio_tree_node;
-			prio_tree_remove(root, root->prio_tree_node);
-			INIT_PRIO_TREE_NODE(first);
-			last = first;
-		} else {
-			prev = last;
-			last = root->prio_tree_node;
-			prio_tree_remove(root, root->prio_tree_node);
-			INIT_PRIO_TREE_NODE(last);
-			prev->left = last;
-			last->parent = prev;
-		}
-	}
-
-	INIT_PRIO_TREE_NODE(node);
-
-	if (first) {
-		node->left = first;
-		first->parent = node;
-	} else
-		last = node;
+		prio_tree_remove(root, root->prio_tree_node);
+		INIT_PRIO_TREE_NODE(tmp);
 
-	if (!prio_tree_empty(root)) {
-		last->left = root->prio_tree_node;
-		last->left->parent = last;
+		prio_set_parent(prev, tmp, true);
+		prev = tmp;
 	}
 
+	if (!prio_tree_empty(root))
+		prio_set_parent(prev, root->prio_tree_node, true);
+
 	root->prio_tree_node = node;
 	return node;
 }
@@ -151,25 +149,15 @@ struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root,
 		 * We can reduce root->index_bits here. However, it is complex
 		 * and does not help much to improve performance (IMO).
 		 */
-		node->parent = node;
 		root->prio_tree_node = node;
-	} else {
-		node->parent = old->parent;
-		if (old->parent->left == old)
-			old->parent->left = node;
-		else
-			old->parent->right = node;
-	}
+	} else
+		prio_set_parent(old->parent, node, old->parent->left == old);
 
-	if (!prio_tree_left_empty(old)) {
-		node->left = old->left;
-		old->left->parent = node;
-	}
+	if (!prio_tree_left_empty(old))
+		prio_set_parent(node, old->left, true);
 
-	if (!prio_tree_right_empty(old)) {
-		node->right = old->right;
-		old->right->parent = node;
-	}
+	if (!prio_tree_right_empty(old))
+		prio_set_parent(node, old->right, false);
 
 	return old;
 }
@@ -229,16 +217,14 @@ struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
 		if (index & mask) {
 			if (prio_tree_right_empty(cur)) {
 				INIT_PRIO_TREE_NODE(node);
-				cur->right = node;
-				node->parent = cur;
+				prio_set_parent(cur, node, false);
 				return res;
 			} else
 				cur = cur->right;
 		} else {
 			if (prio_tree_left_empty(cur)) {
 				INIT_PRIO_TREE_NODE(node);
-				cur->left = node;
-				node->parent = cur;
+				prio_set_parent(cur, node, true);
 				return res;
 			} else
 				cur = cur->left;
@@ -305,6 +291,40 @@ void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node)
 		cur = prio_tree_replace(root, cur->parent, cur);
 }
 
+static void iter_walk_down(struct prio_tree_iter *iter)
+{
+	iter->mask >>= 1;
+	if (iter->mask) {
+		if (iter->size_level)
+			iter->size_level++;
+		return;
+	}
+
+	if (iter->size_level) {
+		BUG_ON(!prio_tree_left_empty(iter->cur));
+		BUG_ON(!prio_tree_right_empty(iter->cur));
+		iter->size_level++;
+		iter->mask = ULONG_MAX;
+	} else {
+		iter->size_level = 1;
+		iter->mask = 1UL << (BITS_PER_LONG - 1);
+	}
+}
+
+static void iter_walk_up(struct prio_tree_iter *iter)
+{
+	if (iter->mask == ULONG_MAX)
+		iter->mask = 1UL;
+	else if (iter->size_level == 1)
+		iter->mask = 1UL;
+	else
+		iter->mask <<= 1;
+	if (iter->size_level)
+		iter->size_level--;
+	if (!iter->size_level && (iter->value & iter->mask))
+		iter->value ^= iter->mask;
+}
+
 /*
  * Following functions help to enumerate all prio_tree_nodes in the tree that
  * overlap with the input interval X [radix_index, heap_index]. The enumeration
@@ -323,21 +343,7 @@ static struct prio_tree_node *prio_tree_left(struct prio_tree_iter *iter,
 
 	if (iter->r_index <= *h_index) {
 		iter->cur = iter->cur->left;
-		iter->mask >>= 1;
-		if (iter->mask) {
-			if (iter->size_level)
-				iter->size_level++;
-		} else {
-			if (iter->size_level) {
-				BUG_ON(!prio_tree_left_empty(iter->cur));
-				BUG_ON(!prio_tree_right_empty(iter->cur));
-				iter->size_level++;
-				iter->mask = ULONG_MAX;
-			} else {
-				iter->size_level = 1;
-				iter->mask = 1UL << (BITS_PER_LONG - 1);
-			}
-		}
+		iter_walk_down(iter);
 		return iter->cur;
 	}
 
@@ -364,22 +370,7 @@ static struct prio_tree_node *prio_tree_right(struct prio_tree_iter *iter,
 
 	if (iter->r_index <= *h_index) {
 		iter->cur = iter->cur->right;
-		iter->mask >>= 1;
-		iter->value = value;
-		if (iter->mask) {
-			if (iter->size_level)
-				iter->size_level++;
-		} else {
-			if (iter->size_level) {
-				BUG_ON(!prio_tree_left_empty(iter->cur));
-				BUG_ON(!prio_tree_right_empty(iter->cur));
-				iter->size_level++;
-				iter->mask = ULONG_MAX;
-			} else {
-				iter->size_level = 1;
-				iter->mask = 1UL << (BITS_PER_LONG - 1);
-			}
-		}
+		iter_walk_down(iter);
 		return iter->cur;
 	}
 
@@ -389,16 +380,7 @@ static struct prio_tree_node *prio_tree_right(struct prio_tree_iter *iter,
 static struct prio_tree_node *prio_tree_parent(struct prio_tree_iter *iter)
 {
 	iter->cur = iter->cur->parent;
-	if (iter->mask == ULONG_MAX)
-		iter->mask = 1UL;
-	else if (iter->size_level == 1)
-		iter->mask = 1UL;
-	else
-		iter->mask <<= 1;
-	if (iter->size_level)
-		iter->size_level--;
-	if (!iter->size_level && (iter->value & iter->mask))
-		iter->value ^= iter->mask;
+	iter_walk_up(iter);
 	return iter->cur;
 }
 
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index d9df7454519c..86516f5588e3 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -3,6 +3,7 @@
  * Portions Copyright (C) 2001 Christoph Hellwig
  * Copyright (C) 2005 SGI, Christoph Lameter
  * Copyright (C) 2006 Nick Piggin
+ * Copyright (C) 2012 Konstantin Khlebnikov
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -22,7 +23,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/radix-tree.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
@@ -48,16 +49,14 @@
 struct radix_tree_node {
 	unsigned int	height;		/* Height from the bottom */
 	unsigned int	count;
-	struct rcu_head	rcu_head;
+	union {
+		struct radix_tree_node *parent;	/* Used when ascending tree */
+		struct rcu_head	rcu_head;	/* Used when freeing node */
+	};
 	void __rcu	*slots[RADIX_TREE_MAP_SIZE];
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
-struct radix_tree_path {
-	struct radix_tree_node *node;
-	int offset;
-};
-
 #define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
 					  RADIX_TREE_MAP_SHIFT))
@@ -148,6 +147,43 @@ static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
 	}
 	return 0;
 }
+
+/**
+ * radix_tree_find_next_bit - find the next set bit in a memory region
+ *
+ * @addr: The address to base the search on
+ * @size: The bitmap size in bits
+ * @offset: The bitnumber to start searching at
+ *
+ * Unrollable variant of find_next_bit() for constant size arrays.
+ * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
+ * Returns next bit offset, or size if nothing found.
+ */
+static __always_inline unsigned long
+radix_tree_find_next_bit(const unsigned long *addr,
+			 unsigned long size, unsigned long offset)
+{
+	if (!__builtin_constant_p(size))
+		return find_next_bit(addr, size, offset);
+
+	if (offset < size) {
+		unsigned long tmp;
+
+		addr += offset / BITS_PER_LONG;
+		tmp = *addr >> (offset % BITS_PER_LONG);
+		if (tmp)
+			return __ffs(tmp) + offset;
+		offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
+		while (offset < size) {
+			tmp = *++addr;
+			if (tmp)
+				return __ffs(tmp) + offset;
+			offset += BITS_PER_LONG;
+		}
+	}
+	return size;
+}
+
 /*
  * This assumes that the caller has performed appropriate preallocation, and
  * that the caller has pinned this thread of control to the current CPU.
@@ -256,6 +292,7 @@ static inline unsigned long radix_tree_maxindex(unsigned int height)
 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 {
 	struct radix_tree_node *node;
+	struct radix_tree_node *slot;
 	unsigned int height;
 	int tag;
 
@@ -274,18 +311,23 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 		if (!(node = radix_tree_node_alloc(root)))
 			return -ENOMEM;
 
-		/* Increase the height.  */
-		node->slots[0] = indirect_to_ptr(root->rnode);
-
 		/* Propagate the aggregated tag info into the new root */
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
 			if (root_tag_get(root, tag))
 				tag_set(node, tag, 0);
 		}
 
+		/* Increase the height.  */
 		newheight = root->height+1;
 		node->height = newheight;
 		node->count = 1;
+		node->parent = NULL;
+		slot = root->rnode;
+		if (newheight > 1) {
+			slot = indirect_to_ptr(slot);
+			slot->parent = node;
+		}
+		node->slots[0] = slot;
 		node = ptr_to_indirect(node);
 		rcu_assign_pointer(root->rnode, node);
 		root->height = newheight;
@@ -331,6 +373,7 @@ int radix_tree_insert(struct radix_tree_root *root,
 			if (!(slot = radix_tree_node_alloc(root)))
 				return -ENOMEM;
 			slot->height = height;
+			slot->parent = node;
 			if (node) {
 				rcu_assign_pointer(node->slots[offset], slot);
 				node->count++;
@@ -504,47 +547,41 @@ EXPORT_SYMBOL(radix_tree_tag_set);
 void *radix_tree_tag_clear(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag)
 {
-	/*
-	 * The radix tree path needs to be one longer than the maximum path
-	 * since the "list" is null terminated.
-	 */
-	struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
+	struct radix_tree_node *node = NULL;
 	struct radix_tree_node *slot = NULL;
 	unsigned int height, shift;
+	int uninitialized_var(offset);
 
 	height = root->height;
 	if (index > radix_tree_maxindex(height))
 		goto out;
 
-	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
-	pathp->node = NULL;
+	shift = height * RADIX_TREE_MAP_SHIFT;
 	slot = indirect_to_ptr(root->rnode);
 
-	while (height > 0) {
-		int offset;
-
+	while (shift) {
 		if (slot == NULL)
 			goto out;
 
+		shift -= RADIX_TREE_MAP_SHIFT;
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-		pathp[1].offset = offset;
-		pathp[1].node = slot;
+		node = slot;
 		slot = slot->slots[offset];
-		pathp++;
-		shift -= RADIX_TREE_MAP_SHIFT;
-		height--;
 	}
 
 	if (slot == NULL)
 		goto out;
 
-	while (pathp->node) {
-		if (!tag_get(pathp->node, tag, pathp->offset))
+	while (node) {
+		if (!tag_get(node, tag, offset))
 			goto out;
-		tag_clear(pathp->node, tag, pathp->offset);
-		if (any_tag_set(pathp->node, tag))
+		tag_clear(node, tag, offset);
+		if (any_tag_set(node, tag))
 			goto out;
-		pathp--;
+
+		index >>= RADIX_TREE_MAP_SHIFT;
+		offset = index & RADIX_TREE_MAP_MASK;
+		node = node->parent;
 	}
 
 	/* clear the root's tag bit */
@@ -614,6 +651,119 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 EXPORT_SYMBOL(radix_tree_tag_get);
 
 /**
+ * radix_tree_next_chunk - find next chunk of slots for iteration
+ *
+ * @root:	radix tree root
+ * @iter:	iterator state
+ * @flags:	RADIX_TREE_ITER_* flags and tag index
+ * Returns:	pointer to chunk first slot, or NULL if iteration is over
+ */
+void **radix_tree_next_chunk(struct radix_tree_root *root,
+			     struct radix_tree_iter *iter, unsigned flags)
+{
+	unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK;
+	struct radix_tree_node *rnode, *node;
+	unsigned long index, offset;
+
+	if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
+		return NULL;
+
+	/*
+	 * Catch next_index overflow after ~0UL. iter->index never overflows
+	 * during iterating; it can be zero only at the beginning.
+	 * And we cannot overflow iter->next_index in a single step,
+	 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
+	 */
+	index = iter->next_index;
+	if (!index && iter->index)
+		return NULL;
+
+	rnode = rcu_dereference_raw(root->rnode);
+	if (radix_tree_is_indirect_ptr(rnode)) {
+		rnode = indirect_to_ptr(rnode);
+	} else if (rnode && !index) {
+		/* Single-slot tree */
+		iter->index = 0;
+		iter->next_index = 1;
+		iter->tags = 1;
+		return (void **)&root->rnode;
+	} else
+		return NULL;
+
+restart:
+	shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT;
+	offset = index >> shift;
+
+	/* Index outside of the tree */
+	if (offset >= RADIX_TREE_MAP_SIZE)
+		return NULL;
+
+	node = rnode;
+	while (1) {
+		if ((flags & RADIX_TREE_ITER_TAGGED) ?
+				!test_bit(offset, node->tags[tag]) :
+				!node->slots[offset]) {
+			/* Hole detected */
+			if (flags & RADIX_TREE_ITER_CONTIG)
+				return NULL;
+
+			if (flags & RADIX_TREE_ITER_TAGGED)
+				offset = radix_tree_find_next_bit(
+						node->tags[tag],
+						RADIX_TREE_MAP_SIZE,
+						offset + 1);
+			else
+				while (++offset	< RADIX_TREE_MAP_SIZE) {
+					if (node->slots[offset])
+						break;
+				}
+			index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1);
+			index += offset << shift;
+			/* Overflow after ~0UL */
+			if (!index)
+				return NULL;
+			if (offset == RADIX_TREE_MAP_SIZE)
+				goto restart;
+		}
+
+		/* This is leaf-node */
+		if (!shift)
+			break;
+
+		node = rcu_dereference_raw(node->slots[offset]);
+		if (node == NULL)
+			goto restart;
+		shift -= RADIX_TREE_MAP_SHIFT;
+		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+	}
+
+	/* Update the iterator state */
+	iter->index = index;
+	iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1;
+
+	/* Construct iter->tags bit-mask from node->tags[tag] array */
+	if (flags & RADIX_TREE_ITER_TAGGED) {
+		unsigned tag_long, tag_bit;
+
+		tag_long = offset / BITS_PER_LONG;
+		tag_bit  = offset % BITS_PER_LONG;
+		iter->tags = node->tags[tag][tag_long] >> tag_bit;
+		/* This never happens if RADIX_TREE_TAG_LONGS == 1 */
+		if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
+			/* Pick tags from next element */
+			if (tag_bit)
+				iter->tags |= node->tags[tag][tag_long + 1] <<
+						(BITS_PER_LONG - tag_bit);
+			/* Clip chunk size, here only BITS_PER_LONG tags */
+			iter->next_index = index + BITS_PER_LONG;
+		}
+	}
+
+	return node->slots + offset;
+}
+EXPORT_SYMBOL(radix_tree_next_chunk);
+
+/**
  * radix_tree_range_tag_if_tagged - for each item in given range set given
  *				   tag if item has another tag set
  * @root:		radix tree root
@@ -646,8 +796,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 		unsigned int iftag, unsigned int settag)
 {
 	unsigned int height = root->height;
-	struct radix_tree_path path[height];
-	struct radix_tree_path *pathp = path;
+	struct radix_tree_node *node = NULL;
 	struct radix_tree_node *slot;
 	unsigned int shift;
 	unsigned long tagged = 0;
@@ -671,14 +820,8 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 	slot = indirect_to_ptr(root->rnode);
 
-	/*
-	 * we fill the path from (root->height - 2) to 0, leaving the index at
-	 * (root->height - 1) as a terminator. Zero the node in the terminator
-	 * so that we can use this to end walk loops back up the path.
-	 */
-	path[height - 1].node = NULL;
-
 	for (;;) {
+		unsigned long upindex;
 		int offset;
 
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
@@ -686,12 +829,10 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 			goto next;
 		if (!tag_get(slot, iftag, offset))
 			goto next;
-		if (height > 1) {
+		if (shift) {
 			/* Go down one level */
-			height--;
 			shift -= RADIX_TREE_MAP_SHIFT;
-			path[height - 1].node = slot;
-			path[height - 1].offset = offset;
+			node = slot;
 			slot = slot->slots[offset];
 			continue;
 		}
@@ -701,15 +842,27 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 		tag_set(slot, settag, offset);
 
 		/* walk back up the path tagging interior nodes */
-		pathp = &path[0];
-		while (pathp->node) {
+		upindex = index;
+		while (node) {
+			upindex >>= RADIX_TREE_MAP_SHIFT;
+			offset = upindex & RADIX_TREE_MAP_MASK;
+
 			/* stop if we find a node with the tag already set */
-			if (tag_get(pathp->node, settag, pathp->offset))
+			if (tag_get(node, settag, offset))
 				break;
-			tag_set(pathp->node, settag, pathp->offset);
-			pathp++;
+			tag_set(node, settag, offset);
+			node = node->parent;
 		}
 
+		/*
+		 * Small optimization: now clear that node pointer.
+		 * Since all of this slot's ancestors now have the tag set
+		 * from setting it above, we have no further need to walk
+		 * back up the tree setting tags, until we update slot to
+		 * point to another radix_tree_node.
+		 */
+		node = NULL;
+
 next:
 		/* Go to next item at level determined by 'shift' */
 		index = ((index >> shift) + 1) << shift;
@@ -724,8 +877,7 @@ next:
 			 * last_index is guaranteed to be in the tree, what
 			 * we do below cannot wander astray.
 			 */
-			slot = path[height - 1].node;
-			height++;
+			slot = slot->parent;
 			shift += RADIX_TREE_MAP_SHIFT;
 		}
 	}
@@ -816,57 +968,6 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_prev_hole);
 
-static unsigned int
-__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices,
-	unsigned long index, unsigned int max_items, unsigned long *next_index)
-{
-	unsigned int nr_found = 0;
-	unsigned int shift, height;
-	unsigned long i;
-
-	height = slot->height;
-	if (height == 0)
-		goto out;
-	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
-
-	for ( ; height > 1; height--) {
-		i = (index >> shift) & RADIX_TREE_MAP_MASK;
-		for (;;) {
-			if (slot->slots[i] != NULL)
-				break;
-			index &= ~((1UL << shift) - 1);
-			index += 1UL << shift;
-			if (index == 0)
-				goto out;	/* 32-bit wraparound */
-			i++;
-			if (i == RADIX_TREE_MAP_SIZE)
-				goto out;
-		}
-
-		shift -= RADIX_TREE_MAP_SHIFT;
-		slot = rcu_dereference_raw(slot->slots[i]);
-		if (slot == NULL)
-			goto out;
-	}
-
-	/* Bottom level: grab some items */
-	for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
-		if (slot->slots[i]) {
-			results[nr_found] = &(slot->slots[i]);
-			if (indices)
-				indices[nr_found] = index;
-			if (++nr_found == max_items) {
-				index++;
-				goto out;
-			}
-		}
-		index++;
-	}
-out:
-	*next_index = index;
-	return nr_found;
-}
-
 /**
  *	radix_tree_gang_lookup - perform multiple lookup on a radix tree
  *	@root:		radix tree root
@@ -890,48 +991,19 @@ unsigned int
 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 			unsigned long first_index, unsigned int max_items)
 {
-	unsigned long max_index;
-	struct radix_tree_node *node;
-	unsigned long cur_index = first_index;
-	unsigned int ret;
+	struct radix_tree_iter iter;
+	void **slot;
+	unsigned int ret = 0;
 
-	node = rcu_dereference_raw(root->rnode);
-	if (!node)
+	if (unlikely(!max_items))
 		return 0;
 
-	if (!radix_tree_is_indirect_ptr(node)) {
-		if (first_index > 0)
-			return 0;
-		results[0] = node;
-		return 1;
-	}
-	node = indirect_to_ptr(node);
-
-	max_index = radix_tree_maxindex(node->height);
-
-	ret = 0;
-	while (ret < max_items) {
-		unsigned int nr_found, slots_found, i;
-		unsigned long next_index;	/* Index of next search */
-
-		if (cur_index > max_index)
-			break;
-		slots_found = __lookup(node, (void ***)results + ret, NULL,
-				cur_index, max_items - ret, &next_index);
-		nr_found = 0;
-		for (i = 0; i < slots_found; i++) {
-			struct radix_tree_node *slot;
-			slot = *(((void ***)results)[ret + i]);
-			if (!slot)
-				continue;
-			results[ret + nr_found] =
-				indirect_to_ptr(rcu_dereference_raw(slot));
-			nr_found++;
-		}
-		ret += nr_found;
-		if (next_index == 0)
+	radix_tree_for_each_slot(slot, root, &iter, first_index) {
+		results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+		if (!results[ret])
+			continue;
+		if (++ret == max_items)
 			break;
-		cur_index = next_index;
 	}
 
 	return ret;
@@ -961,112 +1033,25 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root,
 			void ***results, unsigned long *indices,
 			unsigned long first_index, unsigned int max_items)
 {
-	unsigned long max_index;
-	struct radix_tree_node *node;
-	unsigned long cur_index = first_index;
-	unsigned int ret;
+	struct radix_tree_iter iter;
+	void **slot;
+	unsigned int ret = 0;
 
-	node = rcu_dereference_raw(root->rnode);
-	if (!node)
+	if (unlikely(!max_items))
 		return 0;
 
-	if (!radix_tree_is_indirect_ptr(node)) {
-		if (first_index > 0)
-			return 0;
-		results[0] = (void **)&root->rnode;
+	radix_tree_for_each_slot(slot, root, &iter, first_index) {
+		results[ret] = slot;
 		if (indices)
-			indices[0] = 0;
-		return 1;
-	}
-	node = indirect_to_ptr(node);
-
-	max_index = radix_tree_maxindex(node->height);
-
-	ret = 0;
-	while (ret < max_items) {
-		unsigned int slots_found;
-		unsigned long next_index;	/* Index of next search */
-
-		if (cur_index > max_index)
-			break;
-		slots_found = __lookup(node, results + ret,
-				indices ? indices + ret : NULL,
-				cur_index, max_items - ret, &next_index);
-		ret += slots_found;
-		if (next_index == 0)
+			indices[ret] = iter.index;
+		if (++ret == max_items)
 			break;
-		cur_index = next_index;
 	}
 
 	return ret;
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
 
-/*
- * FIXME: the two tag_get()s here should use find_next_bit() instead of
- * open-coding the search.
- */
-static unsigned int
-__lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
-	unsigned int max_items, unsigned long *next_index, unsigned int tag)
-{
-	unsigned int nr_found = 0;
-	unsigned int shift, height;
-
-	height = slot->height;
-	if (height == 0)
-		goto out;
-	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
-
-	while (height > 0) {
-		unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ;
-
-		for (;;) {
-			if (tag_get(slot, tag, i))
-				break;
-			index &= ~((1UL << shift) - 1);
-			index += 1UL << shift;
-			if (index == 0)
-				goto out;	/* 32-bit wraparound */
-			i++;
-			if (i == RADIX_TREE_MAP_SIZE)
-				goto out;
-		}
-		height--;
-		if (height == 0) {	/* Bottom level: grab some items */
-			unsigned long j = index & RADIX_TREE_MAP_MASK;
-
-			for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
-				index++;
-				if (!tag_get(slot, tag, j))
-					continue;
-				/*
-				 * Even though the tag was found set, we need to
-				 * recheck that we have a non-NULL node, because
-				 * if this lookup is lockless, it may have been
-				 * subsequently deleted.
-				 *
-				 * Similar care must be taken in any place that
-				 * lookup ->slots[x] without a lock (ie. can't
-				 * rely on its value remaining the same).
-				 */
-				if (slot->slots[j]) {
-					results[nr_found++] = &(slot->slots[j]);
-					if (nr_found == max_items)
-						goto out;
-				}
-			}
-		}
-		shift -= RADIX_TREE_MAP_SHIFT;
-		slot = rcu_dereference_raw(slot->slots[i]);
-		if (slot == NULL)
-			break;
-	}
-out:
-	*next_index = index;
-	return nr_found;
-}
-
 /**
  *	radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
  *	                             based on a tag
@@ -1085,52 +1070,19 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 		unsigned long first_index, unsigned int max_items,
 		unsigned int tag)
 {
-	struct radix_tree_node *node;
-	unsigned long max_index;
-	unsigned long cur_index = first_index;
-	unsigned int ret;
-
-	/* check the root's tag bit */
-	if (!root_tag_get(root, tag))
-		return 0;
+	struct radix_tree_iter iter;
+	void **slot;
+	unsigned int ret = 0;
 
-	node = rcu_dereference_raw(root->rnode);
-	if (!node)
+	if (unlikely(!max_items))
 		return 0;
 
-	if (!radix_tree_is_indirect_ptr(node)) {
-		if (first_index > 0)
-			return 0;
-		results[0] = node;
-		return 1;
-	}
-	node = indirect_to_ptr(node);
-
-	max_index = radix_tree_maxindex(node->height);
-
-	ret = 0;
-	while (ret < max_items) {
-		unsigned int nr_found, slots_found, i;
-		unsigned long next_index;	/* Index of next search */
-
-		if (cur_index > max_index)
-			break;
-		slots_found = __lookup_tag(node, (void ***)results + ret,
-				cur_index, max_items - ret, &next_index, tag);
-		nr_found = 0;
-		for (i = 0; i < slots_found; i++) {
-			struct radix_tree_node *slot;
-			slot = *(((void ***)results)[ret + i]);
-			if (!slot)
-				continue;
-			results[ret + nr_found] =
-				indirect_to_ptr(rcu_dereference_raw(slot));
-			nr_found++;
-		}
-		ret += nr_found;
-		if (next_index == 0)
+	radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
+		results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+		if (!results[ret])
+			continue;
+		if (++ret == max_items)
 			break;
-		cur_index = next_index;
 	}
 
 	return ret;
@@ -1155,42 +1107,17 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
 		unsigned long first_index, unsigned int max_items,
 		unsigned int tag)
 {
-	struct radix_tree_node *node;
-	unsigned long max_index;
-	unsigned long cur_index = first_index;
-	unsigned int ret;
-
-	/* check the root's tag bit */
-	if (!root_tag_get(root, tag))
-		return 0;
+	struct radix_tree_iter iter;
+	void **slot;
+	unsigned int ret = 0;
 
-	node = rcu_dereference_raw(root->rnode);
-	if (!node)
+	if (unlikely(!max_items))
 		return 0;
 
-	if (!radix_tree_is_indirect_ptr(node)) {
-		if (first_index > 0)
-			return 0;
-		results[0] = (void **)&root->rnode;
-		return 1;
-	}
-	node = indirect_to_ptr(node);
-
-	max_index = radix_tree_maxindex(node->height);
-
-	ret = 0;
-	while (ret < max_items) {
-		unsigned int slots_found;
-		unsigned long next_index;	/* Index of next search */
-
-		if (cur_index > max_index)
+	radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
+		results[ret] = slot;
+		if (++ret == max_items)
 			break;
-		slots_found = __lookup_tag(node, results + ret,
-				cur_index, max_items - ret, &next_index, tag);
-		ret += slots_found;
-		if (next_index == 0)
-			break;
-		cur_index = next_index;
 	}
 
 	return ret;
@@ -1299,7 +1226,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 	/* try to shrink tree height */
 	while (root->height > 0) {
 		struct radix_tree_node *to_free = root->rnode;
-		void *newptr;
+		struct radix_tree_node *slot;
 
 		BUG_ON(!radix_tree_is_indirect_ptr(to_free));
 		to_free = indirect_to_ptr(to_free);
@@ -1320,10 +1247,12 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 		 * (to_free->slots[0]), it will be safe to dereference the new
 		 * one (root->rnode) as far as dependent read barriers go.
 		 */
-		newptr = to_free->slots[0];
-		if (root->height > 1)
-			newptr = ptr_to_indirect(newptr);
-		root->rnode = newptr;
+		slot = to_free->slots[0];
+		if (root->height > 1) {
+			slot->parent = NULL;
+			slot = ptr_to_indirect(slot);
+		}
+		root->rnode = slot;
 		root->height--;
 
 		/*
@@ -1363,16 +1292,12 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
  */
 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 {
-	/*
-	 * The radix tree path needs to be one longer than the maximum path
-	 * since the "list" is null terminated.
-	 */
-	struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
+	struct radix_tree_node *node = NULL;
 	struct radix_tree_node *slot = NULL;
 	struct radix_tree_node *to_free;
 	unsigned int height, shift;
 	int tag;
-	int offset;
+	int uninitialized_var(offset);
 
 	height = root->height;
 	if (index > radix_tree_maxindex(height))
@@ -1385,39 +1310,35 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 		goto out;
 	}
 	slot = indirect_to_ptr(slot);
-
-	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
-	pathp->node = NULL;
+	shift = height * RADIX_TREE_MAP_SHIFT;
 
 	do {
 		if (slot == NULL)
 			goto out;
 
-		pathp++;
+		shift -= RADIX_TREE_MAP_SHIFT;
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-		pathp->offset = offset;
-		pathp->node = slot;
+		node = slot;
 		slot = slot->slots[offset];
-		shift -= RADIX_TREE_MAP_SHIFT;
-		height--;
-	} while (height > 0);
+	} while (shift);
 
 	if (slot == NULL)
 		goto out;
 
 	/*
-	 * Clear all tags associated with the just-deleted item
+	 * Clear all tags associated with the item to be deleted.
+	 * This way of doing it would be inefficient, but seldom is any set.
 	 */
 	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
-		if (tag_get(pathp->node, tag, pathp->offset))
+		if (tag_get(node, tag, offset))
 			radix_tree_tag_clear(root, index, tag);
 	}
 
 	to_free = NULL;
 	/* Now free the nodes we do not need anymore */
-	while (pathp->node) {
-		pathp->node->slots[pathp->offset] = NULL;
-		pathp->node->count--;
+	while (node) {
+		node->slots[offset] = NULL;
+		node->count--;
 		/*
 		 * Queue the node for deferred freeing after the
 		 * last reference to it disappears (set NULL, above).
@@ -1425,17 +1346,20 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 		if (to_free)
 			radix_tree_node_free(to_free);
 
-		if (pathp->node->count) {
-			if (pathp->node == indirect_to_ptr(root->rnode))
+		if (node->count) {
+			if (node == indirect_to_ptr(root->rnode))
 				radix_tree_shrink(root);
 			goto out;
 		}
 
 		/* Node with zero slots in use so free it */
-		to_free = pathp->node;
-		pathp--;
+		to_free = node;
 
+		index >>= RADIX_TREE_MAP_SHIFT;
+		offset = index & RADIX_TREE_MAP_MASK;
+		node = node->parent;
 	}
+
 	root_tag_clear_all(root);
 	root->height = 0;
 	root->rnode = NULL;
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 2654d5c854be..b71012b756f4 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -28,8 +28,8 @@
 
 #include <altivec.h>
 #ifdef __KERNEL__
-# include <asm/system.h>
 # include <asm/cputable.h>
+# include <asm/switch_to.h>
 #endif
 
 /*
diff --git a/lib/random32.c b/lib/random32.c
index fc3545a32771..938bde5876ac 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -35,7 +35,7 @@
 
 #include <linux/types.h>
 #include <linux/percpu.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/jiffies.h>
 #include <linux/random.h>
 
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index c96d500577de..40e03ea2a967 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -11,7 +11,7 @@
 
 #include <linux/ratelimit.h>
 #include <linux/jiffies.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 /*
  * __ratelimit - rate limiting
diff --git a/lib/rational.c b/lib/rational.c
index 3ed247b80662..d326da3976f5 100644
--- a/lib/rational.c
+++ b/lib/rational.c
@@ -7,7 +7,8 @@
  */
 
 #include <linux/rational.h>
-#include <linux/module.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
 
 /*
  * calculate best rational approximation for a given fraction
diff --git a/lib/rbtree.c b/lib/rbtree.c
index a16be19a1305..d4175565dc2c 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -21,7 +21,7 @@
 */
 
 #include <linux/rbtree.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
 {
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index f2393c21fe85..7e0d6a58fc83 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -7,7 +7,7 @@
  */
 #include <linux/rwsem.h>
 #include <linux/sched.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 struct rwsem_waiter {
 	struct list_head list;
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 410aa1189b13..8337e1b9bb8d 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -6,7 +6,7 @@
 #include <linux/rwsem.h>
 #include <linux/sched.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 /*
  * Initialize an rwsem:
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 4ceb05d772ae..6096e89bee55 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -6,7 +6,7 @@
  * This source code is licensed under the GNU General Public License,
  * Version 2. See the file COPYING for more details.
  */
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/scatterlist.h>
 #include <linux/highmem.h>
@@ -390,7 +390,7 @@ bool sg_miter_next(struct sg_mapping_iter *miter)
 	miter->consumed = miter->length;
 
 	if (miter->__flags & SG_MITER_ATOMIC)
-		miter->addr = kmap_atomic(miter->page, KM_BIO_SRC_IRQ) + off;
+		miter->addr = kmap_atomic(miter->page) + off;
 	else
 		miter->addr = kmap(miter->page) + off;
 
@@ -424,7 +424,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
 
 		if (miter->__flags & SG_MITER_ATOMIC) {
 			WARN_ON(!irqs_disabled());
-			kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ);
+			kunmap_atomic(miter->addr);
 		} else
 			kunmap(miter->page);
 
diff --git a/lib/sha1.c b/lib/sha1.c
index 1de509a159c8..1df191e04a24 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -6,7 +6,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/bitops.h>
 #include <linux/cryptohash.h>
 #include <asm/unaligned.h>
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 503f087382a4..4c0d0e51d49e 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -3,7 +3,7 @@
  *
  * DEBUG_PREEMPT variant of smp_processor_id().
  */
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/kallsyms.h>
 #include <linux/sched.h>
 
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 5f3eacdd6178..525d160d44f0 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -11,7 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
 #include <linux/delay.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
 			  struct lock_class_key *key)
diff --git a/lib/string.c b/lib/string.c
index dc4a86341f91..e5878de4f101 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -22,7 +22,10 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
-#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/bug.h>
+#include <linux/errno.h>
 
 #ifndef __HAVE_ARCH_STRNICMP
 /**
@@ -785,12 +788,24 @@ void *memchr_inv(const void *start, int c, size_t bytes)
 	if (bytes <= 16)
 		return check_bytes8(start, value, bytes);
 
-	value64 = value | value << 8 | value << 16 | value << 24;
-	value64 = (value64 & 0xffffffff) | value64 << 32;
-	prefix = 8 - ((unsigned long)start) % 8;
+	value64 = value;
+#if defined(ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64
+	value64 *= 0x0101010101010101;
+#elif defined(ARCH_HAS_FAST_MULTIPLIER)
+	value64 *= 0x01010101;
+	value64 |= value64 << 32;
+#else
+	value64 |= value64 << 8;
+	value64 |= value64 << 16;
+	value64 |= value64 << 32;
+#endif
 
+	prefix = (unsigned long)start % 8;
 	if (prefix) {
-		u8 *r = check_bytes8(start, value, prefix);
+		u8 *r;
+
+		prefix = 8 - prefix;
+		r = check_bytes8(start, value, prefix);
 		if (r)
 			return r;
 		start += prefix;
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index ab431d4cc970..dd4ece372699 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -5,7 +5,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/math64.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/string_helpers.h>
 
 /**
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 99093b396145..414f46ed1dcd 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -20,7 +20,7 @@
 #include <linux/cache.h>
 #include <linux/dma-mapping.h>
 #include <linux/mm.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/swiotlb.h>
@@ -110,11 +110,11 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
-unsigned long swioltb_nr_tbl(void)
+unsigned long swiotlb_nr_tbl(void)
 {
 	return io_tlb_nslabs;
 }
-
+EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
 /* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)
@@ -321,6 +321,7 @@ void __init swiotlb_free(void)
 		free_bootmem_late(__pa(io_tlb_start),
 				  PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	}
+	io_tlb_nslabs = 0;
 }
 
 static int is_swiotlb_buffer(phys_addr_t paddr)
@@ -348,13 +349,12 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
 			sz = min_t(size_t, PAGE_SIZE - offset, size);
 
 			local_irq_save(flags);
-			buffer = kmap_atomic(pfn_to_page(pfn),
-					     KM_BOUNCE_READ);
+			buffer = kmap_atomic(pfn_to_page(pfn));
 			if (dir == DMA_TO_DEVICE)
 				memcpy(dma_addr, buffer + offset, sz);
 			else
 				memcpy(buffer + offset, dma_addr, sz);
-			kunmap_atomic(buffer, KM_BOUNCE_READ);
+			kunmap_atomic(buffer);
 			local_irq_restore(flags);
 
 			size -= sz;
diff --git a/lib/syscall.c b/lib/syscall.c
index a4f7067f72fa..58710eefeac8 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -1,6 +1,6 @@
 #include <linux/ptrace.h>
 #include <linux/sched.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <asm/syscall.h>
 
 static int collect_syscall(struct task_struct *target, long *callno,
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index 191176a43e9a..a382e4a32609 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -22,9 +22,10 @@
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/bug.h>
 #include <linux/timerqueue.h>
 #include <linux/rbtree.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 /**
  * timerqueue_add - Adds timer to timerqueue.
diff --git a/lib/uuid.c b/lib/uuid.c
index 8fadd7cef46c..52a6fe6387de 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c
@@ -19,7 +19,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/uuid.h>
 #include <linux/random.h>
 
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 8e75003d62f6..abbabec9720a 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -17,7 +17,7 @@
  */
 
 #include <stdarg.h>
-#include <linux/module.h>
+#include <linux/module.h>	/* for KSYM_SYMBOL_LEN */
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
@@ -212,6 +212,26 @@ char *put_dec(char *buf, unsigned long long num)
 	}
 }
 
+/*
+ * Convert passed number to decimal string.
+ * Returns the length of string.  On buffer overflow, returns 0.
+ *
+ * If speed is not important, use snprintf(). It's easy to read the code.
+ */
+int num_to_str(char *buf, int size, unsigned long long num)
+{
+	char tmp[21];		/* Enough for 2^64 in decimal */
+	int idx, len;
+
+	len = put_dec(tmp, num) - tmp;
+
+	if (len > size)
+		return 0;
+	for (idx = 0; idx < len; ++idx)
+		buf[idx] = tmp[len - idx - 1];
+	return  len;
+}
+
 #define ZEROPAD	1		/* pad with zero */
 #define SIGN	2		/* unsigned/signed long */
 #define PLUS	4		/* show plus */
@@ -891,9 +911,15 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 	case 'U':
 		return uuid_string(buf, end, ptr, spec, fmt);
 	case 'V':
-		return buf + vsnprintf(buf, end > buf ? end - buf : 0,
-				       ((struct va_format *)ptr)->fmt,
-				       *(((struct va_format *)ptr)->va));
+		{
+			va_list va;
+
+			va_copy(va, *((struct va_format *)ptr)->va);
+			buf += vsnprintf(buf, end > buf ? end - buf : 0,
+					 ((struct va_format *)ptr)->fmt, va);
+			va_end(va);
+			return buf;
+		}
 	case 'K':
 		/*
 		 * %pK cannot be used in IRQ context because its test