diff options
Diffstat (limited to 'lib')
114 files changed, 8149 insertions, 1051 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 63b5782732ed..4a8aba2e5cc0 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -19,6 +19,20 @@ config RATIONAL config GENERIC_FIND_FIRST_BIT bool +config NO_GENERIC_PCI_IOPORT_MAP + bool + +config GENERIC_PCI_IOMAP + bool + +config GENERIC_IOMAP + bool + select GENERIC_PCI_IOMAP + +config GENERIC_IO + boolean + default n + config CRC_CCITT tristate "CRC-CCITT functions" help @@ -51,14 +65,71 @@ config CRC_ITU_T functions require M here. config CRC32 - tristate "CRC32 functions" + tristate "CRC32/CRC32c functions" default y select BITREVERSE help This option is provided for the case where no in-kernel-tree - modules require CRC32 functions, but a module built outside the - kernel tree does. Such modules that use library CRC32 functions - require M here. + modules require CRC32/CRC32c functions, but a module built outside + the kernel tree does. Such modules that use library CRC32/CRC32c + functions require M here. + +config CRC32_SELFTEST + bool "CRC32 perform self test on init" + default n + depends on CRC32 + help + This option enables the CRC32 library functions to perform a + self test on initialization. The self test computes crc32_le + and crc32_be over byte strings with random alignment and length + and computes the total elapsed time and number of bytes processed. + +choice + prompt "CRC32 implementation" + depends on CRC32 + default CRC32_SLICEBY8 + help + This option allows a kernel builder to override the default choice + of CRC32 algorithm. Choose the default ("slice by 8") unless you + know that you need one of the others. + +config CRC32_SLICEBY8 + bool "Slice by 8 bytes" + help + Calculate checksum 8 bytes at a time with a clever slicing algorithm. + This is the fastest algorithm, but comes with a 8KiB lookup table. + Most modern processors have enough cache to hold this table without + thrashing the cache. + + This is the default implementation choice. Choose this one unless + you have a good reason not to. + +config CRC32_SLICEBY4 + bool "Slice by 4 bytes" + help + Calculate checksum 4 bytes at a time with a clever slicing algorithm. + This is a bit slower than slice by 8, but has a smaller 4KiB lookup + table. + + Only choose this option if you know what you are doing. + +config CRC32_SARWATE + bool "Sarwate's Algorithm (one byte at a time)" + help + Calculate checksum a byte at a time using Sarwate's algorithm. This + is not particularly fast, but has a small 256 byte lookup table. + + Only choose this option if you know what you are doing. + +config CRC32_BIT + bool "Classic Algorithm (one bit at a time)" + help + Calculate checksum one bit at a time. This is VERY slow, but has + no lookup table. This is provided as a debugging option. + + Only choose this option if you are debugging crc32. + +endchoice config CRC7 tristate "CRC7 functions" @@ -214,6 +285,7 @@ config BTREE config HAS_IOMEM boolean depends on !NO_IOMEM + select GENERIC_IO default y config HAS_IOPORT @@ -272,11 +344,38 @@ config AVERAGE If unsure, say N. +config CLZ_TAB + bool + config CORDIC - tristate "Cordic function" + tristate "CORDIC algorithm" + help + This option provides an implementation of the CORDIC algorithm; + calculations are in fixed point. Module will be called cordic. + +config MPILIB + tristate + select CLZ_TAB + help + Multiprecision maths library from GnuPG. + It is used to implement RSA digital signature verification, + which is used by IMA/EVM digital signature extension. + +config MPILIB_EXTRA + bool + depends on MPILIB + help + Additional sources of multiprecision maths library from GnuPG. + This code is unnecessary for RSA digital signature verification, + but can be compiled if needed. + +config SIGNATURE + tristate + depends on KEYS && CRYPTO + select CRYPTO_SHA1 + select MPILIB help - The option provides arithmetic function using cordic algorithm - so its calculations are in fixed point. Modules can select this - when they require this function. Module will be called cordic. + Digital signature verification. Currently only RSA is supported. + Implementation is done using GnuPG MPI library endmenu diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 82928f5ea049..6777153f18f3 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -166,22 +166,25 @@ config LOCKUP_DETECTOR hard and soft lockups. Softlockups are bugs that cause the kernel to loop in kernel - mode for more than 60 seconds, without giving other tasks a + mode for more than 20 seconds, without giving other tasks a chance to run. The current stack trace is displayed upon detection and the system will stay locked up. Hardlockups are bugs that cause the CPU to loop in kernel mode - for more than 60 seconds, without letting other interrupts have a + for more than 10 seconds, without letting other interrupts have a chance to run. The current stack trace is displayed upon detection and the system will stay locked up. The overhead should be minimal. A periodic hrtimer runs to - generate interrupts and kick the watchdog task every 10-12 seconds. - An NMI is generated every 60 seconds or so to check for hardlockups. + generate interrupts and kick the watchdog task every 4 seconds. + An NMI is generated every 10 seconds or so to check for hardlockups. + + The frequency of hrtimer and NMI events and the soft and hard lockup + thresholds can be controlled through the sysctl watchdog_thresh. config HARDLOCKUP_DETECTOR def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ - !ARCH_HAS_NMI_WATCHDOG + !HAVE_NMI_WATCHDOG config BOOTPARAM_HARDLOCKUP_PANIC bool "Panic (Reboot) On Hard Lockups" @@ -189,7 +192,8 @@ config BOOTPARAM_HARDLOCKUP_PANIC help Say Y here to enable the kernel to panic on "hard lockups", which are bugs that cause the kernel to loop in kernel - mode with interrupts disabled for more than 60 seconds. + mode with interrupts disabled for more than 10 seconds (configurable + using the watchdog_thresh sysctl). Say N if unsure. @@ -206,8 +210,8 @@ config BOOTPARAM_SOFTLOCKUP_PANIC help Say Y here to enable the kernel to panic on "soft lockups", which are bugs that cause the kernel to loop in kernel - mode for more than 60 seconds, without giving other tasks a - chance to run. + mode for more than 20 seconds (configurable using the watchdog_thresh + sysctl), without giving other tasks a chance to run. The panic can be used in combination with panic_timeout, to cause the system to reboot automatically after a @@ -414,7 +418,7 @@ config SLUB_STATS config DEBUG_KMEMLEAK bool "Kernel memory leak detector" - depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \ + depends on DEBUG_KERNEL && EXPERIMENTAL && \ (X86 || ARM || PPC || MIPS || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE) select DEBUG_FS @@ -495,6 +499,7 @@ config RT_MUTEX_TESTER config DEBUG_SPINLOCK bool "Spinlock and rw-lock debugging: basic checks" depends on DEBUG_KERNEL + select UNINLINE_SPIN_UNLOCK help Say Y here and build SMP to catch missing spinlock initialization and certain other kinds of spinlock errors commonly made. This is @@ -927,6 +932,30 @@ config RCU_CPU_STALL_VERBOSE Say Y if you want to enable such checks. +config RCU_CPU_STALL_INFO + bool "Print additional diagnostics on RCU CPU stall" + depends on (TREE_RCU || TREE_PREEMPT_RCU) && DEBUG_KERNEL + default n + help + For each stalled CPU that is aware of the current RCU grace + period, print out additional per-CPU diagnostic information + regarding scheduling-clock ticks, idle state, and, + for RCU_FAST_NO_HZ kernels, idle-entry state. + + Say N if you are unsure. + + Say Y if you want to enable such diagnostics. + +config RCU_TRACE + bool "Enable tracing for RCU" + depends on DEBUG_KERNEL + help + This option provides tracing in RCU which presents stats + in debugfs for debugging RCU implementation. + + Say Y here if you want to enable RCU tracing + Say N if you are unsure. + config KPROBES_SANITY_TEST bool "Kprobes sanity tests" depends on DEBUG_KERNEL @@ -1113,14 +1142,6 @@ config LATENCYTOP Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. -config SYSCTL_SYSCALL_CHECK - bool "Sysctl checks" - depends on SYSCTL - ---help--- - sys_sysctl uses binary paths that have been found challenging - to properly maintain and use. This enables checks that help - you to keep things correct. - source mm/Kconfig.debug source kernel/trace/Kconfig diff --git a/lib/Makefile b/lib/Makefile index c0ffaaff6534..18515f0267c4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -33,6 +33,7 @@ endif lib-$(CONFIG_HOTPLUG) += kobject_uevent.o obj-$(CONFIG_GENERIC_IOMAP) += iomap.o +obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o @@ -117,6 +118,11 @@ obj-$(CONFIG_CORDIC) += cordic.o obj-$(CONFIG_DQL) += dynamic_queue_limits.o +obj-$(CONFIG_MPILIB) += mpi/ +obj-$(CONFIG_SIGNATURE) += digsig.o + +obj-$(CONFIG_CLZ_TAB) += clz_tab.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/argv_split.c b/lib/argv_split.c index 4b1b083f219c..1e9a6cbc3689 100644 --- a/lib/argv_split.c +++ b/lib/argv_split.c @@ -6,7 +6,7 @@ #include <linux/ctype.h> #include <linux/string.h> #include <linux/slab.h> -#include <linux/module.h> +#include <linux/export.h> static const char *skip_arg(const char *cp) { diff --git a/lib/atomic64.c b/lib/atomic64.c index 3975470caf4f..978537809d84 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -13,7 +13,7 @@ #include <linux/cache.h> #include <linux/spinlock.h> #include <linux/init.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/atomic.h> /* diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 0c33cde2a1e6..cb99b91c3a1d 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -9,6 +9,7 @@ * (at your option) any later version. */ #include <linux/init.h> +#include <linux/bug.h> #include <linux/kernel.h> #include <linux/atomic.h> diff --git a/lib/average.c b/lib/average.c index 5576c2841496..99a67e662b3c 100644 --- a/lib/average.c +++ b/lib/average.c @@ -5,8 +5,9 @@ * Version 2. See the file COPYING for more details. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/average.h> +#include <linux/kernel.h> #include <linux/bug.h> #include <linux/log2.h> diff --git a/lib/bcd.c b/lib/bcd.c index d74257fd0fe7..55efaf742346 100644 --- a/lib/bcd.c +++ b/lib/bcd.c @@ -1,5 +1,5 @@ #include <linux/bcd.h> -#include <linux/module.h> +#include <linux/export.h> unsigned bcd2bin(unsigned char val) { diff --git a/lib/bitmap.c b/lib/bitmap.c index 0d4a127dd9b3..b5a8b6ad2454 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -5,11 +5,13 @@ * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ -#include <linux/module.h> +#include <linux/export.h> +#include <linux/thread_info.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/bitmap.h> #include <linux/bitops.h> +#include <linux/bug.h> #include <asm/uaccess.h> /* diff --git a/lib/bsearch.c b/lib/bsearch.c index 5b54758e2afb..e33c179089db 100644 --- a/lib/bsearch.c +++ b/lib/bsearch.c @@ -9,7 +9,7 @@ * published by the Free Software Foundation; version 2. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/bsearch.h> /* diff --git a/lib/btree.c b/lib/btree.c index 2a34392bcecc..e5ec1e9c1aa5 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -357,6 +357,7 @@ miss: } return NULL; } +EXPORT_SYMBOL_GPL(btree_get_prev); static int getpos(struct btree_geo *geo, unsigned long *node, unsigned long *key) diff --git a/lib/bug.c b/lib/bug.c index 19552096d16b..a28c1415357c 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -169,7 +169,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) return BUG_TRAP_TYPE_WARN; } - printk(KERN_EMERG "------------[ cut here ]------------\n"); + printk(KERN_DEFAULT "------------[ cut here ]------------\n"); if (file) printk(KERN_CRIT "kernel BUG at %s:%u!\n", diff --git a/lib/check_signature.c b/lib/check_signature.c index fd6af199247b..6b49797980c4 100644 --- a/lib/check_signature.c +++ b/lib/check_signature.c @@ -1,5 +1,5 @@ #include <linux/io.h> -#include <linux/module.h> +#include <linux/export.h> /** * check_signature - find BIOS signatures diff --git a/lib/checksum.c b/lib/checksum.c index 8df2f91e6d98..12dceb27ff20 100644 --- a/lib/checksum.c +++ b/lib/checksum.c @@ -32,7 +32,7 @@ /* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access kills, so most of the assembly has to go. */ -#include <linux/module.h> +#include <linux/export.h> #include <net/checksum.h> #include <asm/byteorder.h> diff --git a/lib/clz_tab.c b/lib/clz_tab.c new file mode 100644 index 000000000000..7287b4a991a7 --- /dev/null +++ b/lib/clz_tab.c @@ -0,0 +1,18 @@ +const unsigned char __clz_tab[] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, +}; diff --git a/lib/cmdline.c b/lib/cmdline.c index f5f3ad8b62ff..eb6791188cf5 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -12,7 +12,7 @@ * */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/kernel.h> #include <linux/string.h> diff --git a/lib/cordic.c b/lib/cordic.c index aa27a88d7e04..6cf477839ebd 100644 --- a/lib/cordic.c +++ b/lib/cordic.c @@ -96,6 +96,6 @@ struct cordic_iq cordic_calc_iq(s32 theta) } EXPORT_SYMBOL(cordic_calc_iq); -MODULE_DESCRIPTION("Cordic functions"); +MODULE_DESCRIPTION("CORDIC algorithm"); MODULE_AUTHOR("Broadcom Corporation"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c index 987acfafeb83..145dec5267c9 100644 --- a/lib/cpu_rmap.c +++ b/lib/cpu_rmap.c @@ -11,7 +11,7 @@ #ifdef CONFIG_GENERIC_HARDIRQS #include <linux/interrupt.h> #endif -#include <linux/module.h> +#include <linux/export.h> /* * These functions maintain a mapping from CPUs to some ordered set of diff --git a/lib/cpumask.c b/lib/cpumask.c index af3e5817de98..402a54ac35cb 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -2,7 +2,7 @@ #include <linux/kernel.h> #include <linux/bitops.h> #include <linux/cpumask.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/bootmem.h> int __first_cpu(const cpumask_t *srcp) @@ -26,18 +26,6 @@ int __next_cpu_nr(int n, const cpumask_t *srcp) EXPORT_SYMBOL(__next_cpu_nr); #endif -int __any_online_cpu(const cpumask_t *mask) -{ - int cpu; - - for_each_cpu(cpu, mask) { - if (cpu_online(cpu)) - break; - } - return cpu; -} -EXPORT_SYMBOL(__any_online_cpu); - /** * cpumask_next_and - get the next cpu in *src1p & *src2p * @n: the cpu prior to the place to search (ie. return will be > @n) diff --git a/lib/crc32.c b/lib/crc32.c index a6e633a48cea..b0d278fb1d91 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -1,4 +1,8 @@ /* + * Aug 8, 2011 Bob Pearson with help from Joakim Tjernlund and George Spelvin + * cleaned up code to current version of sparse and added the slicing-by-8 + * algorithm to the closely similar existing slicing-by-4 algorithm. + * * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com> * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks! * Code was from the public domain, copyright abandoned. Code was @@ -20,51 +24,58 @@ * Version 2. See the file COPYING for more details. */ +/* see: Documentation/crc32.txt for a description of algorithms */ + #include <linux/crc32.h> -#include <linux/kernel.h> #include <linux/module.h> -#include <linux/compiler.h> #include <linux/types.h> -#include <linux/init.h> -#include <linux/atomic.h> #include "crc32defs.h" -#if CRC_LE_BITS == 8 -# define tole(x) __constant_cpu_to_le32(x) + +#if CRC_LE_BITS > 8 +# define tole(x) ((__force u32) __constant_cpu_to_le32(x)) #else # define tole(x) (x) #endif -#if CRC_BE_BITS == 8 -# define tobe(x) __constant_cpu_to_be32(x) +#if CRC_BE_BITS > 8 +# define tobe(x) ((__force u32) __constant_cpu_to_be32(x)) #else # define tobe(x) (x) #endif + #include "crc32table.h" MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); -MODULE_DESCRIPTION("Ethernet CRC32 calculations"); +MODULE_DESCRIPTION("Various CRC32 calculations"); MODULE_LICENSE("GPL"); -#if CRC_LE_BITS == 8 || CRC_BE_BITS == 8 +#if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 +/* implements slicing-by-4 or slicing-by-8 algorithm */ static inline u32 crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) { # ifdef __LITTLE_ENDIAN -# define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8) -# define DO_CRC4 crc = tab[3][(crc) & 255] ^ \ - tab[2][(crc >> 8) & 255] ^ \ - tab[1][(crc >> 16) & 255] ^ \ - tab[0][(crc >> 24) & 255] +# define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8) +# define DO_CRC4 (t3[(q) & 255] ^ t2[(q >> 8) & 255] ^ \ + t1[(q >> 16) & 255] ^ t0[(q >> 24) & 255]) +# define DO_CRC8 (t7[(q) & 255] ^ t6[(q >> 8) & 255] ^ \ + t5[(q >> 16) & 255] ^ t4[(q >> 24) & 255]) # else -# define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8) -# define DO_CRC4 crc = tab[0][(crc) & 255] ^ \ - tab[1][(crc >> 8) & 255] ^ \ - tab[2][(crc >> 16) & 255] ^ \ - tab[3][(crc >> 24) & 255] +# define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8) +# define DO_CRC4 (t0[(q) & 255] ^ t1[(q >> 8) & 255] ^ \ + t2[(q >> 16) & 255] ^ t3[(q >> 24) & 255]) +# define DO_CRC8 (t4[(q) & 255] ^ t5[(q >> 8) & 255] ^ \ + t6[(q >> 16) & 255] ^ t7[(q >> 24) & 255]) # endif const u32 *b; size_t rem_len; +# ifdef CONFIG_X86 + size_t i; +# endif + const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3]; + const u32 *t4 = tab[4], *t5 = tab[5], *t6 = tab[6], *t7 = tab[7]; + u32 q; /* Align it */ if (unlikely((long)buf & 3 && len)) { @@ -72,27 +83,51 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) DO_CRC(*buf++); } while ((--len) && ((long)buf)&3); } + +# if CRC_LE_BITS == 32 rem_len = len & 3; - /* load data 32 bits wide, xor data 32 bits wide. */ len = len >> 2; +# else + rem_len = len & 7; + len = len >> 3; +# endif + b = (const u32 *)buf; +# ifdef CONFIG_X86 + --b; + for (i = 0; i < len; i++) { +# else for (--b; len; --len) { - crc ^= *++b; /* use pre increment for speed */ - DO_CRC4; +# endif + q = crc ^ *++b; /* use pre increment for speed */ +# if CRC_LE_BITS == 32 + crc = DO_CRC4; +# else + crc = DO_CRC8; + q = *++b; + crc ^= DO_CRC4; +# endif } len = rem_len; /* And the last few bytes */ if (len) { u8 *p = (u8 *)(b + 1) - 1; +# ifdef CONFIG_X86 + for (i = 0; i < len; i++) + DO_CRC(*++p); /* use pre increment for speed */ +# else do { DO_CRC(*++p); /* use pre increment for speed */ } while (--len); +# endif } return crc; #undef DO_CRC #undef DO_CRC4 +#undef DO_CRC8 } #endif + /** * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for @@ -100,53 +135,66 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) * @p: pointer to buffer over which CRC is run * @len: length of buffer @p */ -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len); - -#if CRC_LE_BITS == 1 -/* - * In fact, the table-based code will work in this case, but it can be - * simplified by inlining the table in ?: form. - */ - -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, + size_t len, const u32 (*tab)[256], + u32 polynomial) { +#if CRC_LE_BITS == 1 int i; while (len--) { crc ^= *p++; for (i = 0; i < 8; i++) - crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0); + } +# elif CRC_LE_BITS == 2 + while (len--) { + crc ^= *p++; + crc = (crc >> 2) ^ tab[0][crc & 3]; + crc = (crc >> 2) ^ tab[0][crc & 3]; + crc = (crc >> 2) ^ tab[0][crc & 3]; + crc = (crc >> 2) ^ tab[0][crc & 3]; } - return crc; -} -#else /* Table-based approach */ - -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) -{ -# if CRC_LE_BITS == 8 - const u32 (*tab)[] = crc32table_le; - - crc = __cpu_to_le32(crc); - crc = crc32_body(crc, p, len, tab); - return __le32_to_cpu(crc); # elif CRC_LE_BITS == 4 while (len--) { crc ^= *p++; - crc = (crc >> 4) ^ crc32table_le[crc & 15]; - crc = (crc >> 4) ^ crc32table_le[crc & 15]; + crc = (crc >> 4) ^ tab[0][crc & 15]; + crc = (crc >> 4) ^ tab[0][crc & 15]; } - return crc; -# elif CRC_LE_BITS == 2 +# elif CRC_LE_BITS == 8 + /* aka Sarwate algorithm */ while (len--) { crc ^= *p++; - crc = (crc >> 2) ^ crc32table_le[crc & 3]; - crc = (crc >> 2) ^ crc32table_le[crc & 3]; - crc = (crc >> 2) ^ crc32table_le[crc & 3]; - crc = (crc >> 2) ^ crc32table_le[crc & 3]; + crc = (crc >> 8) ^ tab[0][crc & 255]; } +# else + crc = (__force u32) __cpu_to_le32(crc); + crc = crc32_body(crc, p, len, tab); + crc = __le32_to_cpu((__force __le32)crc); +#endif return crc; -# endif +} + +#if CRC_LE_BITS == 1 +u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +{ + return crc32_le_generic(crc, p, len, NULL, CRCPOLY_LE); +} +u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +{ + return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE); +} +#else +u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +{ + return crc32_le_generic(crc, p, len, crc32table_le, CRCPOLY_LE); +} +u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +{ + return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE); } #endif +EXPORT_SYMBOL(crc32_le); +EXPORT_SYMBOL(__crc32c_le); /** * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 @@ -155,317 +203,913 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) * @p: pointer to buffer over which CRC is run * @len: length of buffer @p */ -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len); - -#if CRC_BE_BITS == 1 -/* - * In fact, the table-based code will work in this case, but it can be - * simplified by inlining the table in ?: form. - */ - -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p, + size_t len, const u32 (*tab)[256], + u32 polynomial) { +#if CRC_BE_BITS == 1 int i; while (len--) { crc ^= *p++ << 24; for (i = 0; i < 8; i++) crc = - (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : + (crc << 1) ^ ((crc & 0x80000000) ? polynomial : 0); } - return crc; -} - -#else /* Table-based approach */ -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) -{ -# if CRC_BE_BITS == 8 - const u32 (*tab)[] = crc32table_be; - - crc = __cpu_to_be32(crc); - crc = crc32_body(crc, p, len, tab); - return __be32_to_cpu(crc); +# elif CRC_BE_BITS == 2 + while (len--) { + crc ^= *p++ << 24; + crc = (crc << 2) ^ tab[0][crc >> 30]; + crc = (crc << 2) ^ tab[0][crc >> 30]; + crc = (crc << 2) ^ tab[0][crc >> 30]; + crc = (crc << 2) ^ tab[0][crc >> 30]; + } # elif CRC_BE_BITS == 4 while (len--) { crc ^= *p++ << 24; - crc = (crc << 4) ^ crc32table_be[crc >> 28]; - crc = (crc << 4) ^ crc32table_be[crc >> 28]; + crc = (crc << 4) ^ tab[0][crc >> 28]; + crc = (crc << 4) ^ tab[0][crc >> 28]; } - return crc; -# elif CRC_BE_BITS == 2 +# elif CRC_BE_BITS == 8 while (len--) { crc ^= *p++ << 24; - crc = (crc << 2) ^ crc32table_be[crc >> 30]; - crc = (crc << 2) ^ crc32table_be[crc >> 30]; - crc = (crc << 2) ^ crc32table_be[crc >> 30]; - crc = (crc << 2) ^ crc32table_be[crc >> 30]; + crc = (crc << 8) ^ tab[0][crc >> 24]; } - return crc; +# else + crc = (__force u32) __cpu_to_be32(crc); + crc = crc32_body(crc, p, len, tab); + crc = __be32_to_cpu((__force __be32)crc); # endif + return crc; } -#endif -EXPORT_SYMBOL(crc32_le); +#if CRC_LE_BITS == 1 +u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +{ + return crc32_be_generic(crc, p, len, NULL, CRCPOLY_BE); +} +#else +u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +{ + return crc32_be_generic(crc, p, len, crc32table_be, CRCPOLY_BE); +} +#endif EXPORT_SYMBOL(crc32_be); -/* - * A brief CRC tutorial. - * - * A CRC is a long-division remainder. You add the CRC to the message, - * and the whole thing (message+CRC) is a multiple of the given - * CRC polynomial. To check the CRC, you can either check that the - * CRC matches the recomputed value, *or* you can check that the - * remainder computed on the message+CRC is 0. This latter approach - * is used by a lot of hardware implementations, and is why so many - * protocols put the end-of-frame flag after the CRC. - * - * It's actually the same long division you learned in school, except that - * - We're working in binary, so the digits are only 0 and 1, and - * - When dividing polynomials, there are no carries. Rather than add and - * subtract, we just xor. Thus, we tend to get a bit sloppy about - * the difference between adding and subtracting. - * - * A 32-bit CRC polynomial is actually 33 bits long. But since it's - * 33 bits long, bit 32 is always going to be set, so usually the CRC - * is written in hex with the most significant bit omitted. (If you're - * familiar with the IEEE 754 floating-point format, it's the same idea.) - * - * Note that a CRC is computed over a string of *bits*, so you have - * to decide on the endianness of the bits within each byte. To get - * the best error-detecting properties, this should correspond to the - * order they're actually sent. For example, standard RS-232 serial is - * little-endian; the most significant bit (sometimes used for parity) - * is sent last. And when appending a CRC word to a message, you should - * do it in the right order, matching the endianness. - * - * Just like with ordinary division, the remainder is always smaller than - * the divisor (the CRC polynomial) you're dividing by. Each step of the - * division, you take one more digit (bit) of the dividend and append it - * to the current remainder. Then you figure out the appropriate multiple - * of the divisor to subtract to being the remainder back into range. - * In binary, it's easy - it has to be either 0 or 1, and to make the - * XOR cancel, it's just a copy of bit 32 of the remainder. - * - * When computing a CRC, we don't care about the quotient, so we can - * throw the quotient bit away, but subtract the appropriate multiple of - * the polynomial from the remainder and we're back to where we started, - * ready to process the next bit. - * - * A big-endian CRC written this way would be coded like: - * for (i = 0; i < input_bits; i++) { - * multiple = remainder & 0x80000000 ? CRCPOLY : 0; - * remainder = (remainder << 1 | next_input_bit()) ^ multiple; - * } - * Notice how, to get at bit 32 of the shifted remainder, we look - * at bit 31 of the remainder *before* shifting it. - * - * But also notice how the next_input_bit() bits we're shifting into - * the remainder don't actually affect any decision-making until - * 32 bits later. Thus, the first 32 cycles of this are pretty boring. - * Also, to add the CRC to a message, we need a 32-bit-long hole for it at - * the end, so we have to add 32 extra cycles shifting in zeros at the - * end of every message, - * - * So the standard trick is to rearrage merging in the next_input_bit() - * until the moment it's needed. Then the first 32 cycles can be precomputed, - * and merging in the final 32 zero bits to make room for the CRC can be - * skipped entirely. - * This changes the code to: - * for (i = 0; i < input_bits; i++) { - * remainder ^= next_input_bit() << 31; - * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; - * remainder = (remainder << 1) ^ multiple; - * } - * With this optimization, the little-endian code is simpler: - * for (i = 0; i < input_bits; i++) { - * remainder ^= next_input_bit(); - * multiple = (remainder & 1) ? CRCPOLY : 0; - * remainder = (remainder >> 1) ^ multiple; - * } - * - * Note that the other details of endianness have been hidden in CRCPOLY - * (which must be bit-reversed) and next_input_bit(). - * - * However, as long as next_input_bit is returning the bits in a sensible - * order, we can actually do the merging 8 or more bits at a time rather - * than one bit at a time: - * for (i = 0; i < input_bytes; i++) { - * remainder ^= next_input_byte() << 24; - * for (j = 0; j < 8; j++) { - * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; - * remainder = (remainder << 1) ^ multiple; - * } - * } - * Or in little-endian: - * for (i = 0; i < input_bytes; i++) { - * remainder ^= next_input_byte(); - * for (j = 0; j < 8; j++) { - * multiple = (remainder & 1) ? CRCPOLY : 0; - * remainder = (remainder << 1) ^ multiple; - * } - * } - * If the input is a multiple of 32 bits, you can even XOR in a 32-bit - * word at a time and increase the inner loop count to 32. - * - * You can also mix and match the two loop styles, for example doing the - * bulk of a message byte-at-a-time and adding bit-at-a-time processing - * for any fractional bytes at the end. - * - * The only remaining optimization is to the byte-at-a-time table method. - * Here, rather than just shifting one bit of the remainder to decide - * in the correct multiple to subtract, we can shift a byte at a time. - * This produces a 40-bit (rather than a 33-bit) intermediate remainder, - * but again the multiple of the polynomial to subtract depends only on - * the high bits, the high 8 bits in this case. - * - * The multiple we need in that case is the low 32 bits of a 40-bit - * value whose high 8 bits are given, and which is a multiple of the - * generator polynomial. This is simply the CRC-32 of the given - * one-byte message. - * - * Two more details: normally, appending zero bits to a message which - * is already a multiple of a polynomial produces a larger multiple of that - * polynomial. To enable a CRC to detect this condition, it's common to - * invert the CRC before appending it. This makes the remainder of the - * message+crc come out not as zero, but some fixed non-zero value. - * - * The same problem applies to zero bits prepended to the message, and - * a similar solution is used. Instead of starting with a remainder of - * 0, an initial remainder of all ones is used. As long as you start - * the same way on decoding, it doesn't make a difference. - */ - -#ifdef UNITTEST +#ifdef CONFIG_CRC32_SELFTEST -#include <stdlib.h> -#include <stdio.h> +/* 4096 random bytes */ +static u8 __attribute__((__aligned__(8))) test_buf[] = +{ + 0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30, + 0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4, + 0xc9, 0x6e, 0x8b, 0xdb, 0x98, 0x6b, 0xaa, 0x60, + 0xa8, 0xb5, 0xbc, 0x6c, 0xa9, 0xb1, 0x5b, 0x2c, + 0xea, 0xb4, 0x92, 0x6a, 0x3f, 0x79, 0x91, 0xe4, + 0xe9, 0x70, 0x51, 0x8c, 0x7f, 0x95, 0x6f, 0x1a, + 0x56, 0xa1, 0x5c, 0x27, 0x03, 0x67, 0x9f, 0x3a, + 0xe2, 0x31, 0x11, 0x29, 0x6b, 0x98, 0xfc, 0xc4, + 0x53, 0x24, 0xc5, 0x8b, 0xce, 0x47, 0xb2, 0xb9, + 0x32, 0xcb, 0xc1, 0xd0, 0x03, 0x57, 0x4e, 0xd4, + 0xe9, 0x3c, 0xa1, 0x63, 0xcf, 0x12, 0x0e, 0xca, + 0xe1, 0x13, 0xd1, 0x93, 0xa6, 0x88, 0x5c, 0x61, + 0x5b, 0xbb, 0xf0, 0x19, 0x46, 0xb4, 0xcf, 0x9e, + 0xb6, 0x6b, 0x4c, 0x3a, 0xcf, 0x60, 0xf9, 0x7a, + 0x8d, 0x07, 0x63, 0xdb, 0x40, 0xe9, 0x0b, 0x6f, + 0xad, 0x97, 0xf1, 0xed, 0xd0, 0x1e, 0x26, 0xfd, + 0xbf, 0xb7, 0xc8, 0x04, 0x94, 0xf8, 0x8b, 0x8c, + 0xf1, 0xab, 0x7a, 0xd4, 0xdd, 0xf3, 0xe8, 0x88, + 0xc3, 0xed, 0x17, 0x8a, 0x9b, 0x40, 0x0d, 0x53, + 0x62, 0x12, 0x03, 0x5f, 0x1b, 0x35, 0x32, 0x1f, + 0xb4, 0x7b, 0x93, 0x78, 0x0d, 0xdb, 0xce, 0xa4, + 0xc0, 0x47, 0xd5, 0xbf, 0x68, 0xe8, 0x5d, 0x74, + 0x8f, 0x8e, 0x75, 0x1c, 0xb2, 0x4f, 0x9a, 0x60, + 0xd1, 0xbe, 0x10, 0xf4, 0x5c, 0xa1, 0x53, 0x09, + 0xa5, 0xe0, 0x09, 0x54, 0x85, 0x5c, 0xdc, 0x07, + 0xe7, 0x21, 0x69, 0x7b, 0x8a, 0xfd, 0x90, 0xf1, + 0x22, 0xd0, 0xb4, 0x36, 0x28, 0xe6, 0xb8, 0x0f, + 0x39, 0xde, 0xc8, 0xf3, 0x86, 0x60, 0x34, 0xd2, + 0x5e, 0xdf, 0xfd, 0xcf, 0x0f, 0xa9, 0x65, 0xf0, + 0xd5, 0x4d, 0x96, 0x40, 0xe3, 0xdf, 0x3f, 0x95, + 0x5a, 0x39, 0x19, 0x93, 0xf4, 0x75, 0xce, 0x22, + 0x00, 0x1c, 0x93, 0xe2, 0x03, 0x66, 0xf4, 0x93, + 0x73, 0x86, 0x81, 0x8e, 0x29, 0x44, 0x48, 0x86, + 0x61, 0x7c, 0x48, 0xa3, 0x43, 0xd2, 0x9c, 0x8d, + 0xd4, 0x95, 0xdd, 0xe1, 0x22, 0x89, 0x3a, 0x40, + 0x4c, 0x1b, 0x8a, 0x04, 0xa8, 0x09, 0x69, 0x8b, + 0xea, 0xc6, 0x55, 0x8e, 0x57, 0xe6, 0x64, 0x35, + 0xf0, 0xc7, 0x16, 0x9f, 0x5d, 0x5e, 0x86, 0x40, + 0x46, 0xbb, 0xe5, 0x45, 0x88, 0xfe, 0xc9, 0x63, + 0x15, 0xfb, 0xf5, 0xbd, 0x71, 0x61, 0xeb, 0x7b, + 0x78, 0x70, 0x07, 0x31, 0x03, 0x9f, 0xb2, 0xc8, + 0xa7, 0xab, 0x47, 0xfd, 0xdf, 0xa0, 0x78, 0x72, + 0xa4, 0x2a, 0xe4, 0xb6, 0xba, 0xc0, 0x1e, 0x86, + 0x71, 0xe6, 0x3d, 0x18, 0x37, 0x70, 0xe6, 0xff, + 0xe0, 0xbc, 0x0b, 0x22, 0xa0, 0x1f, 0xd3, 0xed, + 0xa2, 0x55, 0x39, 0xab, 0xa8, 0x13, 0x73, 0x7c, + 0x3f, 0xb2, 0xd6, 0x19, 0xac, 0xff, 0x99, 0xed, + 0xe8, 0xe6, 0xa6, 0x22, 0xe3, 0x9c, 0xf1, 0x30, + 0xdc, 0x01, 0x0a, 0x56, 0xfa, 0xe4, 0xc9, 0x99, + 0xdd, 0xa8, 0xd8, 0xda, 0x35, 0x51, 0x73, 0xb4, + 0x40, 0x86, 0x85, 0xdb, 0x5c, 0xd5, 0x85, 0x80, + 0x14, 0x9c, 0xfd, 0x98, 0xa9, 0x82, 0xc5, 0x37, + 0xff, 0x32, 0x5d, 0xd0, 0x0b, 0xfa, 0xdc, 0x04, + 0x5e, 0x09, 0xd2, 0xca, 0x17, 0x4b, 0x1a, 0x8e, + 0x15, 0xe1, 0xcc, 0x4e, 0x52, 0x88, 0x35, 0xbd, + 0x48, 0xfe, 0x15, 0xa0, 0x91, 0xfd, 0x7e, 0x6c, + 0x0e, 0x5d, 0x79, 0x1b, 0x81, 0x79, 0xd2, 0x09, + 0x34, 0x70, 0x3d, 0x81, 0xec, 0xf6, 0x24, 0xbb, + 0xfb, 0xf1, 0x7b, 0xdf, 0x54, 0xea, 0x80, 0x9b, + 0xc7, 0x99, 0x9e, 0xbd, 0x16, 0x78, 0x12, 0x53, + 0x5e, 0x01, 0xa7, 0x4e, 0xbd, 0x67, 0xe1, 0x9b, + 0x4c, 0x0e, 0x61, 0x45, 0x97, 0xd2, 0xf0, 0x0f, + 0xfe, 0x15, 0x08, 0xb7, 0x11, 0x4c, 0xe7, 0xff, + 0x81, 0x53, 0xff, 0x91, 0x25, 0x38, 0x7e, 0x40, + 0x94, 0xe5, 0xe0, 0xad, 0xe6, 0xd9, 0x79, 0xb6, + 0x92, 0xc9, 0xfc, 0xde, 0xc3, 0x1a, 0x23, 0xbb, + 0xdd, 0xc8, 0x51, 0x0c, 0x3a, 0x72, 0xfa, 0x73, + 0x6f, 0xb7, 0xee, 0x61, 0x39, 0x03, 0x01, 0x3f, + 0x7f, 0x94, 0x2e, 0x2e, 0xba, 0x3a, 0xbb, 0xb4, + 0xfa, 0x6a, 0x17, 0xfe, 0xea, 0xef, 0x5e, 0x66, + 0x97, 0x3f, 0x32, 0x3d, 0xd7, 0x3e, 0xb1, 0xf1, + 0x6c, 0x14, 0x4c, 0xfd, 0x37, 0xd3, 0x38, 0x80, + 0xfb, 0xde, 0xa6, 0x24, 0x1e, 0xc8, 0xca, 0x7f, + 0x3a, 0x93, 0xd8, 0x8b, 0x18, 0x13, 0xb2, 0xe5, + 0xe4, 0x93, 0x05, 0x53, 0x4f, 0x84, 0x66, 0xa7, + 0x58, 0x5c, 0x7b, 0x86, 0x52, 0x6d, 0x0d, 0xce, + 0xa4, 0x30, 0x7d, 0xb6, 0x18, 0x9f, 0xeb, 0xff, + 0x22, 0xbb, 0x72, 0x29, 0xb9, 0x44, 0x0b, 0x48, + 0x1e, 0x84, 0x71, 0x81, 0xe3, 0x6d, 0x73, 0x26, + 0x92, 0xb4, 0x4d, 0x2a, 0x29, 0xb8, 0x1f, 0x72, + 0xed, 0xd0, 0xe1, 0x64, 0x77, 0xea, 0x8e, 0x88, + 0x0f, 0xef, 0x3f, 0xb1, 0x3b, 0xad, 0xf9, 0xc9, + 0x8b, 0xd0, 0xac, 0xc6, 0xcc, 0xa9, 0x40, 0xcc, + 0x76, 0xf6, 0x3b, 0x53, 0xb5, 0x88, 0xcb, 0xc8, + 0x37, 0xf1, 0xa2, 0xba, 0x23, 0x15, 0x99, 0x09, + 0xcc, 0xe7, 0x7a, 0x3b, 0x37, 0xf7, 0x58, 0xc8, + 0x46, 0x8c, 0x2b, 0x2f, 0x4e, 0x0e, 0xa6, 0x5c, + 0xea, 0x85, 0x55, 0xba, 0x02, 0x0e, 0x0e, 0x48, + 0xbc, 0xe1, 0xb1, 0x01, 0x35, 0x79, 0x13, 0x3d, + 0x1b, 0xc0, 0x53, 0x68, 0x11, 0xe7, 0x95, 0x0f, + 0x9d, 0x3f, 0x4c, 0x47, 0x7b, 0x4d, 0x1c, 0xae, + 0x50, 0x9b, 0xcb, 0xdd, 0x05, 0x8d, 0x9a, 0x97, + 0xfd, 0x8c, 0xef, 0x0c, 0x1d, 0x67, 0x73, 0xa8, + 0x28, 0x36, 0xd5, 0xb6, 0x92, 0x33, 0x40, 0x75, + 0x0b, 0x51, 0xc3, 0x64, 0xba, 0x1d, 0xc2, 0xcc, + 0xee, 0x7d, 0x54, 0x0f, 0x27, 0x69, 0xa7, 0x27, + 0x63, 0x30, 0x29, 0xd9, 0xc8, 0x84, 0xd8, 0xdf, + 0x9f, 0x68, 0x8d, 0x04, 0xca, 0xa6, 0xc5, 0xc7, + 0x7a, 0x5c, 0xc8, 0xd1, 0xcb, 0x4a, 0xec, 0xd0, + 0xd8, 0x20, 0x69, 0xc5, 0x17, 0xcd, 0x78, 0xc8, + 0x75, 0x23, 0x30, 0x69, 0xc9, 0xd4, 0xea, 0x5c, + 0x4f, 0x6b, 0x86, 0x3f, 0x8b, 0xfe, 0xee, 0x44, + 0xc9, 0x7c, 0xb7, 0xdd, 0x3e, 0xe5, 0xec, 0x54, + 0x03, 0x3e, 0xaa, 0x82, 0xc6, 0xdf, 0xb2, 0x38, + 0x0e, 0x5d, 0xb3, 0x88, 0xd9, 0xd3, 0x69, 0x5f, + 0x8f, 0x70, 0x8a, 0x7e, 0x11, 0xd9, 0x1e, 0x7b, + 0x38, 0xf1, 0x42, 0x1a, 0xc0, 0x35, 0xf5, 0xc7, + 0x36, 0x85, 0xf5, 0xf7, 0xb8, 0x7e, 0xc7, 0xef, + 0x18, 0xf1, 0x63, 0xd6, 0x7a, 0xc6, 0xc9, 0x0e, + 0x4d, 0x69, 0x4f, 0x84, 0xef, 0x26, 0x41, 0x0c, + 0xec, 0xc7, 0xe0, 0x7e, 0x3c, 0x67, 0x01, 0x4c, + 0x62, 0x1a, 0x20, 0x6f, 0xee, 0x47, 0x4d, 0xc0, + 0x99, 0x13, 0x8d, 0x91, 0x4a, 0x26, 0xd4, 0x37, + 0x28, 0x90, 0x58, 0x75, 0x66, 0x2b, 0x0a, 0xdf, + 0xda, 0xee, 0x92, 0x25, 0x90, 0x62, 0x39, 0x9e, + 0x44, 0x98, 0xad, 0xc1, 0x88, 0xed, 0xe4, 0xb4, + 0xaf, 0xf5, 0x8c, 0x9b, 0x48, 0x4d, 0x56, 0x60, + 0x97, 0x0f, 0x61, 0x59, 0x9e, 0xa6, 0x27, 0xfe, + 0xc1, 0x91, 0x15, 0x38, 0xb8, 0x0f, 0xae, 0x61, + 0x7d, 0x26, 0x13, 0x5a, 0x73, 0xff, 0x1c, 0xa3, + 0x61, 0x04, 0x58, 0x48, 0x55, 0x44, 0x11, 0xfe, + 0x15, 0xca, 0xc3, 0xbd, 0xca, 0xc5, 0xb4, 0x40, + 0x5d, 0x1b, 0x7f, 0x39, 0xb5, 0x9c, 0x35, 0xec, + 0x61, 0x15, 0x32, 0x32, 0xb8, 0x4e, 0x40, 0x9f, + 0x17, 0x1f, 0x0a, 0x4d, 0xa9, 0x91, 0xef, 0xb7, + 0xb0, 0xeb, 0xc2, 0x83, 0x9a, 0x6c, 0xd2, 0x79, + 0x43, 0x78, 0x5e, 0x2f, 0xe5, 0xdd, 0x1a, 0x3c, + 0x45, 0xab, 0x29, 0x40, 0x3a, 0x37, 0x5b, 0x6f, + 0xd7, 0xfc, 0x48, 0x64, 0x3c, 0x49, 0xfb, 0x21, + 0xbe, 0xc3, 0xff, 0x07, 0xfb, 0x17, 0xe9, 0xc9, + 0x0c, 0x4c, 0x5c, 0x15, 0x9e, 0x8e, 0x22, 0x30, + 0x0a, 0xde, 0x48, 0x7f, 0xdb, 0x0d, 0xd1, 0x2b, + 0x87, 0x38, 0x9e, 0xcc, 0x5a, 0x01, 0x16, 0xee, + 0x75, 0x49, 0x0d, 0x30, 0x01, 0x34, 0x6a, 0xb6, + 0x9a, 0x5a, 0x2a, 0xec, 0xbb, 0x48, 0xac, 0xd3, + 0x77, 0x83, 0xd8, 0x08, 0x86, 0x4f, 0x48, 0x09, + 0x29, 0x41, 0x79, 0xa1, 0x03, 0x12, 0xc4, 0xcd, + 0x90, 0x55, 0x47, 0x66, 0x74, 0x9a, 0xcc, 0x4f, + 0x35, 0x8c, 0xd6, 0x98, 0xef, 0xeb, 0x45, 0xb9, + 0x9a, 0x26, 0x2f, 0x39, 0xa5, 0x70, 0x6d, 0xfc, + 0xb4, 0x51, 0xee, 0xf4, 0x9c, 0xe7, 0x38, 0x59, + 0xad, 0xf4, 0xbc, 0x46, 0xff, 0x46, 0x8e, 0x60, + 0x9c, 0xa3, 0x60, 0x1d, 0xf8, 0x26, 0x72, 0xf5, + 0x72, 0x9d, 0x68, 0x80, 0x04, 0xf6, 0x0b, 0xa1, + 0x0a, 0xd5, 0xa7, 0x82, 0x3a, 0x3e, 0x47, 0xa8, + 0x5a, 0xde, 0x59, 0x4f, 0x7b, 0x07, 0xb3, 0xe9, + 0x24, 0x19, 0x3d, 0x34, 0x05, 0xec, 0xf1, 0xab, + 0x6e, 0x64, 0x8f, 0xd3, 0xe6, 0x41, 0x86, 0x80, + 0x70, 0xe3, 0x8d, 0x60, 0x9c, 0x34, 0x25, 0x01, + 0x07, 0x4d, 0x19, 0x41, 0x4e, 0x3d, 0x5c, 0x7e, + 0xa8, 0xf5, 0xcc, 0xd5, 0x7b, 0xe2, 0x7d, 0x3d, + 0x49, 0x86, 0x7d, 0x07, 0xb7, 0x10, 0xe3, 0x35, + 0xb8, 0x84, 0x6d, 0x76, 0xab, 0x17, 0xc6, 0x38, + 0xb4, 0xd3, 0x28, 0x57, 0xad, 0xd3, 0x88, 0x5a, + 0xda, 0xea, 0xc8, 0x94, 0xcc, 0x37, 0x19, 0xac, + 0x9c, 0x9f, 0x4b, 0x00, 0x15, 0xc0, 0xc8, 0xca, + 0x1f, 0x15, 0xaa, 0xe0, 0xdb, 0xf9, 0x2f, 0x57, + 0x1b, 0x24, 0xc7, 0x6f, 0x76, 0x29, 0xfb, 0xed, + 0x25, 0x0d, 0xc0, 0xfe, 0xbd, 0x5a, 0xbf, 0x20, + 0x08, 0x51, 0x05, 0xec, 0x71, 0xa3, 0xbf, 0xef, + 0x5e, 0x99, 0x75, 0xdb, 0x3c, 0x5f, 0x9a, 0x8c, + 0xbb, 0x19, 0x5c, 0x0e, 0x93, 0x19, 0xf8, 0x6a, + 0xbc, 0xf2, 0x12, 0x54, 0x2f, 0xcb, 0x28, 0x64, + 0x88, 0xb3, 0x92, 0x0d, 0x96, 0xd1, 0xa6, 0xe4, + 0x1f, 0xf1, 0x4d, 0xa4, 0xab, 0x1c, 0xee, 0x54, + 0xf2, 0xad, 0x29, 0x6d, 0x32, 0x37, 0xb2, 0x16, + 0x77, 0x5c, 0xdc, 0x2e, 0x54, 0xec, 0x75, 0x26, + 0xc6, 0x36, 0xd9, 0x17, 0x2c, 0xf1, 0x7a, 0xdc, + 0x4b, 0xf1, 0xe2, 0xd9, 0x95, 0xba, 0xac, 0x87, + 0xc1, 0xf3, 0x8e, 0x58, 0x08, 0xd8, 0x87, 0x60, + 0xc9, 0xee, 0x6a, 0xde, 0xa4, 0xd2, 0xfc, 0x0d, + 0xe5, 0x36, 0xc4, 0x5c, 0x52, 0xb3, 0x07, 0x54, + 0x65, 0x24, 0xc1, 0xb1, 0xd1, 0xb1, 0x53, 0x13, + 0x31, 0x79, 0x7f, 0x05, 0x76, 0xeb, 0x37, 0x59, + 0x15, 0x2b, 0xd1, 0x3f, 0xac, 0x08, 0x97, 0xeb, + 0x91, 0x98, 0xdf, 0x6c, 0x09, 0x0d, 0x04, 0x9f, + 0xdc, 0x3b, 0x0e, 0x60, 0x68, 0x47, 0x23, 0x15, + 0x16, 0xc6, 0x0b, 0x35, 0xf8, 0x77, 0xa2, 0x78, + 0x50, 0xd4, 0x64, 0x22, 0x33, 0xff, 0xfb, 0x93, + 0x71, 0x46, 0x50, 0x39, 0x1b, 0x9c, 0xea, 0x4e, + 0x8d, 0x0c, 0x37, 0xe5, 0x5c, 0x51, 0x3a, 0x31, + 0xb2, 0x85, 0x84, 0x3f, 0x41, 0xee, 0xa2, 0xc1, + 0xc6, 0x13, 0x3b, 0x54, 0x28, 0xd2, 0x18, 0x37, + 0xcc, 0x46, 0x9f, 0x6a, 0x91, 0x3d, 0x5a, 0x15, + 0x3c, 0x89, 0xa3, 0x61, 0x06, 0x7d, 0x2e, 0x78, + 0xbe, 0x7d, 0x40, 0xba, 0x2f, 0x95, 0xb1, 0x2f, + 0x87, 0x3b, 0x8a, 0xbe, 0x6a, 0xf4, 0xc2, 0x31, + 0x74, 0xee, 0x91, 0xe0, 0x23, 0xaa, 0x5d, 0x7f, + 0xdd, 0xf0, 0x44, 0x8c, 0x0b, 0x59, 0x2b, 0xfc, + 0x48, 0x3a, 0xdf, 0x07, 0x05, 0x38, 0x6c, 0xc9, + 0xeb, 0x18, 0x24, 0x68, 0x8d, 0x58, 0x98, 0xd3, + 0x31, 0xa3, 0xe4, 0x70, 0x59, 0xb1, 0x21, 0xbe, + 0x7e, 0x65, 0x7d, 0xb8, 0x04, 0xab, 0xf6, 0xe4, + 0xd7, 0xda, 0xec, 0x09, 0x8f, 0xda, 0x6d, 0x24, + 0x07, 0xcc, 0x29, 0x17, 0x05, 0x78, 0x1a, 0xc1, + 0xb1, 0xce, 0xfc, 0xaa, 0x2d, 0xe7, 0xcc, 0x85, + 0x84, 0x84, 0x03, 0x2a, 0x0c, 0x3f, 0xa9, 0xf8, + 0xfd, 0x84, 0x53, 0x59, 0x5c, 0xf0, 0xd4, 0x09, + 0xf0, 0xd2, 0x6c, 0x32, 0x03, 0xb0, 0xa0, 0x8c, + 0x52, 0xeb, 0x23, 0x91, 0x88, 0x43, 0x13, 0x46, + 0xf6, 0x1e, 0xb4, 0x1b, 0xf5, 0x8e, 0x3a, 0xb5, + 0x3d, 0x00, 0xf6, 0xe5, 0x08, 0x3d, 0x5f, 0x39, + 0xd3, 0x21, 0x69, 0xbc, 0x03, 0x22, 0x3a, 0xd2, + 0x5c, 0x84, 0xf8, 0x15, 0xc4, 0x80, 0x0b, 0xbc, + 0x29, 0x3c, 0xf3, 0x95, 0x98, 0xcd, 0x8f, 0x35, + 0xbc, 0xa5, 0x3e, 0xfc, 0xd4, 0x13, 0x9e, 0xde, + 0x4f, 0xce, 0x71, 0x9d, 0x09, 0xad, 0xf2, 0x80, + 0x6b, 0x65, 0x7f, 0x03, 0x00, 0x14, 0x7c, 0x15, + 0x85, 0x40, 0x6d, 0x70, 0xea, 0xdc, 0xb3, 0x63, + 0x35, 0x4f, 0x4d, 0xe0, 0xd9, 0xd5, 0x3c, 0x58, + 0x56, 0x23, 0x80, 0xe2, 0x36, 0xdd, 0x75, 0x1d, + 0x94, 0x11, 0x41, 0x8e, 0xe0, 0x81, 0x8e, 0xcf, + 0xe0, 0xe5, 0xf6, 0xde, 0xd1, 0xe7, 0x04, 0x12, + 0x79, 0x92, 0x2b, 0x71, 0x2a, 0x79, 0x8b, 0x7c, + 0x44, 0x79, 0x16, 0x30, 0x4e, 0xf4, 0xf6, 0x9b, + 0xb7, 0x40, 0xa3, 0x5a, 0xa7, 0x69, 0x3e, 0xc1, + 0x3a, 0x04, 0xd0, 0x88, 0xa0, 0x3b, 0xdd, 0xc6, + 0x9e, 0x7e, 0x1e, 0x1e, 0x8f, 0x44, 0xf7, 0x73, + 0x67, 0x1e, 0x1a, 0x78, 0xfa, 0x62, 0xf4, 0xa9, + 0xa8, 0xc6, 0x5b, 0xb8, 0xfa, 0x06, 0x7d, 0x5e, + 0x38, 0x1c, 0x9a, 0x39, 0xe9, 0x39, 0x98, 0x22, + 0x0b, 0xa7, 0xac, 0x0b, 0xf3, 0xbc, 0xf1, 0xeb, + 0x8c, 0x81, 0xe3, 0x48, 0x8a, 0xed, 0x42, 0xc2, + 0x38, 0xcf, 0x3e, 0xda, 0xd2, 0x89, 0x8d, 0x9c, + 0x53, 0xb5, 0x2f, 0x41, 0x01, 0x26, 0x84, 0x9c, + 0xa3, 0x56, 0xf6, 0x49, 0xc7, 0xd4, 0x9f, 0x93, + 0x1b, 0x96, 0x49, 0x5e, 0xad, 0xb3, 0x84, 0x1f, + 0x3c, 0xa4, 0xe0, 0x9b, 0xd1, 0x90, 0xbc, 0x38, + 0x6c, 0xdd, 0x95, 0x4d, 0x9d, 0xb1, 0x71, 0x57, + 0x2d, 0x34, 0xe8, 0xb8, 0x42, 0xc7, 0x99, 0x03, + 0xc7, 0x07, 0x30, 0x65, 0x91, 0x55, 0xd5, 0x90, + 0x70, 0x97, 0x37, 0x68, 0xd4, 0x11, 0xf9, 0xe8, + 0xce, 0xec, 0xdc, 0x34, 0xd5, 0xd3, 0xb7, 0xc4, + 0xb8, 0x97, 0x05, 0x92, 0xad, 0xf8, 0xe2, 0x36, + 0x64, 0x41, 0xc9, 0xc5, 0x41, 0x77, 0x52, 0xd7, + 0x2c, 0xa5, 0x24, 0x2f, 0xd9, 0x34, 0x0b, 0x47, + 0x35, 0xa7, 0x28, 0x8b, 0xc5, 0xcd, 0xe9, 0x46, + 0xac, 0x39, 0x94, 0x3c, 0x10, 0xc6, 0x29, 0x73, + 0x0e, 0x0e, 0x5d, 0xe0, 0x71, 0x03, 0x8a, 0x72, + 0x0e, 0x26, 0xb0, 0x7d, 0x84, 0xed, 0x95, 0x23, + 0x49, 0x5a, 0x45, 0x83, 0x45, 0x60, 0x11, 0x4a, + 0x46, 0x31, 0xd4, 0xd8, 0x16, 0x54, 0x98, 0x58, + 0xed, 0x6d, 0xcc, 0x5d, 0xd6, 0x50, 0x61, 0x9f, + 0x9d, 0xc5, 0x3e, 0x9d, 0x32, 0x47, 0xde, 0x96, + 0xe1, 0x5d, 0xd8, 0xf8, 0xb4, 0x69, 0x6f, 0xb9, + 0x15, 0x90, 0x57, 0x7a, 0xf6, 0xad, 0xb0, 0x5b, + 0xf5, 0xa6, 0x36, 0x94, 0xfd, 0x84, 0xce, 0x1c, + 0x0f, 0x4b, 0xd0, 0xc2, 0x5b, 0x6b, 0x56, 0xef, + 0x73, 0x93, 0x0b, 0xc3, 0xee, 0xd9, 0xcf, 0xd3, + 0xa4, 0x22, 0x58, 0xcd, 0x50, 0x6e, 0x65, 0xf4, + 0xe9, 0xb7, 0x71, 0xaf, 0x4b, 0xb3, 0xb6, 0x2f, + 0x0f, 0x0e, 0x3b, 0xc9, 0x85, 0x14, 0xf5, 0x17, + 0xe8, 0x7a, 0x3a, 0xbf, 0x5f, 0x5e, 0xf8, 0x18, + 0x48, 0xa6, 0x72, 0xab, 0x06, 0x95, 0xe9, 0xc8, + 0xa7, 0xf4, 0x32, 0x44, 0x04, 0x0c, 0x84, 0x98, + 0x73, 0xe3, 0x89, 0x8d, 0x5f, 0x7e, 0x4a, 0x42, + 0x8f, 0xc5, 0x28, 0xb1, 0x82, 0xef, 0x1c, 0x97, + 0x31, 0x3b, 0x4d, 0xe0, 0x0e, 0x10, 0x10, 0x97, + 0x93, 0x49, 0x78, 0x2f, 0x0d, 0x86, 0x8b, 0xa1, + 0x53, 0xa9, 0x81, 0x20, 0x79, 0xe7, 0x07, 0x77, + 0xb6, 0xac, 0x5e, 0xd2, 0x05, 0xcd, 0xe9, 0xdb, + 0x8a, 0x94, 0x82, 0x8a, 0x23, 0xb9, 0x3d, 0x1c, + 0xa9, 0x7d, 0x72, 0x4a, 0xed, 0x33, 0xa3, 0xdb, + 0x21, 0xa7, 0x86, 0x33, 0x45, 0xa5, 0xaa, 0x56, + 0x45, 0xb5, 0x83, 0x29, 0x40, 0x47, 0x79, 0x04, + 0x6e, 0xb9, 0x95, 0xd0, 0x81, 0x77, 0x2d, 0x48, + 0x1e, 0xfe, 0xc3, 0xc2, 0x1e, 0xe5, 0xf2, 0xbe, + 0xfd, 0x3b, 0x94, 0x9f, 0xc4, 0xc4, 0x26, 0x9d, + 0xe4, 0x66, 0x1e, 0x19, 0xee, 0x6c, 0x79, 0x97, + 0x11, 0x31, 0x4b, 0x0d, 0x01, 0xcb, 0xde, 0xa8, + 0xf6, 0x6d, 0x7c, 0x39, 0x46, 0x4e, 0x7e, 0x3f, + 0x94, 0x17, 0xdf, 0xa1, 0x7d, 0xd9, 0x1c, 0x8e, + 0xbc, 0x7d, 0x33, 0x7d, 0xe3, 0x12, 0x40, 0xca, + 0xab, 0x37, 0x11, 0x46, 0xd4, 0xae, 0xef, 0x44, + 0xa2, 0xb3, 0x6a, 0x66, 0x0e, 0x0c, 0x90, 0x7f, + 0xdf, 0x5c, 0x66, 0x5f, 0xf2, 0x94, 0x9f, 0xa6, + 0x73, 0x4f, 0xeb, 0x0d, 0xad, 0xbf, 0xc0, 0x63, + 0x5c, 0xdc, 0x46, 0x51, 0xe8, 0x8e, 0x90, 0x19, + 0xa8, 0xa4, 0x3c, 0x91, 0x79, 0xfa, 0x7e, 0x58, + 0x85, 0x13, 0x55, 0xc5, 0x19, 0x82, 0x37, 0x1b, + 0x0a, 0x02, 0x1f, 0x99, 0x6b, 0x18, 0xf1, 0x28, + 0x08, 0xa2, 0x73, 0xb8, 0x0f, 0x2e, 0xcd, 0xbf, + 0xf3, 0x86, 0x7f, 0xea, 0xef, 0xd0, 0xbb, 0xa6, + 0x21, 0xdf, 0x49, 0x73, 0x51, 0xcc, 0x36, 0xd3, + 0x3e, 0xa0, 0xf8, 0x44, 0xdf, 0xd3, 0xa6, 0xbe, + 0x8a, 0xd4, 0x57, 0xdd, 0x72, 0x94, 0x61, 0x0f, + 0x82, 0xd1, 0x07, 0xb8, 0x7c, 0x18, 0x83, 0xdf, + 0x3a, 0xe5, 0x50, 0x6a, 0x82, 0x20, 0xac, 0xa9, + 0xa8, 0xff, 0xd9, 0xf3, 0x77, 0x33, 0x5a, 0x9e, + 0x7f, 0x6d, 0xfe, 0x5d, 0x33, 0x41, 0x42, 0xe7, + 0x6c, 0x19, 0xe0, 0x44, 0x8a, 0x15, 0xf6, 0x70, + 0x98, 0xb7, 0x68, 0x4d, 0xfa, 0x97, 0x39, 0xb0, + 0x8e, 0xe8, 0x84, 0x8b, 0x75, 0x30, 0xb7, 0x7d, + 0x92, 0x69, 0x20, 0x9c, 0x81, 0xfb, 0x4b, 0xf4, + 0x01, 0x50, 0xeb, 0xce, 0x0c, 0x1c, 0x6c, 0xb5, + 0x4a, 0xd7, 0x27, 0x0c, 0xce, 0xbb, 0xe5, 0x85, + 0xf0, 0xb6, 0xee, 0xd5, 0x70, 0xdd, 0x3b, 0xfc, + 0xd4, 0x99, 0xf1, 0x33, 0xdd, 0x8b, 0xc4, 0x2f, + 0xae, 0xab, 0x74, 0x96, 0x32, 0xc7, 0x4c, 0x56, + 0x3c, 0x89, 0x0f, 0x96, 0x0b, 0x42, 0xc0, 0xcb, + 0xee, 0x0f, 0x0b, 0x8c, 0xfb, 0x7e, 0x47, 0x7b, + 0x64, 0x48, 0xfd, 0xb2, 0x00, 0x80, 0x89, 0xa5, + 0x13, 0x55, 0x62, 0xfc, 0x8f, 0xe2, 0x42, 0x03, + 0xb7, 0x4e, 0x2a, 0x79, 0xb4, 0x82, 0xea, 0x23, + 0x49, 0xda, 0xaf, 0x52, 0x63, 0x1e, 0x60, 0x03, + 0x89, 0x06, 0x44, 0x46, 0x08, 0xc3, 0xc4, 0x87, + 0x70, 0x2e, 0xda, 0x94, 0xad, 0x6b, 0xe0, 0xe4, + 0xd1, 0x8a, 0x06, 0xc2, 0xa8, 0xc0, 0xa7, 0x43, + 0x3c, 0x47, 0x52, 0x0e, 0xc3, 0x77, 0x81, 0x11, + 0x67, 0x0e, 0xa0, 0x70, 0x04, 0x47, 0x29, 0x40, + 0x86, 0x0d, 0x34, 0x56, 0xa7, 0xc9, 0x35, 0x59, + 0x68, 0xdc, 0x93, 0x81, 0x70, 0xee, 0x86, 0xd9, + 0x80, 0x06, 0x40, 0x4f, 0x1a, 0x0d, 0x40, 0x30, + 0x0b, 0xcb, 0x96, 0x47, 0xc1, 0xb7, 0x52, 0xfd, + 0x56, 0xe0, 0x72, 0x4b, 0xfb, 0xbd, 0x92, 0x45, + 0x61, 0x71, 0xc2, 0x33, 0x11, 0xbf, 0x52, 0x83, + 0x79, 0x26, 0xe0, 0x49, 0x6b, 0xb7, 0x05, 0x8b, + 0xe8, 0x0e, 0x87, 0x31, 0xd7, 0x9d, 0x8a, 0xf5, + 0xc0, 0x5f, 0x2e, 0x58, 0x4a, 0xdb, 0x11, 0xb3, + 0x6c, 0x30, 0x2a, 0x46, 0x19, 0xe3, 0x27, 0x84, + 0x1f, 0x63, 0x6e, 0xf6, 0x57, 0xc7, 0xc9, 0xd8, + 0x5e, 0xba, 0xb3, 0x87, 0xd5, 0x83, 0x26, 0x34, + 0x21, 0x9e, 0x65, 0xde, 0x42, 0xd3, 0xbe, 0x7b, + 0xbc, 0x91, 0x71, 0x44, 0x4d, 0x99, 0x3b, 0x31, + 0xe5, 0x3f, 0x11, 0x4e, 0x7f, 0x13, 0x51, 0x3b, + 0xae, 0x79, 0xc9, 0xd3, 0x81, 0x8e, 0x25, 0x40, + 0x10, 0xfc, 0x07, 0x1e, 0xf9, 0x7b, 0x9a, 0x4b, + 0x6c, 0xe3, 0xb3, 0xad, 0x1a, 0x0a, 0xdd, 0x9e, + 0x59, 0x0c, 0xa2, 0xcd, 0xae, 0x48, 0x4a, 0x38, + 0x5b, 0x47, 0x41, 0x94, 0x65, 0x6b, 0xbb, 0xeb, + 0x5b, 0xe3, 0xaf, 0x07, 0x5b, 0xd4, 0x4a, 0xa2, + 0xc9, 0x5d, 0x2f, 0x64, 0x03, 0xd7, 0x3a, 0x2c, + 0x6e, 0xce, 0x76, 0x95, 0xb4, 0xb3, 0xc0, 0xf1, + 0xe2, 0x45, 0x73, 0x7a, 0x5c, 0xab, 0xc1, 0xfc, + 0x02, 0x8d, 0x81, 0x29, 0xb3, 0xac, 0x07, 0xec, + 0x40, 0x7d, 0x45, 0xd9, 0x7a, 0x59, 0xee, 0x34, + 0xf0, 0xe9, 0xd5, 0x7b, 0x96, 0xb1, 0x3d, 0x95, + 0xcc, 0x86, 0xb5, 0xb6, 0x04, 0x2d, 0xb5, 0x92, + 0x7e, 0x76, 0xf4, 0x06, 0xa9, 0xa3, 0x12, 0x0f, + 0xb1, 0xaf, 0x26, 0xba, 0x7c, 0xfc, 0x7e, 0x1c, + 0xbc, 0x2c, 0x49, 0x97, 0x53, 0x60, 0x13, 0x0b, + 0xa6, 0x61, 0x83, 0x89, 0x42, 0xd4, 0x17, 0x0c, + 0x6c, 0x26, 0x52, 0xc3, 0xb3, 0xd4, 0x67, 0xf5, + 0xe3, 0x04, 0xb7, 0xf4, 0xcb, 0x80, 0xb8, 0xcb, + 0x77, 0x56, 0x3e, 0xaa, 0x57, 0x54, 0xee, 0xb4, + 0x2c, 0x67, 0xcf, 0xf2, 0xdc, 0xbe, 0x55, 0xf9, + 0x43, 0x1f, 0x6e, 0x22, 0x97, 0x67, 0x7f, 0xc4, + 0xef, 0xb1, 0x26, 0x31, 0x1e, 0x27, 0xdf, 0x41, + 0x80, 0x47, 0x6c, 0xe2, 0xfa, 0xa9, 0x8c, 0x2a, + 0xf6, 0xf2, 0xab, 0xf0, 0x15, 0xda, 0x6c, 0xc8, + 0xfe, 0xb5, 0x23, 0xde, 0xa9, 0x05, 0x3f, 0x06, + 0x54, 0x4c, 0xcd, 0xe1, 0xab, 0xfc, 0x0e, 0x62, + 0x33, 0x31, 0x73, 0x2c, 0x76, 0xcb, 0xb4, 0x47, + 0x1e, 0x20, 0xad, 0xd8, 0xf2, 0x31, 0xdd, 0xc4, + 0x8b, 0x0c, 0x77, 0xbe, 0xe1, 0x8b, 0x26, 0x00, + 0x02, 0x58, 0xd6, 0x8d, 0xef, 0xad, 0x74, 0x67, + 0xab, 0x3f, 0xef, 0xcb, 0x6f, 0xb0, 0xcc, 0x81, + 0x44, 0x4c, 0xaf, 0xe9, 0x49, 0x4f, 0xdb, 0xa0, + 0x25, 0xa4, 0xf0, 0x89, 0xf1, 0xbe, 0xd8, 0x10, + 0xff, 0xb1, 0x3b, 0x4b, 0xfa, 0x98, 0xf5, 0x79, + 0x6d, 0x1e, 0x69, 0x4d, 0x57, 0xb1, 0xc8, 0x19, + 0x1b, 0xbd, 0x1e, 0x8c, 0x84, 0xb7, 0x7b, 0xe8, + 0xd2, 0x2d, 0x09, 0x41, 0x41, 0x37, 0x3d, 0xb1, + 0x6f, 0x26, 0x5d, 0x71, 0x16, 0x3d, 0xb7, 0x83, + 0x27, 0x2c, 0xa7, 0xb6, 0x50, 0xbd, 0x91, 0x86, + 0xab, 0x24, 0xa1, 0x38, 0xfd, 0xea, 0x71, 0x55, + 0x7e, 0x9a, 0x07, 0x77, 0x4b, 0xfa, 0x61, 0x66, + 0x20, 0x1e, 0x28, 0x95, 0x18, 0x1b, 0xa4, 0xa0, + 0xfd, 0xc0, 0x89, 0x72, 0x43, 0xd9, 0x3b, 0x49, + 0x5a, 0x3f, 0x9d, 0xbf, 0xdb, 0xb4, 0x46, 0xea, + 0x42, 0x01, 0x77, 0x23, 0x68, 0x95, 0xb6, 0x24, + 0xb3, 0xa8, 0x6c, 0x28, 0x3b, 0x11, 0x40, 0x7e, + 0x18, 0x65, 0x6d, 0xd8, 0x24, 0x42, 0x7d, 0x88, + 0xc0, 0x52, 0xd9, 0x05, 0xe4, 0x95, 0x90, 0x87, + 0x8c, 0xf4, 0xd0, 0x6b, 0xb9, 0x83, 0x99, 0x34, + 0x6d, 0xfe, 0x54, 0x40, 0x94, 0x52, 0x21, 0x4f, + 0x14, 0x25, 0xc5, 0xd6, 0x5e, 0x95, 0xdc, 0x0a, + 0x2b, 0x89, 0x20, 0x11, 0x84, 0x48, 0xd6, 0x3a, + 0xcd, 0x5c, 0x24, 0xad, 0x62, 0xe3, 0xb1, 0x93, + 0x25, 0x8d, 0xcd, 0x7e, 0xfc, 0x27, 0xa3, 0x37, + 0xfd, 0x84, 0xfc, 0x1b, 0xb2, 0xf1, 0x27, 0x38, + 0x5a, 0xb7, 0xfc, 0xf2, 0xfa, 0x95, 0x66, 0xd4, + 0xfb, 0xba, 0xa7, 0xd7, 0xa3, 0x72, 0x69, 0x48, + 0x48, 0x8c, 0xeb, 0x28, 0x89, 0xfe, 0x33, 0x65, + 0x5a, 0x36, 0x01, 0x7e, 0x06, 0x79, 0x0a, 0x09, + 0x3b, 0x74, 0x11, 0x9a, 0x6e, 0xbf, 0xd4, 0x9e, + 0x58, 0x90, 0x49, 0x4f, 0x4d, 0x08, 0xd4, 0xe5, + 0x4a, 0x09, 0x21, 0xef, 0x8b, 0xb8, 0x74, 0x3b, + 0x91, 0xdd, 0x36, 0x85, 0x60, 0x2d, 0xfa, 0xd4, + 0x45, 0x7b, 0x45, 0x53, 0xf5, 0x47, 0x87, 0x7e, + 0xa6, 0x37, 0xc8, 0x78, 0x7a, 0x68, 0x9d, 0x8d, + 0x65, 0x2c, 0x0e, 0x91, 0x5c, 0xa2, 0x60, 0xf0, + 0x8e, 0x3f, 0xe9, 0x1a, 0xcd, 0xaa, 0xe7, 0xd5, + 0x77, 0x18, 0xaf, 0xc9, 0xbc, 0x18, 0xea, 0x48, + 0x1b, 0xfb, 0x22, 0x48, 0x70, 0x16, 0x29, 0x9e, + 0x5b, 0xc1, 0x2c, 0x66, 0x23, 0xbc, 0xf0, 0x1f, + 0xef, 0xaf, 0xe4, 0xd6, 0x04, 0x19, 0x82, 0x7a, + 0x0b, 0xba, 0x4b, 0x46, 0xb1, 0x6a, 0x85, 0x5d, + 0xb4, 0x73, 0xd6, 0x21, 0xa1, 0x71, 0x60, 0x14, + 0xee, 0x0a, 0x77, 0xc4, 0x66, 0x2e, 0xf9, 0x69, + 0x30, 0xaf, 0x41, 0x0b, 0xc8, 0x83, 0x3c, 0x53, + 0x99, 0x19, 0x27, 0x46, 0xf7, 0x41, 0x6e, 0x56, + 0xdc, 0x94, 0x28, 0x67, 0x4e, 0xb7, 0x25, 0x48, + 0x8a, 0xc2, 0xe0, 0x60, 0x96, 0xcc, 0x18, 0xf4, + 0x84, 0xdd, 0xa7, 0x5e, 0x3e, 0x05, 0x0b, 0x26, + 0x26, 0xb2, 0x5c, 0x1f, 0x57, 0x1a, 0x04, 0x7e, + 0x6a, 0xe3, 0x2f, 0xb4, 0x35, 0xb6, 0x38, 0x40, + 0x40, 0xcd, 0x6f, 0x87, 0x2e, 0xef, 0xa3, 0xd7, + 0xa9, 0xc2, 0xe8, 0x0d, 0x27, 0xdf, 0x44, 0x62, + 0x99, 0xa0, 0xfc, 0xcf, 0x81, 0x78, 0xcb, 0xfe, + 0xe5, 0xa0, 0x03, 0x4e, 0x6c, 0xd7, 0xf4, 0xaf, + 0x7a, 0xbb, 0x61, 0x82, 0xfe, 0x71, 0x89, 0xb2, + 0x22, 0x7c, 0x8e, 0x83, 0x04, 0xce, 0xf6, 0x5d, + 0x84, 0x8f, 0x95, 0x6a, 0x7f, 0xad, 0xfd, 0x32, + 0x9c, 0x5e, 0xe4, 0x9c, 0x89, 0x60, 0x54, 0xaa, + 0x96, 0x72, 0xd2, 0xd7, 0x36, 0x85, 0xa9, 0x45, + 0xd2, 0x2a, 0xa1, 0x81, 0x49, 0x6f, 0x7e, 0x04, + 0xfa, 0xe2, 0xfe, 0x90, 0x26, 0x77, 0x5a, 0x33, + 0xb8, 0x04, 0x9a, 0x7a, 0xe6, 0x4c, 0x4f, 0xad, + 0x72, 0x96, 0x08, 0x28, 0x58, 0x13, 0xf8, 0xc4, + 0x1c, 0xf0, 0xc3, 0x45, 0x95, 0x49, 0x20, 0x8c, + 0x9f, 0x39, 0x70, 0xe1, 0x77, 0xfe, 0xd5, 0x4b, + 0xaf, 0x86, 0xda, 0xef, 0x22, 0x06, 0x83, 0x36, + 0x29, 0x12, 0x11, 0x40, 0xbc, 0x3b, 0x86, 0xaa, + 0xaa, 0x65, 0x60, 0xc3, 0x80, 0xca, 0xed, 0xa9, + 0xf3, 0xb0, 0x79, 0x96, 0xa2, 0x55, 0x27, 0x28, + 0x55, 0x73, 0x26, 0xa5, 0x50, 0xea, 0x92, 0x4b, + 0x3c, 0x5c, 0x82, 0x33, 0xf0, 0x01, 0x3f, 0x03, + 0xc1, 0x08, 0x05, 0xbf, 0x98, 0xf4, 0x9b, 0x6d, + 0xa5, 0xa8, 0xb4, 0x82, 0x0c, 0x06, 0xfa, 0xff, + 0x2d, 0x08, 0xf3, 0x05, 0x4f, 0x57, 0x2a, 0x39, + 0xd4, 0x83, 0x0d, 0x75, 0x51, 0xd8, 0x5b, 0x1b, + 0xd3, 0x51, 0x5a, 0x32, 0x2a, 0x9b, 0x32, 0xb2, + 0xf2, 0xa4, 0x96, 0x12, 0xf2, 0xae, 0x40, 0x34, + 0x67, 0xa8, 0xf5, 0x44, 0xd5, 0x35, 0x53, 0xfe, + 0xa3, 0x60, 0x96, 0x63, 0x0f, 0x1f, 0x6e, 0xb0, + 0x5a, 0x42, 0xa6, 0xfc, 0x51, 0x0b, 0x60, 0x27, + 0xbc, 0x06, 0x71, 0xed, 0x65, 0x5b, 0x23, 0x86, + 0x4a, 0x07, 0x3b, 0x22, 0x07, 0x46, 0xe6, 0x90, + 0x3e, 0xf3, 0x25, 0x50, 0x1b, 0x4c, 0x7f, 0x03, + 0x08, 0xa8, 0x36, 0x6b, 0x87, 0xe5, 0xe3, 0xdb, + 0x9a, 0x38, 0x83, 0xff, 0x9f, 0x1a, 0x9f, 0x57, + 0xa4, 0x2a, 0xf6, 0x37, 0xbc, 0x1a, 0xff, 0xc9, + 0x1e, 0x35, 0x0c, 0xc3, 0x7c, 0xa3, 0xb2, 0xe5, + 0xd2, 0xc6, 0xb4, 0x57, 0x47, 0xe4, 0x32, 0x16, + 0x6d, 0xa9, 0xae, 0x64, 0xe6, 0x2d, 0x8d, 0xc5, + 0x8d, 0x50, 0x8e, 0xe8, 0x1a, 0x22, 0x34, 0x2a, + 0xd9, 0xeb, 0x51, 0x90, 0x4a, 0xb1, 0x41, 0x7d, + 0x64, 0xf9, 0xb9, 0x0d, 0xf6, 0x23, 0x33, 0xb0, + 0x33, 0xf4, 0xf7, 0x3f, 0x27, 0x84, 0xc6, 0x0f, + 0x54, 0xa5, 0xc0, 0x2e, 0xec, 0x0b, 0x3a, 0x48, + 0x6e, 0x80, 0x35, 0x81, 0x43, 0x9b, 0x90, 0xb1, + 0xd0, 0x2b, 0xea, 0x21, 0xdc, 0xda, 0x5b, 0x09, + 0xf4, 0xcc, 0x10, 0xb4, 0xc7, 0xfe, 0x79, 0x51, + 0xc3, 0xc5, 0xac, 0x88, 0x74, 0x84, 0x0b, 0x4b, + 0xca, 0x79, 0x16, 0x29, 0xfb, 0x69, 0x54, 0xdf, + 0x41, 0x7e, 0xe9, 0xc7, 0x8e, 0xea, 0xa5, 0xfe, + 0xfc, 0x76, 0x0e, 0x90, 0xc4, 0x92, 0x38, 0xad, + 0x7b, 0x48, 0xe6, 0x6e, 0xf7, 0x21, 0xfd, 0x4e, + 0x93, 0x0a, 0x7b, 0x41, 0x83, 0x68, 0xfb, 0x57, + 0x51, 0x76, 0x34, 0xa9, 0x6c, 0x00, 0xaa, 0x4f, + 0x66, 0x65, 0x98, 0x4a, 0x4f, 0xa3, 0xa0, 0xef, + 0x69, 0x3f, 0xe3, 0x1c, 0x92, 0x8c, 0xfd, 0xd8, + 0xe8, 0xde, 0x7c, 0x7f, 0x3e, 0x84, 0x8e, 0x69, + 0x3c, 0xf1, 0xf2, 0x05, 0x46, 0xdc, 0x2f, 0x9d, + 0x5e, 0x6e, 0x4c, 0xfb, 0xb5, 0x99, 0x2a, 0x59, + 0x63, 0xc1, 0x34, 0xbc, 0x57, 0xc0, 0x0d, 0xb9, + 0x61, 0x25, 0xf3, 0x33, 0x23, 0x51, 0xb6, 0x0d, + 0x07, 0xa6, 0xab, 0x94, 0x4a, 0xb7, 0x2a, 0xea, + 0xee, 0xac, 0xa3, 0xc3, 0x04, 0x8b, 0x0e, 0x56, + 0xfe, 0x44, 0xa7, 0x39, 0xe2, 0xed, 0xed, 0xb4, + 0x22, 0x2b, 0xac, 0x12, 0x32, 0x28, 0x91, 0xd8, + 0xa5, 0xab, 0xff, 0x5f, 0xe0, 0x4b, 0xda, 0x78, + 0x17, 0xda, 0xf1, 0x01, 0x5b, 0xcd, 0xe2, 0x5f, + 0x50, 0x45, 0x73, 0x2b, 0xe4, 0x76, 0x77, 0xf4, + 0x64, 0x1d, 0x43, 0xfb, 0x84, 0x7a, 0xea, 0x91, + 0xae, 0xf9, 0x9e, 0xb7, 0xb4, 0xb0, 0x91, 0x5f, + 0x16, 0x35, 0x9a, 0x11, 0xb8, 0xc7, 0xc1, 0x8c, + 0xc6, 0x10, 0x8d, 0x2f, 0x63, 0x4a, 0xa7, 0x57, + 0x3a, 0x51, 0xd6, 0x32, 0x2d, 0x64, 0x72, 0xd4, + 0x66, 0xdc, 0x10, 0xa6, 0x67, 0xd6, 0x04, 0x23, + 0x9d, 0x0a, 0x11, 0x77, 0xdd, 0x37, 0x94, 0x17, + 0x3c, 0xbf, 0x8b, 0x65, 0xb0, 0x2e, 0x5e, 0x66, + 0x47, 0x64, 0xac, 0xdd, 0xf0, 0x84, 0xfd, 0x39, + 0xfa, 0x15, 0x5d, 0xef, 0xae, 0xca, 0xc1, 0x36, + 0xa7, 0x5c, 0xbf, 0xc7, 0x08, 0xc2, 0x66, 0x00, + 0x74, 0x74, 0x4e, 0x27, 0x3f, 0x55, 0x8a, 0xb7, + 0x38, 0x66, 0x83, 0x6d, 0xcf, 0x99, 0x9e, 0x60, + 0x8f, 0xdd, 0x2e, 0x62, 0x22, 0x0e, 0xef, 0x0c, + 0x98, 0xa7, 0x85, 0x74, 0x3b, 0x9d, 0xec, 0x9e, + 0xa9, 0x19, 0x72, 0xa5, 0x7f, 0x2c, 0x39, 0xb7, + 0x7d, 0xb7, 0xf1, 0x12, 0x65, 0x27, 0x4b, 0x5a, + 0xde, 0x17, 0xfe, 0xad, 0x44, 0xf3, 0x20, 0x4d, + 0xfd, 0xe4, 0x1f, 0xb5, 0x81, 0xb0, 0x36, 0x37, + 0x08, 0x6f, 0xc3, 0x0c, 0xe9, 0x85, 0x98, 0x82, + 0xa9, 0x62, 0x0c, 0xc4, 0x97, 0xc0, 0x50, 0xc8, + 0xa7, 0x3c, 0x50, 0x9f, 0x43, 0xb9, 0xcd, 0x5e, + 0x4d, 0xfa, 0x1c, 0x4b, 0x0b, 0xa9, 0x98, 0x85, + 0x38, 0x92, 0xac, 0x8d, 0xe4, 0xad, 0x9b, 0x98, + 0xab, 0xd9, 0x38, 0xac, 0x62, 0x52, 0xa3, 0x22, + 0x63, 0x0f, 0xbf, 0x95, 0x48, 0xdf, 0x69, 0xe7, + 0x8b, 0x33, 0xd5, 0xb2, 0xbd, 0x05, 0x49, 0x49, + 0x9d, 0x57, 0x73, 0x19, 0x33, 0xae, 0xfa, 0x33, + 0xf1, 0x19, 0xa8, 0x80, 0xce, 0x04, 0x9f, 0xbc, + 0x1d, 0x65, 0x82, 0x1b, 0xe5, 0x3a, 0x51, 0xc8, + 0x1c, 0x21, 0xe3, 0x5d, 0xf3, 0x7d, 0x9b, 0x2f, + 0x2c, 0x1d, 0x4a, 0x7f, 0x9b, 0x68, 0x35, 0xa3, + 0xb2, 0x50, 0xf7, 0x62, 0x79, 0xcd, 0xf4, 0x98, + 0x4f, 0xe5, 0x63, 0x7c, 0x3e, 0x45, 0x31, 0x8c, + 0x16, 0xa0, 0x12, 0xc8, 0x58, 0xce, 0x39, 0xa6, + 0xbc, 0x54, 0xdb, 0xc5, 0xe0, 0xd5, 0xba, 0xbc, + 0xb9, 0x04, 0xf4, 0x8d, 0xe8, 0x2f, 0x15, 0x9d, +}; -#if 0 /*Not used at present */ -static void -buf_dump(char const *prefix, unsigned char const *buf, size_t len) +/* 100 test cases */ +static struct crc_test { + u32 crc; /* random starting crc */ + u32 start; /* random 6 bit offset in buf */ + u32 length; /* random 11 bit length of test */ + u32 crc_le; /* expected crc32_le result */ + u32 crc_be; /* expected crc32_be result */ + u32 crc32c_le; /* expected crc32c_le result */ +} test[] = { - fputs(prefix, stdout); - while (len--) - printf(" %02x", *buf++); - putchar('\n'); + {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, + 0xf6e93d6c}, + {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, + 0x0fe92aca}, + {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, + 0x52e1ebb8}, + {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, + 0x0798af9a}, + {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, + 0x18eb3152}, + {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, + 0xd00d08c7}, + {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, + 0x8ba966bc}, + {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, + 0x11d694a2}, + {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, + 0x6ab3208d}, + {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, + 0xba4603c5}, + {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, + 0xe6071c6f}, + {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, + 0x179ec30a}, + {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, + 0x0903beb8}, + {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, + 0x6a7cb4fa}, + {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, + 0xdb535801}, + {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, + 0x92bed597}, + {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, + 0x192a3f1b}, + {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, + 0xccbaec1a}, + {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, + 0x7eabae4d}, + {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, + 0x28c72982}, + {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, + 0xc3cd4d18}, + {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, + 0xbca8f0e7}, + {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, + 0x713f60b3}, + {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, + 0xebd08fd5}, + {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, + 0x64406c59}, + {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, + 0x7421890e}, + {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, + 0xe9347603}, + {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, + 0x1bef9060}, + {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, + 0x34720072}, + {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, + 0x48310f59}, + {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, + 0x783a4213}, + {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, + 0x9e8efd41}, + {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, + 0xfc3d34a5}, + {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, + 0x17a52ae2}, + {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, + 0x886d935a}, + {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, + 0xeaaeaeb2}, + {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, + 0x8e900a4b}, + {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, + 0xd74662b1}, + {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, + 0xd26752ba}, + {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, + 0x8b1fcd62}, + {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, + 0xf54342fe}, + {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, + 0x5b95b988}, + {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, + 0x2e1176be}, + {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, + 0x66120546}, + {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, + 0xf256a5cc}, + {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, + 0x4af1dd69}, + {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, + 0x56f0a04a}, + {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, + 0x74f6b6b2}, + {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, + 0x085951fd}, + {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, + 0xc65387eb}, + {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, + 0x1ca9257b}, + {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, + 0xfd196d76}, + {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, + 0x5ef88339}, + {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, + 0x2c3714d9}, + {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, + 0x58576548}, + {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, + 0xfd7c57de}, + {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, + 0xd5fedd59}, + {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, + 0x1cc3b17b}, + {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, + 0x270eed73}, + {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, + 0x91ecbb11}, + {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, + 0x05ed8d0c}, + {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, + 0x0b09ad5b}, + {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, + 0xf8d511fb}, + {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, + 0x5ad832cc}, + {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, + 0x1214d196}, + {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, + 0x5747218a}, + {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, + 0xde8f14de}, + {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, + 0x3563b7b9}, + {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, + 0x071475d0}, + {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, + 0x54c79d60}, + {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, + 0x4c53eee6}, + {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, + 0x10137a3c}, + {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, + 0xaa9d6c73}, + {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, + 0xb63d23e7}, + {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, + 0x7f53e9cf}, + {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, + 0x13c1cd83}, + {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, + 0x49ff5867}, + {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, + 0x8467f211}, + {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, + 0x3f9683b2}, + {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, + 0x76a3f874}, + {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, + 0x863b702f}, + {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, + 0xdc6c58ff}, + {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, + 0x0622cc95}, + {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, + 0xe85605cd}, + {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, + 0x31da5f06}, + {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, + 0xa1f2e784}, + {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, + 0xb07cc616}, + {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, + 0xbf943b6c}, + {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, + 0x2c01af1c}, + {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, + 0x0fe5f56d}, + {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, + 0xf8943b2d}, + {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, + 0xe4d89272}, + {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, + 0x7c2f6bbb}, + {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, + 0xabbf388b}, + {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, + 0x1dca1f4e}, + {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, + 0x5c170e23}, + {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, + 0xc0e9d672}, + {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, + 0xc18bdc86}, + {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, + 0xa874fcdd}, + {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, + 0x9dc0bb48}, +}; -} -#endif +#include <linux/time.h> -static void bytereverse(unsigned char *buf, size_t len) +static int __init crc32c_test(void) { - while (len--) { - unsigned char x = bitrev8(*buf); - *buf++ = x; + int i; + int errors = 0; + int bytes = 0; + struct timespec start, stop; + u64 nsec; + unsigned long flags; + + /* keep static to prevent cache warming code from + * getting eliminated by the compiler */ + static u32 crc; + + /* pre-warm the cache */ + for (i = 0; i < 100; i++) { + bytes += 2*test[i].length; + + crc ^= __crc32c_le(test[i].crc, test_buf + + test[i].start, test[i].length); } -} -static void random_garbage(unsigned char *buf, size_t len) -{ - while (len--) - *buf++ = (unsigned char) random(); -} + /* reduce OS noise */ + local_irq_save(flags); + local_irq_disable(); -#if 0 /* Not used at present */ -static void store_le(u32 x, unsigned char *buf) -{ - buf[0] = (unsigned char) x; - buf[1] = (unsigned char) (x >> 8); - buf[2] = (unsigned char) (x >> 16); - buf[3] = (unsigned char) (x >> 24); -} -#endif + getnstimeofday(&start); + for (i = 0; i < 100; i++) { + if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf + + test[i].start, test[i].length)) + errors++; + } + getnstimeofday(&stop); -static void store_be(u32 x, unsigned char *buf) -{ - buf[0] = (unsigned char) (x >> 24); - buf[1] = (unsigned char) (x >> 16); - buf[2] = (unsigned char) (x >> 8); - buf[3] = (unsigned char) x; + local_irq_restore(flags); + local_irq_enable(); + + nsec = stop.tv_nsec - start.tv_nsec + + 1000000000 * (stop.tv_sec - start.tv_sec); + + pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS); + + if (errors) + pr_warn("crc32c: %d self tests failed\n", errors); + else { + pr_info("crc32c: self tests passed, processed %d bytes in %lld nsec\n", + bytes, nsec); + } + + return 0; } -/* - * This checks that CRC(buf + CRC(buf)) = 0, and that - * CRC commutes with bit-reversal. This has the side effect - * of bytewise bit-reversing the input buffer, and returns - * the CRC of the reversed buffer. - */ -static u32 test_step(u32 init, unsigned char *buf, size_t len) +static int __init crc32_test(void) { - u32 crc1, crc2; - size_t i; + int i; + int errors = 0; + int bytes = 0; + struct timespec start, stop; + u64 nsec; + unsigned long flags; + + /* keep static to prevent cache warming code from + * getting eliminated by the compiler */ + static u32 crc; + + /* pre-warm the cache */ + for (i = 0; i < 100; i++) { + bytes += 2*test[i].length; - crc1 = crc32_be(init, buf, len); - store_be(crc1, buf + len); - crc2 = crc32_be(init, buf, len + 4); - if (crc2) - printf("\nCRC cancellation fail: 0x%08x should be 0\n", - crc2); - - for (i = 0; i <= len + 4; i++) { - crc2 = crc32_be(init, buf, i); - crc2 = crc32_be(crc2, buf + i, len + 4 - i); - if (crc2) - printf("\nCRC split fail: 0x%08x\n", crc2); + crc ^= crc32_le(test[i].crc, test_buf + + test[i].start, test[i].length); + + crc ^= crc32_be(test[i].crc, test_buf + + test[i].start, test[i].length); } - /* Now swap it around for the other test */ - - bytereverse(buf, len + 4); - init = bitrev32(init); - crc2 = bitrev32(crc1); - if (crc1 != bitrev32(crc2)) - printf("\nBit reversal fail: 0x%08x -> 0x%08x -> 0x%08x\n", - crc1, crc2, bitrev32(crc2)); - crc1 = crc32_le(init, buf, len); - if (crc1 != crc2) - printf("\nCRC endianness fail: 0x%08x != 0x%08x\n", crc1, - crc2); - crc2 = crc32_le(init, buf, len + 4); - if (crc2) - printf("\nCRC cancellation fail: 0x%08x should be 0\n", - crc2); - - for (i = 0; i <= len + 4; i++) { - crc2 = crc32_le(init, buf, i); - crc2 = crc32_le(crc2, buf + i, len + 4 - i); - if (crc2) - printf("\nCRC split fail: 0x%08x\n", crc2); + /* reduce OS noise */ + local_irq_save(flags); + local_irq_disable(); + + getnstimeofday(&start); + for (i = 0; i < 100; i++) { + if (test[i].crc_le != crc32_le(test[i].crc, test_buf + + test[i].start, test[i].length)) + errors++; + + if (test[i].crc_be != crc32_be(test[i].crc, test_buf + + test[i].start, test[i].length)) + errors++; } + getnstimeofday(&stop); - return crc1; -} + local_irq_restore(flags); + local_irq_enable(); -#define SIZE 64 -#define INIT1 0 -#define INIT2 0 + nsec = stop.tv_nsec - start.tv_nsec + + 1000000000 * (stop.tv_sec - start.tv_sec); -int main(void) -{ - unsigned char buf1[SIZE + 4]; - unsigned char buf2[SIZE + 4]; - unsigned char buf3[SIZE + 4]; - int i, j; - u32 crc1, crc2, crc3; - - for (i = 0; i <= SIZE; i++) { - printf("\rTesting length %d...", i); - fflush(stdout); - random_garbage(buf1, i); - random_garbage(buf2, i); - for (j = 0; j < i; j++) - buf3[j] = buf1[j] ^ buf2[j]; - - crc1 = test_step(INIT1, buf1, i); - crc2 = test_step(INIT2, buf2, i); - /* Now check that CRC(buf1 ^ buf2) = CRC(buf1) ^ CRC(buf2) */ - crc3 = test_step(INIT1 ^ INIT2, buf3, i); - if (crc3 != (crc1 ^ crc2)) - printf("CRC XOR fail: 0x%08x != 0x%08x ^ 0x%08x\n", - crc3, crc1, crc2); + pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n", + CRC_LE_BITS, CRC_BE_BITS); + + if (errors) + pr_warn("crc32: %d self tests failed\n", errors); + else { + pr_info("crc32: self tests passed, processed %d bytes in %lld nsec\n", + bytes, nsec); } - printf("\nAll test complete. No failures expected.\n"); + return 0; } -#endif /* UNITTEST */ +static int __init crc32test_init(void) +{ + crc32_test(); + crc32c_test(); + return 0; +} + +static void __exit crc32_exit(void) +{ +} + +module_init(crc32test_init); +module_exit(crc32_exit); +#endif /* CONFIG_CRC32_SELFTEST */ diff --git a/lib/crc32defs.h b/lib/crc32defs.h index 9b6773d73749..64cba2c3c700 100644 --- a/lib/crc32defs.h +++ b/lib/crc32defs.h @@ -6,27 +6,67 @@ #define CRCPOLY_LE 0xedb88320 #define CRCPOLY_BE 0x04c11db7 -/* How many bits at a time to use. Requires a table of 4<<CRC_xx_BITS bytes. */ -/* For less performance-sensitive, use 4 */ -#ifndef CRC_LE_BITS +/* + * This is the CRC32c polynomial, as outlined by Castagnoli. + * x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+x^11+x^10+x^9+ + * x^8+x^6+x^0 + */ +#define CRC32C_POLY_LE 0x82F63B78 + +/* Try to choose an implementation variant via Kconfig */ +#ifdef CONFIG_CRC32_SLICEBY8 +# define CRC_LE_BITS 64 +# define CRC_BE_BITS 64 +#endif +#ifdef CONFIG_CRC32_SLICEBY4 +# define CRC_LE_BITS 32 +# define CRC_BE_BITS 32 +#endif +#ifdef CONFIG_CRC32_SARWATE # define CRC_LE_BITS 8 +# define CRC_BE_BITS 8 +#endif +#ifdef CONFIG_CRC32_BIT +# define CRC_LE_BITS 1 +# define CRC_BE_BITS 1 +#endif + +/* + * How many bits at a time to use. Valid values are 1, 2, 4, 8, 32 and 64. + * For less performance-sensitive, use 4 or 8 to save table size. + * For larger systems choose same as CPU architecture as default. + * This works well on X86_64, SPARC64 systems. This may require some + * elaboration after experiments with other architectures. + */ +#ifndef CRC_LE_BITS +# ifdef CONFIG_64BIT +# define CRC_LE_BITS 64 +# else +# define CRC_LE_BITS 32 +# endif #endif #ifndef CRC_BE_BITS -# define CRC_BE_BITS 8 +# ifdef CONFIG_64BIT +# define CRC_BE_BITS 64 +# else +# define CRC_BE_BITS 32 +# endif #endif /* * Little-endian CRC computation. Used with serial bit streams sent * lsbit-first. Be sure to use cpu_to_le32() to append the computed CRC. */ -#if CRC_LE_BITS > 8 || CRC_LE_BITS < 1 || CRC_LE_BITS & CRC_LE_BITS-1 -# error CRC_LE_BITS must be a power of 2 between 1 and 8 +#if CRC_LE_BITS > 64 || CRC_LE_BITS < 1 || CRC_LE_BITS == 16 || \ + CRC_LE_BITS & CRC_LE_BITS-1 +# error "CRC_LE_BITS must be one of {1, 2, 4, 8, 32, 64}" #endif /* * Big-endian CRC computation. Used with serial bit streams sent * msbit-first. Be sure to use cpu_to_be32() to append the computed CRC. */ -#if CRC_BE_BITS > 8 || CRC_BE_BITS < 1 || CRC_BE_BITS & CRC_BE_BITS-1 -# error CRC_BE_BITS must be a power of 2 between 1 and 8 +#if CRC_BE_BITS > 64 || CRC_BE_BITS < 1 || CRC_BE_BITS == 16 || \ + CRC_BE_BITS & CRC_BE_BITS-1 +# error "CRC_BE_BITS must be one of {1, 2, 4, 8, 32, 64}" #endif diff --git a/lib/ctype.c b/lib/ctype.c index 26baa620e95b..c646df91a2f7 100644 --- a/lib/ctype.c +++ b/lib/ctype.c @@ -5,7 +5,8 @@ */ #include <linux/ctype.h> -#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/export.h> const unsigned char _ctype[] = { _C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ diff --git a/lib/debug_locks.c b/lib/debug_locks.c index b1c177307677..f2fa60c59343 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -10,7 +10,7 @@ */ #include <linux/rwsem.h> #include <linux/mutex.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/spinlock.h> #include <linux/debug_locks.h> diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 77cb245f8e7b..0ab9ae8057f0 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -818,17 +818,9 @@ static int __init fixup_activate(void *addr, enum debug_obj_state state) if (obj->static_init == 1) { debug_object_init(obj, &descr_type_test); debug_object_activate(obj, &descr_type_test); - /* - * Real code should return 0 here ! This is - * not a fixup of some bad behaviour. We - * merily call the debug_init function to keep - * track of the object. - */ - return 1; - } else { - /* Real code needs to emit a warning here */ + return 0; } - return 0; + return 1; case ODEBUG_STATE_ACTIVE: debug_object_deactivate(obj, &descr_type_test); @@ -967,7 +959,7 @@ static void __init debug_objects_selftest(void) obj.static_init = 1; debug_object_activate(&obj, &descr_type_test); - if (check_results(&obj, ODEBUG_STATE_ACTIVE, ++fixups, warnings)) + if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings)) goto out; debug_object_init(&obj, &descr_type_test); if (check_results(&obj, ODEBUG_STATE_INIT, ++fixups, ++warnings)) diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c index b5257725daad..e26278576b31 100644 --- a/lib/dec_and_lock.c +++ b/lib/dec_and_lock.c @@ -1,4 +1,4 @@ -#include <linux/module.h> +#include <linux/export.h> #include <linux/spinlock.h> #include <linux/atomic.h> diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index a7b80c1d6a0d..31c5f7675fbf 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -1,4 +1,3 @@ -/* vi: set sw = 4 ts = 4: */ /* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net). Based on bzip2 decompression code by Julian R Seward (jseward@acm.org), @@ -691,7 +690,7 @@ STATIC int INIT bunzip2(unsigned char *buf, int len, outbuf = malloc(BZIP2_IOBUF_SIZE); if (!outbuf) { - error("Could not allocate output bufer"); + error("Could not allocate output buffer"); return RETVAL_OUT_OF_MEMORY; } if (buf) @@ -699,7 +698,7 @@ STATIC int INIT bunzip2(unsigned char *buf, int len, else inbuf = malloc(BZIP2_IOBUF_SIZE); if (!inbuf) { - error("Could not allocate input bufer"); + error("Could not allocate input buffer"); i = RETVAL_OUT_OF_MEMORY; goto exit_0; } diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 476c65af9709..32adb73a9038 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -562,7 +562,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len, else inbuf = malloc(LZMA_IOBUF_SIZE); if (!inbuf) { - error("Could not allocate input bufer"); + error("Could not allocate input buffer"); goto exit_0; } diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c index 5a7a2adf4c4c..4531294fa62f 100644 --- a/lib/decompress_unlzo.c +++ b/lib/decompress_unlzo.c @@ -279,7 +279,7 @@ STATIC inline int INIT unlzo(u8 *input, int in_len, ret = 0; exit_2: if (!input) - free(in_buf); + free(in_buf_save); exit_1: if (!output) free(out_buf); diff --git a/lib/devres.c b/lib/devres.c index 4fbc09e6e9e6..80b9c76d436a 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -1,7 +1,7 @@ #include <linux/pci.h> #include <linux/io.h> #include <linux/gfp.h> -#include <linux/module.h> +#include <linux/export.h> void devm_ioremap_release(struct device *dev, void *res) { @@ -304,7 +304,7 @@ EXPORT_SYMBOL(pcim_iounmap); * * Request and iomap regions specified by @mask. */ -int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name) +int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name) { void __iomem * const *iomap; int i, rc; @@ -357,7 +357,7 @@ EXPORT_SYMBOL(pcim_iomap_regions); * * Request all PCI BARs and iomap regions specified by @mask. */ -int pcim_iomap_regions_request_all(struct pci_dev *pdev, u16 mask, +int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask, const char *name) { int request_mask = ((1 << 6) - 1) & ~mask; @@ -381,7 +381,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all); * * Unmap and release regions specified by @mask. */ -void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask) +void pcim_iounmap_regions(struct pci_dev *pdev, int mask) { void __iomem * const *iomap; int i; diff --git a/lib/digsig.c b/lib/digsig.c new file mode 100644 index 000000000000..286d558033e2 --- /dev/null +++ b/lib/digsig.c @@ -0,0 +1,278 @@ +/* + * Copyright (C) 2011 Nokia Corporation + * Copyright (C) 2011 Intel Corporation + * + * Author: + * Dmitry Kasatkin <dmitry.kasatkin@nokia.com> + * <dmitry.kasatkin@intel.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2 of the License. + * + * File: sign.c + * implements signature (RSA) verification + * pkcs decoding is based on LibTomCrypt code + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/err.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/key.h> +#include <linux/crypto.h> +#include <crypto/hash.h> +#include <crypto/sha.h> +#include <keys/user-type.h> +#include <linux/mpi.h> +#include <linux/digsig.h> + +static struct crypto_shash *shash; + +static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg, + unsigned long msglen, + unsigned long modulus_bitlen, + unsigned char *out, + unsigned long *outlen) +{ + unsigned long modulus_len, ps_len, i; + + modulus_len = (modulus_bitlen >> 3) + (modulus_bitlen & 7 ? 1 : 0); + + /* test message size */ + if ((msglen > modulus_len) || (modulus_len < 11)) + return -EINVAL; + + /* separate encoded message */ + if ((msg[0] != 0x00) || (msg[1] != (unsigned char)1)) + return -EINVAL; + + for (i = 2; i < modulus_len - 1; i++) + if (msg[i] != 0xFF) + break; + + /* separator check */ + if (msg[i] != 0) + /* There was no octet with hexadecimal value 0x00 + to separate ps from m. */ + return -EINVAL; + + ps_len = i - 2; + + if (*outlen < (msglen - (2 + ps_len + 1))) { + *outlen = msglen - (2 + ps_len + 1); + return -EOVERFLOW; + } + + *outlen = (msglen - (2 + ps_len + 1)); + memcpy(out, &msg[2 + ps_len + 1], *outlen); + + return 0; +} + +/* + * RSA Signature verification with public key + */ +static int digsig_verify_rsa(struct key *key, + const char *sig, int siglen, + const char *h, int hlen) +{ + int err = -EINVAL; + unsigned long len; + unsigned long mlen, mblen; + unsigned nret, l; + int head, i; + unsigned char *out1 = NULL, *out2 = NULL; + MPI in = NULL, res = NULL, pkey[2]; + uint8_t *p, *datap, *endp; + struct user_key_payload *ukp; + struct pubkey_hdr *pkh; + + down_read(&key->sem); + ukp = key->payload.data; + + if (ukp->datalen < sizeof(*pkh)) + goto err1; + + pkh = (struct pubkey_hdr *)ukp->data; + + if (pkh->version != 1) + goto err1; + + if (pkh->algo != PUBKEY_ALGO_RSA) + goto err1; + + if (pkh->nmpi != 2) + goto err1; + + datap = pkh->mpi; + endp = ukp->data + ukp->datalen; + + err = -ENOMEM; + + for (i = 0; i < pkh->nmpi; i++) { + unsigned int remaining = endp - datap; + pkey[i] = mpi_read_from_buffer(datap, &remaining); + if (!pkey[i]) + goto err; + datap += remaining; + } + + mblen = mpi_get_nbits(pkey[0]); + mlen = (mblen + 7)/8; + + if (mlen == 0) + goto err; + + out1 = kzalloc(mlen, GFP_KERNEL); + if (!out1) + goto err; + + out2 = kzalloc(mlen, GFP_KERNEL); + if (!out2) + goto err; + + nret = siglen; + in = mpi_read_from_buffer(sig, &nret); + if (!in) + goto err; + + res = mpi_alloc(mpi_get_nlimbs(in) * 2); + if (!res) + goto err; + + err = mpi_powm(res, in, pkey[1], pkey[0]); + if (err) + goto err; + + if (mpi_get_nlimbs(res) * BYTES_PER_MPI_LIMB > mlen) { + err = -EINVAL; + goto err; + } + + p = mpi_get_buffer(res, &l, NULL); + if (!p) { + err = -EINVAL; + goto err; + } + + len = mlen; + head = len - l; + memset(out1, 0, head); + memcpy(out1 + head, p, l); + + err = pkcs_1_v1_5_decode_emsa(out1, len, mblen, out2, &len); + + if (!err && len == hlen) + err = memcmp(out2, h, hlen); + +err: + mpi_free(in); + mpi_free(res); + kfree(out1); + kfree(out2); + while (--i >= 0) + mpi_free(pkey[i]); +err1: + up_read(&key->sem); + + return err; +} + +/** + * digsig_verify() - digital signature verification with public key + * @keyring: keyring to search key in + * @sig: digital signature + * @sigen: length of the signature + * @data: data + * @datalen: length of the data + * @return: 0 on success, -EINVAL otherwise + * + * Verifies data integrity against digital signature. + * Currently only RSA is supported. + * Normally hash of the content is used as a data for this function. + * + */ +int digsig_verify(struct key *keyring, const char *sig, int siglen, + const char *data, int datalen) +{ + int err = -ENOMEM; + struct signature_hdr *sh = (struct signature_hdr *)sig; + struct shash_desc *desc = NULL; + unsigned char hash[SHA1_DIGEST_SIZE]; + struct key *key; + char name[20]; + + if (siglen < sizeof(*sh) + 2) + return -EINVAL; + + if (sh->algo != PUBKEY_ALGO_RSA) + return -ENOTSUPP; + + sprintf(name, "%llX", __be64_to_cpup((uint64_t *)sh->keyid)); + + if (keyring) { + /* search in specific keyring */ + key_ref_t kref; + kref = keyring_search(make_key_ref(keyring, 1UL), + &key_type_user, name); + if (IS_ERR(kref)) + key = ERR_PTR(PTR_ERR(kref)); + else + key = key_ref_to_ptr(kref); + } else { + key = request_key(&key_type_user, name, NULL); + } + if (IS_ERR(key)) { + pr_err("key not found, id: %s\n", name); + return PTR_ERR(key); + } + + desc = kzalloc(sizeof(*desc) + crypto_shash_descsize(shash), + GFP_KERNEL); + if (!desc) + goto err; + + desc->tfm = shash; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + + crypto_shash_init(desc); + crypto_shash_update(desc, data, datalen); + crypto_shash_update(desc, sig, sizeof(*sh)); + crypto_shash_final(desc, hash); + + kfree(desc); + + /* pass signature mpis address */ + err = digsig_verify_rsa(key, sig + sizeof(*sh), siglen - sizeof(*sh), + hash, sizeof(hash)); + +err: + key_put(key); + + return err ? -EINVAL : 0; +} +EXPORT_SYMBOL_GPL(digsig_verify); + +static int __init digsig_init(void) +{ + shash = crypto_alloc_shash("sha1", 0, 0); + if (IS_ERR(shash)) { + pr_err("shash allocation failed\n"); + return PTR_ERR(shash); + } + + return 0; + +} + +static void __exit digsig_cleanup(void) +{ + crypto_free_shash(shash); +} + +module_init(digsig_init); +module_exit(digsig_cleanup); + +MODULE_LICENSE("GPL"); diff --git a/lib/div64.c b/lib/div64.c index 5b4919191778..3ea24907d52e 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -16,7 +16,8 @@ * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S. */ -#include <linux/module.h> +#include <linux/export.h> +#include <linux/kernel.h> #include <linux/math64.h> /* Not needed on 64bit architectures */ diff --git a/lib/dma-debug.c b/lib/dma-debug.c index fea790a2b176..13ef2338be41 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -170,7 +170,7 @@ static bool driver_filter(struct device *dev) return false; /* driver filter on but not yet initialized */ - drv = get_driver(dev->driver); + drv = dev->driver; if (!drv) return false; @@ -185,7 +185,6 @@ static bool driver_filter(struct device *dev) } read_unlock_irqrestore(&driver_name_lock, flags); - put_driver(drv); return ret; } diff --git a/lib/dump_stack.c b/lib/dump_stack.c index 53bff4c8452b..42f4f55c9458 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -4,7 +4,7 @@ */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> void dump_stack(void) { diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index dcdade39e47f..310c753cf83e 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -60,6 +60,7 @@ struct ddebug_iter { static DEFINE_MUTEX(ddebug_lock); static LIST_HEAD(ddebug_tables); static int verbose = 0; +module_param(verbose, int, 0644); /* Return the last part of a pathname */ static inline const char *basename(const char *path) @@ -68,12 +69,24 @@ static inline const char *basename(const char *path) return tail ? tail+1 : path; } +/* Return the path relative to source root */ +static inline const char *trim_prefix(const char *path) +{ + int skip = strlen(__FILE__) - strlen("lib/dynamic_debug.c"); + + if (strncmp(path, __FILE__, skip)) + skip = 0; /* prefix mismatch, don't skip */ + + return path + skip; +} + static struct { unsigned flag:8; char opt_char; } opt_array[] = { { _DPRINTK_FLAGS_PRINT, 'p' }, { _DPRINTK_FLAGS_INCL_MODNAME, 'm' }, { _DPRINTK_FLAGS_INCL_FUNCNAME, 'f' }, { _DPRINTK_FLAGS_INCL_LINENO, 'l' }, { _DPRINTK_FLAGS_INCL_TID, 't' }, + { _DPRINTK_FLAGS_NONE, '_' }, }; /* format a string into buf[] which describes the _ddebug's flags */ @@ -83,58 +96,74 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf, char *p = buf; int i; - BUG_ON(maxlen < 4); + BUG_ON(maxlen < 6); for (i = 0; i < ARRAY_SIZE(opt_array); ++i) if (dp->flags & opt_array[i].flag) *p++ = opt_array[i].opt_char; if (p == buf) - *p++ = '-'; + *p++ = '_'; *p = '\0'; return buf; } +#define vpr_info_dq(q, msg) \ +do { \ + if (verbose) \ + /* trim last char off format print */ \ + pr_info("%s: func=\"%s\" file=\"%s\" " \ + "module=\"%s\" format=\"%.*s\" " \ + "lineno=%u-%u", \ + msg, \ + q->function ? q->function : "", \ + q->filename ? q->filename : "", \ + q->module ? q->module : "", \ + (int)(q->format ? strlen(q->format) - 1 : 0), \ + q->format ? q->format : "", \ + q->first_lineno, q->last_lineno); \ +} while (0) + /* - * Search the tables for _ddebug's which match the given - * `query' and apply the `flags' and `mask' to them. Tells - * the user which ddebug's were changed, or whether none - * were matched. + * Search the tables for _ddebug's which match the given `query' and + * apply the `flags' and `mask' to them. Returns number of matching + * callsites, normally the same as number of changes. If verbose, + * logs the changes. Takes ddebug_lock. */ -static void ddebug_change(const struct ddebug_query *query, - unsigned int flags, unsigned int mask) +static int ddebug_change(const struct ddebug_query *query, + unsigned int flags, unsigned int mask) { int i; struct ddebug_table *dt; unsigned int newflags; unsigned int nfound = 0; - char flagbuf[8]; + char flagbuf[10]; /* search for matching ddebugs */ mutex_lock(&ddebug_lock); list_for_each_entry(dt, &ddebug_tables, link) { /* match against the module name */ - if (query->module != NULL && - strcmp(query->module, dt->mod_name)) + if (query->module && strcmp(query->module, dt->mod_name)) continue; for (i = 0 ; i < dt->num_ddebugs ; i++) { struct _ddebug *dp = &dt->ddebugs[i]; /* match against the source filename */ - if (query->filename != NULL && + if (query->filename && strcmp(query->filename, dp->filename) && - strcmp(query->filename, basename(dp->filename))) + strcmp(query->filename, basename(dp->filename)) && + strcmp(query->filename, trim_prefix(dp->filename))) continue; /* match against the function */ - if (query->function != NULL && + if (query->function && strcmp(query->function, dp->function)) continue; /* match against the format */ - if (query->format != NULL && - strstr(dp->format, query->format) == NULL) + if (query->format && + !strstr(dp->format, query->format)) continue; /* match against the line number range */ @@ -151,13 +180,9 @@ static void ddebug_change(const struct ddebug_query *query, if (newflags == dp->flags) continue; dp->flags = newflags; - if (newflags) - dp->enabled = 1; - else - dp->enabled = 0; if (verbose) - pr_info("changed %s:%d [%s]%s %s\n", - dp->filename, dp->lineno, + pr_info("changed %s:%d [%s]%s =%s\n", + trim_prefix(dp->filename), dp->lineno, dt->mod_name, dp->function, ddebug_describe_flags(dp, flagbuf, sizeof(flagbuf))); @@ -167,6 +192,8 @@ static void ddebug_change(const struct ddebug_query *query, if (!nfound && verbose) pr_info("no matches for query\n"); + + return nfound; } /* @@ -186,8 +213,10 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) buf = skip_spaces(buf); if (!*buf) break; /* oh, it was trailing whitespace */ + if (*buf == '#') + break; /* token starts comment, skip rest of line */ - /* Run `end' over a word, either whitespace separated or quoted */ + /* find `end' of word, whitespace separated or quoted */ if (*buf == '"' || *buf == '\'') { int quote = *buf++; for (end = buf ; *end && *end != quote ; end++) @@ -199,8 +228,8 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) ; BUG_ON(end == buf); } - /* Here `buf' is the start of the word, `end' is one past the end */ + /* `buf' is start of word, `end' is one past its end */ if (nwords == maxwords) return -EINVAL; /* ran out of words[] before bytes */ if (*end) @@ -279,6 +308,19 @@ static char *unescape(char *str) return str; } +static int check_set(const char **dest, char *src, char *name) +{ + int rc = 0; + + if (*dest) { + rc = -EINVAL; + pr_err("match-spec:%s val:%s overridden by %s", + name, *dest, src); + } + *dest = src; + return rc; +} + /* * Parse words[] as a ddebug query specification, which is a series * of (keyword, value) pairs chosen from these possibilities: @@ -290,11 +332,15 @@ static char *unescape(char *str) * format <escaped-string-to-find-in-format> * line <lineno> * line <first-lineno>-<last-lineno> // where either may be empty + * + * Only 1 of each type is allowed. + * Returns 0 on success, <0 on error. */ static int ddebug_parse_query(char *words[], int nwords, struct ddebug_query *query) { unsigned int i; + int rc; /* check we have an even number of words */ if (nwords % 2 != 0) @@ -303,41 +349,43 @@ static int ddebug_parse_query(char *words[], int nwords, for (i = 0 ; i < nwords ; i += 2) { if (!strcmp(words[i], "func")) - query->function = words[i+1]; + rc = check_set(&query->function, words[i+1], "func"); else if (!strcmp(words[i], "file")) - query->filename = words[i+1]; + rc = check_set(&query->filename, words[i+1], "file"); else if (!strcmp(words[i], "module")) - query->module = words[i+1]; + rc = check_set(&query->module, words[i+1], "module"); else if (!strcmp(words[i], "format")) - query->format = unescape(words[i+1]); + rc = check_set(&query->format, unescape(words[i+1]), + "format"); else if (!strcmp(words[i], "line")) { char *first = words[i+1]; char *last = strchr(first, '-'); + if (query->first_lineno || query->last_lineno) { + pr_err("match-spec:line given 2 times\n"); + return -EINVAL; + } if (last) *last++ = '\0'; if (parse_lineno(first, &query->first_lineno) < 0) return -EINVAL; - if (last != NULL) { + if (last) { /* range <first>-<last> */ - if (parse_lineno(last, &query->last_lineno) < 0) + if (parse_lineno(last, &query->last_lineno) + < query->first_lineno) { + pr_err("last-line < 1st-line\n"); return -EINVAL; + } } else { query->last_lineno = query->first_lineno; } } else { - if (verbose) - pr_err("unknown keyword \"%s\"\n", words[i]); + pr_err("unknown keyword \"%s\"\n", words[i]); return -EINVAL; } + if (rc) + return rc; } - - if (verbose) - pr_info("q->function=\"%s\" q->filename=\"%s\" " - "q->module=\"%s\" q->format=\"%s\" q->lineno=%u-%u\n", - query->function, query->filename, - query->module, query->format, query->first_lineno, - query->last_lineno); - + vpr_info_dq(query, "parsed"); return 0; } @@ -375,8 +423,6 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, if (i < 0) return -EINVAL; } - if (flags == 0) - return -EINVAL; if (verbose) pr_info("flags=0x%x\n", flags); @@ -405,7 +451,7 @@ static int ddebug_exec_query(char *query_string) unsigned int flags = 0, mask = 0; struct ddebug_query query; #define MAXWORDS 9 - int nwords; + int nwords, nfound; char *words[MAXWORDS]; nwords = ddebug_tokenize(query_string, words, MAXWORDS); @@ -417,8 +463,47 @@ static int ddebug_exec_query(char *query_string) return -EINVAL; /* actually go and implement the change */ - ddebug_change(&query, flags, mask); - return 0; + nfound = ddebug_change(&query, flags, mask); + vpr_info_dq((&query), (nfound) ? "applied" : "no-match"); + + return nfound; +} + +/* handle multiple queries in query string, continue on error, return + last error or number of matching callsites. Module name is either + in param (for boot arg) or perhaps in query string. +*/ +static int ddebug_exec_queries(char *query) +{ + char *split; + int i, errs = 0, exitcode = 0, rc, nfound = 0; + + for (i = 0; query; query = split) { + split = strpbrk(query, ";\n"); + if (split) + *split++ = '\0'; + + query = skip_spaces(query); + if (!query || !*query || *query == '#') + continue; + + if (verbose) + pr_info("query %d: \"%s\"\n", i, query); + + rc = ddebug_exec_query(query); + if (rc < 0) { + errs++; + exitcode = rc; + } else + nfound += rc; + i++; + } + pr_info("processed %d queries, with %d matches, %d errs\n", + i, nfound, errs); + + if (exitcode) + return exitcode; + return nfound; } #define PREFIX_SIZE 64 @@ -452,7 +537,8 @@ static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf) pos += snprintf(buf + pos, remaining(pos), "%s:", desc->function); if (desc->flags & _DPRINTK_FLAGS_INCL_LINENO) - pos += snprintf(buf + pos, remaining(pos), "%d:", desc->lineno); + pos += snprintf(buf + pos, remaining(pos), "%d:", + desc->lineno); if (pos - pos_after_tid) pos += snprintf(buf + pos, remaining(pos), " "); if (pos >= PREFIX_SIZE) @@ -527,14 +613,16 @@ EXPORT_SYMBOL(__dynamic_netdev_dbg); #endif -static __initdata char ddebug_setup_string[1024]; +#define DDEBUG_STRING_SIZE 1024 +static __initdata char ddebug_setup_string[DDEBUG_STRING_SIZE]; + static __init int ddebug_setup_query(char *str) { - if (strlen(str) >= 1024) { + if (strlen(str) >= DDEBUG_STRING_SIZE) { pr_warn("ddebug boot param string too large\n"); return 0; } - strcpy(ddebug_setup_string, str); + strlcpy(ddebug_setup_string, str, DDEBUG_STRING_SIZE); return 1; } @@ -544,25 +632,33 @@ __setup("ddebug_query=", ddebug_setup_query); * File_ops->write method for <debugfs>/dynamic_debug/conrol. Gathers the * command text from userspace, parses and executes it. */ +#define USER_BUF_PAGE 4096 static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { - char tmpbuf[256]; + char *tmpbuf; int ret; if (len == 0) return 0; - /* we don't check *offp -- multiple writes() are allowed */ - if (len > sizeof(tmpbuf)-1) + if (len > USER_BUF_PAGE - 1) { + pr_warn("expected <%d bytes into control\n", USER_BUF_PAGE); return -E2BIG; - if (copy_from_user(tmpbuf, ubuf, len)) + } + tmpbuf = kmalloc(len + 1, GFP_KERNEL); + if (!tmpbuf) + return -ENOMEM; + if (copy_from_user(tmpbuf, ubuf, len)) { + kfree(tmpbuf); return -EFAULT; + } tmpbuf[len] = '\0'; if (verbose) pr_info("read %d bytes from userspace\n", (int)len); - ret = ddebug_exec_query(tmpbuf); - if (ret) + ret = ddebug_exec_queries(tmpbuf); + kfree(tmpbuf); + if (ret < 0) return ret; *offp += len; @@ -668,7 +764,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p) { struct ddebug_iter *iter = m->private; struct _ddebug *dp = p; - char flagsbuf[8]; + char flagsbuf[10]; if (verbose) pr_info("called m=%p p=%p\n", m, p); @@ -679,10 +775,10 @@ static int ddebug_proc_show(struct seq_file *m, void *p) return 0; } - seq_printf(m, "%s:%u [%s]%s %s \"", - dp->filename, dp->lineno, - iter->table->mod_name, dp->function, - ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf))); + seq_printf(m, "%s:%u [%s]%s =%s \"", + trim_prefix(dp->filename), dp->lineno, + iter->table->mod_name, dp->function, + ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf))); seq_escape(m, dp->format, "\t\r\n\""); seq_puts(m, "\"\n"); @@ -708,10 +804,11 @@ static const struct seq_operations ddebug_proc_seqops = { }; /* - * File_ops->open method for <debugfs>/dynamic_debug/control. Does the seq_file - * setup dance, and also creates an iterator to walk the _ddebugs. - * Note that we create a seq_file always, even for O_WRONLY files - * where it's not needed, as doing so simplifies the ->release method. + * File_ops->open method for <debugfs>/dynamic_debug/control. Does + * the seq_file setup dance, and also creates an iterator to walk the + * _ddebugs. Note that we create a seq_file always, even for O_WRONLY + * files where it's not needed, as doing so simplifies the ->release + * method. */ static int ddebug_proc_open(struct inode *inode, struct file *file) { @@ -846,33 +943,40 @@ static int __init dynamic_debug_init(void) int ret = 0; int n = 0; - if (__start___verbose != __stop___verbose) { - iter = __start___verbose; - modname = iter->modname; - iter_start = iter; - for (; iter < __stop___verbose; iter++) { - if (strcmp(modname, iter->modname)) { - ret = ddebug_add_module(iter_start, n, modname); - if (ret) - goto out_free; - n = 0; - modname = iter->modname; - iter_start = iter; - } - n++; + if (__start___verbose == __stop___verbose) { + pr_warn("_ddebug table is empty in a " + "CONFIG_DYNAMIC_DEBUG build"); + return 1; + } + iter = __start___verbose; + modname = iter->modname; + iter_start = iter; + for (; iter < __stop___verbose; iter++) { + if (strcmp(modname, iter->modname)) { + ret = ddebug_add_module(iter_start, n, modname); + if (ret) + goto out_free; + n = 0; + modname = iter->modname; + iter_start = iter; } - ret = ddebug_add_module(iter_start, n, modname); + n++; } + ret = ddebug_add_module(iter_start, n, modname); + if (ret) + goto out_free; /* ddebug_query boot param got passed -> set it up */ if (ddebug_setup_string[0] != '\0') { - ret = ddebug_exec_query(ddebug_setup_string); - if (ret) + ret = ddebug_exec_queries(ddebug_setup_string); + if (ret < 0) pr_warn("Invalid ddebug boot param %s", ddebug_setup_string); else - pr_info("ddebug initialized with string %s", - ddebug_setup_string); + pr_info("%d changes by ddebug_query\n", ret); + + /* keep tables even on ddebug_query parse error */ + ret = 0; } out_free: diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index 3d1bdcdd7db4..6ab4587d052b 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -7,6 +7,7 @@ #include <linux/types.h> #include <linux/ctype.h> #include <linux/kernel.h> +#include <linux/jiffies.h> #include <linux/dynamic_queue_limits.h> #define POSDIFF(A, B) ((A) > (B) ? (A) - (B) : 0) diff --git a/lib/fault-inject.c b/lib/fault-inject.c index b4801f51b607..6805453c18e7 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -5,7 +5,7 @@ #include <linux/stat.h> #include <linux/types.h> #include <linux/fs.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/interrupt.h> #include <linux/stacktrace.h> #include <linux/fault-inject.h> diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c index d903959ad695..91ca09fbf6f9 100644 --- a/lib/find_last_bit.c +++ b/lib/find_last_bit.c @@ -11,7 +11,7 @@ */ #include <linux/bitops.h> -#include <linux/module.h> +#include <linux/export.h> #include <asm/types.h> #include <asm/byteorder.h> diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c index 4bd75a73ba00..0cbfc0b4398f 100644 --- a/lib/find_next_bit.c +++ b/lib/find_next_bit.c @@ -10,7 +10,7 @@ */ #include <linux/bitops.h> -#include <linux/module.h> +#include <linux/export.h> #include <asm/types.h> #include <asm/byteorder.h> diff --git a/lib/flex_array.c b/lib/flex_array.c index 9b8b89458c4c..6948a6692fc4 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -23,7 +23,7 @@ #include <linux/flex_array.h> #include <linux/slab.h> #include <linux/stddef.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/reciprocal_div.h> struct flex_array_part { diff --git a/lib/gcd.c b/lib/gcd.c index f879033d9822..cce4f3cd14b3 100644 --- a/lib/gcd.c +++ b/lib/gcd.c @@ -1,6 +1,6 @@ #include <linux/kernel.h> #include <linux/gcd.h> -#include <linux/module.h> +#include <linux/export.h> /* Greatest common divisor */ unsigned long gcd(unsigned long a, unsigned long b) diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index 85d0e412a04f..8f8d5439e2d9 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c @@ -1,14 +1,29 @@ #include <stdio.h> +#include "../include/generated/autoconf.h" #include "crc32defs.h" #include <inttypes.h> #define ENTRIES_PER_LINE 4 -#define LE_TABLE_SIZE (1 << CRC_LE_BITS) -#define BE_TABLE_SIZE (1 << CRC_BE_BITS) +#if CRC_LE_BITS > 8 +# define LE_TABLE_ROWS (CRC_LE_BITS/8) +# define LE_TABLE_SIZE 256 +#else +# define LE_TABLE_ROWS 1 +# define LE_TABLE_SIZE (1 << CRC_LE_BITS) +#endif -static uint32_t crc32table_le[4][LE_TABLE_SIZE]; -static uint32_t crc32table_be[4][BE_TABLE_SIZE]; +#if CRC_BE_BITS > 8 +# define BE_TABLE_ROWS (CRC_BE_BITS/8) +# define BE_TABLE_SIZE 256 +#else +# define BE_TABLE_ROWS 1 +# define BE_TABLE_SIZE (1 << CRC_BE_BITS) +#endif + +static uint32_t crc32table_le[LE_TABLE_ROWS][256]; +static uint32_t crc32table_be[BE_TABLE_ROWS][256]; +static uint32_t crc32ctable_le[LE_TABLE_ROWS][256]; /** * crc32init_le() - allocate and initialize LE table data @@ -17,27 +32,38 @@ static uint32_t crc32table_be[4][BE_TABLE_SIZE]; * fact that crctable[i^j] = crctable[i] ^ crctable[j]. * */ -static void crc32init_le(void) +static void crc32init_le_generic(const uint32_t polynomial, + uint32_t (*tab)[256]) { unsigned i, j; uint32_t crc = 1; - crc32table_le[0][0] = 0; + tab[0][0] = 0; - for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) { - crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + for (i = LE_TABLE_SIZE >> 1; i; i >>= 1) { + crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0); for (j = 0; j < LE_TABLE_SIZE; j += 2 * i) - crc32table_le[0][i + j] = crc ^ crc32table_le[0][j]; + tab[0][i + j] = crc ^ tab[0][j]; } for (i = 0; i < LE_TABLE_SIZE; i++) { - crc = crc32table_le[0][i]; - for (j = 1; j < 4; j++) { - crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8); - crc32table_le[j][i] = crc; + crc = tab[0][i]; + for (j = 1; j < LE_TABLE_ROWS; j++) { + crc = tab[0][crc & 0xff] ^ (crc >> 8); + tab[j][i] = crc; } } } +static void crc32init_le(void) +{ + crc32init_le_generic(CRCPOLY_LE, crc32table_le); +} + +static void crc32cinit_le(void) +{ + crc32init_le_generic(CRC32C_POLY_LE, crc32ctable_le); +} + /** * crc32init_be() - allocate and initialize BE table data */ @@ -55,18 +81,18 @@ static void crc32init_be(void) } for (i = 0; i < BE_TABLE_SIZE; i++) { crc = crc32table_be[0][i]; - for (j = 1; j < 4; j++) { + for (j = 1; j < BE_TABLE_ROWS; j++) { crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8); crc32table_be[j][i] = crc; } } } -static void output_table(uint32_t table[4][256], int len, char *trans) +static void output_table(uint32_t (*table)[256], int rows, int len, char *trans) { int i, j; - for (j = 0 ; j < 4; j++) { + for (j = 0 ; j < rows; j++) { printf("{"); for (i = 0; i < len - 1; i++) { if (i % ENTRIES_PER_LINE == 0) @@ -83,15 +109,30 @@ int main(int argc, char** argv) if (CRC_LE_BITS > 1) { crc32init_le(); - printf("static const u32 crc32table_le[4][256] = {"); - output_table(crc32table_le, LE_TABLE_SIZE, "tole"); + printf("static const u32 __cacheline_aligned " + "crc32table_le[%d][%d] = {", + LE_TABLE_ROWS, LE_TABLE_SIZE); + output_table(crc32table_le, LE_TABLE_ROWS, + LE_TABLE_SIZE, "tole"); printf("};\n"); } if (CRC_BE_BITS > 1) { crc32init_be(); - printf("static const u32 crc32table_be[4][256] = {"); - output_table(crc32table_be, BE_TABLE_SIZE, "tobe"); + printf("static const u32 __cacheline_aligned " + "crc32table_be[%d][%d] = {", + BE_TABLE_ROWS, BE_TABLE_SIZE); + output_table(crc32table_be, LE_TABLE_ROWS, + BE_TABLE_SIZE, "tobe"); + printf("};\n"); + } + if (CRC_LE_BITS > 1) { + crc32cinit_le(); + printf("static const u32 __cacheline_aligned " + "crc32ctable_le[%d][%d] = {", + LE_TABLE_ROWS, LE_TABLE_SIZE); + output_table(crc32ctable_le, LE_TABLE_ROWS, + LE_TABLE_SIZE, "tole"); printf("};\n"); } diff --git a/lib/genalloc.c b/lib/genalloc.c index f352cc42f4f8..6bc04aab6ec7 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -29,7 +29,7 @@ */ #include <linux/slab.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/bitmap.h> #include <linux/rculist.h> #include <linux/interrupt.h> diff --git a/lib/halfmd4.c b/lib/halfmd4.c index e11db26f8ae5..66d0ee8b7776 100644 --- a/lib/halfmd4.c +++ b/lib/halfmd4.c @@ -1,5 +1,5 @@ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/cryptohash.h> /* F, G and H are basic MD4 functions: selection, majority, parity */ diff --git a/lib/hexdump.c b/lib/hexdump.c index 51d5ae210244..6540d657dca4 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -10,7 +10,7 @@ #include <linux/types.h> #include <linux/ctype.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> const char hex_asc[] = "0123456789abcdef"; EXPORT_SYMBOL(hex_asc); diff --git a/lib/hweight.c b/lib/hweight.c index 3c79d50814cf..b7d81ba143d1 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -1,4 +1,4 @@ -#include <linux/module.h> +#include <linux/export.h> #include <linux/bitops.h> #include <asm/types.h> diff --git a/lib/idr.c b/lib/idr.c index ed055b297c81..4046e29c0a99 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -29,7 +29,7 @@ #ifndef TEST // to test in user space... #include <linux/slab.h> #include <linux/init.h> -#include <linux/module.h> +#include <linux/export.h> #endif #include <linux/err.h> #include <linux/string.h> @@ -595,8 +595,10 @@ EXPORT_SYMBOL(idr_for_each); * Returns pointer to registered object with id, which is next number to * given id. After being looked up, *@nextidp will be updated for the next * iteration. + * + * This function can be called under rcu_read_lock(), given that the leaf + * pointers lifetimes are correctly managed. */ - void *idr_get_next(struct idr *idp, int *nextidp) { struct idr_layer *p, *pa[MAX_LEVEL]; @@ -605,11 +607,11 @@ void *idr_get_next(struct idr *idp, int *nextidp) int n, max; /* find first ent */ - n = idp->layers * IDR_BITS; - max = 1 << n; p = rcu_dereference_raw(idp->top); if (!p) return NULL; + n = (p->layer + 1) * IDR_BITS; + max = 1 << n; while (id < max) { while (n > 0 && p) { diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c index fd355a99327c..fc2eeb7cb2ea 100644 --- a/lib/int_sqrt.c +++ b/lib/int_sqrt.c @@ -1,6 +1,6 @@ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> /** * int_sqrt - rough approximation to sqrt diff --git a/lib/iomap.c b/lib/iomap.c index 5dbcb4b2d864..2c08f36862eb 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -6,7 +6,7 @@ #include <linux/pci.h> #include <linux/io.h> -#include <linux/module.h> +#include <linux/export.h> /* * Read/write from/to an (offsettable) iomem cookie. It might be a PIO @@ -242,45 +242,11 @@ EXPORT_SYMBOL(ioport_unmap); #endif /* CONFIG_HAS_IOPORT */ #ifdef CONFIG_PCI -/** - * pci_iomap - create a virtual mapping cookie for a PCI BAR - * @dev: PCI device that owns the BAR - * @bar: BAR number - * @maxlen: length of the memory to map - * - * Using this function you will get a __iomem address to your device BAR. - * You can access it using ioread*() and iowrite*(). These functions hide - * the details if this is a MMIO or PIO address space and will just do what - * you expect from them in the correct way. - * - * @maxlen specifies the maximum length to map. If you want to get access to - * the complete BAR without checking for its length first, pass %0 here. - * */ -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) -{ - resource_size_t start = pci_resource_start(dev, bar); - resource_size_t len = pci_resource_len(dev, bar); - unsigned long flags = pci_resource_flags(dev, bar); - - if (!len || !start) - return NULL; - if (maxlen && len > maxlen) - len = maxlen; - if (flags & IORESOURCE_IO) - return ioport_map(start, len); - if (flags & IORESOURCE_MEM) { - if (flags & IORESOURCE_CACHEABLE) - return ioremap(start, len); - return ioremap_nocache(start, len); - } - /* What? */ - return NULL; -} - +/* Hide the details if this is a MMIO or PIO address space and just do what + * you expect in the correct way. */ void pci_iounmap(struct pci_dev *dev, void __iomem * addr) { IO_COND(addr, /* nothing */, iounmap(addr)); } -EXPORT_SYMBOL(pci_iomap); EXPORT_SYMBOL(pci_iounmap); #endif /* CONFIG_PCI */ diff --git a/lib/iomap_copy.c b/lib/iomap_copy.c index 864fc5ea398c..4527e751b5e0 100644 --- a/lib/iomap_copy.c +++ b/lib/iomap_copy.c @@ -15,7 +15,7 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/io.h> /** diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index da053313ee5c..c27e269210c4 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c @@ -2,8 +2,9 @@ * IOMMU helper functions for the free area management */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/bitmap.h> +#include <linux/bug.h> int iommu_is_span_boundary(unsigned int index, unsigned int nr, unsigned long shift, diff --git a/lib/ioremap.c b/lib/ioremap.c index da4e2ad74b68..0c9216c48762 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -9,7 +9,7 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/io.h> -#include <linux/module.h> +#include <linux/export.h> #include <asm/cacheflush.h> #include <asm/pgtable.h> diff --git a/lib/irq_regs.c b/lib/irq_regs.c index 753880a5440c..9c0a1d70fbe8 100644 --- a/lib/irq_regs.c +++ b/lib/irq_regs.c @@ -8,7 +8,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include <linux/module.h> +#include <linux/export.h> +#include <linux/percpu.h> #include <asm/irq_regs.h> #ifndef ARCH_HAS_OWN_IRQ_REGS diff --git a/lib/kasprintf.c b/lib/kasprintf.c index 9c4233b23783..ae0de80c1c88 100644 --- a/lib/kasprintf.c +++ b/lib/kasprintf.c @@ -5,7 +5,7 @@ */ #include <stdarg.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/string.h> diff --git a/lib/klist.c b/lib/klist.c index 573d6068a42e..0874e41609a6 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -35,7 +35,7 @@ */ #include <linux/klist.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> /* diff --git a/lib/kobject.c b/lib/kobject.c index c33d7a18d635..21dee7c19afd 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -14,7 +14,7 @@ #include <linux/kobject.h> #include <linux/string.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/stat.h> #include <linux/slab.h> diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index e66e9b632617..1a91efa6d121 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -17,7 +17,8 @@ #include <linux/spinlock.h> #include <linux/string.h> #include <linux/kobject.h> -#include <linux/module.h> +#include <linux/export.h> +#include <linux/kmod.h> #include <linux/slab.h> #include <linux/user_namespace.h> #include <linux/socket.h> @@ -29,16 +30,17 @@ u64 uevent_seqnum; char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH; -static DEFINE_SPINLOCK(sequence_lock); #ifdef CONFIG_NET struct uevent_sock { struct list_head list; struct sock *sk; }; static LIST_HEAD(uevent_sock_list); -static DEFINE_MUTEX(uevent_sock_mutex); #endif +/* This lock protects uevent_seqnum and uevent_sock_list */ +static DEFINE_MUTEX(uevent_sock_mutex); + /* the strings here must match the enum in include/linux/kobject.h */ static const char *kobject_actions[] = { [KOBJ_ADD] = "add", @@ -136,7 +138,6 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, struct kobject *top_kobj; struct kset *kset; const struct kset_uevent_ops *uevent_ops; - u64 seq; int i = 0; int retval = 0; #ifdef CONFIG_NET @@ -243,17 +244,16 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, else if (action == KOBJ_REMOVE) kobj->state_remove_uevent_sent = 1; + mutex_lock(&uevent_sock_mutex); /* we will send an event, so request a new sequence number */ - spin_lock(&sequence_lock); - seq = ++uevent_seqnum; - spin_unlock(&sequence_lock); - retval = add_uevent_var(env, "SEQNUM=%llu", (unsigned long long)seq); - if (retval) + retval = add_uevent_var(env, "SEQNUM=%llu", (unsigned long long)++uevent_seqnum); + if (retval) { + mutex_unlock(&uevent_sock_mutex); goto exit; + } #if defined(CONFIG_NET) /* send netlink message */ - mutex_lock(&uevent_sock_mutex); list_for_each_entry(ue_sk, &uevent_sock_list, list) { struct sock *uevent_sock = ue_sk->sk; struct sk_buff *skb; @@ -290,8 +290,8 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, } else retval = -ENOMEM; } - mutex_unlock(&uevent_sock_mutex); #endif + mutex_unlock(&uevent_sock_mutex); /* call uevent_helper, usually only enabled during early boot */ if (uevent_helper[0] && !kobj_usermode_filter(kobj)) { diff --git a/lib/kstrtox.c b/lib/kstrtox.c index 7a94c8f14e29..c3615eab0cc3 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -15,7 +15,7 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/math64.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/types.h> #include <asm/uaccess.h> #include "kstrtox.h" @@ -44,12 +44,13 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base) * * Don't you dare use this function. */ -unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res) +unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p) { + unsigned long long res; unsigned int rv; int overflow; - *res = 0; + res = 0; rv = 0; overflow = 0; while (*s) { @@ -64,12 +65,19 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long if (val >= base) break; - if (*res > div_u64(ULLONG_MAX - val, base)) - overflow = 1; - *res = *res * base + val; + /* + * Check for overflow only if we are within range of + * it in the max base we support (16) + */ + if (unlikely(res & (~0ull << 60))) { + if (res > div_u64(ULLONG_MAX - val, base)) + overflow = 1; + } + res = res * base + val; rv++; s++; } + *p = res; if (overflow) rv |= KSTRTOX_OVERFLOW; return rv; diff --git a/lib/lcm.c b/lib/lcm.c index 10b5cfcacf6b..b9c8de461e9e 100644 --- a/lib/lcm.c +++ b/lib/lcm.c @@ -1,6 +1,6 @@ #include <linux/kernel.h> #include <linux/gcd.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/lcm.h> /* Lowest common multiple */ diff --git a/lib/list_debug.c b/lib/list_debug.c index b8029a5583ff..982b850d4e7a 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -6,8 +6,10 @@ * DEBUG_LIST. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/list.h> +#include <linux/bug.h> +#include <linux/kernel.h> /* * Insert a new entry between two known consecutive entries. diff --git a/lib/llist.c b/lib/llist.c index 700cff77a387..4a15115e90f8 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -23,11 +23,10 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/interrupt.h> #include <linux/llist.h> -#include <asm/system.h> /** * llist_add_batch - add several linked entries in batch diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 507a22fab738..7aae0f2a5e0a 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -14,7 +14,6 @@ #include <linux/mutex.h> #include <linux/sched.h> #include <linux/delay.h> -#include <linux/module.h> #include <linux/lockdep.h> #include <linux/spinlock.h> #include <linux/kallsyms.h> diff --git a/lib/md5.c b/lib/md5.c index c777180e1f2f..958a3c15923c 100644 --- a/lib/md5.c +++ b/lib/md5.c @@ -1,5 +1,5 @@ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/cryptohash.h> #define F1(x, y, z) (z ^ (x & (y ^ z))) diff --git a/lib/mpi/Makefile b/lib/mpi/Makefile new file mode 100644 index 000000000000..567d52e74d77 --- /dev/null +++ b/lib/mpi/Makefile @@ -0,0 +1,32 @@ +# +# MPI multiprecision maths library (from gpg) +# + +obj-$(CONFIG_MPILIB) = mpi.o + +mpi-y = \ + generic_mpih-lshift.o \ + generic_mpih-mul1.o \ + generic_mpih-mul2.o \ + generic_mpih-mul3.o \ + generic_mpih-rshift.o \ + generic_mpih-sub1.o \ + generic_mpih-add1.o \ + mpicoder.o \ + mpi-bit.o \ + mpih-cmp.o \ + mpih-div.o \ + mpih-mul.o \ + mpi-pow.o \ + mpiutil.o + +mpi-$(CONFIG_MPILIB_EXTRA) += \ + mpi-add.o \ + mpi-div.o \ + mpi-cmp.o \ + mpi-gcd.o \ + mpi-inline.o \ + mpi-inv.o \ + mpi-mpow.o \ + mpi-mul.o \ + mpi-scan.o diff --git a/lib/mpi/generic_mpi-asm-defs.h b/lib/mpi/generic_mpi-asm-defs.h new file mode 100644 index 000000000000..047d1f5a7249 --- /dev/null +++ b/lib/mpi/generic_mpi-asm-defs.h @@ -0,0 +1,4 @@ +/* This file defines some basic constants for the MPI machinery. We + * need to define the types on a per-CPU basis, so it is done with + * this file here. */ +#define BYTES_PER_MPI_LIMB (SIZEOF_UNSIGNED_LONG) diff --git a/lib/mpi/generic_mpih-add1.c b/lib/mpi/generic_mpih-add1.c new file mode 100644 index 000000000000..c94c7dd344b3 --- /dev/null +++ b/lib/mpi/generic_mpih-add1.c @@ -0,0 +1,61 @@ +/* mpihelp-add_1.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, + * 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size) +{ + mpi_limb_t x, y, cy; + mpi_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to one addend */ + cy = y < cy; /* get out carry from that addition */ + y += x; /* add other addend */ + cy += y < x; /* get out carry from that add, combine */ + res_ptr[j] = y; + } while (++j); + + return cy; +} diff --git a/lib/mpi/generic_mpih-lshift.c b/lib/mpi/generic_mpih-lshift.c new file mode 100644 index 000000000000..86318927231a --- /dev/null +++ b/lib/mpi/generic_mpih-lshift.c @@ -0,0 +1,63 @@ +/* mpihelp-lshift.c - MPI helper functions + * Copyright (C) 1994, 1996, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left + * and store the USIZE least significant digits of the result at WP. + * Return the bits shifted out from the most significant digit. + * + * Argument constraints: + * 1. 0 < CNT < BITS_PER_MP_LIMB + * 2. If the result is to be written over the input, WP must be >= UP. + */ + +mpi_limb_t +mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned int cnt) +{ + mpi_limb_t high_limb, low_limb; + unsigned sh_1, sh_2; + mpi_size_t i; + mpi_limb_t retval; + + sh_1 = cnt; + wp += 1; + sh_2 = BITS_PER_MPI_LIMB - sh_1; + i = usize - 1; + low_limb = up[i]; + retval = low_limb >> sh_2; + high_limb = low_limb; + while (--i >= 0) { + low_limb = up[i]; + wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); + high_limb = low_limb; + } + wp[i] = high_limb << sh_1; + + return retval; +} diff --git a/lib/mpi/generic_mpih-mul1.c b/lib/mpi/generic_mpih-mul1.c new file mode 100644 index 000000000000..1668dfd9092c --- /dev/null +++ b/lib/mpi/generic_mpih-mul1.c @@ -0,0 +1,57 @@ +/* mpihelp-mul_1.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + + /* The loop counter and index J goes from -S1_SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + res_ptr -= j; + + cy_limb = 0; + do { + umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; + res_ptr[j] = prod_low; + } while (++j); + + return cy_limb; +} diff --git a/lib/mpi/generic_mpih-mul2.c b/lib/mpi/generic_mpih-mul2.c new file mode 100644 index 000000000000..8a7b29ee1740 --- /dev/null +++ b/lib/mpi/generic_mpih-mul2.c @@ -0,0 +1,60 @@ +/* mpihelp-mul_2.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + mpi_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do { + umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; + + x = res_ptr[j]; + prod_low = x + prod_low; + cy_limb += prod_low < x ? 1 : 0; + res_ptr[j] = prod_low; + } while (++j); + return cy_limb; +} diff --git a/lib/mpi/generic_mpih-mul3.c b/lib/mpi/generic_mpih-mul3.c new file mode 100644 index 000000000000..f96df327be63 --- /dev/null +++ b/lib/mpi/generic_mpih-mul3.c @@ -0,0 +1,61 @@ +/* mpihelp-mul_3.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + mpi_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do { + umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; + + x = res_ptr[j]; + prod_low = x - prod_low; + cy_limb += prod_low > x ? 1 : 0; + res_ptr[j] = prod_low; + } while (++j); + + return cy_limb; +} diff --git a/lib/mpi/generic_mpih-rshift.c b/lib/mpi/generic_mpih-rshift.c new file mode 100644 index 000000000000..ffa328818ca6 --- /dev/null +++ b/lib/mpi/generic_mpih-rshift.c @@ -0,0 +1,63 @@ +/* mpih-rshift.c - MPI helper functions + * Copyright (C) 1994, 1996, 1998, 1999, + * 2000, 2001 Free Software Foundation, Inc. + * + * This file is part of GNUPG + * + * GNUPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GNUPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right + * and store the USIZE least significant limbs of the result at WP. + * The bits shifted out to the right are returned. + * + * Argument constraints: + * 1. 0 < CNT < BITS_PER_MP_LIMB + * 2. If the result is to be written over the input, WP must be <= UP. + */ + +mpi_limb_t +mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt) +{ + mpi_limb_t high_limb, low_limb; + unsigned sh_1, sh_2; + mpi_size_t i; + mpi_limb_t retval; + + sh_1 = cnt; + wp -= 1; + sh_2 = BITS_PER_MPI_LIMB - sh_1; + high_limb = up[0]; + retval = high_limb << sh_2; + low_limb = high_limb; + for (i = 1; i < usize; i++) { + high_limb = up[i]; + wp[i] = (low_limb >> sh_1) | (high_limb << sh_2); + low_limb = high_limb; + } + wp[i] = low_limb >> sh_1; + + return retval; +} diff --git a/lib/mpi/generic_mpih-sub1.c b/lib/mpi/generic_mpih-sub1.c new file mode 100644 index 000000000000..5d98ab7d6853 --- /dev/null +++ b/lib/mpi/generic_mpih-sub1.c @@ -0,0 +1,60 @@ +/* mpihelp-add_2.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size) +{ + mpi_limb_t x, y, cy; + mpi_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to subtrahend */ + cy = y < cy; /* get out carry from that addition */ + y = x - y; /* main subtract */ + cy += y > x; /* get out carry from the subtract, combine */ + res_ptr[j] = y; + } while (++j); + + return cy; +} diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h new file mode 100644 index 000000000000..29f98624ef93 --- /dev/null +++ b/lib/mpi/longlong.h @@ -0,0 +1,1500 @@ +/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. + * Note: I added some stuff for use with gnupg + * + * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998, + * 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + * License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this file; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. */ + +/* You have to define the following before including this file: + * + * UWtype -- An unsigned type, default type for operations (typically a "word") + * UHWtype -- An unsigned type, at least half the size of UWtype. + * UDWtype -- An unsigned type, at least twice as large a UWtype + * W_TYPE_SIZE -- size in bits of UWtype + * + * SItype, USItype -- Signed and unsigned 32 bit types. + * DItype, UDItype -- Signed and unsigned 64 bit types. + * + * On a 32 bit machine UWtype should typically be USItype; + * on a 64 bit machine, UWtype should typically be UDItype. +*/ + +#define __BITS4 (W_TYPE_SIZE / 4) +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +/* This is used to make sure no undesirable sharing between different libraries + that use this file takes place. */ +#ifndef __MPN +#define __MPN(x) __##x +#endif + +/* Define auxiliary asm macros. + * + * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two + * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype + * word product in HIGH_PROD and LOW_PROD. + * + * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a + * UDWtype product. This is just a variant of umul_ppmm. + + * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + * denominator) divides a UDWtype, composed by the UWtype integers + * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient + * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less + * than DENOMINATOR for correct operation. If, in addition, the most + * significant bit of DENOMINATOR must be 1, then the pre-processor symbol + * UDIV_NEEDS_NORMALIZATION is defined to 1. + * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + * denominator). Like udiv_qrnnd but the numbers are signed. The quotient + * is rounded towards 0. + * + * 5) count_leading_zeros(count, x) counts the number of zero-bits from the + * msb to the first non-zero bit in the UWtype X. This is the number of + * steps X needs to be shifted left to set the msb. Undefined for X == 0, + * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. + * + * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts + * from the least significant end. + * + * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, + * high_addend_2, low_addend_2) adds two UWtype integers, composed by + * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 + * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow + * (i.e. carry out) is not stored anywhere, and is lost. + * + * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, + * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, + * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and + * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE + * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, + * and is lost. + * + * If any of these macros are left undefined for a particular CPU, + * C macros are used. */ + +/* The CPUs come in alphabetical order below. + * + * Please add support for more CPUs here, or improve the current support + * for the CPUs below! */ + +#if defined(__GNUC__) && !defined(NO_ASM) + +/* We sometimes need to clobber "cc" with gcc2, but that would not be + understood by gcc1. Use cpp to avoid major code duplication. */ +#if __GNUC__ < 2 +#define __CLOBBER_CC +#define __AND_CLOBBER_CC +#else /* __GNUC__ >= 2 */ +#define __CLOBBER_CC : "cc" +#define __AND_CLOBBER_CC , "cc" +#endif /* __GNUC__ < 2 */ + +/*************************************** + ************** A29K ***************** + ***************************************/ +#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %1,%4,%5\n" \ + "addc %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %1,%4,%5\n" \ + "subc %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("multiplu %0,%1,%2" \ + : "=r" ((USItype)(xl)) \ + : "r" (__m0), \ + "r" (__m1)); \ + __asm__ ("multmu %0,%1,%2" \ + : "=r" ((USItype)(xh)) \ + : "r" (__m0), \ + "r" (__m1)); \ +} while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("dividu %0,%3,%4" \ + : "=r" ((USItype)(q)), \ + "=q" ((USItype)(r)) \ + : "1" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "r" ((USItype)(d))) + +#define count_leading_zeros(count, x) \ + __asm__ ("clz %0,%1" \ + : "=r" ((USItype)(count)) \ + : "r" ((USItype)(x))) +#define COUNT_LEADING_ZEROS_0 32 +#endif /* __a29k__ */ + +#if defined(__alpha) && W_TYPE_SIZE == 64 +#define umul_ppmm(ph, pl, m0, m1) \ +do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("umulh %r1,%2,%0" \ + : "=r" ((UDItype) ph) \ + : "%rJ" (__m0), \ + "rI" (__m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define UMUL_TIME 46 +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { UDItype __r; \ + (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ + (r) = __r; \ +} while (0) +extern UDItype __udiv_qrnnd(); +#define UDIV_TIME 220 +#endif /* LONGLONG_STANDALONE */ +#endif /* __alpha */ + +/*************************************** + ************** ARM ****************** + ***************************************/ +#if defined(__arm__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("adds %1, %4, %5\n" \ + "adc %0, %2, %3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subs %1, %4, %5\n" \ + "sbc %0, %2, %3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__ +#define umul_ppmm(xh, xl, a, b) \ + __asm__ ("%@ Inlined umul_ppmm\n" \ + "mov %|r0, %2, lsr #16 @ AAAA\n" \ + "mov %|r2, %3, lsr #16 @ BBBB\n" \ + "bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \ + "bic %0, %3, %|r2, lsl #16 @ bbbb\n" \ + "mul %1, %|r1, %|r2 @ aaaa * BBBB\n" \ + "mul %|r2, %|r0, %|r2 @ AAAA * BBBB\n" \ + "mul %|r1, %0, %|r1 @ aaaa * bbbb\n" \ + "mul %0, %|r0, %0 @ AAAA * bbbb\n" \ + "adds %|r0, %1, %0 @ central sum\n" \ + "addcs %|r2, %|r2, #65536\n" \ + "adds %1, %|r1, %|r0, lsl #16\n" \ + "adc %0, %|r2, %|r0, lsr #16" \ + : "=&r" ((USItype)(xh)), \ + "=r" ((USItype)(xl)) \ + : "r" ((USItype)(a)), \ + "r" ((USItype)(b)) \ + : "r0", "r1", "r2") +#else +#define umul_ppmm(xh, xl, a, b) \ + __asm__ ("%@ Inlined umul_ppmm\n" \ + "umull %r1, %r0, %r2, %r3" \ + : "=&r" ((USItype)(xh)), \ + "=r" ((USItype)(xl)) \ + : "r" ((USItype)(a)), \ + "r" ((USItype)(b)) \ + : "r0", "r1") +#endif +#define UMUL_TIME 20 +#define UDIV_TIME 100 +#endif /* __arm__ */ + +/*************************************** + ************** CLIPPER ************** + ***************************************/ +#if defined(__clipper__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__xx.__ll) \ + : "%0" ((USItype)(u)), \ + "r" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define smul_ppmm(w1, w0, u, v) \ + ({union {DItype __ll; \ + struct {SItype __l, __h; } __i; \ + } __xx; \ + __asm__ ("mulwx %2,%0" \ + : "=r" (__xx.__ll) \ + : "%0" ((SItype)(u)), \ + "r" ((SItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__w) \ + : "%0" ((USItype)(u)), \ + "r" ((USItype)(v))); \ + __w; }) +#endif /* __clipper__ */ + +/*************************************** + ************** GMICRO *************** + ***************************************/ +#if defined(__gmicro__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add.w %5,%1\n" \ + "addx %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub.w %5,%1\n" \ + "subx %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define umul_ppmm(ph, pl, m0, m1) \ + __asm__ ("mulx %3,%0,%1" \ + : "=g" ((USItype)(ph)), \ + "=r" ((USItype)(pl)) \ + : "%0" ((USItype)(m0)), \ + "g" ((USItype)(m1))) +#define udiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("divx %4,%0,%1" \ + : "=g" ((USItype)(q)), \ + "=r" ((USItype)(r)) \ + : "1" ((USItype)(nh)), \ + "0" ((USItype)(nl)), \ + "g" ((USItype)(d))) +#define count_leading_zeros(count, x) \ + __asm__ ("bsch/1 %1,%0" \ + : "=g" (count) \ + : "g" ((USItype)(x)), \ + "0" ((USItype)0)) +#endif + +/*************************************** + ************** HPPA ***************** + ***************************************/ +#if defined(__hppa) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %4,%5,%1\n" \ + "addc %2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rM" ((USItype)(ah)), \ + "rM" ((USItype)(bh)), \ + "%rM" ((USItype)(al)), \ + "rM" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %4,%5,%1\n" \ + "subb %2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rM" ((USItype)(ah)), \ + "rM" ((USItype)(bh)), \ + "rM" ((USItype)(al)), \ + "rM" ((USItype)(bl))) +#if defined(_PA_RISC1_1) +#define umul_ppmm(wh, wl, u, v) \ +do { \ + union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + __asm__ ("xmpyu %1,%2,%0" \ + : "=*f" (__xx.__ll) \ + : "*f" ((USItype)(u)), \ + "*f" ((USItype)(v))); \ + (wh) = __xx.__i.__h; \ + (wl) = __xx.__i.__l; \ +} while (0) +#define UMUL_TIME 8 +#define UDIV_TIME 60 +#else +#define UMUL_TIME 40 +#define UDIV_TIME 80 +#endif +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { USItype __r; \ + (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ + (r) = __r; \ +} while (0) +extern USItype __udiv_qrnnd(); +#endif /* LONGLONG_STANDALONE */ +#define count_leading_zeros(count, x) \ +do { \ + USItype __tmp; \ + __asm__ ( \ + "ldi 1,%0\n" \ + "extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \ + "extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \ + "ldo 16(%0),%0 ; Yes. Perform add.\n" \ + "extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \ + "extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \ + "ldo 8(%0),%0 ; Yes. Perform add.\n" \ + "extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \ + "extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \ + "ldo 4(%0),%0 ; Yes. Perform add.\n" \ + "extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \ + "extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \ + "ldo 2(%0),%0 ; Yes. Perform add.\n" \ + "extru %1,30,1,%1 ; Extract bit 1.\n" \ + "sub %0,%1,%0 ; Subtract it. " \ + : "=r" (count), "=r" (__tmp) : "1" (x)); \ +} while (0) +#endif /* hppa */ + +/*************************************** + ************** I370 ***************** + ***************************************/ +#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32 +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mr %0,%3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (__m0), \ + "r" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define smul_ppmm(xh, xl, m0, m1) \ +do { \ + union {DItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + __asm__ ("mr %0,%3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (m0), \ + "r" (m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ +} while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ +do { \ + union {DItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + __xx.__i.__h = n1; __xx.__i.__l = n0; \ + __asm__ ("dr %0,%2" \ + : "=r" (__xx.__ll) \ + : "0" (__xx.__ll), "r" (d)); \ + (q) = __xx.__i.__l; (r) = __xx.__i.__h; \ +} while (0) +#endif + +/*************************************** + ************** I386 ***************** + ***************************************/ +#undef __i386__ +#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl %5,%1\n" \ + "adcl %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl %5,%1\n" \ + "sbbl %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mull %3" \ + : "=a" ((USItype)(w0)), \ + "=d" ((USItype)(w1)) \ + : "%0" ((USItype)(u)), \ + "rm" ((USItype)(v))) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divl %4" \ + : "=a" ((USItype)(q)), \ + "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "rm" ((USItype)(d))) +#define count_leading_zeros(count, x) \ +do { \ + USItype __cbtmp; \ + __asm__ ("bsrl %1,%0" \ + : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ + (count) = __cbtmp ^ 31; \ +} while (0) +#define count_trailing_zeros(count, x) \ + __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))) +#ifndef UMUL_TIME +#define UMUL_TIME 40 +#endif +#ifndef UDIV_TIME +#define UDIV_TIME 40 +#endif +#endif /* 80x86 */ + +/*************************************** + ************** I860 ***************** + ***************************************/ +#if defined(__i860__) && W_TYPE_SIZE == 32 +#define rshift_rhlc(r, h, l, c) \ + __asm__ ("shr %3,r0,r0\n" \ + "shrd %1,%2,%0" \ + "=r" (r) : "r" (h), "r" (l), "rn" (c)) +#endif /* i860 */ + +/*************************************** + ************** I960 ***************** + ***************************************/ +#if defined(__i960__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 1,0\n" \ + "addc %5,%4,%1\n" \ + "addc %3,%2,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%dI" ((USItype)(ah)), \ + "dI" ((USItype)(bh)), \ + "%dI" ((USItype)(al)), \ + "dI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 0,0\n" \ + "subc %5,%4,%1\n" \ + "subc %3,%2,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "dI" ((USItype)(ah)), \ + "dI" ((USItype)(bh)), \ + "dI" ((USItype)(al)), \ + "dI" ((USItype)(bl))) +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__xx.__ll) \ + : "%dI" ((USItype)(u)), \ + "dI" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__w) \ + : "%dI" ((USItype)(u)), \ + "dI" ((USItype)(v))); \ + __w; }) +#define udiv_qrnnd(q, r, nh, nl, d) \ +do { \ + union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __nn; \ + __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ + __asm__ ("ediv %d,%n,%0" \ + : "=d" (__rq.__ll) \ + : "dI" (__nn.__ll), \ + "dI" ((USItype)(d))); \ + (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ +} while (0) +#define count_leading_zeros(count, x) \ +do { \ + USItype __cbtmp; \ + __asm__ ("scanbit %1,%0" \ + : "=r" (__cbtmp) \ + : "r" ((USItype)(x))); \ + (count) = __cbtmp ^ 31; \ +} while (0) +#define COUNT_LEADING_ZEROS_0 (-32) /* sic */ +#if defined(__i960mx) /* what is the proper symbol to test??? */ +#define rshift_rhlc(r, h, l, c) \ +do { \ + union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __nn; \ + __nn.__i.__h = (h); __nn.__i.__l = (l); \ + __asm__ ("shre %2,%1,%0" \ + : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ +} +#endif /* i960mx */ +#endif /* i960 */ + +/*************************************** + ************** 68000 **************** + ***************************************/ +#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add%.l %5,%1\n" \ + "addx%.l %3,%0" \ + : "=d" ((USItype)(sh)), \ + "=&d" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "d" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub%.l %5,%1\n" \ + "subx%.l %3,%0" \ + : "=d" ((USItype)(sh)), \ + "=&d" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "d" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulu%.l %3,%1:%0" \ + : "=d" ((USItype)(w0)), \ + "=d" ((USItype)(w1)) \ + : "%0" ((USItype)(u)), \ + "dmi" ((USItype)(v))) +#define UMUL_TIME 45 +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divu%.l %4,%1:%0" \ + : "=d" ((USItype)(q)), \ + "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "dmi" ((USItype)(d))) +#define UDIV_TIME 90 +#define sdiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divs%.l %4,%1:%0" \ + : "=d" ((USItype)(q)), \ + "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "dmi" ((USItype)(d))) +#define count_leading_zeros(count, x) \ + __asm__ ("bfffo %1{%b2:%b2},%0" \ + : "=d" ((USItype)(count)) \ + : "od" ((USItype)(x)), "n" (0)) +#define COUNT_LEADING_ZEROS_0 32 +#else /* not mc68020 */ +#define umul_ppmm(xh, xl, a, b) \ +do { USItype __umul_tmp1, __umul_tmp2; \ + __asm__ ("| Inlined umul_ppmm\n" \ + "move%.l %5,%3\n" \ + "move%.l %2,%0\n" \ + "move%.w %3,%1\n" \ + "swap %3\n" \ + "swap %0\n" \ + "mulu %2,%1\n" \ + "mulu %3,%0\n" \ + "mulu %2,%3\n" \ + "swap %2\n" \ + "mulu %5,%2\n" \ + "add%.l %3,%2\n" \ + "jcc 1f\n" \ + "add%.l %#0x10000,%0\n" \ + "1: move%.l %2,%3\n" \ + "clr%.w %2\n" \ + "swap %2\n" \ + "swap %3\n" \ + "clr%.w %3\n" \ + "add%.l %3,%1\n" \ + "addx%.l %2,%0\n" \ + "| End inlined umul_ppmm" \ + : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ + "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ + : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ +} while (0) +#define UMUL_TIME 100 +#define UDIV_TIME 400 +#endif /* not mc68020 */ +#endif /* mc68000 */ + +/*************************************** + ************** 88000 **************** + ***************************************/ +#if defined(__m88000__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addu.co %1,%r4,%r5\n" \ + "addu.ci %0,%r2,%r3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rJ" ((USItype)(ah)), \ + "rJ" ((USItype)(bh)), \ + "%rJ" ((USItype)(al)), \ + "rJ" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subu.co %1,%r4,%r5\n" \ + "subu.ci %0,%r2,%r3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rJ" ((USItype)(ah)), \ + "rJ" ((USItype)(bh)), \ + "rJ" ((USItype)(al)), \ + "rJ" ((USItype)(bl))) +#define count_leading_zeros(count, x) \ +do { \ + USItype __cbtmp; \ + __asm__ ("ff1 %0,%1" \ + : "=r" (__cbtmp) \ + : "r" ((USItype)(x))); \ + (count) = __cbtmp ^ 31; \ +} while (0) +#define COUNT_LEADING_ZEROS_0 63 /* sic */ +#if defined(__m88110__) +#define umul_ppmm(wh, wl, u, v) \ +do { \ + union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __x; \ + __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ + (wh) = __x.__i.__h; \ + (wl) = __x.__i.__l; \ +} while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __x, __q; \ + __x.__i.__h = (n1); __x.__i.__l = (n0); \ + __asm__ ("divu.d %0,%1,%2" \ + : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ + (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) +#define UMUL_TIME 5 +#define UDIV_TIME 25 +#else +#define UMUL_TIME 17 +#define UDIV_TIME 150 +#endif /* __m88110__ */ +#endif /* __m88000__ */ + +/*************************************** + ************** MIPS ***************** + ***************************************/ +#if defined(__mips__) && W_TYPE_SIZE == 32 +#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3" \ + : "=l" ((USItype)(w0)), \ + "=h" ((USItype)(w1)) \ + : "d" ((USItype)(u)), \ + "d" ((USItype)(v))) +#else +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3\n" \ + "mflo %0\n" \ + "mfhi %1" \ + : "=d" ((USItype)(w0)), \ + "=d" ((USItype)(w1)) \ + : "d" ((USItype)(u)), \ + "d" ((USItype)(v))) +#endif +#define UMUL_TIME 10 +#define UDIV_TIME 100 +#endif /* __mips__ */ + +/*************************************** + ************** MIPS/64 ************** + ***************************************/ +#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 +#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3" \ + : "=l" ((UDItype)(w0)), \ + "=h" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))) +#else +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3\n" \ + "mflo %0\n" \ + "mfhi %1" \ + : "=d" ((UDItype)(w0)), \ + "=d" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))) +#endif +#define UMUL_TIME 20 +#define UDIV_TIME 140 +#endif /* __mips__ */ + +/*************************************** + ************** 32000 **************** + ***************************************/ +#if defined(__ns32000__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + __asm__ ("meid %2,%0" \ + : "=g" (__xx.__ll) \ + : "%0" ((USItype)(u)), \ + "g" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("meid %2,%0" \ + : "=g" (__w) \ + : "%0" ((USItype)(u)), \ + "g" ((USItype)(v))); \ + __w; }) +#define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ + __asm__ ("deid %2,%0" \ + : "=g" (__xx.__ll) \ + : "0" (__xx.__ll), \ + "g" ((USItype)(d))); \ + (r) = __xx.__i.__l; (q) = __xx.__i.__h; }) +#define count_trailing_zeros(count, x) \ +do { \ + __asm__("ffsd %2,%0" \ + : "=r"((USItype) (count)) \ + : "0"((USItype) 0), "r"((USItype) (x))); \ + } while (0) +#endif /* __ns32000__ */ + +/*************************************** + ************** PPC ****************** + ***************************************/ +#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +do { \ + if (__builtin_constant_p(bh) && (bh) == 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + else \ + __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ +} while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +do { \ + if (__builtin_constant_p(ah) && (ah) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p(bh) && (bh) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else \ + __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ +} while (0) +#define count_leading_zeros(count, x) \ + __asm__ ("{cntlz|cntlzw} %0,%1" \ + : "=r" ((USItype)(count)) \ + : "r" ((USItype)(x))) +#define COUNT_LEADING_ZEROS_0 32 +#if defined(_ARCH_PPC) +#define umul_ppmm(ph, pl, m0, m1) \ +do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhwu %0,%1,%2" \ + : "=r" ((USItype) ph) \ + : "%r" (__m0), \ + "r" (__m1)); \ + (pl) = __m0 * __m1; \ +} while (0) +#define UMUL_TIME 15 +#define smul_ppmm(ph, pl, m0, m1) \ +do { \ + SItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhw %0,%1,%2" \ + : "=r" ((SItype) ph) \ + : "%r" (__m0), \ + "r" (__m1)); \ + (pl) = __m0 * __m1; \ +} while (0) +#define SMUL_TIME 14 +#define UDIV_TIME 120 +#else +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mul %0,%2,%3" \ + : "=r" ((USItype)(xh)), \ + "=q" ((USItype)(xl)) \ + : "r" (__m0), \ + "r" (__m1)); \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define UMUL_TIME 8 +#define smul_ppmm(xh, xl, m0, m1) \ + __asm__ ("mul %0,%2,%3" \ + : "=r" ((SItype)(xh)), \ + "=q" ((SItype)(xl)) \ + : "r" (m0), \ + "r" (m1)) +#define SMUL_TIME 4 +#define sdiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("div %0,%2,%4" \ + : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \ + : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d))) +#define UDIV_TIME 100 +#endif +#endif /* Power architecture variants. */ + +/*************************************** + ************** PYR ****************** + ***************************************/ +#if defined(__pyr__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addw %5,%1\n" \ + "addwc %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subw %5,%1\n" \ + "subwb %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) + /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + __asm__ ("movw %1,%R0\n" \ + "uemul %2,%0" \ + : "=&r" (__xx.__ll) \ + : "g" ((USItype) (u)), \ + "g" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#endif /* __pyr__ */ + +/*************************************** + ************** RT/ROMP ************** + ***************************************/ +#if defined(__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("a %1,%5\n" \ + "ae %0,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "r" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("s %1,%5\n" \ + "se %0,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "r" ((USItype)(bl))) +#define umul_ppmm(ph, pl, m0, m1) \ +do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ( \ + "s r2,r2\n" \ + "mts r10,%2\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "cas %0,r2,r0\n" \ + "mfs r10,%1" \ + : "=r" ((USItype)(ph)), \ + "=r" ((USItype)(pl)) \ + : "%r" (__m0), \ + "r" (__m1) \ + : "r2"); \ + (ph) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define UMUL_TIME 20 +#define UDIV_TIME 200 +#define count_leading_zeros(count, x) \ +do { \ + if ((x) >= 0x10000) \ + __asm__ ("clz %0,%1" \ + : "=r" ((USItype)(count)) \ + : "r" ((USItype)(x) >> 16)); \ + else { \ + __asm__ ("clz %0,%1" \ + : "=r" ((USItype)(count)) \ + : "r" ((USItype)(x))); \ + (count) += 16; \ + } \ +} while (0) +#endif /* RT/ROMP */ + +/*************************************** + ************** SH2 ****************** + ***************************************/ +#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \ + && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ( \ + "dmulu.l %2,%3\n" \ + "sts macl,%1\n" \ + "sts mach,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "macl", "mach") +#define UMUL_TIME 5 +#endif + +/*************************************** + ************** SPARC **************** + ***************************************/ +#if defined(__sparc__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addcc %r4,%5,%1\n" \ + "addx %r2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rJ" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%rJ" ((USItype)(al)), \ + "rI" ((USItype)(bl)) \ + __CLOBBER_CC) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subcc %r4,%5,%1\n" \ + "subx %r2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rJ" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "rJ" ((USItype)(al)), \ + "rI" ((USItype)(bl)) \ + __CLOBBER_CC) +#if defined(__sparc_v8__) +/* Don't match immediate range because, 1) it is not often useful, + 2) the 'I' flag thinks of the range as a 13 bit signed interval, + while we want to match a 13 bit interval, sign extended to 32 bits, + but INTERPRETED AS UNSIGNED. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v))) +#define UMUL_TIME 5 +#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */ +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { \ + USItype __q; \ + __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ + : "=r" ((USItype)(__q)) \ + : "r" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "r" ((USItype)(d))); \ + (r) = (n0) - __q * (d); \ + (q) = __q; \ +} while (0) +#define UDIV_TIME 25 +#endif /* SUPERSPARC */ +#else /* ! __sparc_v8__ */ +#if defined(__sparclite__) +/* This has hardware multiply but not divide. It also has two additional + instructions scan (ffs from high bit) and divscc. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v))) +#define UMUL_TIME 5 +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("! Inlined udiv_qrnnd\n" \ + "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ + "tst %%g0\n" \ + "divscc %3,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%0\n" \ + "rd %%y,%1\n" \ + "bl,a 1f\n" \ + "add %1,%4,%1\n" \ + "1: ! End of inline udiv_qrnnd" \ + : "=r" ((USItype)(q)), \ + "=r" ((USItype)(r)) \ + : "r" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "rI" ((USItype)(d)) \ + : "%g1" __AND_CLOBBER_CC) +#define UDIV_TIME 37 +#define count_leading_zeros(count, x) \ + __asm__ ("scan %1,0,%0" \ + : "=r" ((USItype)(x)) \ + : "r" ((USItype)(count))) +/* Early sparclites return 63 for an argument of 0, but they warn that future + implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 + undefined. */ +#endif /* __sparclite__ */ +#endif /* __sparc_v8__ */ + /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ +#ifndef umul_ppmm +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("! Inlined umul_ppmm\n" \ + "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \ + "sra %3,31,%%g2 ! Don't move this insn\n" \ + "and %2,%%g2,%%g2 ! Don't move this insn\n" \ + "andcc %%g0,0,%%g1 ! Don't move this insn\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,0,%%g1\n" \ + "add %%g1,%%g2,%0\n" \ + "rd %%y,%1" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "%rI" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "%g1", "%g2" __AND_CLOBBER_CC) +#define UMUL_TIME 39 /* 39 instructions */ +/* It's quite necessary to add this much assembler for the sparc. + The default udiv_qrnnd (in C) is more than 10 times slower! */ +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("! Inlined udiv_qrnnd\n\t" \ + "mov 32,%%g1\n\t" \ + "subcc %1,%2,%%g0\n\t" \ + "1: bcs 5f\n\t" \ + "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ + "sub %1,%2,%1 ! this kills msb of n\n\t" \ + "addx %1,%1,%1 ! so this can't give carry\n\t" \ + "subcc %%g1,1,%%g1\n\t" \ + "2: bne 1b\n\t" \ + "subcc %1,%2,%%g0\n\t" \ + "bcs 3f\n\t" \ + "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ + "b 3f\n\t" \ + "sub %1,%2,%1 ! this kills msb of n\n\t" \ + "4: sub %1,%2,%1\n\t" \ + "5: addxcc %1,%1,%1\n\t" \ + "bcc 2b\n\t" \ + "subcc %%g1,1,%%g1\n\t" \ + "! Got carry from n. Subtract next step to cancel this carry.\n\t" \ + "bne 4b\n\t" \ + "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \ + "sub %1,%2,%1\n\t" \ + "3: xnor %0,0,%0\n\t" \ + "! End of inline udiv_qrnnd\n" \ + : "=&r" ((USItype)(q)), \ + "=&r" ((USItype)(r)) \ + : "r" ((USItype)(d)), \ + "1" ((USItype)(n1)), \ + "0" ((USItype)(n0)) : "%g1", "cc") +#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ +#endif +#endif /* __sparc__ */ + +/*************************************** + ************** VAX ****************** + ***************************************/ +#if defined(__vax__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl2 %5,%1\n" \ + "adwc %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl2 %5,%1\n" \ + "sbwc %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("emul %1,%2,$0,%0" \ + : "=g" (__xx.__ll) \ + : "g" (__m0), \ + "g" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ +do { \ + union {DItype __ll; \ + struct {SItype __l, __h; } __i; \ + } __xx; \ + __xx.__i.__h = n1; __xx.__i.__l = n0; \ + __asm__ ("ediv %3,%2,%0,%1" \ + : "=g" (q), "=g" (r) \ + : "g" (__xx.__ll), "g" (d)); \ +} while (0) +#endif /* __vax__ */ + +/*************************************** + ************** Z8000 **************** + ***************************************/ +#if defined(__z8000__) && W_TYPE_SIZE == 16 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), \ + "=&r" ((unsigned int)(sl)) \ + : "%0" ((unsigned int)(ah)), \ + "r" ((unsigned int)(bh)), \ + "%1" ((unsigned int)(al)), \ + "rQR" ((unsigned int)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), \ + "=&r" ((unsigned int)(sl)) \ + : "0" ((unsigned int)(ah)), \ + "r" ((unsigned int)(bh)), \ + "1" ((unsigned int)(al)), \ + "rQR" ((unsigned int)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + union {long int __ll; \ + struct {unsigned int __h, __l; } __i; \ + } __xx; \ + unsigned int __m0 = (m0), __m1 = (m1); \ + __asm__ ("mult %S0,%H3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (__m0), \ + "rQR" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((signed int) __m0 >> 15) & __m1) \ + + (((signed int) __m1 >> 15) & __m0)); \ +} while (0) +#endif /* __z8000__ */ + +#endif /* __GNUC__ */ + +/*************************************** + *********** Generic Versions ******** + ***************************************/ +#if !defined(umul_ppmm) && defined(__umulsidi3) +#define umul_ppmm(ph, pl, m0, m1) \ +{ \ + UDWtype __ll = __umulsidi3(m0, m1); \ + ph = (UWtype) (__ll >> W_TYPE_SIZE); \ + pl = (UWtype) __ll; \ +} +#endif + +#if !defined(__umulsidi3) +#define __umulsidi3(u, v) \ + ({UWtype __hi, __lo; \ + umul_ppmm(__hi, __lo, u, v); \ + ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) +#endif + + /* If this machine has no inline assembler, use C macros. */ + +#if !defined(add_ssaaaa) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +do { \ + UWtype __x; \ + __x = (al) + (bl); \ + (sh) = (ah) + (bh) + (__x < (al)); \ + (sl) = __x; \ +} while (0) +#endif + +#if !defined(sub_ddmmss) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +do { \ + UWtype __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ +} while (0) +#endif + +#if !defined(umul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ +do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + UWtype __u = (u), __v = (v); \ + \ + __ul = __ll_lowpart(__u); \ + __uh = __ll_highpart(__u); \ + __vl = __ll_lowpart(__v); \ + __vh = __ll_highpart(__v); \ + \ + __x0 = (UWtype) __ul * __vl; \ + __x1 = (UWtype) __ul * __vh; \ + __x2 = (UWtype) __uh * __vl; \ + __x3 = (UWtype) __uh * __vh; \ + \ + __x1 += __ll_highpart(__x0);/* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += __ll_B; /* yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + __ll_highpart(__x1); \ + (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \ +} while (0) +#endif + +#if !defined(umul_ppmm) +#define smul_ppmm(w1, w0, u, v) \ +do { \ + UWtype __w1; \ + UWtype __m0 = (u), __m1 = (v); \ + umul_ppmm(__w1, w0, __m0, __m1); \ + (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ + - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ +} while (0) +#endif + + /* Define this unconditionally, so it can be used for debugging. */ +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ +do { \ + UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + __d1 = __ll_highpart(d); \ + __d0 = __ll_lowpart(d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart(n0); \ + if (__r1 < __m) { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart(n0); \ + if (__r0 < __m) { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (UWtype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ +} while (0) + +/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through + __udiv_w_sdiv (defined in libgcc or elsewhere). */ +#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd) +#define udiv_qrnnd(q, r, nh, nl, d) \ +do { \ + UWtype __r; \ + (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ + (r) = __r; \ +} while (0) +#endif + + /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ +#if !defined(udiv_qrnnd) +#define UDIV_NEEDS_NORMALIZATION 1 +#define udiv_qrnnd __udiv_qrnnd_c +#endif + +#undef count_leading_zeros +#if !defined(count_leading_zeros) + extern +#ifdef __STDC__ + const +#endif + unsigned char __clz_tab[]; +#define count_leading_zeros(count, x) \ +do { \ + UWtype __xr = (x); \ + UWtype __a; \ + \ + if (W_TYPE_SIZE <= 32) { \ + __a = __xr < ((UWtype) 1 << 2*__BITS4) \ + ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \ + : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \ + } \ + else { \ + for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ + if (((__xr >> __a) & 0xff) != 0) \ + break; \ + } \ + \ + (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ +} while (0) + /* This version gives a well-defined value for zero. */ +#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE +#endif + +#if !defined(count_trailing_zeros) +/* Define count_trailing_zeros using count_leading_zeros. The latter might be + defined in asm, but if it is not, the C version above is good enough. */ +#define count_trailing_zeros(count, x) \ +do { \ + UWtype __ctz_x = (x); \ + UWtype __ctz_c; \ + count_leading_zeros(__ctz_c, __ctz_x & -__ctz_x); \ + (count) = W_TYPE_SIZE - 1 - __ctz_c; \ +} while (0) +#endif + +#ifndef UDIV_NEEDS_NORMALIZATION +#define UDIV_NEEDS_NORMALIZATION 0 +#endif diff --git a/lib/mpi/mpi-add.c b/lib/mpi/mpi-add.c new file mode 100644 index 000000000000..f56b9ba295e6 --- /dev/null +++ b/lib/mpi/mpi-add.c @@ -0,0 +1,234 @@ +/* mpi-add.c - MPI functions + * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + * Copyright (C) 1994, 1996 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +/**************** + * Add the unsigned integer V to the mpi-integer U and store the + * result in W. U and V may be the same. + */ +int mpi_add_ui(MPI w, const MPI u, unsigned long v) +{ + mpi_ptr_t wp, up; + mpi_size_t usize, wsize; + int usign, wsign; + + usize = u->nlimbs; + usign = u->sign; + wsign = 0; + + /* If not space for W (and possible carry), increase space. */ + wsize = usize + 1; + if (w->alloced < wsize) + if (mpi_resize(w, wsize) < 0) + return -ENOMEM; + + /* These must be after realloc (U may be the same as W). */ + up = u->d; + wp = w->d; + + if (!usize) { /* simple */ + wp[0] = v; + wsize = v ? 1 : 0; + } else if (!usign) { /* mpi is not negative */ + mpi_limb_t cy; + cy = mpihelp_add_1(wp, up, usize, v); + wp[usize] = cy; + wsize = usize + cy; + } else { /* The signs are different. Need exact comparison to determine + * which operand to subtract from which. */ + if (usize == 1 && up[0] < v) { + wp[0] = v - up[0]; + wsize = 1; + } else { + mpihelp_sub_1(wp, up, usize, v); + /* Size can decrease with at most one limb. */ + wsize = usize - (wp[usize - 1] == 0); + wsign = 1; + } + } + + w->nlimbs = wsize; + w->sign = wsign; + return 0; +} + +int mpi_add(MPI w, MPI u, MPI v) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t usize, vsize, wsize; + int usign, vsign, wsign; + + if (u->nlimbs < v->nlimbs) { /* Swap U and V. */ + usize = v->nlimbs; + usign = v->sign; + vsize = u->nlimbs; + vsign = u->sign; + wsize = usize + 1; + if (RESIZE_IF_NEEDED(w, wsize) < 0) + return -ENOMEM; + /* These must be after realloc (u or v may be the same as w). */ + up = v->d; + vp = u->d; + } else { + usize = u->nlimbs; + usign = u->sign; + vsize = v->nlimbs; + vsign = v->sign; + wsize = usize + 1; + if (RESIZE_IF_NEEDED(w, wsize) < 0) + return -ENOMEM; + /* These must be after realloc (u or v may be the same as w). */ + up = u->d; + vp = v->d; + } + wp = w->d; + wsign = 0; + + if (!vsize) { /* simple */ + MPN_COPY(wp, up, usize); + wsize = usize; + wsign = usign; + } else if (usign != vsign) { /* different sign */ + /* This test is right since USIZE >= VSIZE */ + if (usize != vsize) { + mpihelp_sub(wp, up, usize, vp, vsize); + wsize = usize; + MPN_NORMALIZE(wp, wsize); + wsign = usign; + } else if (mpihelp_cmp(up, vp, usize) < 0) { + mpihelp_sub_n(wp, vp, up, usize); + wsize = usize; + MPN_NORMALIZE(wp, wsize); + if (!usign) + wsign = 1; + } else { + mpihelp_sub_n(wp, up, vp, usize); + wsize = usize; + MPN_NORMALIZE(wp, wsize); + if (usign) + wsign = 1; + } + } else { /* U and V have same sign. Add them. */ + mpi_limb_t cy = mpihelp_add(wp, up, usize, vp, vsize); + wp[usize] = cy; + wsize = usize + cy; + if (usign) + wsign = 1; + } + + w->nlimbs = wsize; + w->sign = wsign; + return 0; +} + +/**************** + * Subtract the unsigned integer V from the mpi-integer U and store the + * result in W. + */ +int mpi_sub_ui(MPI w, MPI u, unsigned long v) +{ + mpi_ptr_t wp, up; + mpi_size_t usize, wsize; + int usign, wsign; + + usize = u->nlimbs; + usign = u->sign; + wsign = 0; + + /* If not space for W (and possible carry), increase space. */ + wsize = usize + 1; + if (w->alloced < wsize) + if (mpi_resize(w, wsize) < 0) + return -ENOMEM; + + /* These must be after realloc (U may be the same as W). */ + up = u->d; + wp = w->d; + + if (!usize) { /* simple */ + wp[0] = v; + wsize = v ? 1 : 0; + wsign = 1; + } else if (usign) { /* mpi and v are negative */ + mpi_limb_t cy; + cy = mpihelp_add_1(wp, up, usize, v); + wp[usize] = cy; + wsize = usize + cy; + } else { /* The signs are different. Need exact comparison to determine + * which operand to subtract from which. */ + if (usize == 1 && up[0] < v) { + wp[0] = v - up[0]; + wsize = 1; + wsign = 1; + } else { + mpihelp_sub_1(wp, up, usize, v); + /* Size can decrease with at most one limb. */ + wsize = usize - (wp[usize - 1] == 0); + } + } + + w->nlimbs = wsize; + w->sign = wsign; + return 0; +} + +int mpi_sub(MPI w, MPI u, MPI v) +{ + int rc; + + if (w == v) { + MPI vv; + if (mpi_copy(&vv, v) < 0) + return -ENOMEM; + vv->sign = !vv->sign; + rc = mpi_add(w, u, vv); + mpi_free(vv); + } else { + /* fixme: this is not thread-save (we temp. modify v) */ + v->sign = !v->sign; + rc = mpi_add(w, u, v); + v->sign = !v->sign; + } + return rc; +} + +int mpi_addm(MPI w, MPI u, MPI v, MPI m) +{ + if (mpi_add(w, u, v) < 0 || mpi_fdiv_r(w, w, m) < 0) + return -ENOMEM; + return 0; +} + +int mpi_subm(MPI w, MPI u, MPI v, MPI m) +{ + if (mpi_sub(w, u, v) < 0 || mpi_fdiv_r(w, w, m) < 0) + return -ENOMEM; + return 0; +} diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c new file mode 100644 index 000000000000..2f526627e4f5 --- /dev/null +++ b/lib/mpi/mpi-bit.c @@ -0,0 +1,217 @@ +/* mpi-bit.c - MPI bit level fucntions + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" +#include "longlong.h" + +#define A_LIMB_1 ((mpi_limb_t) 1) + +/**************** + * Sometimes we have MSL (most significant limbs) which are 0; + * this is for some reasons not good, so this function removes them. + */ +void mpi_normalize(MPI a) +{ + for (; a->nlimbs && !a->d[a->nlimbs - 1]; a->nlimbs--) + ; +} + +/**************** + * Return the number of bits in A. + */ +unsigned mpi_get_nbits(MPI a) +{ + unsigned n; + + mpi_normalize(a); + + if (a->nlimbs) { + mpi_limb_t alimb = a->d[a->nlimbs - 1]; + if (alimb) + count_leading_zeros(n, alimb); + else + n = BITS_PER_MPI_LIMB; + n = BITS_PER_MPI_LIMB - n + (a->nlimbs - 1) * BITS_PER_MPI_LIMB; + } else + n = 0; + return n; +} +EXPORT_SYMBOL_GPL(mpi_get_nbits); + +/**************** + * Test whether bit N is set. + */ +int mpi_test_bit(MPI a, unsigned n) +{ + unsigned limbno, bitno; + mpi_limb_t limb; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if (limbno >= a->nlimbs) + return 0; /* too far left: this is a 0 */ + limb = a->d[limbno]; + return (limb & (A_LIMB_1 << bitno)) ? 1 : 0; +} + +/**************** + * Set bit N of A. + */ +int mpi_set_bit(MPI a, unsigned n) +{ + unsigned limbno, bitno; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if (limbno >= a->nlimbs) { /* resize */ + if (a->alloced >= limbno) + if (mpi_resize(a, limbno + 1) < 0) + return -ENOMEM; + a->nlimbs = limbno + 1; + } + a->d[limbno] |= (A_LIMB_1 << bitno); + return 0; +} + +/**************** + * Set bit N of A. and clear all bits above + */ +int mpi_set_highbit(MPI a, unsigned n) +{ + unsigned limbno, bitno; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if (limbno >= a->nlimbs) { /* resize */ + if (a->alloced >= limbno) + if (mpi_resize(a, limbno + 1) < 0) + return -ENOMEM; + a->nlimbs = limbno + 1; + } + a->d[limbno] |= (A_LIMB_1 << bitno); + for (bitno++; bitno < BITS_PER_MPI_LIMB; bitno++) + a->d[limbno] &= ~(A_LIMB_1 << bitno); + a->nlimbs = limbno + 1; + return 0; +} + +/**************** + * clear bit N of A and all bits above + */ +void mpi_clear_highbit(MPI a, unsigned n) +{ + unsigned limbno, bitno; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if (limbno >= a->nlimbs) + return; /* not allocated, so need to clear bits :-) */ + + for (; bitno < BITS_PER_MPI_LIMB; bitno++) + a->d[limbno] &= ~(A_LIMB_1 << bitno); + a->nlimbs = limbno + 1; +} + +/**************** + * Clear bit N of A. + */ +void mpi_clear_bit(MPI a, unsigned n) +{ + unsigned limbno, bitno; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if (limbno >= a->nlimbs) + return; /* don't need to clear this bit, it's to far to left */ + a->d[limbno] &= ~(A_LIMB_1 << bitno); +} + +/**************** + * Shift A by N bits to the right + * FIXME: should use alloc_limb if X and A are same. + */ +int mpi_rshift(MPI x, MPI a, unsigned n) +{ + mpi_ptr_t xp; + mpi_size_t xsize; + + xsize = a->nlimbs; + x->sign = a->sign; + if (RESIZE_IF_NEEDED(x, (size_t) xsize) < 0) + return -ENOMEM; + xp = x->d; + + if (xsize) { + mpihelp_rshift(xp, a->d, xsize, n); + MPN_NORMALIZE(xp, xsize); + } + x->nlimbs = xsize; + return 0; +} + +/**************** + * Shift A by COUNT limbs to the left + * This is used only within the MPI library + */ +int mpi_lshift_limbs(MPI a, unsigned int count) +{ + mpi_ptr_t ap = a->d; + int n = a->nlimbs; + int i; + + if (!count || !n) + return 0; + + if (RESIZE_IF_NEEDED(a, n + count) < 0) + return -ENOMEM; + + for (i = n - 1; i >= 0; i--) + ap[i + count] = ap[i]; + for (i = 0; i < count; i++) + ap[i] = 0; + a->nlimbs += count; + return 0; +} + +/**************** + * Shift A by COUNT limbs to the right + * This is used only within the MPI library + */ +void mpi_rshift_limbs(MPI a, unsigned int count) +{ + mpi_ptr_t ap = a->d; + mpi_size_t n = a->nlimbs; + unsigned int i; + + if (count >= n) { + a->nlimbs = 0; + return; + } + + for (i = 0; i < n - count; i++) + ap[i] = ap[i + count]; + ap[i] = 0; + a->nlimbs -= count; +} diff --git a/lib/mpi/mpi-cmp.c b/lib/mpi/mpi-cmp.c new file mode 100644 index 000000000000..914bc42a8a80 --- /dev/null +++ b/lib/mpi/mpi-cmp.c @@ -0,0 +1,68 @@ +/* mpi-cmp.c - MPI functions + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" + +int mpi_cmp_ui(MPI u, unsigned long v) +{ + mpi_limb_t limb = v; + + mpi_normalize(u); + if (!u->nlimbs && !limb) + return 0; + if (u->sign) + return -1; + if (u->nlimbs > 1) + return 1; + + if (u->d[0] == limb) + return 0; + else if (u->d[0] > limb) + return 1; + else + return -1; +} + +int mpi_cmp(MPI u, MPI v) +{ + mpi_size_t usize, vsize; + int cmp; + + mpi_normalize(u); + mpi_normalize(v); + usize = u->nlimbs; + vsize = v->nlimbs; + if (!u->sign && v->sign) + return 1; + if (u->sign && !v->sign) + return -1; + if (usize != vsize && !u->sign && !v->sign) + return usize - vsize; + if (usize != vsize && u->sign && v->sign) + return vsize + usize; + if (!usize) + return 0; + cmp = mpihelp_cmp(u->d, v->d, usize); + if (!cmp) + return 0; + if ((cmp < 0 ? 1 : 0) == (u->sign ? 1 : 0)) + return 1; + return -1; +} diff --git a/lib/mpi/mpi-div.c b/lib/mpi/mpi-div.c new file mode 100644 index 000000000000..f68cbbb4d4a4 --- /dev/null +++ b/lib/mpi/mpi-div.c @@ -0,0 +1,338 @@ +/* mpi-div.c - MPI functions + * Copyright (C) 1994, 1996 Free Software Foundation, Inc. + * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include <linux/string.h> +#include "mpi-internal.h" +#include "longlong.h" + +int mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor) +{ + int rc = -ENOMEM; + int divisor_sign = divisor->sign; + MPI temp_divisor = NULL; + + /* We need the original value of the divisor after the remainder has been + * preliminary calculated. We have to copy it to temporary space if it's + * the same variable as REM. */ + if (rem == divisor) { + if (mpi_copy(&temp_divisor, divisor) < 0) + goto nomem; + divisor = temp_divisor; + } + + if (mpi_tdiv_qr(NULL, rem, dividend, divisor) < 0) + goto nomem; + if (((divisor_sign ? 1 : 0) ^ (dividend->sign ? 1 : 0)) && rem->nlimbs) + if (mpi_add(rem, rem, divisor) < 0) + goto nomem; + + rc = 0; + +nomem: + if (temp_divisor) + mpi_free(temp_divisor); + return rc; +} + +/**************** + * Division rounding the quotient towards -infinity. + * The remainder gets the same sign as the denominator. + * rem is optional + */ + +ulong mpi_fdiv_r_ui(MPI rem, MPI dividend, ulong divisor) +{ + mpi_limb_t rlimb; + + rlimb = mpihelp_mod_1(dividend->d, dividend->nlimbs, divisor); + if (rlimb && dividend->sign) + rlimb = divisor - rlimb; + + if (rem) { + rem->d[0] = rlimb; + rem->nlimbs = rlimb ? 1 : 0; + } + return rlimb; +} + +int mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor) +{ + MPI tmp = mpi_alloc(mpi_get_nlimbs(quot)); + if (!tmp) + return -ENOMEM; + mpi_fdiv_qr(quot, tmp, dividend, divisor); + mpi_free(tmp); + return 0; +} + +int mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor) +{ + int divisor_sign = divisor->sign; + MPI temp_divisor = NULL; + + if (quot == divisor || rem == divisor) { + if (mpi_copy(&temp_divisor, divisor) < 0) + return -ENOMEM; + divisor = temp_divisor; + } + + if (mpi_tdiv_qr(quot, rem, dividend, divisor) < 0) + goto nomem; + + if ((divisor_sign ^ dividend->sign) && rem->nlimbs) { + if (mpi_sub_ui(quot, quot, 1) < 0) + goto nomem; + if (mpi_add(rem, rem, divisor) < 0) + goto nomem; + } + + if (temp_divisor) + mpi_free(temp_divisor); + + return 0; + +nomem: + mpi_free(temp_divisor); + return -ENOMEM; +} + +/* If den == quot, den needs temporary storage. + * If den == rem, den needs temporary storage. + * If num == quot, num needs temporary storage. + * If den has temporary storage, it can be normalized while being copied, + * i.e no extra storage should be allocated. + */ + +int mpi_tdiv_r(MPI rem, MPI num, MPI den) +{ + return mpi_tdiv_qr(NULL, rem, num, den); +} + +int mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den) +{ + int rc = -ENOMEM; + mpi_ptr_t np, dp; + mpi_ptr_t qp, rp; + mpi_size_t nsize = num->nlimbs; + mpi_size_t dsize = den->nlimbs; + mpi_size_t qsize, rsize; + mpi_size_t sign_remainder = num->sign; + mpi_size_t sign_quotient = num->sign ^ den->sign; + unsigned normalization_steps; + mpi_limb_t q_limb; + mpi_ptr_t marker[5]; + int markidx = 0; + + if (!dsize) + return -EINVAL; + + memset(marker, 0, sizeof(marker)); + + /* Ensure space is enough for quotient and remainder. + * We need space for an extra limb in the remainder, because it's + * up-shifted (normalized) below. */ + rsize = nsize + 1; + if (mpi_resize(rem, rsize) < 0) + goto nomem; + + qsize = rsize - dsize; /* qsize cannot be bigger than this. */ + if (qsize <= 0) { + if (num != rem) { + rem->nlimbs = num->nlimbs; + rem->sign = num->sign; + MPN_COPY(rem->d, num->d, nsize); + } + if (quot) { + /* This needs to follow the assignment to rem, in case the + * numerator and quotient are the same. */ + quot->nlimbs = 0; + quot->sign = 0; + } + return 0; + } + + if (quot) + if (mpi_resize(quot, qsize) < 0) + goto nomem; + + /* Read pointers here, when reallocation is finished. */ + np = num->d; + dp = den->d; + rp = rem->d; + + /* Optimize division by a single-limb divisor. */ + if (dsize == 1) { + mpi_limb_t rlimb; + if (quot) { + qp = quot->d; + rlimb = mpihelp_divmod_1(qp, np, nsize, dp[0]); + qsize -= qp[qsize - 1] == 0; + quot->nlimbs = qsize; + quot->sign = sign_quotient; + } else + rlimb = mpihelp_mod_1(np, nsize, dp[0]); + rp[0] = rlimb; + rsize = rlimb != 0 ? 1 : 0; + rem->nlimbs = rsize; + rem->sign = sign_remainder; + return 0; + } + + if (quot) { + qp = quot->d; + /* Make sure QP and NP point to different objects. Otherwise the + * numerator would be gradually overwritten by the quotient limbs. */ + if (qp == np) { /* Copy NP object to temporary space. */ + np = marker[markidx++] = mpi_alloc_limb_space(nsize); + if (!np) + goto nomem; + MPN_COPY(np, qp, nsize); + } + } else /* Put quotient at top of remainder. */ + qp = rp + dsize; + + count_leading_zeros(normalization_steps, dp[dsize - 1]); + + /* Normalize the denominator, i.e. make its most significant bit set by + * shifting it NORMALIZATION_STEPS bits to the left. Also shift the + * numerator the same number of steps (to keep the quotient the same!). + */ + if (normalization_steps) { + mpi_ptr_t tp; + mpi_limb_t nlimb; + + /* Shift up the denominator setting the most significant bit of + * the most significant word. Use temporary storage not to clobber + * the original contents of the denominator. */ + tp = marker[markidx++] = mpi_alloc_limb_space(dsize); + if (!tp) + goto nomem; + mpihelp_lshift(tp, dp, dsize, normalization_steps); + dp = tp; + + /* Shift up the numerator, possibly introducing a new most + * significant word. Move the shifted numerator in the remainder + * meanwhile. */ + nlimb = mpihelp_lshift(rp, np, nsize, normalization_steps); + if (nlimb) { + rp[nsize] = nlimb; + rsize = nsize + 1; + } else + rsize = nsize; + } else { + /* The denominator is already normalized, as required. Copy it to + * temporary space if it overlaps with the quotient or remainder. */ + if (dp == rp || (quot && (dp == qp))) { + mpi_ptr_t tp; + + tp = marker[markidx++] = mpi_alloc_limb_space(dsize); + if (!tp) + goto nomem; + MPN_COPY(tp, dp, dsize); + dp = tp; + } + + /* Move the numerator to the remainder. */ + if (rp != np) + MPN_COPY(rp, np, nsize); + + rsize = nsize; + } + + q_limb = mpihelp_divrem(qp, 0, rp, rsize, dp, dsize); + + if (quot) { + qsize = rsize - dsize; + if (q_limb) { + qp[qsize] = q_limb; + qsize += 1; + } + + quot->nlimbs = qsize; + quot->sign = sign_quotient; + } + + rsize = dsize; + MPN_NORMALIZE(rp, rsize); + + if (normalization_steps && rsize) { + mpihelp_rshift(rp, rp, rsize, normalization_steps); + rsize -= rp[rsize - 1] == 0 ? 1 : 0; + } + + rem->nlimbs = rsize; + rem->sign = sign_remainder; + + rc = 0; +nomem: + while (markidx) + mpi_free_limb_space(marker[--markidx]); + return rc; +} + +int mpi_tdiv_q_2exp(MPI w, MPI u, unsigned count) +{ + mpi_size_t usize, wsize; + mpi_size_t limb_cnt; + + usize = u->nlimbs; + limb_cnt = count / BITS_PER_MPI_LIMB; + wsize = usize - limb_cnt; + if (limb_cnt >= usize) + w->nlimbs = 0; + else { + mpi_ptr_t wp; + mpi_ptr_t up; + + if (RESIZE_IF_NEEDED(w, wsize) < 0) + return -ENOMEM; + wp = w->d; + up = u->d; + + count %= BITS_PER_MPI_LIMB; + if (count) { + mpihelp_rshift(wp, up + limb_cnt, wsize, count); + wsize -= !wp[wsize - 1]; + } else { + MPN_COPY_INCR(wp, up + limb_cnt, wsize); + } + + w->nlimbs = wsize; + } + return 0; +} + +/**************** + * Check whether dividend is divisible by divisor + * (note: divisor must fit into a limb) + */ +int mpi_divisible_ui(MPI dividend, ulong divisor) +{ + return !mpihelp_mod_1(dividend->d, dividend->nlimbs, divisor); +} diff --git a/lib/mpi/mpi-gcd.c b/lib/mpi/mpi-gcd.c new file mode 100644 index 000000000000..13c48aef9c4e --- /dev/null +++ b/lib/mpi/mpi-gcd.c @@ -0,0 +1,59 @@ +/* mpi-gcd.c - MPI functions + * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" + +/**************** + * Find the greatest common divisor G of A and B. + * Return: true if this 1, false in all other cases + */ +int mpi_gcd(MPI g, const MPI xa, const MPI xb) +{ + MPI a = NULL, b = NULL; + + if (mpi_copy(&a, xa) < 0) + goto nomem; + + if (mpi_copy(&b, xb) < 0) + goto nomem; + + /* TAOCP Vol II, 4.5.2, Algorithm A */ + a->sign = 0; + b->sign = 0; + while (mpi_cmp_ui(b, 0)) { + if (mpi_fdiv_r(g, a, b) < 0) /* g used as temorary variable */ + goto nomem; + if (mpi_set(a, b) < 0) + goto nomem; + if (mpi_set(b, g) < 0) + goto nomem; + } + if (mpi_set(g, a) < 0) + goto nomem; + + mpi_free(a); + mpi_free(b); + return !mpi_cmp_ui(g, 1); + +nomem: + mpi_free(a); + mpi_free(b); + return -ENOMEM; +} diff --git a/lib/mpi/mpi-inline.c b/lib/mpi/mpi-inline.c new file mode 100644 index 000000000000..654f68aeed8b --- /dev/null +++ b/lib/mpi/mpi-inline.c @@ -0,0 +1,31 @@ +/* mpi-inline.c + * Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +/* put the inline functions as real functions into the lib */ +#define G10_MPI_INLINE_DECL + +#include "mpi-internal.h" + +/* always include the header becuase it is only + * included by mpi-internal if __GCC__ is defined but we + * need it here in all cases and the above definition of + * of the macro allows us to do so + */ +#include "mpi-inline.h" diff --git a/lib/mpi/mpi-inline.h b/lib/mpi/mpi-inline.h new file mode 100644 index 000000000000..e2b39852b30a --- /dev/null +++ b/lib/mpi/mpi-inline.h @@ -0,0 +1,122 @@ +/* mpi-inline.h - Internal to the Multi Precision Integers + * Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#ifndef G10_MPI_INLINE_H +#define G10_MPI_INLINE_H + +#ifndef G10_MPI_INLINE_DECL +#define G10_MPI_INLINE_DECL extern inline +#endif + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t x; + + x = *s1_ptr++; + s2_limb += x; + *res_ptr++ = s2_limb; + if (s2_limb < x) { /* sum is less than the left operand: handle carry */ + while (--s1_size) { + x = *s1_ptr++ + 1; /* add carry */ + *res_ptr++ = x; /* and store */ + if (x) /* not 0 (no overflow): we can stop */ + goto leave; + } + return 1; /* return carry (size of s1 to small) */ + } + +leave: + if (res_ptr != s1_ptr) { /* not the same variable */ + mpi_size_t i; /* copy the rest */ + for (i = 0; i < s1_size - 1; i++) + res_ptr[i] = s1_ptr[i]; + } + return 0; /* no carry */ +} + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size) +{ + mpi_limb_t cy = 0; + + if (s2_size) + cy = mpihelp_add_n(res_ptr, s1_ptr, s2_ptr, s2_size); + + if (s1_size - s2_size) + cy = mpihelp_add_1(res_ptr + s2_size, s1_ptr + s2_size, + s1_size - s2_size, cy); + return cy; +} + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t x; + + x = *s1_ptr++; + s2_limb = x - s2_limb; + *res_ptr++ = s2_limb; + if (s2_limb > x) { + while (--s1_size) { + x = *s1_ptr++; + *res_ptr++ = x - 1; + if (x) + goto leave; + } + return 1; + } + +leave: + if (res_ptr != s1_ptr) { + mpi_size_t i; + for (i = 0; i < s1_size - 1; i++) + res_ptr[i] = s1_ptr[i]; + } + return 0; +} + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size) +{ + mpi_limb_t cy = 0; + + if (s2_size) + cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size); + + if (s1_size - s2_size) + cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size, + s1_size - s2_size, cy); + return cy; +} + +#endif /*G10_MPI_INLINE_H */ diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h new file mode 100644 index 000000000000..77adcf6bc257 --- /dev/null +++ b/lib/mpi/mpi-internal.h @@ -0,0 +1,261 @@ +/* mpi-internal.h - Internal to the Multi Precision Integers + * Copyright (C) 1994, 1996 Free Software Foundation, Inc. + * Copyright (C) 1998, 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#ifndef G10_MPI_INTERNAL_H +#define G10_MPI_INTERNAL_H + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/mpi.h> +#include <linux/errno.h> + +#define log_debug printk +#define log_bug printk + +#define assert(x) \ + do { \ + if (!x) \ + log_bug("failed assertion\n"); \ + } while (0); + +/* If KARATSUBA_THRESHOLD is not already defined, define it to a + * value which is good on most machines. */ + +/* tested 4, 16, 32 and 64, where 16 gave the best performance when + * checking a 768 and a 1024 bit ElGamal signature. + * (wk 22.12.97) */ +#ifndef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 16 +#endif + +/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */ +#if KARATSUBA_THRESHOLD < 2 +#undef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 2 +#endif + +typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */ +typedef int mpi_size_t; /* (must be a signed type) */ + +#define ABS(x) (x >= 0 ? x : -x) +#define MIN(l, o) ((l) < (o) ? (l) : (o)) +#define MAX(h, i) ((h) > (i) ? (h) : (i)) + +static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) +{ + if (a->alloced < b) + return mpi_resize(a, b); + return 0; +} + +/* Copy N limbs from S to D. */ +#define MPN_COPY(d, s, n) \ + do { \ + mpi_size_t _i; \ + for (_i = 0; _i < (n); _i++) \ + (d)[_i] = (s)[_i]; \ + } while (0) + +#define MPN_COPY_INCR(d, s, n) \ + do { \ + mpi_size_t _i; \ + for (_i = 0; _i < (n); _i++) \ + (d)[_i] = (d)[_i]; \ + } while (0) + +#define MPN_COPY_DECR(d, s, n) \ + do { \ + mpi_size_t _i; \ + for (_i = (n)-1; _i >= 0; _i--) \ + (d)[_i] = (s)[_i]; \ + } while (0) + +/* Zero N limbs at D */ +#define MPN_ZERO(d, n) \ + do { \ + int _i; \ + for (_i = 0; _i < (n); _i++) \ + (d)[_i] = 0; \ + } while (0) + +#define MPN_NORMALIZE(d, n) \ + do { \ + while ((n) > 0) { \ + if ((d)[(n)-1]) \ + break; \ + (n)--; \ + } \ + } while (0) + +#define MPN_NORMALIZE_NOT_ZERO(d, n) \ + do { \ + for (;;) { \ + if ((d)[(n)-1]) \ + break; \ + (n)--; \ + } \ + } while (0) + +#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ + do { \ + if ((size) < KARATSUBA_THRESHOLD) \ + mul_n_basecase(prodp, up, vp, size); \ + else \ + mul_n(prodp, up, vp, size, tspace); \ + } while (0); + +/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest + * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). + * If this would yield overflow, DI should be the largest possible number + * (i.e., only ones). For correct operation, the most significant bit of D + * has to be set. Put the quotient in Q and the remainder in R. + */ +#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \ + do { \ + mpi_limb_t _q, _ql, _r; \ + mpi_limb_t _xh, _xl; \ + umul_ppmm(_q, _ql, (nh), (di)); \ + _q += (nh); /* DI is 2**BITS_PER_MPI_LIMB too small */ \ + umul_ppmm(_xh, _xl, _q, (d)); \ + sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl); \ + if (_xh) { \ + sub_ddmmss(_xh, _r, _xh, _r, 0, (d)); \ + _q++; \ + if (_xh) { \ + sub_ddmmss(_xh, _r, _xh, _r, 0, (d)); \ + _q++; \ + } \ + } \ + if (_r >= (d)) { \ + _r -= (d); \ + _q++; \ + } \ + (r) = _r; \ + (q) = _q; \ + } while (0) + +/*-- mpiutil.c --*/ +mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs); +void mpi_free_limb_space(mpi_ptr_t a); +void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs); + +/*-- mpi-bit.c --*/ +void mpi_rshift_limbs(MPI a, unsigned int count); +int mpi_lshift_limbs(MPI a, unsigned int count); + +/*-- mpihelp-add.c --*/ +mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size); +mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size); + +/*-- mpihelp-sub.c --*/ +mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size); +mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size); + +/*-- mpihelp-cmp.c --*/ +int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size); + +/*-- mpihelp-mul.c --*/ + +struct karatsuba_ctx { + struct karatsuba_ctx *next; + mpi_ptr_t tspace; + mpi_size_t tspace_size; + mpi_ptr_t tp; + mpi_size_t tp_size; +}; + +void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx); + +mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size); +int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result); +void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size); +void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, + mpi_ptr_t tspace); + +int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp, + mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, + struct karatsuba_ctx *ctx); + +/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/ +mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); + +/*-- mpihelp-div.c --*/ +mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb); +mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, + mpi_ptr_t np, mpi_size_t nsize, + mpi_ptr_t dp, mpi_size_t dsize); +mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr, + mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb); + +/*-- mpihelp-shift.c --*/ +mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned cnt); +mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned cnt); + +/* Define stuff for longlong.h. */ +#define W_TYPE_SIZE BITS_PER_MPI_LIMB +typedef mpi_limb_t UWtype; +typedef unsigned int UHWtype; +#if defined(__GNUC__) +typedef unsigned int UQItype __attribute__ ((mode(QI))); +typedef int SItype __attribute__ ((mode(SI))); +typedef unsigned int USItype __attribute__ ((mode(SI))); +typedef int DItype __attribute__ ((mode(DI))); +typedef unsigned int UDItype __attribute__ ((mode(DI))); +#else +typedef unsigned char UQItype; +typedef long SItype; +typedef unsigned long USItype; +#endif + +#ifdef __GNUC__ +#include "mpi-inline.h" +#endif + +#endif /*G10_MPI_INTERNAL_H */ diff --git a/lib/mpi/mpi-inv.c b/lib/mpi/mpi-inv.c new file mode 100644 index 000000000000..0951f9847745 --- /dev/null +++ b/lib/mpi/mpi-inv.c @@ -0,0 +1,187 @@ +/* mpi-inv.c - MPI functions + * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" + +/**************** + * Calculate the multiplicative inverse X of A mod N + * That is: Find the solution x for + * 1 = (a*x) mod n + */ +int mpi_invm(MPI x, const MPI a, const MPI n) +{ + /* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X) + * modified according to Michael Penk's solution for Exercice 35 + * with further enhancement */ + MPI u = NULL, v = NULL; + MPI u1 = NULL, u2 = NULL, u3 = NULL; + MPI v1 = NULL, v2 = NULL, v3 = NULL; + MPI t1 = NULL, t2 = NULL, t3 = NULL; + unsigned k; + int sign; + int odd = 0; + int rc = -ENOMEM; + + if (mpi_copy(&u, a) < 0) + goto cleanup; + if (mpi_copy(&v, n) < 0) + goto cleanup; + + for (k = 0; !mpi_test_bit(u, 0) && !mpi_test_bit(v, 0); k++) { + if (mpi_rshift(u, u, 1) < 0) + goto cleanup; + if (mpi_rshift(v, v, 1) < 0) + goto cleanup; + } + odd = mpi_test_bit(v, 0); + + u1 = mpi_alloc_set_ui(1); + if (!u1) + goto cleanup; + if (!odd) { + u2 = mpi_alloc_set_ui(0); + if (!u2) + goto cleanup; + } + if (mpi_copy(&u3, u) < 0) + goto cleanup; + if (mpi_copy(&v1, v) < 0) + goto cleanup; + if (!odd) { + v2 = mpi_alloc(mpi_get_nlimbs(u)); + if (!v2) + goto cleanup; + if (mpi_sub(v2, u1, u) < 0) + goto cleanup; /* U is used as const 1 */ + } + if (mpi_copy(&v3, v) < 0) + goto cleanup; + if (mpi_test_bit(u, 0)) { /* u is odd */ + t1 = mpi_alloc_set_ui(0); + if (!t1) + goto cleanup; + if (!odd) { + t2 = mpi_alloc_set_ui(1); + if (!t2) + goto cleanup; + t2->sign = 1; + } + if (mpi_copy(&t3, v) < 0) + goto cleanup; + t3->sign = !t3->sign; + goto Y4; + } else { + t1 = mpi_alloc_set_ui(1); + if (!t1) + goto cleanup; + if (!odd) { + t2 = mpi_alloc_set_ui(0); + if (!t2) + goto cleanup; + } + if (mpi_copy(&t3, u) < 0) + goto cleanup; + } + do { + do { + if (!odd) { + if (mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0)) { /* one is odd */ + if (mpi_add(t1, t1, v) < 0) + goto cleanup; + if (mpi_sub(t2, t2, u) < 0) + goto cleanup; + } + if (mpi_rshift(t1, t1, 1) < 0) + goto cleanup; + if (mpi_rshift(t2, t2, 1) < 0) + goto cleanup; + if (mpi_rshift(t3, t3, 1) < 0) + goto cleanup; + } else { + if (mpi_test_bit(t1, 0)) + if (mpi_add(t1, t1, v) < 0) + goto cleanup; + if (mpi_rshift(t1, t1, 1) < 0) + goto cleanup; + if (mpi_rshift(t3, t3, 1) < 0) + goto cleanup; + } +Y4: + ; + } while (!mpi_test_bit(t3, 0)); /* while t3 is even */ + + if (!t3->sign) { + if (mpi_set(u1, t1) < 0) + goto cleanup; + if (!odd) + if (mpi_set(u2, t2) < 0) + goto cleanup; + if (mpi_set(u3, t3) < 0) + goto cleanup; + } else { + if (mpi_sub(v1, v, t1) < 0) + goto cleanup; + sign = u->sign; + u->sign = !u->sign; + if (!odd) + if (mpi_sub(v2, u, t2) < 0) + goto cleanup; + u->sign = sign; + sign = t3->sign; + t3->sign = !t3->sign; + if (mpi_set(v3, t3) < 0) + goto cleanup; + t3->sign = sign; + } + if (mpi_sub(t1, u1, v1) < 0) + goto cleanup; + if (!odd) + if (mpi_sub(t2, u2, v2) < 0) + goto cleanup; + if (mpi_sub(t3, u3, v3) < 0) + goto cleanup; + if (t1->sign) { + if (mpi_add(t1, t1, v) < 0) + goto cleanup; + if (!odd) + if (mpi_sub(t2, t2, u) < 0) + goto cleanup; + } + } while (mpi_cmp_ui(t3, 0)); /* while t3 != 0 */ + /* mpi_lshift( u3, k ); */ + rc = mpi_set(x, u1); + +cleanup: + mpi_free(u1); + mpi_free(v1); + mpi_free(t1); + if (!odd) { + mpi_free(u2); + mpi_free(v2); + mpi_free(t2); + } + mpi_free(u3); + mpi_free(v3); + mpi_free(t3); + + mpi_free(u); + mpi_free(v); + return rc; +} diff --git a/lib/mpi/mpi-mpow.c b/lib/mpi/mpi-mpow.c new file mode 100644 index 000000000000..7328d0d6c748 --- /dev/null +++ b/lib/mpi/mpi-mpow.c @@ -0,0 +1,134 @@ +/* mpi-mpow.c - MPI functions + * Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" +#include "longlong.h" + +static int build_index(const MPI *exparray, int k, int i, int t) +{ + int j, bitno; + int index = 0; + + bitno = t - i; + for (j = k - 1; j >= 0; j--) { + index <<= 1; + if (mpi_test_bit(exparray[j], bitno)) + index |= 1; + } + return index; +} + +/**************** + * RES = (BASE[0] ^ EXP[0]) * (BASE[1] ^ EXP[1]) * ... * mod M + */ +int mpi_mulpowm(MPI res, MPI *basearray, MPI *exparray, MPI m) +{ + int rc = -ENOMEM; + int k; /* number of elements */ + int t; /* bit size of largest exponent */ + int i, j, idx; + MPI *G = NULL; /* table with precomputed values of size 2^k */ + MPI tmp = NULL; + + for (k = 0; basearray[k]; k++) + ; + if (!k) { + pr_emerg("mpi_mulpowm: assert(k) failed\n"); + BUG(); + } + for (t = 0, i = 0; (tmp = exparray[i]); i++) { + j = mpi_get_nbits(tmp); + if (j > t) + t = j; + } + if (i != k) { + pr_emerg("mpi_mulpowm: assert(i==k) failed\n"); + BUG(); + } + if (!t) { + pr_emerg("mpi_mulpowm: assert(t) failed\n"); + BUG(); + } + if (k >= 10) { + pr_emerg("mpi_mulpowm: assert(k<10) failed\n"); + BUG(); + } + + G = kzalloc((1 << k) * sizeof *G, GFP_KERNEL); + if (!G) + goto err_out; + + /* and calculate */ + tmp = mpi_alloc(mpi_get_nlimbs(m) + 1); + if (!tmp) + goto nomem; + if (mpi_set_ui(res, 1) < 0) + goto nomem; + for (i = 1; i <= t; i++) { + if (mpi_mulm(tmp, res, res, m) < 0) + goto nomem; + idx = build_index(exparray, k, i, t); + if (!(idx >= 0 && idx < (1 << k))) { + pr_emerg("mpi_mulpowm: assert(idx >= 0 && idx < (1<<k)) failed\n"); + BUG(); + } + if (!G[idx]) { + if (!idx) { + G[0] = mpi_alloc_set_ui(1); + if (!G[0]) + goto nomem; + } else { + for (j = 0; j < k; j++) { + if ((idx & (1 << j))) { + if (!G[idx]) { + if (mpi_copy + (&G[idx], + basearray[j]) < 0) + goto nomem; + } else { + if (mpi_mulm + (G[idx], G[idx], + basearray[j], + m) < 0) + goto nomem; + } + } + } + if (!G[idx]) { + G[idx] = mpi_alloc(0); + if (!G[idx]) + goto nomem; + } + } + } + if (mpi_mulm(res, tmp, G[idx], m) < 0) + goto nomem; + } + + rc = 0; +nomem: + /* cleanup */ + mpi_free(tmp); + for (i = 0; i < (1 << k); i++) + mpi_free(G[i]); + kfree(G); +err_out: + return rc; +} diff --git a/lib/mpi/mpi-mul.c b/lib/mpi/mpi-mul.c new file mode 100644 index 000000000000..1f3219e27292 --- /dev/null +++ b/lib/mpi/mpi-mul.c @@ -0,0 +1,194 @@ +/* mpi-mul.c - MPI functions + * Copyright (C) 1994, 1996 Free Software Foundation, Inc. + * Copyright (C) 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +int mpi_mul_ui(MPI prod, MPI mult, unsigned long small_mult) +{ + mpi_size_t size, prod_size; + mpi_ptr_t prod_ptr; + mpi_limb_t cy; + int sign; + + size = mult->nlimbs; + sign = mult->sign; + + if (!size || !small_mult) { + prod->nlimbs = 0; + prod->sign = 0; + return 0; + } + + prod_size = size + 1; + if (prod->alloced < prod_size) + if (mpi_resize(prod, prod_size) < 0) + return -ENOMEM; + prod_ptr = prod->d; + + cy = mpihelp_mul_1(prod_ptr, mult->d, size, (mpi_limb_t) small_mult); + if (cy) + prod_ptr[size++] = cy; + prod->nlimbs = size; + prod->sign = sign; + return 0; +} + +int mpi_mul_2exp(MPI w, MPI u, unsigned long cnt) +{ + mpi_size_t usize, wsize, limb_cnt; + mpi_ptr_t wp; + mpi_limb_t wlimb; + int usign, wsign; + + usize = u->nlimbs; + usign = u->sign; + + if (!usize) { + w->nlimbs = 0; + w->sign = 0; + return 0; + } + + limb_cnt = cnt / BITS_PER_MPI_LIMB; + wsize = usize + limb_cnt + 1; + if (w->alloced < wsize) + if (mpi_resize(w, wsize) < 0) + return -ENOMEM; + wp = w->d; + wsize = usize + limb_cnt; + wsign = usign; + + cnt %= BITS_PER_MPI_LIMB; + if (cnt) { + wlimb = mpihelp_lshift(wp + limb_cnt, u->d, usize, cnt); + if (wlimb) { + wp[wsize] = wlimb; + wsize++; + } + } else { + MPN_COPY_DECR(wp + limb_cnt, u->d, usize); + } + + /* Zero all whole limbs at low end. Do it here and not before calling + * mpn_lshift, not to lose for U == W. */ + MPN_ZERO(wp, limb_cnt); + + w->nlimbs = wsize; + w->sign = wsign; + return 0; +} + +int mpi_mul(MPI w, MPI u, MPI v) +{ + int rc = -ENOMEM; + mpi_size_t usize, vsize, wsize; + mpi_ptr_t up, vp, wp; + mpi_limb_t cy; + int usign, vsign, sign_product; + int assign_wp = 0; + mpi_ptr_t tmp_limb = NULL; + + if (u->nlimbs < v->nlimbs) { /* Swap U and V. */ + usize = v->nlimbs; + usign = v->sign; + up = v->d; + vsize = u->nlimbs; + vsign = u->sign; + vp = u->d; + } else { + usize = u->nlimbs; + usign = u->sign; + up = u->d; + vsize = v->nlimbs; + vsign = v->sign; + vp = v->d; + } + sign_product = usign ^ vsign; + wp = w->d; + + /* Ensure W has space enough to store the result. */ + wsize = usize + vsize; + if (w->alloced < (size_t) wsize) { + if (wp == up || wp == vp) { + wp = mpi_alloc_limb_space(wsize); + if (!wp) + goto nomem; + assign_wp = 1; + } else { + if (mpi_resize(w, wsize) < 0) + goto nomem; + wp = w->d; + } + } else { /* Make U and V not overlap with W. */ + if (wp == up) { + /* W and U are identical. Allocate temporary space for U. */ + up = tmp_limb = mpi_alloc_limb_space(usize); + if (!up) + goto nomem; + /* Is V identical too? Keep it identical with U. */ + if (wp == vp) + vp = up; + /* Copy to the temporary space. */ + MPN_COPY(up, wp, usize); + } else if (wp == vp) { + /* W and V are identical. Allocate temporary space for V. */ + vp = tmp_limb = mpi_alloc_limb_space(vsize); + if (!vp) + goto nomem; + /* Copy to the temporary space. */ + MPN_COPY(vp, wp, vsize); + } + } + + if (!vsize) + wsize = 0; + else { + if (mpihelp_mul(wp, up, usize, vp, vsize, &cy) < 0) + goto nomem; + wsize -= cy ? 0 : 1; + } + + if (assign_wp) + mpi_assign_limb_space(w, wp, wsize); + + w->nlimbs = wsize; + w->sign = sign_product; + rc = 0; +nomem: + if (tmp_limb) + mpi_free_limb_space(tmp_limb); + return rc; +} + +int mpi_mulm(MPI w, MPI u, MPI v, MPI m) +{ + if (mpi_mul(w, u, v) < 0) + return -ENOMEM; + return mpi_fdiv_r(w, w, m); +} diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c new file mode 100644 index 000000000000..67f3e79af914 --- /dev/null +++ b/lib/mpi/mpi-pow.c @@ -0,0 +1,323 @@ +/* mpi-pow.c - MPI functions + * Copyright (C) 1994, 1996, 1998, 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include <linux/string.h> +#include "mpi-internal.h" +#include "longlong.h" + +/**************** + * RES = BASE ^ EXP mod MOD + */ +int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) +{ + mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL; + mpi_ptr_t xp_marker = NULL; + mpi_ptr_t tspace = NULL; + mpi_ptr_t rp, ep, mp, bp; + mpi_size_t esize, msize, bsize, rsize; + int esign, msign, bsign, rsign; + mpi_size_t size; + int mod_shift_cnt; + int negative_result; + int assign_rp = 0; + mpi_size_t tsize = 0; /* to avoid compiler warning */ + /* fixme: we should check that the warning is void */ + int rc = -ENOMEM; + + esize = exp->nlimbs; + msize = mod->nlimbs; + size = 2 * msize; + esign = exp->sign; + msign = mod->sign; + + rp = res->d; + ep = exp->d; + + if (!msize) + return -EINVAL; + + if (!esize) { + /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 + * depending on if MOD equals 1. */ + rp[0] = 1; + res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; + res->sign = 0; + goto leave; + } + + /* Normalize MOD (i.e. make its most significant bit set) as required by + * mpn_divrem. This will make the intermediate values in the calculation + * slightly larger, but the correct result is obtained after a final + * reduction using the original MOD value. */ + mp = mp_marker = mpi_alloc_limb_space(msize); + if (!mp) + goto enomem; + count_leading_zeros(mod_shift_cnt, mod->d[msize - 1]); + if (mod_shift_cnt) + mpihelp_lshift(mp, mod->d, msize, mod_shift_cnt); + else + MPN_COPY(mp, mod->d, msize); + + bsize = base->nlimbs; + bsign = base->sign; + if (bsize > msize) { /* The base is larger than the module. Reduce it. */ + /* Allocate (BSIZE + 1) with space for remainder and quotient. + * (The quotient is (bsize - msize + 1) limbs.) */ + bp = bp_marker = mpi_alloc_limb_space(bsize + 1); + if (!bp) + goto enomem; + MPN_COPY(bp, base->d, bsize); + /* We don't care about the quotient, store it above the remainder, + * at BP + MSIZE. */ + mpihelp_divrem(bp + msize, 0, bp, bsize, mp, msize); + bsize = msize; + /* Canonicalize the base, since we are going to multiply with it + * quite a few times. */ + MPN_NORMALIZE(bp, bsize); + } else + bp = base->d; + + if (!bsize) { + res->nlimbs = 0; + res->sign = 0; + goto leave; + } + + if (res->alloced < size) { + /* We have to allocate more space for RES. If any of the input + * parameters are identical to RES, defer deallocation of the old + * space. */ + if (rp == ep || rp == mp || rp == bp) { + rp = mpi_alloc_limb_space(size); + if (!rp) + goto enomem; + assign_rp = 1; + } else { + if (mpi_resize(res, size) < 0) + goto enomem; + rp = res->d; + } + } else { /* Make BASE, EXP and MOD not overlap with RES. */ + if (rp == bp) { + /* RES and BASE are identical. Allocate temp. space for BASE. */ + BUG_ON(bp_marker); + bp = bp_marker = mpi_alloc_limb_space(bsize); + if (!bp) + goto enomem; + MPN_COPY(bp, rp, bsize); + } + if (rp == ep) { + /* RES and EXP are identical. Allocate temp. space for EXP. */ + ep = ep_marker = mpi_alloc_limb_space(esize); + if (!ep) + goto enomem; + MPN_COPY(ep, rp, esize); + } + if (rp == mp) { + /* RES and MOD are identical. Allocate temporary space for MOD. */ + BUG_ON(mp_marker); + mp = mp_marker = mpi_alloc_limb_space(msize); + if (!mp) + goto enomem; + MPN_COPY(mp, rp, msize); + } + } + + MPN_COPY(rp, bp, bsize); + rsize = bsize; + rsign = bsign; + + { + mpi_size_t i; + mpi_ptr_t xp; + int c; + mpi_limb_t e; + mpi_limb_t carry_limb; + struct karatsuba_ctx karactx; + + xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1)); + if (!xp) + goto enomem; + + memset(&karactx, 0, sizeof karactx); + negative_result = (ep[0] & 1) && base->sign; + + i = esize - 1; + e = ep[i]; + count_leading_zeros(c, e); + e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ + c = BITS_PER_MPI_LIMB - 1 - c; + + /* Main loop. + * + * Make the result be pointed to alternately by XP and RP. This + * helps us avoid block copying, which would otherwise be necessary + * with the overlap restrictions of mpihelp_divmod. With 50% probability + * the result after this loop will be in the area originally pointed + * by RP (==RES->d), and with 50% probability in the area originally + * pointed to by XP. + */ + + for (;;) { + while (c) { + mpi_ptr_t tp; + mpi_size_t xsize; + + /*if (mpihelp_mul_n(xp, rp, rp, rsize) < 0) goto enomem */ + if (rsize < KARATSUBA_THRESHOLD) + mpih_sqr_n_basecase(xp, rp, rsize); + else { + if (!tspace) { + tsize = 2 * rsize; + tspace = + mpi_alloc_limb_space(tsize); + if (!tspace) + goto enomem; + } else if (tsize < (2 * rsize)) { + mpi_free_limb_space(tspace); + tsize = 2 * rsize; + tspace = + mpi_alloc_limb_space(tsize); + if (!tspace) + goto enomem; + } + mpih_sqr_n(xp, rp, rsize, tspace); + } + + xsize = 2 * rsize; + if (xsize > msize) { + mpihelp_divrem(xp + msize, 0, xp, xsize, + mp, msize); + xsize = msize; + } + + tp = rp; + rp = xp; + xp = tp; + rsize = xsize; + + if ((mpi_limb_signed_t) e < 0) { + /*mpihelp_mul( xp, rp, rsize, bp, bsize ); */ + if (bsize < KARATSUBA_THRESHOLD) { + mpi_limb_t tmp; + if (mpihelp_mul + (xp, rp, rsize, bp, bsize, + &tmp) < 0) + goto enomem; + } else { + if (mpihelp_mul_karatsuba_case + (xp, rp, rsize, bp, bsize, + &karactx) < 0) + goto enomem; + } + + xsize = rsize + bsize; + if (xsize > msize) { + mpihelp_divrem(xp + msize, 0, + xp, xsize, mp, + msize); + xsize = msize; + } + + tp = rp; + rp = xp; + xp = tp; + rsize = xsize; + } + e <<= 1; + c--; + } + + i--; + if (i < 0) + break; + e = ep[i]; + c = BITS_PER_MPI_LIMB; + } + + /* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT + * steps. Adjust the result by reducing it with the original MOD. + * + * Also make sure the result is put in RES->d (where it already + * might be, see above). + */ + if (mod_shift_cnt) { + carry_limb = + mpihelp_lshift(res->d, rp, rsize, mod_shift_cnt); + rp = res->d; + if (carry_limb) { + rp[rsize] = carry_limb; + rsize++; + } + } else { + MPN_COPY(res->d, rp, rsize); + rp = res->d; + } + + if (rsize >= msize) { + mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize); + rsize = msize; + } + + /* Remove any leading zero words from the result. */ + if (mod_shift_cnt) + mpihelp_rshift(rp, rp, rsize, mod_shift_cnt); + MPN_NORMALIZE(rp, rsize); + + mpihelp_release_karatsuba_ctx(&karactx); + } + + if (negative_result && rsize) { + if (mod_shift_cnt) + mpihelp_rshift(mp, mp, msize, mod_shift_cnt); + mpihelp_sub(rp, mp, msize, rp, rsize); + rsize = msize; + rsign = msign; + MPN_NORMALIZE(rp, rsize); + } + res->nlimbs = rsize; + res->sign = rsign; + +leave: + rc = 0; +enomem: + if (assign_rp) + mpi_assign_limb_space(res, rp, size); + if (mp_marker) + mpi_free_limb_space(mp_marker); + if (bp_marker) + mpi_free_limb_space(bp_marker); + if (ep_marker) + mpi_free_limb_space(ep_marker); + if (xp_marker) + mpi_free_limb_space(xp_marker); + if (tspace) + mpi_free_limb_space(tspace); + return rc; +} +EXPORT_SYMBOL_GPL(mpi_powm); diff --git a/lib/mpi/mpi-scan.c b/lib/mpi/mpi-scan.c new file mode 100644 index 000000000000..b2da5ad96199 --- /dev/null +++ b/lib/mpi/mpi-scan.c @@ -0,0 +1,136 @@ +/* mpi-scan.c - MPI functions + * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" +#include "longlong.h" + +/**************** + * Scan through an mpi and return byte for byte. a -1 is returned to indicate + * the end of the mpi. Scanning is done from the lsb to the msb, returned + * values are in the range of 0 .. 255. + * + * FIXME: This code is VERY ugly! + */ +int mpi_getbyte(const MPI a, unsigned idx) +{ + int i, j; + unsigned n; + mpi_ptr_t ap; + mpi_limb_t limb; + + ap = a->d; + for (n = 0, i = 0; i < a->nlimbs; i++) { + limb = ap[i]; + for (j = 0; j < BYTES_PER_MPI_LIMB; j++, n++) + if (n == idx) + return (limb >> j * 8) & 0xff; + } + return -1; +} + +/**************** + * Put a value at position IDX into A. idx counts from lsb to msb + */ +void mpi_putbyte(MPI a, unsigned idx, int xc) +{ + int i, j; + unsigned n; + mpi_ptr_t ap; + mpi_limb_t limb, c; + + c = xc & 0xff; + ap = a->d; + for (n = 0, i = 0; i < a->alloced; i++) { + limb = ap[i]; + for (j = 0; j < BYTES_PER_MPI_LIMB; j++, n++) + if (n == idx) { +#if BYTES_PER_MPI_LIMB == 4 + if (j == 0) + limb = (limb & 0xffffff00) | c; + else if (j == 1) + limb = (limb & 0xffff00ff) | (c << 8); + else if (j == 2) + limb = (limb & 0xff00ffff) | (c << 16); + else + limb = (limb & 0x00ffffff) | (c << 24); +#elif BYTES_PER_MPI_LIMB == 8 + if (j == 0) + limb = (limb & 0xffffffffffffff00) | c; + else if (j == 1) + limb = + (limb & 0xffffffffffff00ff) | (c << + 8); + else if (j == 2) + limb = + (limb & 0xffffffffff00ffff) | (c << + 16); + else if (j == 3) + limb = + (limb & 0xffffffff00ffffff) | (c << + 24); + else if (j == 4) + limb = + (limb & 0xffffff00ffffffff) | (c << + 32); + else if (j == 5) + limb = + (limb & 0xffff00ffffffffff) | (c << + 40); + else if (j == 6) + limb = + (limb & 0xff00ffffffffffff) | (c << + 48); + else + limb = + (limb & 0x00ffffffffffffff) | (c << + 56); +#else +#error please enhance this function, its ugly - i know. +#endif + if (a->nlimbs <= i) + a->nlimbs = i + 1; + ap[i] = limb; + return; + } + } + log_bug("index out of range\n"); +} + +/**************** + * Count the number of zerobits at the low end of A + */ +unsigned mpi_trailing_zeros(const MPI a) +{ + unsigned n, count = 0; + + for (n = 0; n < a->nlimbs; n++) { + if (a->d[n]) { + unsigned nn; + mpi_limb_t alimb = a->d[n]; + + count_trailing_zeros(nn, alimb); + count += nn; + break; + } + count += BITS_PER_MPI_LIMB; + } + return count; + +} diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c new file mode 100644 index 000000000000..f26b41fcb48c --- /dev/null +++ b/lib/mpi/mpicoder.c @@ -0,0 +1,280 @@ +/* mpicoder.c - Coder for the external representation of MPIs + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" + +#define MAX_EXTERN_MPI_BITS 16384 + +MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) +{ + const uint8_t *buffer = xbuffer; + int i, j; + unsigned nbits, nbytes, nlimbs, nread = 0; + mpi_limb_t a; + MPI val = NULL; + + if (*ret_nread < 2) + goto leave; + nbits = buffer[0] << 8 | buffer[1]; + + if (nbits > MAX_EXTERN_MPI_BITS) { + pr_info("MPI: mpi too large (%u bits)\n", nbits); + goto leave; + } + buffer += 2; + nread = 2; + + nbytes = (nbits + 7) / 8; + nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB; + val = mpi_alloc(nlimbs); + if (!val) + return NULL; + i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; + i %= BYTES_PER_MPI_LIMB; + val->nbits = nbits; + j = val->nlimbs = nlimbs; + val->sign = 0; + for (; j > 0; j--) { + a = 0; + for (; i < BYTES_PER_MPI_LIMB; i++) { + if (++nread > *ret_nread) { + printk + ("MPI: mpi larger than buffer nread=%d ret_nread=%d\n", + nread, *ret_nread); + goto leave; + } + a <<= 8; + a |= *buffer++; + } + i = 0; + val->d[j - 1] = a; + } + +leave: + *ret_nread = nread; + return val; +} +EXPORT_SYMBOL_GPL(mpi_read_from_buffer); + +/**************** + * Make an mpi from a character string. + */ +int mpi_fromstr(MPI val, const char *str) +{ + int hexmode = 0, sign = 0, prepend_zero = 0, i, j, c, c1, c2; + unsigned nbits, nbytes, nlimbs; + mpi_limb_t a; + + if (*str == '-') { + sign = 1; + str++; + } + if (*str == '0' && str[1] == 'x') + hexmode = 1; + else + return -EINVAL; /* other bases are not yet supported */ + str += 2; + + nbits = strlen(str) * 4; + if (nbits % 8) + prepend_zero = 1; + nbytes = (nbits + 7) / 8; + nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB; + if (val->alloced < nlimbs) + if (!mpi_resize(val, nlimbs)) + return -ENOMEM; + i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; + i %= BYTES_PER_MPI_LIMB; + j = val->nlimbs = nlimbs; + val->sign = sign; + for (; j > 0; j--) { + a = 0; + for (; i < BYTES_PER_MPI_LIMB; i++) { + if (prepend_zero) { + c1 = '0'; + prepend_zero = 0; + } else + c1 = *str++; + assert(c1); + c2 = *str++; + assert(c2); + if (c1 >= '0' && c1 <= '9') + c = c1 - '0'; + else if (c1 >= 'a' && c1 <= 'f') + c = c1 - 'a' + 10; + else if (c1 >= 'A' && c1 <= 'F') + c = c1 - 'A' + 10; + else { + mpi_clear(val); + return 1; + } + c <<= 4; + if (c2 >= '0' && c2 <= '9') + c |= c2 - '0'; + else if (c2 >= 'a' && c2 <= 'f') + c |= c2 - 'a' + 10; + else if (c2 >= 'A' && c2 <= 'F') + c |= c2 - 'A' + 10; + else { + mpi_clear(val); + return 1; + } + a <<= 8; + a |= c; + } + i = 0; + + val->d[j - 1] = a; + } + + return 0; +} +EXPORT_SYMBOL_GPL(mpi_fromstr); + +/**************** + * Return an allocated buffer with the MPI (msb first). + * NBYTES receives the length of this buffer. Caller must free the + * return string (This function does return a 0 byte buffer with NBYTES + * set to zero if the value of A is zero. If sign is not NULL, it will + * be set to the sign of the A. + */ +void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign) +{ + uint8_t *p, *buffer; + mpi_limb_t alimb; + int i; + unsigned int n; + + if (sign) + *sign = a->sign; + *nbytes = n = a->nlimbs * BYTES_PER_MPI_LIMB; + if (!n) + n++; /* avoid zero length allocation */ + p = buffer = kmalloc(n, GFP_KERNEL); + if (!p) + return NULL; + + for (i = a->nlimbs - 1; i >= 0; i--) { + alimb = a->d[i]; +#if BYTES_PER_MPI_LIMB == 4 + *p++ = alimb >> 24; + *p++ = alimb >> 16; + *p++ = alimb >> 8; + *p++ = alimb; +#elif BYTES_PER_MPI_LIMB == 8 + *p++ = alimb >> 56; + *p++ = alimb >> 48; + *p++ = alimb >> 40; + *p++ = alimb >> 32; + *p++ = alimb >> 24; + *p++ = alimb >> 16; + *p++ = alimb >> 8; + *p++ = alimb; +#else +#error please implement for this limb size. +#endif + } + + /* this is sub-optimal but we need to do the shift operation + * because the caller has to free the returned buffer */ + for (p = buffer; !*p && *nbytes; p++, --*nbytes) + ; + if (p != buffer) + memmove(buffer, p, *nbytes); + + return buffer; +} +EXPORT_SYMBOL_GPL(mpi_get_buffer); + +/**************** + * Use BUFFER to update MPI. + */ +int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign) +{ + const uint8_t *buffer = xbuffer, *p; + mpi_limb_t alimb; + int nlimbs; + int i; + + nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB; + if (RESIZE_IF_NEEDED(a, nlimbs) < 0) + return -ENOMEM; + a->sign = sign; + + for (i = 0, p = buffer + nbytes - 1; p >= buffer + BYTES_PER_MPI_LIMB;) { +#if BYTES_PER_MPI_LIMB == 4 + alimb = (mpi_limb_t) *p--; + alimb |= (mpi_limb_t) *p-- << 8; + alimb |= (mpi_limb_t) *p-- << 16; + alimb |= (mpi_limb_t) *p-- << 24; +#elif BYTES_PER_MPI_LIMB == 8 + alimb = (mpi_limb_t) *p--; + alimb |= (mpi_limb_t) *p-- << 8; + alimb |= (mpi_limb_t) *p-- << 16; + alimb |= (mpi_limb_t) *p-- << 24; + alimb |= (mpi_limb_t) *p-- << 32; + alimb |= (mpi_limb_t) *p-- << 40; + alimb |= (mpi_limb_t) *p-- << 48; + alimb |= (mpi_limb_t) *p-- << 56; +#else +#error please implement for this limb size. +#endif + a->d[i++] = alimb; + } + if (p >= buffer) { +#if BYTES_PER_MPI_LIMB == 4 + alimb = *p--; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 8; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 16; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 24; +#elif BYTES_PER_MPI_LIMB == 8 + alimb = (mpi_limb_t) *p--; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 8; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 16; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 24; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 32; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 40; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 48; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 56; +#else +#error please implement for this limb size. +#endif + a->d[i++] = alimb; + } + a->nlimbs = i; + + if (i != nlimbs) { + pr_emerg("MPI: mpi_set_buffer: Assertion failed (%d != %d)", i, + nlimbs); + BUG(); + } + return 0; +} +EXPORT_SYMBOL_GPL(mpi_set_buffer); diff --git a/lib/mpi/mpih-cmp.c b/lib/mpi/mpih-cmp.c new file mode 100644 index 000000000000..b2fd39677f1b --- /dev/null +++ b/lib/mpi/mpih-cmp.c @@ -0,0 +1,56 @@ +/* mpihelp-sub.c - MPI helper functions + * Copyright (C) 1994, 1996 Free Software Foundation, Inc. + * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +/**************** + * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE. + * There are no restrictions on the relative sizes of + * the two arguments. + * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. + */ +int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size) +{ + mpi_size_t i; + mpi_limb_t op1_word, op2_word; + + for (i = size - 1; i >= 0; i--) { + op1_word = op1_ptr[i]; + op2_word = op2_ptr[i]; + if (op1_word != op2_word) + goto diff; + } + return 0; + +diff: + /* This can *not* be simplified to + * op2_word - op2_word + * since that expression might give signed overflow. */ + return (op1_word > op2_word) ? 1 : -1; +} diff --git a/lib/mpi/mpih-div.c b/lib/mpi/mpih-div.c new file mode 100644 index 000000000000..cde1aaec18da --- /dev/null +++ b/lib/mpi/mpih-div.c @@ -0,0 +1,545 @@ +/* mpihelp-div.c - MPI helper functions + * Copyright (C) 1994, 1996 Free Software Foundation, Inc. + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +/* FIXME: We should be using invert_limb (or invert_normalized_limb) + * here (not udiv_qrnnd). + */ + +mpi_limb_t +mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb) +{ + mpi_size_t i; + mpi_limb_t n1, n0, r; + int dummy; + + /* Botch: Should this be handled at all? Rely on callers? */ + if (!dividend_size) + return 0; + + /* If multiplication is much faster than division, and the + * dividend is large, pre-invert the divisor, and use + * only multiplications in the inner loop. + * + * This test should be read: + * Does it ever help to use udiv_qrnnd_preinv? + * && Does what we save compensate for the inversion overhead? + */ + if (UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) { + int normalization_steps; + + count_leading_zeros(normalization_steps, divisor_limb); + if (normalization_steps) { + mpi_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + * + * Special case for DIVISOR_LIMB == 100...000. + */ + if (!(divisor_limb << 1)) + divisor_limb_inverted = ~(mpi_limb_t) 0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for (i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV(dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> + (BITS_PER_MPI_LIMB - + normalization_steps))), + divisor_limb, + divisor_limb_inverted); + n1 = n0; + } + UDIV_QRNND_PREINV(dummy, r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } else { + mpi_limb_t divisor_limb_inverted; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + * + * Special case for DIVISOR_LIMB == 100...000. + */ + if (!(divisor_limb << 1)) + divisor_limb_inverted = ~(mpi_limb_t) 0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + i--; + + for (; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV(dummy, r, r, + n0, divisor_limb, + divisor_limb_inverted); + } + return r; + } + } else { + if (UDIV_NEEDS_NORMALIZATION) { + int normalization_steps; + + count_leading_zeros(normalization_steps, divisor_limb); + if (normalization_steps) { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - + normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for (i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd(dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> + (BITS_PER_MPI_LIMB - + normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd(dummy, r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + * it, or because DIVISOR_LIMB is already normalized. */ + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + i--; + + for (; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd(dummy, r, r, n0, divisor_limb); + } + return r; + } +} + +/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write + * the NSIZE-DSIZE least significant quotient limbs at QP + * and the DSIZE long remainder at NP. If QEXTRA_LIMBS is + * non-zero, generate that many fraction bits and append them after the + * other quotient limbs. + * Return the most significant limb of the quotient, this is always 0 or 1. + * + * Preconditions: + * 0. NSIZE >= DSIZE. + * 1. The most significant bit of the divisor must be set. + * 2. QP must either not overlap with the input operands at all, or + * QP + DSIZE >= NP must hold true. (This means that it's + * possible to put the quotient in the high part of NUM, right after the + * remainder in NUM. + * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. + */ + +mpi_limb_t +mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, + mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize) +{ + mpi_limb_t most_significant_q_limb = 0; + + switch (dsize) { + case 0: + /* We are asked to divide by zero, so go ahead and do it! (To make + the compiler not remove this statement, return the value.) */ + /* + * existing clients of this function have been modified + * not to call it with dsize == 0, so this should not happen + */ + return 1 / dsize; + + case 1: + { + mpi_size_t i; + mpi_limb_t n1; + mpi_limb_t d; + + d = dp[0]; + n1 = np[nsize - 1]; + + if (n1 >= d) { + n1 -= d; + most_significant_q_limb = 1; + } + + qp += qextra_limbs; + for (i = nsize - 2; i >= 0; i--) + udiv_qrnnd(qp[i], n1, n1, np[i], d); + qp -= qextra_limbs; + + for (i = qextra_limbs - 1; i >= 0; i--) + udiv_qrnnd(qp[i], n1, n1, 0, d); + + np[0] = n1; + } + break; + + case 2: + { + mpi_size_t i; + mpi_limb_t n1, n0, n2; + mpi_limb_t d1, d0; + + np += nsize - 2; + d1 = dp[1]; + d0 = dp[0]; + n1 = np[1]; + n0 = np[0]; + + if (n1 >= d1 && (n1 > d1 || n0 >= d0)) { + sub_ddmmss(n1, n0, n1, n0, d1, d0); + most_significant_q_limb = 1; + } + + for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) { + mpi_limb_t q; + mpi_limb_t r; + + if (i >= qextra_limbs) + np--; + else + np[0] = 0; + + if (n1 == d1) { + /* Q should be either 111..111 or 111..110. Need special + * treatment of this rare case as normal division would + * give overflow. */ + q = ~(mpi_limb_t) 0; + + r = n0 + d1; + if (r < d1) { /* Carry in the addition? */ + add_ssaaaa(n1, n0, r - d0, + np[0], 0, d0); + qp[i] = q; + continue; + } + n1 = d0 - (d0 != 0 ? 1 : 0); + n0 = -d0; + } else { + udiv_qrnnd(q, r, n1, n0, d1); + umul_ppmm(n1, n0, d0, q); + } + + n2 = np[0]; +q_test: + if (n1 > r || (n1 == r && n0 > n2)) { + /* The estimated Q was too large. */ + q--; + sub_ddmmss(n1, n0, n1, n0, 0, d0); + r += d1; + if (r >= d1) /* If not carry, test Q again. */ + goto q_test; + } + + qp[i] = q; + sub_ddmmss(n1, n0, r, n2, n1, n0); + } + np[1] = n1; + np[0] = n0; + } + break; + + default: + { + mpi_size_t i; + mpi_limb_t dX, d1, n0; + + np += nsize - dsize; + dX = dp[dsize - 1]; + d1 = dp[dsize - 2]; + n0 = np[dsize - 1]; + + if (n0 >= dX) { + if (n0 > dX + || mpihelp_cmp(np, dp, dsize - 1) >= 0) { + mpihelp_sub_n(np, np, dp, dsize); + n0 = np[dsize - 1]; + most_significant_q_limb = 1; + } + } + + for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) { + mpi_limb_t q; + mpi_limb_t n1, n2; + mpi_limb_t cy_limb; + + if (i >= qextra_limbs) { + np--; + n2 = np[dsize]; + } else { + n2 = np[dsize - 1]; + MPN_COPY_DECR(np + 1, np, dsize - 1); + np[0] = 0; + } + + if (n0 == dX) { + /* This might over-estimate q, but it's probably not worth + * the extra code here to find out. */ + q = ~(mpi_limb_t) 0; + } else { + mpi_limb_t r; + + udiv_qrnnd(q, r, n0, np[dsize - 1], dX); + umul_ppmm(n1, n0, d1, q); + + while (n1 > r + || (n1 == r + && n0 > np[dsize - 2])) { + q--; + r += dX; + if (r < dX) /* I.e. "carry in previous addition?" */ + break; + n1 -= n0 < d1; + n0 -= d1; + } + } + + /* Possible optimization: We already have (q * n0) and (1 * n1) + * after the calculation of q. Taking advantage of that, we + * could make this loop make two iterations less. */ + cy_limb = mpihelp_submul_1(np, dp, dsize, q); + + if (n2 != cy_limb) { + mpihelp_add_n(np, np, dp, dsize); + q--; + } + + qp[i] = q; + n0 = np[dsize - 1]; + } + } + } + + return most_significant_q_limb; +} + +/**************** + * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. + * Return the single-limb remainder. + * There are no constraints on the value of the divisor. + * + * QUOT_PTR and DIVIDEND_PTR might point to the same limb. + */ + +mpi_limb_t +mpihelp_divmod_1(mpi_ptr_t quot_ptr, + mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb) +{ + mpi_size_t i; + mpi_limb_t n1, n0, r; + int dummy; + + if (!dividend_size) + return 0; + + /* If multiplication is much faster than division, and the + * dividend is large, pre-invert the divisor, and use + * only multiplications in the inner loop. + * + * This test should be read: + * Does it ever help to use udiv_qrnnd_preinv? + * && Does what we save compensate for the inversion overhead? + */ + if (UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) { + int normalization_steps; + + count_leading_zeros(normalization_steps, divisor_limb); + if (normalization_steps) { + mpi_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + */ + /* Special case for DIVISOR_LIMB == 100...000. */ + if (!(divisor_limb << 1)) + divisor_limb_inverted = ~(mpi_limb_t) 0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for (i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> + (BITS_PER_MPI_LIMB - + normalization_steps))), + divisor_limb, + divisor_limb_inverted); + n1 = n0; + } + UDIV_QRNND_PREINV(quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } else { + mpi_limb_t divisor_limb_inverted; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + */ + /* Special case for DIVISOR_LIMB == 100...000. */ + if (!(divisor_limb << 1)) + divisor_limb_inverted = ~(mpi_limb_t) 0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + quot_ptr[i--] = 0; + + for (; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV(quot_ptr[i], r, r, + n0, divisor_limb, + divisor_limb_inverted); + } + return r; + } + } else { + if (UDIV_NEEDS_NORMALIZATION) { + int normalization_steps; + + count_leading_zeros(normalization_steps, divisor_limb); + if (normalization_steps) { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - + normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for (i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd(quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> + (BITS_PER_MPI_LIMB - + normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd(quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + * it, or because DIVISOR_LIMB is already normalized. */ + i = dividend_size - 1; + r = dividend_ptr[i]; + + if (r >= divisor_limb) + r = 0; + else + quot_ptr[i--] = 0; + + for (; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb); + } + return r; + } +} diff --git a/lib/mpi/mpih-mul.c b/lib/mpi/mpih-mul.c new file mode 100644 index 000000000000..c69c5eef233b --- /dev/null +++ b/lib/mpi/mpih-mul.c @@ -0,0 +1,527 @@ +/* mpihelp-mul.c - MPI helper functions + * Copyright (C) 1994, 1996, 1998, 1999, + * 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include <linux/string.h> +#include "mpi-internal.h" +#include "longlong.h" + +#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ + do { \ + if ((size) < KARATSUBA_THRESHOLD) \ + mul_n_basecase(prodp, up, vp, size); \ + else \ + mul_n(prodp, up, vp, size, tspace); \ + } while (0); + +#define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \ + do { \ + if ((size) < KARATSUBA_THRESHOLD) \ + mpih_sqr_n_basecase(prodp, up, size); \ + else \ + mpih_sqr_n(prodp, up, size, tspace); \ + } while (0); + +/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), + * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are + * always stored. Return the most significant limb. + * + * Argument constraints: + * 1. PRODP != UP and PRODP != VP, i.e. the destination + * must be distinct from the multiplier and the multiplicand. + * + * + * Handle simple cases with traditional multiplication. + * + * This is the most critical code of multiplication. All multiplies rely + * on this, both small and huge. Small ones arrive here immediately. Huge + * ones arrive here as this is the base case for Karatsuba's recursive + * algorithm below. + */ + +static mpi_limb_t +mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) +{ + mpi_size_t i; + mpi_limb_t cy; + mpi_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if (v_limb <= 1) { + if (v_limb == 1) + MPN_COPY(prodp, up, size); + else + MPN_ZERO(prodp, size); + cy = 0; + } else + cy = mpihelp_mul_1(prodp, up, size, v_limb); + + prodp[size] = cy; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for (i = 1; i < size; i++) { + v_limb = vp[i]; + if (v_limb <= 1) { + cy = 0; + if (v_limb == 1) + cy = mpihelp_add_n(prodp, prodp, up, size); + } else + cy = mpihelp_addmul_1(prodp, up, size, v_limb); + + prodp[size] = cy; + prodp++; + } + + return cy; +} + +static void +mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, + mpi_size_t size, mpi_ptr_t tspace) +{ + if (size & 1) { + /* The size is odd, and the code below doesn't handle that. + * Multiply the least significant (size - 1) limbs with a recursive + * call, and handle the most significant limb of S1 and S2 + * separately. + * A slightly faster way to do this would be to make the Karatsuba + * code below behave as if the size were even, and let it check for + * odd size in the end. I.e., in essence move this code to the end. + * Doing so would save us a recursive call, and potentially make the + * stack grow a lot less. + */ + mpi_size_t esize = size - 1; /* even size */ + mpi_limb_t cy_limb; + + MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace); + cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]); + prodp[esize + esize] = cy_limb; + cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]); + prodp[esize + size] = cy_limb; + } else { + /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. + * + * Split U in two pieces, U1 and U0, such that + * U = U0 + U1*(B**n), + * and V in V1 and V0, such that + * V = V0 + V1*(B**n). + * + * UV is then computed recursively using the identity + * + * 2n n n n + * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V + * 1 1 1 0 0 1 0 0 + * + * Where B = 2**BITS_PER_MP_LIMB. + */ + mpi_size_t hsize = size >> 1; + mpi_limb_t cy; + int negflg; + + /* Product H. ________________ ________________ + * |_____U1 x V1____||____U0 x V0_____| + * Put result in upper part of PROD and pass low part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, + tspace); + + /* Product M. ________________ + * |_(U1-U0)(V0-V1)_| + */ + if (mpihelp_cmp(up + hsize, up, hsize) >= 0) { + mpihelp_sub_n(prodp, up + hsize, up, hsize); + negflg = 0; + } else { + mpihelp_sub_n(prodp, up, up + hsize, hsize); + negflg = 1; + } + if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) { + mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize); + negflg ^= 1; + } else { + mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize); + /* No change of NEGFLG. */ + } + /* Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, + tspace + size); + + /* Add/copy product H. */ + MPN_COPY(prodp + hsize, prodp + size, hsize); + cy = mpihelp_add_n(prodp + size, prodp + size, + prodp + size + hsize, hsize); + + /* Add product M (if NEGFLG M is a negative number) */ + if (negflg) + cy -= + mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, + size); + else + cy += + mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, + size); + + /* Product L. ________________ ________________ + * |________________||____U0 x V0_____| + * Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size); + + /* Add/copy Product L (twice) */ + + cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); + if (cy) + mpihelp_add_1(prodp + hsize + size, + prodp + hsize + size, hsize, cy); + + MPN_COPY(prodp, tspace, hsize); + cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, + hsize); + if (cy) + mpihelp_add_1(prodp + size, prodp + size, size, 1); + } +} + +void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size) +{ + mpi_size_t i; + mpi_limb_t cy_limb; + mpi_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = up[0]; + if (v_limb <= 1) { + if (v_limb == 1) + MPN_COPY(prodp, up, size); + else + MPN_ZERO(prodp, size); + cy_limb = 0; + } else + cy_limb = mpihelp_mul_1(prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for (i = 1; i < size; i++) { + v_limb = up[i]; + if (v_limb <= 1) { + cy_limb = 0; + if (v_limb == 1) + cy_limb = mpihelp_add_n(prodp, prodp, up, size); + } else + cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + } +} + +void +mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) +{ + if (size & 1) { + /* The size is odd, and the code below doesn't handle that. + * Multiply the least significant (size - 1) limbs with a recursive + * call, and handle the most significant limb of S1 and S2 + * separately. + * A slightly faster way to do this would be to make the Karatsuba + * code below behave as if the size were even, and let it check for + * odd size in the end. I.e., in essence move this code to the end. + * Doing so would save us a recursive call, and potentially make the + * stack grow a lot less. + */ + mpi_size_t esize = size - 1; /* even size */ + mpi_limb_t cy_limb; + + MPN_SQR_N_RECURSE(prodp, up, esize, tspace); + cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]); + prodp[esize + esize] = cy_limb; + cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]); + + prodp[esize + size] = cy_limb; + } else { + mpi_size_t hsize = size >> 1; + mpi_limb_t cy; + + /* Product H. ________________ ________________ + * |_____U1 x U1____||____U0 x U0_____| + * Put result in upper part of PROD and pass low part of TSPACE + * as new TSPACE. + */ + MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace); + + /* Product M. ________________ + * |_(U1-U0)(U0-U1)_| + */ + if (mpihelp_cmp(up + hsize, up, hsize) >= 0) + mpihelp_sub_n(prodp, up + hsize, up, hsize); + else + mpihelp_sub_n(prodp, up, up + hsize, hsize); + + /* Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. */ + MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size); + + /* Add/copy product H */ + MPN_COPY(prodp + hsize, prodp + size, hsize); + cy = mpihelp_add_n(prodp + size, prodp + size, + prodp + size + hsize, hsize); + + /* Add product M (if NEGFLG M is a negative number). */ + cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size); + + /* Product L. ________________ ________________ + * |________________||____U0 x U0_____| + * Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. */ + MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size); + + /* Add/copy Product L (twice). */ + cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); + if (cy) + mpihelp_add_1(prodp + hsize + size, + prodp + hsize + size, hsize, cy); + + MPN_COPY(prodp, tspace, hsize); + cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, + hsize); + if (cy) + mpihelp_add_1(prodp + size, prodp + size, size, 1); + } +} + +/* This should be made into an inline function in gmp.h. */ +int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) +{ + if (up == vp) { + if (size < KARATSUBA_THRESHOLD) + mpih_sqr_n_basecase(prodp, up, size); + else { + mpi_ptr_t tspace; + tspace = mpi_alloc_limb_space(2 * size); + if (!tspace) + return -ENOMEM; + mpih_sqr_n(prodp, up, size, tspace); + mpi_free_limb_space(tspace); + } + } else { + if (size < KARATSUBA_THRESHOLD) + mul_n_basecase(prodp, up, vp, size); + else { + mpi_ptr_t tspace; + tspace = mpi_alloc_limb_space(2 * size); + if (!tspace) + return -ENOMEM; + mul_n(prodp, up, vp, size, tspace); + mpi_free_limb_space(tspace); + } + } + + return 0; +} + +int +mpihelp_mul_karatsuba_case(mpi_ptr_t prodp, + mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, + struct karatsuba_ctx *ctx) +{ + mpi_limb_t cy; + + if (!ctx->tspace || ctx->tspace_size < vsize) { + if (ctx->tspace) + mpi_free_limb_space(ctx->tspace); + ctx->tspace = mpi_alloc_limb_space(2 * vsize); + if (!ctx->tspace) + return -ENOMEM; + ctx->tspace_size = vsize; + } + + MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace); + + prodp += vsize; + up += vsize; + usize -= vsize; + if (usize >= vsize) { + if (!ctx->tp || ctx->tp_size < vsize) { + if (ctx->tp) + mpi_free_limb_space(ctx->tp); + ctx->tp = mpi_alloc_limb_space(2 * vsize); + if (!ctx->tp) { + if (ctx->tspace) + mpi_free_limb_space(ctx->tspace); + ctx->tspace = NULL; + return -ENOMEM; + } + ctx->tp_size = vsize; + } + + do { + MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace); + cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize); + mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize, + cy); + prodp += vsize; + up += vsize; + usize -= vsize; + } while (usize >= vsize); + } + + if (usize) { + if (usize < KARATSUBA_THRESHOLD) { + mpi_limb_t tmp; + if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp) + < 0) + return -ENOMEM; + } else { + if (!ctx->next) { + ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL); + if (!ctx->next) + return -ENOMEM; + } + if (mpihelp_mul_karatsuba_case(ctx->tspace, + vp, vsize, + up, usize, + ctx->next) < 0) + return -ENOMEM; + } + + cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize); + mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy); + } + + return 0; +} + +void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx) +{ + struct karatsuba_ctx *ctx2; + + if (ctx->tp) + mpi_free_limb_space(ctx->tp); + if (ctx->tspace) + mpi_free_limb_space(ctx->tspace); + for (ctx = ctx->next; ctx; ctx = ctx2) { + ctx2 = ctx->next; + if (ctx->tp) + mpi_free_limb_space(ctx->tp); + if (ctx->tspace) + mpi_free_limb_space(ctx->tspace); + kfree(ctx); + } +} + +/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) + * and v (pointed to by VP, with VSIZE limbs), and store the result at + * PRODP. USIZE + VSIZE limbs are always stored, but if the input + * operands are normalized. Return the most significant limb of the + * result. + * + * NOTE: The space pointed to by PRODP is overwritten before finished + * with U and V, so overlap is an error. + * + * Argument constraints: + * 1. USIZE >= VSIZE. + * 2. PRODP != UP and PRODP != VP, i.e. the destination + * must be distinct from the multiplier and the multiplicand. + */ + +int +mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result) +{ + mpi_ptr_t prod_endp = prodp + usize + vsize - 1; + mpi_limb_t cy; + struct karatsuba_ctx ctx; + + if (vsize < KARATSUBA_THRESHOLD) { + mpi_size_t i; + mpi_limb_t v_limb; + + if (!vsize) { + *_result = 0; + return 0; + } + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if (v_limb <= 1) { + if (v_limb == 1) + MPN_COPY(prodp, up, usize); + else + MPN_ZERO(prodp, usize); + cy = 0; + } else + cy = mpihelp_mul_1(prodp, up, usize, v_limb); + + prodp[usize] = cy; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for (i = 1; i < vsize; i++) { + v_limb = vp[i]; + if (v_limb <= 1) { + cy = 0; + if (v_limb == 1) + cy = mpihelp_add_n(prodp, prodp, up, + usize); + } else + cy = mpihelp_addmul_1(prodp, up, usize, v_limb); + + prodp[usize] = cy; + prodp++; + } + + *_result = cy; + return 0; + } + + memset(&ctx, 0, sizeof ctx); + if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0) + return -ENOMEM; + mpihelp_release_karatsuba_ctx(&ctx); + *_result = *prod_endp; + return 0; +} diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c new file mode 100644 index 000000000000..26e4ed31e256 --- /dev/null +++ b/lib/mpi/mpiutil.c @@ -0,0 +1,211 @@ +/* mpiutil.ac - Utility functions for MPI + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" + +/**************** + * Note: It was a bad idea to use the number of limbs to allocate + * because on a alpha the limbs are large but we normally need + * integers of n bits - So we should chnage this to bits (or bytes). + * + * But mpi_alloc is used in a lot of places :-) + */ +MPI mpi_alloc(unsigned nlimbs) +{ + MPI a; + + a = kmalloc(sizeof *a, GFP_KERNEL); + if (!a) + return a; + + if (nlimbs) { + a->d = mpi_alloc_limb_space(nlimbs); + if (!a->d) { + kfree(a); + return NULL; + } + } else { + a->d = NULL; + } + + a->alloced = nlimbs; + a->nlimbs = 0; + a->sign = 0; + a->flags = 0; + a->nbits = 0; + return a; +} +EXPORT_SYMBOL_GPL(mpi_alloc); + +mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs) +{ + size_t len = nlimbs * sizeof(mpi_limb_t); + + if (!len) + return NULL; + + return kmalloc(len, GFP_KERNEL); +} + +void mpi_free_limb_space(mpi_ptr_t a) +{ + if (!a) + return; + + kfree(a); +} + +void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs) +{ + mpi_free_limb_space(a->d); + a->d = ap; + a->alloced = nlimbs; +} + +/**************** + * Resize the array of A to NLIMBS. the additional space is cleared + * (set to 0) [done by m_realloc()] + */ +int mpi_resize(MPI a, unsigned nlimbs) +{ + void *p; + + if (nlimbs <= a->alloced) + return 0; /* no need to do it */ + + if (a->d) { + p = kmalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL); + if (!p) + return -ENOMEM; + memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t)); + kfree(a->d); + a->d = p; + } else { + a->d = kzalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL); + if (!a->d) + return -ENOMEM; + } + a->alloced = nlimbs; + return 0; +} + +void mpi_clear(MPI a) +{ + a->nlimbs = 0; + a->nbits = 0; + a->flags = 0; +} + +void mpi_free(MPI a) +{ + if (!a) + return; + + if (a->flags & 4) + kfree(a->d); + else + mpi_free_limb_space(a->d); + + if (a->flags & ~7) + pr_info("invalid flag value in mpi\n"); + kfree(a); +} +EXPORT_SYMBOL_GPL(mpi_free); + +/**************** + * Note: This copy function should not interpret the MPI + * but copy it transparently. + */ +int mpi_copy(MPI *copied, const MPI a) +{ + size_t i; + MPI b; + + *copied = NULL; + + if (a) { + b = mpi_alloc(a->nlimbs); + if (!b) + return -ENOMEM; + + b->nlimbs = a->nlimbs; + b->sign = a->sign; + b->flags = a->flags; + b->nbits = a->nbits; + + for (i = 0; i < b->nlimbs; i++) + b->d[i] = a->d[i]; + + *copied = b; + } + + return 0; +} + +int mpi_set(MPI w, const MPI u) +{ + mpi_ptr_t wp, up; + mpi_size_t usize = u->nlimbs; + int usign = u->sign; + + if (RESIZE_IF_NEEDED(w, (size_t) usize) < 0) + return -ENOMEM; + + wp = w->d; + up = u->d; + MPN_COPY(wp, up, usize); + w->nlimbs = usize; + w->nbits = u->nbits; + w->flags = u->flags; + w->sign = usign; + return 0; +} + +int mpi_set_ui(MPI w, unsigned long u) +{ + if (RESIZE_IF_NEEDED(w, 1) < 0) + return -ENOMEM; + w->d[0] = u; + w->nlimbs = u ? 1 : 0; + w->sign = 0; + w->nbits = 0; + w->flags = 0; + return 0; +} + +MPI mpi_alloc_set_ui(unsigned long u) +{ + MPI w = mpi_alloc(1); + if (!w) + return w; + w->d[0] = u; + w->nlimbs = u ? 1 : 0; + w->sign = 0; + return w; +} + +void mpi_swap(MPI a, MPI b) +{ + struct gcry_mpi tmp; + + tmp = *a; + *a = *b; + *b = tmp; +} diff --git a/lib/nlattr.c b/lib/nlattr.c index a8408b6cacdf..4226dfeb5178 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -5,7 +5,7 @@ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/jiffies.h> diff --git a/lib/parser.c b/lib/parser.c index dcbaaef6cf11..c43410084838 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -6,7 +6,8 @@ */ #include <linux/ctype.h> -#include <linux/module.h> +#include <linux/types.h> +#include <linux/export.h> #include <linux/parser.h> #include <linux/slab.h> #include <linux/string.h> diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c new file mode 100644 index 000000000000..0d83ea8a9605 --- /dev/null +++ b/lib/pci_iomap.c @@ -0,0 +1,48 @@ +/* + * Implement the default iomap interfaces + * + * (C) Copyright 2004 Linus Torvalds + */ +#include <linux/pci.h> +#include <linux/io.h> + +#include <linux/export.h> + +#ifdef CONFIG_PCI +/** + * pci_iomap - create a virtual mapping cookie for a PCI BAR + * @dev: PCI device that owns the BAR + * @bar: BAR number + * @maxlen: length of the memory to map + * + * Using this function you will get a __iomem address to your device BAR. + * You can access it using ioread*() and iowrite*(). These functions hide + * the details if this is a MMIO or PIO address space and will just do what + * you expect from them in the correct way. + * + * @maxlen specifies the maximum length to map. If you want to get access to + * the complete BAR without checking for its length first, pass %0 here. + * */ +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +{ + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (!len || !start) + return NULL; + if (maxlen && len > maxlen) + len = maxlen; + if (flags & IORESOURCE_IO) + return __pci_ioport_map(dev, start, len); + if (flags & IORESOURCE_MEM) { + if (flags & IORESOURCE_CACHEABLE) + return ioremap(start, len); + return ioremap_nocache(start, len); + } + /* What? */ + return NULL; +} + +EXPORT_SYMBOL(pci_iomap); +#endif /* CONFIG_PCI */ diff --git a/lib/plist.c b/lib/plist.c index a0a4da489c22..6ab0e521c48b 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -23,6 +23,7 @@ * information. */ +#include <linux/bug.h> #include <linux/plist.h> #include <linux/spinlock.h> diff --git a/lib/prio_tree.c b/lib/prio_tree.c index ccfd850b0dec..8d443af03b4c 100644 --- a/lib/prio_tree.c +++ b/lib/prio_tree.c @@ -85,6 +85,17 @@ static inline unsigned long prio_tree_maxindex(unsigned int bits) return index_bits_to_maxindex[bits - 1]; } +static void prio_set_parent(struct prio_tree_node *parent, + struct prio_tree_node *child, bool left) +{ + if (left) + parent->left = child; + else + parent->right = child; + + child->parent = parent; +} + /* * Extend a priority search tree so that it can store a node with heap_index * max_heap_index. In the worst case, this algorithm takes O((log n)^2). @@ -94,45 +105,32 @@ static inline unsigned long prio_tree_maxindex(unsigned int bits) static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root, struct prio_tree_node *node, unsigned long max_heap_index) { - struct prio_tree_node *first = NULL, *prev, *last = NULL; + struct prio_tree_node *prev; if (max_heap_index > prio_tree_maxindex(root->index_bits)) root->index_bits++; + prev = node; + INIT_PRIO_TREE_NODE(node); + while (max_heap_index > prio_tree_maxindex(root->index_bits)) { + struct prio_tree_node *tmp = root->prio_tree_node; + root->index_bits++; if (prio_tree_empty(root)) continue; - if (first == NULL) { - first = root->prio_tree_node; - prio_tree_remove(root, root->prio_tree_node); - INIT_PRIO_TREE_NODE(first); - last = first; - } else { - prev = last; - last = root->prio_tree_node; - prio_tree_remove(root, root->prio_tree_node); - INIT_PRIO_TREE_NODE(last); - prev->left = last; - last->parent = prev; - } - } - - INIT_PRIO_TREE_NODE(node); - - if (first) { - node->left = first; - first->parent = node; - } else - last = node; + prio_tree_remove(root, root->prio_tree_node); + INIT_PRIO_TREE_NODE(tmp); - if (!prio_tree_empty(root)) { - last->left = root->prio_tree_node; - last->left->parent = last; + prio_set_parent(prev, tmp, true); + prev = tmp; } + if (!prio_tree_empty(root)) + prio_set_parent(prev, root->prio_tree_node, true); + root->prio_tree_node = node; return node; } @@ -151,25 +149,15 @@ struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root, * We can reduce root->index_bits here. However, it is complex * and does not help much to improve performance (IMO). */ - node->parent = node; root->prio_tree_node = node; - } else { - node->parent = old->parent; - if (old->parent->left == old) - old->parent->left = node; - else - old->parent->right = node; - } + } else + prio_set_parent(old->parent, node, old->parent->left == old); - if (!prio_tree_left_empty(old)) { - node->left = old->left; - old->left->parent = node; - } + if (!prio_tree_left_empty(old)) + prio_set_parent(node, old->left, true); - if (!prio_tree_right_empty(old)) { - node->right = old->right; - old->right->parent = node; - } + if (!prio_tree_right_empty(old)) + prio_set_parent(node, old->right, false); return old; } @@ -229,16 +217,14 @@ struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root, if (index & mask) { if (prio_tree_right_empty(cur)) { INIT_PRIO_TREE_NODE(node); - cur->right = node; - node->parent = cur; + prio_set_parent(cur, node, false); return res; } else cur = cur->right; } else { if (prio_tree_left_empty(cur)) { INIT_PRIO_TREE_NODE(node); - cur->left = node; - node->parent = cur; + prio_set_parent(cur, node, true); return res; } else cur = cur->left; @@ -305,6 +291,40 @@ void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node) cur = prio_tree_replace(root, cur->parent, cur); } +static void iter_walk_down(struct prio_tree_iter *iter) +{ + iter->mask >>= 1; + if (iter->mask) { + if (iter->size_level) + iter->size_level++; + return; + } + + if (iter->size_level) { + BUG_ON(!prio_tree_left_empty(iter->cur)); + BUG_ON(!prio_tree_right_empty(iter->cur)); + iter->size_level++; + iter->mask = ULONG_MAX; + } else { + iter->size_level = 1; + iter->mask = 1UL << (BITS_PER_LONG - 1); + } +} + +static void iter_walk_up(struct prio_tree_iter *iter) +{ + if (iter->mask == ULONG_MAX) + iter->mask = 1UL; + else if (iter->size_level == 1) + iter->mask = 1UL; + else + iter->mask <<= 1; + if (iter->size_level) + iter->size_level--; + if (!iter->size_level && (iter->value & iter->mask)) + iter->value ^= iter->mask; +} + /* * Following functions help to enumerate all prio_tree_nodes in the tree that * overlap with the input interval X [radix_index, heap_index]. The enumeration @@ -323,21 +343,7 @@ static struct prio_tree_node *prio_tree_left(struct prio_tree_iter *iter, if (iter->r_index <= *h_index) { iter->cur = iter->cur->left; - iter->mask >>= 1; - if (iter->mask) { - if (iter->size_level) - iter->size_level++; - } else { - if (iter->size_level) { - BUG_ON(!prio_tree_left_empty(iter->cur)); - BUG_ON(!prio_tree_right_empty(iter->cur)); - iter->size_level++; - iter->mask = ULONG_MAX; - } else { - iter->size_level = 1; - iter->mask = 1UL << (BITS_PER_LONG - 1); - } - } + iter_walk_down(iter); return iter->cur; } @@ -364,22 +370,7 @@ static struct prio_tree_node *prio_tree_right(struct prio_tree_iter *iter, if (iter->r_index <= *h_index) { iter->cur = iter->cur->right; - iter->mask >>= 1; - iter->value = value; - if (iter->mask) { - if (iter->size_level) - iter->size_level++; - } else { - if (iter->size_level) { - BUG_ON(!prio_tree_left_empty(iter->cur)); - BUG_ON(!prio_tree_right_empty(iter->cur)); - iter->size_level++; - iter->mask = ULONG_MAX; - } else { - iter->size_level = 1; - iter->mask = 1UL << (BITS_PER_LONG - 1); - } - } + iter_walk_down(iter); return iter->cur; } @@ -389,16 +380,7 @@ static struct prio_tree_node *prio_tree_right(struct prio_tree_iter *iter, static struct prio_tree_node *prio_tree_parent(struct prio_tree_iter *iter) { iter->cur = iter->cur->parent; - if (iter->mask == ULONG_MAX) - iter->mask = 1UL; - else if (iter->size_level == 1) - iter->mask = 1UL; - else - iter->mask <<= 1; - if (iter->size_level) - iter->size_level--; - if (!iter->size_level && (iter->value & iter->mask)) - iter->value ^= iter->mask; + iter_walk_up(iter); return iter->cur; } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d9df7454519c..86516f5588e3 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -3,6 +3,7 @@ * Portions Copyright (C) 2001 Christoph Hellwig * Copyright (C) 2005 SGI, Christoph Lameter * Copyright (C) 2006 Nick Piggin + * Copyright (C) 2012 Konstantin Khlebnikov * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -22,7 +23,7 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/radix-tree.h> #include <linux/percpu.h> #include <linux/slab.h> @@ -48,16 +49,14 @@ struct radix_tree_node { unsigned int height; /* Height from the bottom */ unsigned int count; - struct rcu_head rcu_head; + union { + struct radix_tree_node *parent; /* Used when ascending tree */ + struct rcu_head rcu_head; /* Used when freeing node */ + }; void __rcu *slots[RADIX_TREE_MAP_SIZE]; unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; }; -struct radix_tree_path { - struct radix_tree_node *node; - int offset; -}; - #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ RADIX_TREE_MAP_SHIFT)) @@ -148,6 +147,43 @@ static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) } return 0; } + +/** + * radix_tree_find_next_bit - find the next set bit in a memory region + * + * @addr: The address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Unrollable variant of find_next_bit() for constant size arrays. + * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. + * Returns next bit offset, or size if nothing found. + */ +static __always_inline unsigned long +radix_tree_find_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset) +{ + if (!__builtin_constant_p(size)) + return find_next_bit(addr, size, offset); + + if (offset < size) { + unsigned long tmp; + + addr += offset / BITS_PER_LONG; + tmp = *addr >> (offset % BITS_PER_LONG); + if (tmp) + return __ffs(tmp) + offset; + offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); + while (offset < size) { + tmp = *++addr; + if (tmp) + return __ffs(tmp) + offset; + offset += BITS_PER_LONG; + } + } + return size; +} + /* * This assumes that the caller has performed appropriate preallocation, and * that the caller has pinned this thread of control to the current CPU. @@ -256,6 +292,7 @@ static inline unsigned long radix_tree_maxindex(unsigned int height) static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) { struct radix_tree_node *node; + struct radix_tree_node *slot; unsigned int height; int tag; @@ -274,18 +311,23 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) if (!(node = radix_tree_node_alloc(root))) return -ENOMEM; - /* Increase the height. */ - node->slots[0] = indirect_to_ptr(root->rnode); - /* Propagate the aggregated tag info into the new root */ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { if (root_tag_get(root, tag)) tag_set(node, tag, 0); } + /* Increase the height. */ newheight = root->height+1; node->height = newheight; node->count = 1; + node->parent = NULL; + slot = root->rnode; + if (newheight > 1) { + slot = indirect_to_ptr(slot); + slot->parent = node; + } + node->slots[0] = slot; node = ptr_to_indirect(node); rcu_assign_pointer(root->rnode, node); root->height = newheight; @@ -331,6 +373,7 @@ int radix_tree_insert(struct radix_tree_root *root, if (!(slot = radix_tree_node_alloc(root))) return -ENOMEM; slot->height = height; + slot->parent = node; if (node) { rcu_assign_pointer(node->slots[offset], slot); node->count++; @@ -504,47 +547,41 @@ EXPORT_SYMBOL(radix_tree_tag_set); void *radix_tree_tag_clear(struct radix_tree_root *root, unsigned long index, unsigned int tag) { - /* - * The radix tree path needs to be one longer than the maximum path - * since the "list" is null terminated. - */ - struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; + struct radix_tree_node *node = NULL; struct radix_tree_node *slot = NULL; unsigned int height, shift; + int uninitialized_var(offset); height = root->height; if (index > radix_tree_maxindex(height)) goto out; - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - pathp->node = NULL; + shift = height * RADIX_TREE_MAP_SHIFT; slot = indirect_to_ptr(root->rnode); - while (height > 0) { - int offset; - + while (shift) { if (slot == NULL) goto out; + shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - pathp[1].offset = offset; - pathp[1].node = slot; + node = slot; slot = slot->slots[offset]; - pathp++; - shift -= RADIX_TREE_MAP_SHIFT; - height--; } if (slot == NULL) goto out; - while (pathp->node) { - if (!tag_get(pathp->node, tag, pathp->offset)) + while (node) { + if (!tag_get(node, tag, offset)) goto out; - tag_clear(pathp->node, tag, pathp->offset); - if (any_tag_set(pathp->node, tag)) + tag_clear(node, tag, offset); + if (any_tag_set(node, tag)) goto out; - pathp--; + + index >>= RADIX_TREE_MAP_SHIFT; + offset = index & RADIX_TREE_MAP_MASK; + node = node->parent; } /* clear the root's tag bit */ @@ -614,6 +651,119 @@ int radix_tree_tag_get(struct radix_tree_root *root, EXPORT_SYMBOL(radix_tree_tag_get); /** + * radix_tree_next_chunk - find next chunk of slots for iteration + * + * @root: radix tree root + * @iter: iterator state + * @flags: RADIX_TREE_ITER_* flags and tag index + * Returns: pointer to chunk first slot, or NULL if iteration is over + */ +void **radix_tree_next_chunk(struct radix_tree_root *root, + struct radix_tree_iter *iter, unsigned flags) +{ + unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; + struct radix_tree_node *rnode, *node; + unsigned long index, offset; + + if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) + return NULL; + + /* + * Catch next_index overflow after ~0UL. iter->index never overflows + * during iterating; it can be zero only at the beginning. + * And we cannot overflow iter->next_index in a single step, + * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. + */ + index = iter->next_index; + if (!index && iter->index) + return NULL; + + rnode = rcu_dereference_raw(root->rnode); + if (radix_tree_is_indirect_ptr(rnode)) { + rnode = indirect_to_ptr(rnode); + } else if (rnode && !index) { + /* Single-slot tree */ + iter->index = 0; + iter->next_index = 1; + iter->tags = 1; + return (void **)&root->rnode; + } else + return NULL; + +restart: + shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT; + offset = index >> shift; + + /* Index outside of the tree */ + if (offset >= RADIX_TREE_MAP_SIZE) + return NULL; + + node = rnode; + while (1) { + if ((flags & RADIX_TREE_ITER_TAGGED) ? + !test_bit(offset, node->tags[tag]) : + !node->slots[offset]) { + /* Hole detected */ + if (flags & RADIX_TREE_ITER_CONTIG) + return NULL; + + if (flags & RADIX_TREE_ITER_TAGGED) + offset = radix_tree_find_next_bit( + node->tags[tag], + RADIX_TREE_MAP_SIZE, + offset + 1); + else + while (++offset < RADIX_TREE_MAP_SIZE) { + if (node->slots[offset]) + break; + } + index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); + index += offset << shift; + /* Overflow after ~0UL */ + if (!index) + return NULL; + if (offset == RADIX_TREE_MAP_SIZE) + goto restart; + } + + /* This is leaf-node */ + if (!shift) + break; + + node = rcu_dereference_raw(node->slots[offset]); + if (node == NULL) + goto restart; + shift -= RADIX_TREE_MAP_SHIFT; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + } + + /* Update the iterator state */ + iter->index = index; + iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1; + + /* Construct iter->tags bit-mask from node->tags[tag] array */ + if (flags & RADIX_TREE_ITER_TAGGED) { + unsigned tag_long, tag_bit; + + tag_long = offset / BITS_PER_LONG; + tag_bit = offset % BITS_PER_LONG; + iter->tags = node->tags[tag][tag_long] >> tag_bit; + /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ + if (tag_long < RADIX_TREE_TAG_LONGS - 1) { + /* Pick tags from next element */ + if (tag_bit) + iter->tags |= node->tags[tag][tag_long + 1] << + (BITS_PER_LONG - tag_bit); + /* Clip chunk size, here only BITS_PER_LONG tags */ + iter->next_index = index + BITS_PER_LONG; + } + } + + return node->slots + offset; +} +EXPORT_SYMBOL(radix_tree_next_chunk); + +/** * radix_tree_range_tag_if_tagged - for each item in given range set given * tag if item has another tag set * @root: radix tree root @@ -646,8 +796,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, unsigned int iftag, unsigned int settag) { unsigned int height = root->height; - struct radix_tree_path path[height]; - struct radix_tree_path *pathp = path; + struct radix_tree_node *node = NULL; struct radix_tree_node *slot; unsigned int shift; unsigned long tagged = 0; @@ -671,14 +820,8 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, shift = (height - 1) * RADIX_TREE_MAP_SHIFT; slot = indirect_to_ptr(root->rnode); - /* - * we fill the path from (root->height - 2) to 0, leaving the index at - * (root->height - 1) as a terminator. Zero the node in the terminator - * so that we can use this to end walk loops back up the path. - */ - path[height - 1].node = NULL; - for (;;) { + unsigned long upindex; int offset; offset = (index >> shift) & RADIX_TREE_MAP_MASK; @@ -686,12 +829,10 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, goto next; if (!tag_get(slot, iftag, offset)) goto next; - if (height > 1) { + if (shift) { /* Go down one level */ - height--; shift -= RADIX_TREE_MAP_SHIFT; - path[height - 1].node = slot; - path[height - 1].offset = offset; + node = slot; slot = slot->slots[offset]; continue; } @@ -701,15 +842,27 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, tag_set(slot, settag, offset); /* walk back up the path tagging interior nodes */ - pathp = &path[0]; - while (pathp->node) { + upindex = index; + while (node) { + upindex >>= RADIX_TREE_MAP_SHIFT; + offset = upindex & RADIX_TREE_MAP_MASK; + /* stop if we find a node with the tag already set */ - if (tag_get(pathp->node, settag, pathp->offset)) + if (tag_get(node, settag, offset)) break; - tag_set(pathp->node, settag, pathp->offset); - pathp++; + tag_set(node, settag, offset); + node = node->parent; } + /* + * Small optimization: now clear that node pointer. + * Since all of this slot's ancestors now have the tag set + * from setting it above, we have no further need to walk + * back up the tree setting tags, until we update slot to + * point to another radix_tree_node. + */ + node = NULL; + next: /* Go to next item at level determined by 'shift' */ index = ((index >> shift) + 1) << shift; @@ -724,8 +877,7 @@ next: * last_index is guaranteed to be in the tree, what * we do below cannot wander astray. */ - slot = path[height - 1].node; - height++; + slot = slot->parent; shift += RADIX_TREE_MAP_SHIFT; } } @@ -816,57 +968,6 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_prev_hole); -static unsigned int -__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices, - unsigned long index, unsigned int max_items, unsigned long *next_index) -{ - unsigned int nr_found = 0; - unsigned int shift, height; - unsigned long i; - - height = slot->height; - if (height == 0) - goto out; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - - for ( ; height > 1; height--) { - i = (index >> shift) & RADIX_TREE_MAP_MASK; - for (;;) { - if (slot->slots[i] != NULL) - break; - index &= ~((1UL << shift) - 1); - index += 1UL << shift; - if (index == 0) - goto out; /* 32-bit wraparound */ - i++; - if (i == RADIX_TREE_MAP_SIZE) - goto out; - } - - shift -= RADIX_TREE_MAP_SHIFT; - slot = rcu_dereference_raw(slot->slots[i]); - if (slot == NULL) - goto out; - } - - /* Bottom level: grab some items */ - for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { - if (slot->slots[i]) { - results[nr_found] = &(slot->slots[i]); - if (indices) - indices[nr_found] = index; - if (++nr_found == max_items) { - index++; - goto out; - } - } - index++; - } -out: - *next_index = index; - return nr_found; -} - /** * radix_tree_gang_lookup - perform multiple lookup on a radix tree * @root: radix tree root @@ -890,48 +991,19 @@ unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items) { - unsigned long max_index; - struct radix_tree_node *node; - unsigned long cur_index = first_index; - unsigned int ret; + struct radix_tree_iter iter; + void **slot; + unsigned int ret = 0; - node = rcu_dereference_raw(root->rnode); - if (!node) + if (unlikely(!max_items)) return 0; - if (!radix_tree_is_indirect_ptr(node)) { - if (first_index > 0) - return 0; - results[0] = node; - return 1; - } - node = indirect_to_ptr(node); - - max_index = radix_tree_maxindex(node->height); - - ret = 0; - while (ret < max_items) { - unsigned int nr_found, slots_found, i; - unsigned long next_index; /* Index of next search */ - - if (cur_index > max_index) - break; - slots_found = __lookup(node, (void ***)results + ret, NULL, - cur_index, max_items - ret, &next_index); - nr_found = 0; - for (i = 0; i < slots_found; i++) { - struct radix_tree_node *slot; - slot = *(((void ***)results)[ret + i]); - if (!slot) - continue; - results[ret + nr_found] = - indirect_to_ptr(rcu_dereference_raw(slot)); - nr_found++; - } - ret += nr_found; - if (next_index == 0) + radix_tree_for_each_slot(slot, root, &iter, first_index) { + results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot)); + if (!results[ret]) + continue; + if (++ret == max_items) break; - cur_index = next_index; } return ret; @@ -961,112 +1033,25 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, unsigned long *indices, unsigned long first_index, unsigned int max_items) { - unsigned long max_index; - struct radix_tree_node *node; - unsigned long cur_index = first_index; - unsigned int ret; + struct radix_tree_iter iter; + void **slot; + unsigned int ret = 0; - node = rcu_dereference_raw(root->rnode); - if (!node) + if (unlikely(!max_items)) return 0; - if (!radix_tree_is_indirect_ptr(node)) { - if (first_index > 0) - return 0; - results[0] = (void **)&root->rnode; + radix_tree_for_each_slot(slot, root, &iter, first_index) { + results[ret] = slot; if (indices) - indices[0] = 0; - return 1; - } - node = indirect_to_ptr(node); - - max_index = radix_tree_maxindex(node->height); - - ret = 0; - while (ret < max_items) { - unsigned int slots_found; - unsigned long next_index; /* Index of next search */ - - if (cur_index > max_index) - break; - slots_found = __lookup(node, results + ret, - indices ? indices + ret : NULL, - cur_index, max_items - ret, &next_index); - ret += slots_found; - if (next_index == 0) + indices[ret] = iter.index; + if (++ret == max_items) break; - cur_index = next_index; } return ret; } EXPORT_SYMBOL(radix_tree_gang_lookup_slot); -/* - * FIXME: the two tag_get()s here should use find_next_bit() instead of - * open-coding the search. - */ -static unsigned int -__lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index, - unsigned int max_items, unsigned long *next_index, unsigned int tag) -{ - unsigned int nr_found = 0; - unsigned int shift, height; - - height = slot->height; - if (height == 0) - goto out; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - - while (height > 0) { - unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; - - for (;;) { - if (tag_get(slot, tag, i)) - break; - index &= ~((1UL << shift) - 1); - index += 1UL << shift; - if (index == 0) - goto out; /* 32-bit wraparound */ - i++; - if (i == RADIX_TREE_MAP_SIZE) - goto out; - } - height--; - if (height == 0) { /* Bottom level: grab some items */ - unsigned long j = index & RADIX_TREE_MAP_MASK; - - for ( ; j < RADIX_TREE_MAP_SIZE; j++) { - index++; - if (!tag_get(slot, tag, j)) - continue; - /* - * Even though the tag was found set, we need to - * recheck that we have a non-NULL node, because - * if this lookup is lockless, it may have been - * subsequently deleted. - * - * Similar care must be taken in any place that - * lookup ->slots[x] without a lock (ie. can't - * rely on its value remaining the same). - */ - if (slot->slots[j]) { - results[nr_found++] = &(slot->slots[j]); - if (nr_found == max_items) - goto out; - } - } - } - shift -= RADIX_TREE_MAP_SHIFT; - slot = rcu_dereference_raw(slot->slots[i]); - if (slot == NULL) - break; - } -out: - *next_index = index; - return nr_found; -} - /** * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree * based on a tag @@ -1085,52 +1070,19 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items, unsigned int tag) { - struct radix_tree_node *node; - unsigned long max_index; - unsigned long cur_index = first_index; - unsigned int ret; - - /* check the root's tag bit */ - if (!root_tag_get(root, tag)) - return 0; + struct radix_tree_iter iter; + void **slot; + unsigned int ret = 0; - node = rcu_dereference_raw(root->rnode); - if (!node) + if (unlikely(!max_items)) return 0; - if (!radix_tree_is_indirect_ptr(node)) { - if (first_index > 0) - return 0; - results[0] = node; - return 1; - } - node = indirect_to_ptr(node); - - max_index = radix_tree_maxindex(node->height); - - ret = 0; - while (ret < max_items) { - unsigned int nr_found, slots_found, i; - unsigned long next_index; /* Index of next search */ - - if (cur_index > max_index) - break; - slots_found = __lookup_tag(node, (void ***)results + ret, - cur_index, max_items - ret, &next_index, tag); - nr_found = 0; - for (i = 0; i < slots_found; i++) { - struct radix_tree_node *slot; - slot = *(((void ***)results)[ret + i]); - if (!slot) - continue; - results[ret + nr_found] = - indirect_to_ptr(rcu_dereference_raw(slot)); - nr_found++; - } - ret += nr_found; - if (next_index == 0) + radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { + results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot)); + if (!results[ret]) + continue; + if (++ret == max_items) break; - cur_index = next_index; } return ret; @@ -1155,42 +1107,17 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, unsigned long first_index, unsigned int max_items, unsigned int tag) { - struct radix_tree_node *node; - unsigned long max_index; - unsigned long cur_index = first_index; - unsigned int ret; - - /* check the root's tag bit */ - if (!root_tag_get(root, tag)) - return 0; + struct radix_tree_iter iter; + void **slot; + unsigned int ret = 0; - node = rcu_dereference_raw(root->rnode); - if (!node) + if (unlikely(!max_items)) return 0; - if (!radix_tree_is_indirect_ptr(node)) { - if (first_index > 0) - return 0; - results[0] = (void **)&root->rnode; - return 1; - } - node = indirect_to_ptr(node); - - max_index = radix_tree_maxindex(node->height); - - ret = 0; - while (ret < max_items) { - unsigned int slots_found; - unsigned long next_index; /* Index of next search */ - - if (cur_index > max_index) + radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { + results[ret] = slot; + if (++ret == max_items) break; - slots_found = __lookup_tag(node, results + ret, - cur_index, max_items - ret, &next_index, tag); - ret += slots_found; - if (next_index == 0) - break; - cur_index = next_index; } return ret; @@ -1299,7 +1226,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) /* try to shrink tree height */ while (root->height > 0) { struct radix_tree_node *to_free = root->rnode; - void *newptr; + struct radix_tree_node *slot; BUG_ON(!radix_tree_is_indirect_ptr(to_free)); to_free = indirect_to_ptr(to_free); @@ -1320,10 +1247,12 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) * (to_free->slots[0]), it will be safe to dereference the new * one (root->rnode) as far as dependent read barriers go. */ - newptr = to_free->slots[0]; - if (root->height > 1) - newptr = ptr_to_indirect(newptr); - root->rnode = newptr; + slot = to_free->slots[0]; + if (root->height > 1) { + slot->parent = NULL; + slot = ptr_to_indirect(slot); + } + root->rnode = slot; root->height--; /* @@ -1363,16 +1292,12 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) */ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) { - /* - * The radix tree path needs to be one longer than the maximum path - * since the "list" is null terminated. - */ - struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; + struct radix_tree_node *node = NULL; struct radix_tree_node *slot = NULL; struct radix_tree_node *to_free; unsigned int height, shift; int tag; - int offset; + int uninitialized_var(offset); height = root->height; if (index > radix_tree_maxindex(height)) @@ -1385,39 +1310,35 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) goto out; } slot = indirect_to_ptr(slot); - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - pathp->node = NULL; + shift = height * RADIX_TREE_MAP_SHIFT; do { if (slot == NULL) goto out; - pathp++; + shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - pathp->offset = offset; - pathp->node = slot; + node = slot; slot = slot->slots[offset]; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } while (height > 0); + } while (shift); if (slot == NULL) goto out; /* - * Clear all tags associated with the just-deleted item + * Clear all tags associated with the item to be deleted. + * This way of doing it would be inefficient, but seldom is any set. */ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { - if (tag_get(pathp->node, tag, pathp->offset)) + if (tag_get(node, tag, offset)) radix_tree_tag_clear(root, index, tag); } to_free = NULL; /* Now free the nodes we do not need anymore */ - while (pathp->node) { - pathp->node->slots[pathp->offset] = NULL; - pathp->node->count--; + while (node) { + node->slots[offset] = NULL; + node->count--; /* * Queue the node for deferred freeing after the * last reference to it disappears (set NULL, above). @@ -1425,17 +1346,20 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) if (to_free) radix_tree_node_free(to_free); - if (pathp->node->count) { - if (pathp->node == indirect_to_ptr(root->rnode)) + if (node->count) { + if (node == indirect_to_ptr(root->rnode)) radix_tree_shrink(root); goto out; } /* Node with zero slots in use so free it */ - to_free = pathp->node; - pathp--; + to_free = node; + index >>= RADIX_TREE_MAP_SHIFT; + offset = index & RADIX_TREE_MAP_MASK; + node = node->parent; } + root_tag_clear_all(root); root->height = 0; root->rnode = NULL; diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc index 2654d5c854be..b71012b756f4 100644 --- a/lib/raid6/altivec.uc +++ b/lib/raid6/altivec.uc @@ -28,8 +28,8 @@ #include <altivec.h> #ifdef __KERNEL__ -# include <asm/system.h> # include <asm/cputable.h> +# include <asm/switch_to.h> #endif /* diff --git a/lib/random32.c b/lib/random32.c index fc3545a32771..938bde5876ac 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -35,7 +35,7 @@ #include <linux/types.h> #include <linux/percpu.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/jiffies.h> #include <linux/random.h> diff --git a/lib/ratelimit.c b/lib/ratelimit.c index c96d500577de..40e03ea2a967 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -11,7 +11,7 @@ #include <linux/ratelimit.h> #include <linux/jiffies.h> -#include <linux/module.h> +#include <linux/export.h> /* * __ratelimit - rate limiting diff --git a/lib/rational.c b/lib/rational.c index 3ed247b80662..d326da3976f5 100644 --- a/lib/rational.c +++ b/lib/rational.c @@ -7,7 +7,8 @@ */ #include <linux/rational.h> -#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/export.h> /* * calculate best rational approximation for a given fraction diff --git a/lib/rbtree.c b/lib/rbtree.c index a16be19a1305..d4175565dc2c 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -21,7 +21,7 @@ */ #include <linux/rbtree.h> -#include <linux/module.h> +#include <linux/export.h> static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) { diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index f2393c21fe85..7e0d6a58fc83 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -7,7 +7,7 @@ */ #include <linux/rwsem.h> #include <linux/sched.h> -#include <linux/module.h> +#include <linux/export.h> struct rwsem_waiter { struct list_head list; diff --git a/lib/rwsem.c b/lib/rwsem.c index 410aa1189b13..8337e1b9bb8d 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -6,7 +6,7 @@ #include <linux/rwsem.h> #include <linux/sched.h> #include <linux/init.h> -#include <linux/module.h> +#include <linux/export.h> /* * Initialize an rwsem: diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 4ceb05d772ae..6096e89bee55 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -6,7 +6,7 @@ * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/scatterlist.h> #include <linux/highmem.h> @@ -390,7 +390,7 @@ bool sg_miter_next(struct sg_mapping_iter *miter) miter->consumed = miter->length; if (miter->__flags & SG_MITER_ATOMIC) - miter->addr = kmap_atomic(miter->page, KM_BIO_SRC_IRQ) + off; + miter->addr = kmap_atomic(miter->page) + off; else miter->addr = kmap(miter->page) + off; @@ -424,7 +424,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) if (miter->__flags & SG_MITER_ATOMIC) { WARN_ON(!irqs_disabled()); - kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ); + kunmap_atomic(miter->addr); } else kunmap(miter->page); diff --git a/lib/sha1.c b/lib/sha1.c index 1de509a159c8..1df191e04a24 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -6,7 +6,7 @@ */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/bitops.h> #include <linux/cryptohash.h> #include <asm/unaligned.h> diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 503f087382a4..4c0d0e51d49e 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -3,7 +3,7 @@ * * DEBUG_PREEMPT variant of smp_processor_id(). */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/kallsyms.h> #include <linux/sched.h> diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index 5f3eacdd6178..525d160d44f0 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -11,7 +11,7 @@ #include <linux/interrupt.h> #include <linux/debug_locks.h> #include <linux/delay.h> -#include <linux/module.h> +#include <linux/export.h> void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, struct lock_class_key *key) diff --git a/lib/string.c b/lib/string.c index dc4a86341f91..e5878de4f101 100644 --- a/lib/string.c +++ b/lib/string.c @@ -22,7 +22,10 @@ #include <linux/types.h> #include <linux/string.h> #include <linux/ctype.h> -#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/bug.h> +#include <linux/errno.h> #ifndef __HAVE_ARCH_STRNICMP /** @@ -785,12 +788,24 @@ void *memchr_inv(const void *start, int c, size_t bytes) if (bytes <= 16) return check_bytes8(start, value, bytes); - value64 = value | value << 8 | value << 16 | value << 24; - value64 = (value64 & 0xffffffff) | value64 << 32; - prefix = 8 - ((unsigned long)start) % 8; + value64 = value; +#if defined(ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64 + value64 *= 0x0101010101010101; +#elif defined(ARCH_HAS_FAST_MULTIPLIER) + value64 *= 0x01010101; + value64 |= value64 << 32; +#else + value64 |= value64 << 8; + value64 |= value64 << 16; + value64 |= value64 << 32; +#endif + prefix = (unsigned long)start % 8; if (prefix) { - u8 *r = check_bytes8(start, value, prefix); + u8 *r; + + prefix = 8 - prefix; + r = check_bytes8(start, value, prefix); if (r) return r; start += prefix; diff --git a/lib/string_helpers.c b/lib/string_helpers.c index ab431d4cc970..dd4ece372699 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -5,7 +5,7 @@ */ #include <linux/kernel.h> #include <linux/math64.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/string_helpers.h> /** diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 99093b396145..414f46ed1dcd 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -20,7 +20,7 @@ #include <linux/cache.h> #include <linux/dma-mapping.h> #include <linux/mm.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/spinlock.h> #include <linux/string.h> #include <linux/swiotlb.h> @@ -110,11 +110,11 @@ setup_io_tlb_npages(char *str) __setup("swiotlb=", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ -unsigned long swioltb_nr_tbl(void) +unsigned long swiotlb_nr_tbl(void) { return io_tlb_nslabs; } - +EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); /* Note that this doesn't work with highmem page */ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, volatile void *address) @@ -321,6 +321,7 @@ void __init swiotlb_free(void) free_bootmem_late(__pa(io_tlb_start), PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); } + io_tlb_nslabs = 0; } static int is_swiotlb_buffer(phys_addr_t paddr) @@ -348,13 +349,12 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, sz = min_t(size_t, PAGE_SIZE - offset, size); local_irq_save(flags); - buffer = kmap_atomic(pfn_to_page(pfn), - KM_BOUNCE_READ); + buffer = kmap_atomic(pfn_to_page(pfn)); if (dir == DMA_TO_DEVICE) memcpy(dma_addr, buffer + offset, sz); else memcpy(buffer + offset, dma_addr, sz); - kunmap_atomic(buffer, KM_BOUNCE_READ); + kunmap_atomic(buffer); local_irq_restore(flags); size -= sz; diff --git a/lib/syscall.c b/lib/syscall.c index a4f7067f72fa..58710eefeac8 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -1,6 +1,6 @@ #include <linux/ptrace.h> #include <linux/sched.h> -#include <linux/module.h> +#include <linux/export.h> #include <asm/syscall.h> static int collect_syscall(struct task_struct *target, long *callno, diff --git a/lib/timerqueue.c b/lib/timerqueue.c index 191176a43e9a..a382e4a32609 100644 --- a/lib/timerqueue.c +++ b/lib/timerqueue.c @@ -22,9 +22,10 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/bug.h> #include <linux/timerqueue.h> #include <linux/rbtree.h> -#include <linux/module.h> +#include <linux/export.h> /** * timerqueue_add - Adds timer to timerqueue. diff --git a/lib/uuid.c b/lib/uuid.c index 8fadd7cef46c..52a6fe6387de 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -19,7 +19,7 @@ */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/uuid.h> #include <linux/random.h> diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 8e75003d62f6..abbabec9720a 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -17,7 +17,7 @@ */ #include <stdarg.h> -#include <linux/module.h> +#include <linux/module.h> /* for KSYM_SYMBOL_LEN */ #include <linux/types.h> #include <linux/string.h> #include <linux/ctype.h> @@ -212,6 +212,26 @@ char *put_dec(char *buf, unsigned long long num) } } +/* + * Convert passed number to decimal string. + * Returns the length of string. On buffer overflow, returns 0. + * + * If speed is not important, use snprintf(). It's easy to read the code. + */ +int num_to_str(char *buf, int size, unsigned long long num) +{ + char tmp[21]; /* Enough for 2^64 in decimal */ + int idx, len; + + len = put_dec(tmp, num) - tmp; + + if (len > size) + return 0; + for (idx = 0; idx < len; ++idx) + buf[idx] = tmp[len - idx - 1]; + return len; +} + #define ZEROPAD 1 /* pad with zero */ #define SIGN 2 /* unsigned/signed long */ #define PLUS 4 /* show plus */ @@ -891,9 +911,15 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'U': return uuid_string(buf, end, ptr, spec, fmt); case 'V': - return buf + vsnprintf(buf, end > buf ? end - buf : 0, - ((struct va_format *)ptr)->fmt, - *(((struct va_format *)ptr)->va)); + { + va_list va; + + va_copy(va, *((struct va_format *)ptr)->va); + buf += vsnprintf(buf, end > buf ? end - buf : 0, + ((struct va_format *)ptr)->fmt, va); + va_end(va); + return buf; + } case 'K': /* * %pK cannot be used in IRQ context because its test |
