diff options
281 files changed, 6069 insertions, 3557 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 2d8fe6499090..f6851d94c1af 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1418,11 +1418,20 @@ accept_ra_pinfo - BOOLEAN Functional default: enabled if accept_ra is enabled. disabled if accept_ra is disabled. +accept_ra_rt_info_min_plen - INTEGER + Minimum prefix length of Route Information in RA. + + Route Information w/ prefix smaller than this variable shall + be ignored. + + Functional default: 0 if accept_ra_rtr_pref is enabled. + -1 if accept_ra_rtr_pref is disabled. + accept_ra_rt_info_max_plen - INTEGER Maximum prefix length of Route Information in RA. - Route Information w/ prefix larger than or equal to this - variable shall be ignored. + Route Information w/ prefix larger than this variable shall + be ignored. Functional default: 0 if accept_ra_rtr_pref is enabled. -1 if accept_ra_rtr_pref is disabled. diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt deleted file mode 100644 index 54f10478e8e3..000000000000 --- a/Documentation/networking/netlink_mmap.txt +++ /dev/null @@ -1,332 +0,0 @@ -This file documents how to use memory mapped I/O with netlink. - -Author: Patrick McHardy <kaber@trash.net> - -Overview --------- - -Memory mapped netlink I/O can be used to increase throughput and decrease -overhead of unicast receive and transmit operations. Some netlink subsystems -require high throughput, these are mainly the netfilter subsystems -nfnetlink_queue and nfnetlink_log, but it can also help speed up large -dump operations of f.i. the routing database. - -Memory mapped netlink I/O used two circular ring buffers for RX and TX which -are mapped into the processes address space. - -The RX ring is used by the kernel to directly construct netlink messages into -user-space memory without copying them as done with regular socket I/O, -additionally as long as the ring contains messages no recvmsg() or poll() -syscalls have to be issued by user-space to get more message. - -The TX ring is used to process messages directly from user-space memory, the -kernel processes all messages contained in the ring using a single sendmsg() -call. - -Usage overview --------------- - -In order to use memory mapped netlink I/O, user-space needs three main changes: - -- ring setup -- conversion of the RX path to get messages from the ring instead of recvmsg() -- conversion of the TX path to construct messages into the ring - -Ring setup is done using setsockopt() to provide the ring parameters to the -kernel, then a call to mmap() to map the ring into the processes address space: - -- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, ¶ms, sizeof(params)); -- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, ¶ms, sizeof(params)); -- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0) - -Usage of either ring is optional, but even if only the RX ring is used the -mapping still needs to be writable in order to update the frame status after -processing. - -Conversion of the reception path involves calling poll() on the file -descriptor, once the socket is readable the frames from the ring are -processed in order until no more messages are available, as indicated by -a status word in the frame header. - -On kernel side, in order to make use of memory mapped I/O on receive, the -originating netlink subsystem needs to support memory mapped I/O, otherwise -it will use an allocated socket buffer as usual and the contents will be - copied to the ring on transmission, nullifying most of the performance gains. -Dumps of kernel databases automatically support memory mapped I/O. - -Conversion of the transmit path involves changing message construction to -use memory from the TX ring instead of (usually) a buffer declared on the -stack and setting up the frame header appropriately. Optionally poll() can -be used to wait for free frames in the TX ring. - -Structured and definitions for using memory mapped I/O are contained in -<linux/netlink.h>. - -RX and TX rings ----------------- - -Each ring contains a number of continuous memory blocks, containing frames of -fixed size dependent on the parameters used for ring setup. - -Ring: [ block 0 ] - [ frame 0 ] - [ frame 1 ] - [ block 1 ] - [ frame 2 ] - [ frame 3 ] - ... - [ block n ] - [ frame 2 * n ] - [ frame 2 * n + 1 ] - -The blocks are only visible to the kernel, from the point of view of user-space -the ring just contains the frames in a continuous memory zone. - -The ring parameters used for setting up the ring are defined as follows: - -struct nl_mmap_req { - unsigned int nm_block_size; - unsigned int nm_block_nr; - unsigned int nm_frame_size; - unsigned int nm_frame_nr; -}; - -Frames are grouped into blocks, where each block is a continuous region of memory -and holds nm_block_size / nm_frame_size frames. The total number of frames in -the ring is nm_frame_nr. The following invariants hold: - -- frames_per_block = nm_block_size / nm_frame_size - -- nm_frame_nr = frames_per_block * nm_block_nr - -Some parameters are constrained, specifically: - -- nm_block_size must be a multiple of the architectures memory page size. - The getpagesize() function can be used to get the page size. - -- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be - able to hold at least the frame header - -- nm_frame_size must be smaller or equal to nm_block_size - -- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT - -- nm_frame_nr must equal the actual number of frames as specified above. - -When the kernel can't allocate physically continuous memory for a ring block, -it will fall back to use physically discontinuous memory. This might affect -performance negatively, in order to avoid this the nm_frame_size parameter -should be chosen to be as small as possible for the required frame size and -the number of blocks should be increased instead. - -Ring frames ------------- - -Each frames contain a frame header, consisting of a synchronization word and some -meta-data, and the message itself. - -Frame: [ header message ] - -The frame header is defined as follows: - -struct nl_mmap_hdr { - unsigned int nm_status; - unsigned int nm_len; - __u32 nm_group; - /* credentials */ - __u32 nm_pid; - __u32 nm_uid; - __u32 nm_gid; -}; - -- nm_status is used for synchronizing processing between the kernel and user- - space and specifies ownership of the frame as well as the operation to perform - -- nm_len contains the length of the message contained in the data area - -- nm_group specified the destination multicast group of message - -- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending - process. These values correspond to the data available using SOCK_PASSCRED in - the SCM_CREDENTIALS cmsg. - -The possible values in the status word are: - -- NL_MMAP_STATUS_UNUSED: - RX ring: frame belongs to the kernel and contains no message - for user-space. Approriate action is to invoke poll() - to wait for new messages. - - TX ring: frame belongs to user-space and can be used for - message construction. - -- NL_MMAP_STATUS_RESERVED: - RX ring only: frame is currently used by the kernel for message - construction and contains no valid message yet. - Appropriate action is to invoke poll() to wait for - new messages. - -- NL_MMAP_STATUS_VALID: - RX ring: frame contains a valid message. Approriate action is - to process the message and release the frame back to - the kernel by setting the status to - NL_MMAP_STATUS_UNUSED or queue the frame by setting the - status to NL_MMAP_STATUS_SKIP. - - TX ring: the frame contains a valid message from user-space to - be processed by the kernel. After completing processing - the kernel will release the frame back to user-space by - setting the status to NL_MMAP_STATUS_UNUSED. - -- NL_MMAP_STATUS_COPY: - RX ring only: a message is ready to be processed but could not be - stored in the ring, either because it exceeded the - frame size or because the originating subsystem does - not support memory mapped I/O. Appropriate action is - to invoke recvmsg() to receive the message and release - the frame back to the kernel by setting the status to - NL_MMAP_STATUS_UNUSED. - -- NL_MMAP_STATUS_SKIP: - RX ring only: user-space queued the message for later processing, but - processed some messages following it in the ring. The - kernel should skip this frame when looking for unused - frames. - -The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the -frame header. - -TX limitations --------------- - -As of Jan 2015 the message is always copied from the ring frame to an -allocated buffer due to unresolved security concerns. -See commit 4682a0358639b29cf ("netlink: Always copy on mmap TX."). - -Example -------- - -Ring setup: - - unsigned int block_size = 16 * getpagesize(); - struct nl_mmap_req req = { - .nm_block_size = block_size, - .nm_block_nr = 64, - .nm_frame_size = 16384, - .nm_frame_nr = 64 * block_size / 16384, - }; - unsigned int ring_size; - void *rx_ring, *tx_ring; - - /* Configure ring parameters */ - if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) - exit(1); - if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) - exit(1) - - /* Calculate size of each individual ring */ - ring_size = req.nm_block_nr * req.nm_block_size; - - /* Map RX/TX rings. The TX ring is located after the RX ring */ - rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - if ((long)rx_ring == -1L) - exit(1); - tx_ring = rx_ring + ring_size: - -Message reception: - -This example assumes some ring parameters of the ring setup are available. - - unsigned int frame_offset = 0; - struct nl_mmap_hdr *hdr; - struct nlmsghdr *nlh; - unsigned char buf[16384]; - ssize_t len; - - while (1) { - struct pollfd pfds[1]; - - pfds[0].fd = fd; - pfds[0].events = POLLIN | POLLERR; - pfds[0].revents = 0; - - if (poll(pfds, 1, -1) < 0 && errno != -EINTR) - exit(1); - - /* Check for errors. Error handling omitted */ - if (pfds[0].revents & POLLERR) - <handle error> - - /* If no new messages, poll again */ - if (!(pfds[0].revents & POLLIN)) - continue; - - /* Process all frames */ - while (1) { - /* Get next frame header */ - hdr = rx_ring + frame_offset; - - if (hdr->nm_status == NL_MMAP_STATUS_VALID) { - /* Regular memory mapped frame */ - nlh = (void *)hdr + NL_MMAP_HDRLEN; - len = hdr->nm_len; - - /* Release empty message immediately. May happen - * on error during message construction. - */ - if (len == 0) - goto release; - } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) { - /* Frame queued to socket receive queue */ - len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); - if (len <= 0) - break; - nlh = buf; - } else - /* No more messages to process, continue polling */ - break; - - process_msg(nlh); -release: - /* Release frame back to the kernel */ - hdr->nm_status = NL_MMAP_STATUS_UNUSED; - - /* Advance frame offset to next frame */ - frame_offset = (frame_offset + frame_size) % ring_size; - } - } - -Message transmission: - -This example assumes some ring parameters of the ring setup are available. -A single message is constructed and transmitted, to send multiple messages -at once they would be constructed in consecutive frames before a final call -to sendto(). - - unsigned int frame_offset = 0; - struct nl_mmap_hdr *hdr; - struct nlmsghdr *nlh; - struct sockaddr_nl addr = { - .nl_family = AF_NETLINK, - }; - - hdr = tx_ring + frame_offset; - if (hdr->nm_status != NL_MMAP_STATUS_UNUSED) - /* No frame available. Use poll() to avoid. */ - exit(1); - - nlh = (void *)hdr + NL_MMAP_HDRLEN; - - /* Build message */ - build_message(nlh); - - /* Fill frame header: length and status need to be set */ - hdr->nm_len = nlh->nlmsg_len; - hdr->nm_status = NL_MMAP_STATUS_VALID; - - if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0) - exit(1); - - /* Advance frame offset to next frame */ - frame_offset = (frame_offset + frame_size) % ring_size; @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 55 +SUBLEVEL = 59 EXTRAVERSION = NAME = Blurry Fish Butt @@ -146,7 +146,7 @@ PHONY += $(MAKECMDGOALS) sub-make $(filter-out _all sub-make $(CURDIR)/Makefile, $(MAKECMDGOALS)) _all: sub-make @: -sub-make: FORCE +sub-make: $(Q)$(MAKE) -C $(KBUILD_OUTPUT) KBUILD_SRC=$(CURDIR) \ -f $(CURDIR)/Makefile $(filter-out _all sub-make,$(MAKECMDGOALS)) @@ -1005,7 +1005,7 @@ prepare1: prepare2 $(version_h) include/generated/utsrelease.h \ archprepare: archheaders archscripts prepare1 scripts_basic -prepare0: archprepare FORCE +prepare0: archprepare $(Q)$(MAKE) $(build)=. # All the preparing.. @@ -1050,7 +1050,7 @@ INSTALL_FW_PATH=$(INSTALL_MOD_PATH)/lib/firmware export INSTALL_FW_PATH PHONY += firmware_install -firmware_install: FORCE +firmware_install: @mkdir -p $(objtree)/firmware $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.fwinst obj=firmware __fw_install @@ -1070,7 +1070,7 @@ PHONY += archscripts archscripts: PHONY += __headers -__headers: $(version_h) scripts_basic asm-generic archheaders archscripts FORCE +__headers: $(version_h) scripts_basic asm-generic archheaders archscripts $(Q)$(MAKE) $(build)=scripts build_unifdef PHONY += headers_install_all diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 7f672485f9c2..67842e7007a8 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -1,10 +1,12 @@ # KEEP ALPHABETICALLY SORTED # CONFIG_DEVKMEM is not set # CONFIG_DEVMEM is not set +# CONFIG_FHANDLE is not set # CONFIG_INET_LRO is not set # CONFIG_MODULES is not set # CONFIG_OABI_COMPAT is not set # CONFIG_SYSVIPC is not set +# CONFIG_USELIB is not set CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y CONFIG_ANDROID_BINDER_DEVICES=binder,hwbinder,vndbinder @@ -12,7 +14,6 @@ CONFIG_ANDROID_LOW_MEMORY_KILLER=y CONFIG_ARMV8_DEPRECATED=y CONFIG_ASHMEM=y CONFIG_AUDIT=y -CONFIG_BLK_DEV_DM=y CONFIG_BLK_DEV_INITRD=y CONFIG_CGROUPS=y CONFIG_CGROUP_CPUACCT=y @@ -20,14 +21,15 @@ CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_SCHED=y CONFIG_CP15_BARRIER_EMULATION=y -CONFIG_DM_CRYPT=y -CONFIG_DM_VERITY=y -CONFIG_DM_VERITY_FEC=y +CONFIG_DEFAULT_SECURITY_SELINUX=y CONFIG_EMBEDDED=y CONFIG_FB=y CONFIG_HARDENED_USERCOPY=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y CONFIG_INET6_AH=y +CONFIG_INET6_DIAG_DESTROY=y CONFIG_INET6_ESP=y CONFIG_INET6_IPCOMP=y CONFIG_INET=y @@ -43,7 +45,6 @@ CONFIG_IPV6=y CONFIG_IPV6_MIP6=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_OPTIMISTIC_DAD=y -CONFIG_IPV6_PRIVACY=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IP_ADVANCED_ROUTER=y @@ -65,6 +66,9 @@ CONFIG_IP_NF_TARGET_MASQUERADE=y CONFIG_IP_NF_TARGET_NETMAP=y CONFIG_IP_NF_TARGET_REDIRECT=y CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y CONFIG_NET=y CONFIG_NETDEVICES=y CONFIG_NETFILTER=y @@ -158,16 +162,16 @@ CONFIG_STAGING=y CONFIG_SWP_EMULATION=y CONFIG_SYNC=y CONFIG_TUN=y -CONFIG_UID_CPUTIME=y +CONFIG_UID_SYS_STATS=y CONFIG_UNIX=y -CONFIG_USB_GADGET=y CONFIG_USB_CONFIGFS=y +CONFIG_USB_CONFIGFS_F_ACC=y +CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y CONFIG_USB_CONFIGFS_F_FS=y +CONFIG_USB_CONFIGFS_F_MIDI=y CONFIG_USB_CONFIGFS_F_MTP=y CONFIG_USB_CONFIGFS_F_PTP=y -CONFIG_USB_CONFIGFS_F_ACC=y -CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y CONFIG_USB_CONFIGFS_UEVENT=y -CONFIG_USB_CONFIGFS_F_MIDI=y +CONFIG_USB_GADGET=y CONFIG_USB_OTG_WAKELOCK=y CONFIG_XFRM_USER=y diff --git a/android/configs/android-recommended.cfg b/android/configs/android-recommended.cfg index 70aaae17ad29..11df0892301b 100644 --- a/android/configs/android-recommended.cfg +++ b/android/configs/android-recommended.cfg @@ -7,16 +7,22 @@ # CONFIG_PM_WAKELOCKS_GC is not set # CONFIG_VT is not set CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ARM64_SW_TTBR0_PAN=y CONFIG_ARM_KERNMEM_PERMS=y CONFIG_ARM64_SW_TTBR0_PAN=y CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_BLK_DEV_DM=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_CC_STACKPROTECTOR_STRONG=y CONFIG_COMPACTION=y +CONFIG_CPU_SW_DOMAIN_PAN=y CONFIG_DEBUG_RODATA=y +CONFIG_DM_CRYPT=y CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y CONFIG_DRAGONRISE_FF=y CONFIG_ENABLE_DEFAULT_TRACERS=y CONFIG_EXT4_FS=y @@ -92,6 +98,7 @@ CONFIG_LOGIRUMBLEPAD2_FF=y CONFIG_LOGITECH_FF=y CONFIG_MD=y CONFIG_MEDIA_SUPPORT=y +CONFIG_MEMORY_STATE_TIME=y CONFIG_MSDOS_FS=y CONFIG_PANIC_TIMEOUT=5 CONFIG_PANTHERLORD_FF=y @@ -121,7 +128,6 @@ CONFIG_TIMER_STATS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_UHID=y -CONFIG_MEMORY_STATE_TIME=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_HIDDEV=y diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 4dfca8fc49b3..1bc61ece2589 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -856,6 +856,13 @@ compatible = "atmel,at91sam9260-usart"; reg = <0xf801c000 0x100>; interrupts = <24 IRQ_TYPE_LEVEL_HIGH 7>; + dmas = <&dma0 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) | + AT91_XDMAC_DT_PERID(35))>, + <&dma0 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) | + AT91_XDMAC_DT_PERID(36))>; + dma-names = "tx", "rx"; clocks = <&uart0_clk>; clock-names = "usart"; status = "disabled"; @@ -865,6 +872,13 @@ compatible = "atmel,at91sam9260-usart"; reg = <0xf8020000 0x100>; interrupts = <25 IRQ_TYPE_LEVEL_HIGH 7>; + dmas = <&dma0 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) | + AT91_XDMAC_DT_PERID(37))>, + <&dma0 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) | + AT91_XDMAC_DT_PERID(38))>; + dma-names = "tx", "rx"; clocks = <&uart1_clk>; clock-names = "usart"; status = "disabled"; @@ -874,6 +888,13 @@ compatible = "atmel,at91sam9260-usart"; reg = <0xf8024000 0x100>; interrupts = <26 IRQ_TYPE_LEVEL_HIGH 7>; + dmas = <&dma0 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) | + AT91_XDMAC_DT_PERID(39))>, + <&dma0 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) | + AT91_XDMAC_DT_PERID(40))>; + dma-names = "tx", "rx"; clocks = <&uart2_clk>; clock-names = "usart"; status = "disabled"; @@ -985,6 +1006,13 @@ compatible = "atmel,at91sam9260-usart"; reg = <0xfc008000 0x100>; interrupts = <27 IRQ_TYPE_LEVEL_HIGH 7>; + dmas = <&dma0 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) | + AT91_XDMAC_DT_PERID(41))>, + <&dma0 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) | + AT91_XDMAC_DT_PERID(42))>; + dma-names = "tx", "rx"; clocks = <&uart3_clk>; clock-names = "usart"; status = "disabled"; @@ -993,6 +1021,13 @@ uart4: serial@fc00c000 { compatible = "atmel,at91sam9260-usart"; reg = <0xfc00c000 0x100>; + dmas = <&dma0 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) | + AT91_XDMAC_DT_PERID(43))>, + <&dma0 + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) | + AT91_XDMAC_DT_PERID(44))>; + dma-names = "tx", "rx"; interrupts = <28 IRQ_TYPE_LEVEL_HIGH 7>; clocks = <&uart4_clk>; clock-names = "usart"; diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index 679c589c4828..1f7b98e1a00d 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -369,7 +369,7 @@ static struct crypto_alg aes_algs[] = { { .cra_blkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, + .ivsize = 0, .setkey = ce_aes_setkey, .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, @@ -446,7 +446,7 @@ static struct crypto_alg aes_algs[] = { { .cra_ablkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, + .ivsize = 0, .setkey = ablk_set_key, .encrypt = ablk_encrypt, .decrypt = ablk_decrypt, diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index 54a5aeab988d..bbbffe946122 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/cache.h> #include <linux/elf.h> #include <linux/err.h> #include <linux/kernel.h> @@ -39,7 +40,7 @@ static struct page **vdso_text_pagelist; /* Total number of pages needed for the data and text portions of the VDSO. */ -unsigned int vdso_total_pages __read_mostly; +unsigned int vdso_total_pages __ro_after_init; /* * The VDSO data page. @@ -47,13 +48,13 @@ unsigned int vdso_total_pages __read_mostly; static union vdso_data_store vdso_data_store __page_aligned_data; static struct vdso_data *vdso_data = &vdso_data_store.data; -static struct page *vdso_data_page; -static struct vm_special_mapping vdso_data_mapping = { +static struct page *vdso_data_page __ro_after_init; +static const struct vm_special_mapping vdso_data_mapping = { .name = "[vvar]", .pages = &vdso_data_page, }; -static struct vm_special_mapping vdso_text_mapping = { +static struct vm_special_mapping vdso_text_mapping __ro_after_init = { .name = "[vdso]", }; @@ -67,7 +68,7 @@ struct elfinfo { /* Cached result of boot-time check for whether the arch timer exists, * and if so, whether the virtual counter is useable. */ -static bool cntvct_ok __read_mostly; +static bool cntvct_ok __ro_after_init; static bool __init cntvct_functional(void) { @@ -224,7 +225,7 @@ static int install_vvar(struct mm_struct *mm, unsigned long addr) VM_READ | VM_MAYREAD, &vdso_data_mapping); - return IS_ERR(vma) ? PTR_ERR(vma) : 0; + return PTR_ERR_OR_ZERO(vma); } /* assumes mmap_sem is write-locked */ diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 23726fb31741..d687f860a2da 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -286,6 +286,22 @@ static void at91_ddr_standby(void) at91_ramc_write(1, AT91_DDRSDRC_LPR, saved_lpr1); } +static void sama5d3_ddr_standby(void) +{ + u32 lpr0; + u32 saved_lpr0; + + saved_lpr0 = at91_ramc_read(0, AT91_DDRSDRC_LPR); + lpr0 = saved_lpr0 & ~AT91_DDRSDRC_LPCB; + lpr0 |= AT91_DDRSDRC_LPCB_POWER_DOWN; + + at91_ramc_write(0, AT91_DDRSDRC_LPR, lpr0); + + cpu_do_idle(); + + at91_ramc_write(0, AT91_DDRSDRC_LPR, saved_lpr0); +} + /* We manage both DDRAM/SDRAM controllers, we need more than one value to * remember. */ @@ -320,7 +336,7 @@ static const struct of_device_id const ramc_ids[] __initconst = { { .compatible = "atmel,at91rm9200-sdramc", .data = at91rm9200_standby }, { .compatible = "atmel,at91sam9260-sdramc", .data = at91sam9_sdram_standby }, { .compatible = "atmel,at91sam9g45-ddramc", .data = at91_ddr_standby }, - { .compatible = "atmel,sama5d3-ddramc", .data = at91_ddr_standby }, + { .compatible = "atmel,sama5d3-ddramc", .data = sama5d3_ddr_standby }, { /*sentinel*/ } }; diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 1160434eece0..59a8fa7b8a3b 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -74,5 +74,5 @@ $(MODLIB)/vdso: FORCE @mkdir -p $(MODLIB)/vdso PHONY += vdso_install -vdso_install: $(obj)/vdso.so.dbg $(MODLIB)/vdso FORCE +vdso_install: $(obj)/vdso.so.dbg $(MODLIB)/vdso $(call cmd,vdso_install) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3c89828b9af2..69f08519301d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1023,6 +1023,26 @@ config BUILD_ARM64_APPENDED_DTB_IMAGE DTBs to be built by default (instead of a standalone Image.gz.) The image will built in arch/arm64/boot/Image.gz-dtb +choice + prompt "Appended DTB Kernel Image name" + depends on BUILD_ARM64_APPENDED_DTB_IMAGE + help + Enabling this option will cause a specific kernel image Image or + Image.gz to be used for final image creation. + The image will built in arch/arm64/boot/IMAGE-NAME-dtb + + config IMG_GZ_DTB + bool "Image.gz-dtb" + config IMG_DTB + bool "Image-dtb" +endchoice + +config BUILD_ARM64_APPENDED_KERNEL_IMAGE_NAME + string + depends on BUILD_ARM64_APPENDED_DTB_IMAGE + default "Image.gz-dtb" if IMG_GZ_DTB + default "Image-dtb" if IMG_DTB + config BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES string "Default dtb names" depends on BUILD_ARM64_APPENDED_DTB_IMAGE diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index fcfa3c7dedc1..f656c52d7e10 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -85,7 +85,7 @@ core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a # Default target when executing plain make ifeq ($(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE),y) -KBUILD_IMAGE := Image.gz-dtb +KBUILD_IMAGE := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_KERNEL_IMAGE_NAME)) else KBUILD_IMAGE := Image.gz endif @@ -132,6 +132,16 @@ archclean: $(Q)$(MAKE) $(clean)=$(boot) $(Q)$(MAKE) $(clean)=$(boot)/dts +# We need to generate vdso-offsets.h before compiling certain files in kernel/. +# In order to do that, we should use the archprepare target, but we can't since +# asm-offsets.h is included in some files used to generate vdso-offsets.h, and +# asm-offsets.h is built in prepare0, for which archprepare is a dependency. +# Therefore we need to generate the header after prepare0 has been made, hence +# this hack. +prepare: vdso_prepare +vdso_prepare: prepare0 + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h + define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' diff --git a/arch/arm64/boot/dts/arm/juno-r1.dts b/arch/arm64/boot/dts/arm/juno-r1.dts index 93bc3d7d51c0..29315af22147 100644 --- a/arch/arm64/boot/dts/arm/juno-r1.dts +++ b/arch/arm64/boot/dts/arm/juno-r1.dts @@ -60,6 +60,28 @@ }; }; + idle-states { + entry-method = "arm,psci"; + + CPU_SLEEP_0: cpu-sleep-0 { + compatible = "arm,idle-state"; + arm,psci-suspend-param = <0x0010000>; + local-timer-stop; + entry-latency-us = <300>; + exit-latency-us = <1200>; + min-residency-us = <2000>; + }; + + CLUSTER_SLEEP_0: cluster-sleep-0 { + compatible = "arm,idle-state"; + arm,psci-suspend-param = <0x1010000>; + local-timer-stop; + entry-latency-us = <400>; + exit-latency-us = <1200>; + min-residency-us = <2500>; + }; + }; + A57_0: cpu@0 { compatible = "arm,cortex-a57","arm,armv8"; reg = <0x0 0x0>; @@ -67,6 +89,7 @@ enable-method = "psci"; next-level-cache = <&A57_L2>; clocks = <&scpi_dvfs 0>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; A57_1: cpu@1 { @@ -76,6 +99,7 @@ enable-method = "psci"; next-level-cache = <&A57_L2>; clocks = <&scpi_dvfs 0>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; A53_0: cpu@100 { @@ -85,6 +109,7 @@ enable-method = "psci"; next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; A53_1: cpu@101 { @@ -94,6 +119,7 @@ enable-method = "psci"; next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; A53_2: cpu@102 { @@ -103,6 +129,7 @@ enable-method = "psci"; next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; A53_3: cpu@103 { @@ -112,6 +139,7 @@ enable-method = "psci"; next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; }; A57_L2: l2-cache0 { diff --git a/arch/arm64/boot/dts/arm/juno-sched-energy.dtsi b/arch/arm64/boot/dts/arm/juno-sched-energy.dtsi new file mode 100644 index 000000000000..38207e4391ab --- /dev/null +++ b/arch/arm64/boot/dts/arm/juno-sched-energy.dtsi @@ -0,0 +1,147 @@ +/* + * ARM JUNO specific energy cost model data. There are no unit requirements for + * the data. Data can be normalized to any reference point, but the + * normalization must be consistent. That is, one bogo-joule/watt must be the + * same quantity for all data, but we don't care what it is. + */ + +/* static struct idle_state idle_states_cluster_a53[] = { */ +/* { .power = 56 }, /\* arch_cpu_idle() (active idle) = WFI *\/ */ +/* { .power = 56 }, /\* WFI *\/ */ +/* { .power = 56 }, /\* cpu-sleep-0 *\/ */ +/* { .power = 17 }, /\* cluster-sleep-0 *\/ */ +/* }; */ + +/* static struct idle_state idle_states_cluster_a57[] = { */ +/* { .power = 65 }, /\* arch_cpu_idle() (active idle) = WFI *\/ */ +/* { .power = 65 }, /\* WFI *\/ */ +/* { .power = 65 }, /\* cpu-sleep-0 *\/ */ +/* { .power = 24 }, /\* cluster-sleep-0 *\/ */ +/* }; */ + +/* static struct capacity_state cap_states_cluster_a53[] = { */ +/* /\* Power per cluster *\/ */ +/* { .cap = 235, .power = 26, }, /\* 450 MHz *\/ */ +/* { .cap = 303, .power = 30, }, /\* 575 MHz *\/ */ +/* { .cap = 368, .power = 39, }, /\* 700 MHz *\/ */ +/* { .cap = 406, .power = 47, }, /\* 775 MHz *\/ */ +/* { .cap = 447, .power = 57, }, /\* 850 Mhz *\/ */ +/* }; */ + +/* static struct capacity_state cap_states_cluster_a57[] = { */ +/* /\* Power per cluster *\/ */ +/* { .cap = 417, .power = 24, }, /\* 450 MHz *\/ */ +/* { .cap = 579, .power = 32, }, /\* 625 MHz *\/ */ +/* { .cap = 744, .power = 43, }, /\* 800 MHz *\/ */ +/* { .cap = 883, .power = 49, }, /\* 950 MHz *\/ */ +/* { .cap = 1024, .power = 64, }, /\* 1100 MHz *\/ */ +/* }; */ + +/* static struct sched_group_energy energy_cluster_a53 = { */ +/* .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a53), */ +/* .idle_states = idle_states_cluster_a53, */ +/* .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a53), */ +/* .cap_states = cap_states_cluster_a53, */ +/* }; */ + +/* static struct sched_group_energy energy_cluster_a57 = { */ +/* .nr_idle_states = ARRAY_SIZE(idle_states_cluster_a57), */ +/* .idle_states = idle_states_cluster_a57, */ +/* .nr_cap_states = ARRAY_SIZE(cap_states_cluster_a57), */ +/* .cap_states = cap_states_cluster_a57, */ +/* }; */ + +/* static struct idle_state idle_states_core_a53[] = { */ +/* { .power = 6 }, /\* arch_cpu_idle() (active idle) = WFI *\/ */ +/* { .power = 6 }, /\* WFI *\/ */ +/* { .power = 0 }, /\* cpu-sleep-0 *\/ */ +/* { .power = 0 }, /\* cluster-sleep-0 *\/ */ +/* }; */ + +/* static struct idle_state idle_states_core_a57[] = { */ +/* { .power = 15 }, /\* arch_cpu_idle() (active idle) = WFI *\/ */ +/* { .power = 15 }, /\* WFI *\/ */ +/* { .power = 0 }, /\* cpu-sleep-0 *\/ */ +/* { .power = 0 }, /\* cluster-sleep-0 *\/ */ +/* }; */ + +/* static struct capacity_state cap_states_core_a53[] = { */ +/* /\* Power per cpu *\/ */ +/* { .cap = 235, .power = 33, }, /\* 450 MHz *\/ */ +/* { .cap = 302, .power = 46, }, /\* 575 MHz *\/ */ +/* { .cap = 368, .power = 61, }, /\* 700 MHz *\/ */ +/* { .cap = 406, .power = 76, }, /\* 775 MHz *\/ */ +/* { .cap = 447, .power = 93, }, /\* 850 Mhz *\/ */ +/* }; */ + +/* static struct capacity_state cap_states_core_a57[] = { */ +/* /\* Power per cpu *\/ */ +/* { .cap = 417, .power = 168, }, /\* 450 MHz *\/ */ +/* { .cap = 579, .power = 251, }, /\* 625 MHz *\/ */ +/* { .cap = 744, .power = 359, }, /\* 800 MHz *\/ */ +/* { .cap = 883, .power = 479, }, /\* 950 MHz *\/ */ +/* { .cap = 1024, .power = 616, }, /\* 1100 MHz *\/ */ +/* }; */ + +energy-costs { + CPU_COST_A57: core-cost0 { + busy-cost-data = < + 417 168 + 579 251 + 744 359 + 883 479 + 1023 616 + >; + idle-cost-data = < + 15 + 15 + 0 + 0 + >; + }; + CPU_COST_A53: core-cost1 { + busy-cost-data = < + 235 33 + 302 46 + 368 61 + 406 76 + 447 93 + >; + idle-cost-data = < + 6 + 6 + 0 + 0 + >; + }; + CLUSTER_COST_A57: cluster-cost0 { + busy-cost-data = < + 417 24 + 579 32 + 744 43 + 883 49 + 1024 64 + >; + idle-cost-data = < + 65 + 65 + 65 + 24 + >; + }; + CLUSTER_COST_A53: cluster-cost1 { + busy-cost-data = < + 235 26 + 303 30 + 368 39 + 406 47 + 447 57 + >; + idle-cost-data = < + 56 + 56 + 56 + 17 + >; + }; +}; diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts index 3e1a84b01b50..68816f71fa51 100644 --- a/arch/arm64/boot/dts/arm/juno.dts +++ b/arch/arm64/boot/dts/arm/juno.dts @@ -60,6 +60,28 @@ }; }; + idle-states { + entry-method = "arm,psci"; + + CPU_SLEEP_0: cpu-sleep-0 { + compatible = "arm,idle-state"; + arm,psci-suspend-param = <0x0010000>; + local-timer-stop; + entry-latency-us = <300>; + exit-latency-us = <1200>; + min-residency-us = <2000>; + }; + + CLUSTER_SLEEP_0: cluster-sleep-0 { + compatible = "arm,idle-state"; + arm,psci-suspend-param = <0x1010000>; + local-timer-stop; + entry-latency-us = <400>; + exit-latency-us = <1200>; + min-residency-us = <2500>; + }; + }; + A57_0: cpu@0 { compatible = "arm,cortex-a57","arm,armv8"; reg = <0x0 0x0>; @@ -67,6 +89,8 @@ enable-method = "psci"; next-level-cache = <&A57_L2>; clocks = <&scpi_dvfs 0>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A57 &CLUSTER_COST_A57>; }; A57_1: cpu@1 { @@ -76,6 +100,8 @@ enable-method = "psci"; next-level-cache = <&A57_L2>; clocks = <&scpi_dvfs 0>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A57 &CLUSTER_COST_A57>; }; A53_0: cpu@100 { @@ -85,6 +111,8 @@ enable-method = "psci"; next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A53 &CLUSTER_COST_A53>; }; A53_1: cpu@101 { @@ -94,6 +122,8 @@ enable-method = "psci"; next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A53 &CLUSTER_COST_A53>; }; A53_2: cpu@102 { @@ -103,6 +133,8 @@ enable-method = "psci"; next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A53 &CLUSTER_COST_A53>; }; A53_3: cpu@103 { @@ -112,6 +144,8 @@ enable-method = "psci"; next-level-cache = <&A53_L2>; clocks = <&scpi_dvfs 1>; + cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>; + sched-energy-costs = <&CPU_COST_A53 &CLUSTER_COST_A53>; }; A57_L2: l2-cache0 { @@ -121,6 +155,8 @@ A53_L2: l2-cache1 { compatible = "cache"; }; + + /include/ "juno-sched-energy.dtsi" }; pmu_a57 { diff --git a/arch/arm64/configs/msm-perf_defconfig b/arch/arm64/configs/msm-perf_defconfig index 0031c55360c7..97d60740fb48 100644 --- a/arch/arm64/configs/msm-perf_defconfig +++ b/arch/arm64/configs/msm-perf_defconfig @@ -5,6 +5,9 @@ CONFIG_AUDIT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y +CONFIG_TASKSTATS=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y CONFIG_RCU_EXPERT=y CONFIG_RCU_FAST_NO_HZ=y CONFIG_IKCONFIG=y @@ -236,7 +239,7 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_QSEECOM=y CONFIG_HDCP_QSEECOM=y -CONFIG_UID_CPUTIME=y +CONFIG_UID_SYS_STATS=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y diff --git a/arch/arm64/configs/msm_defconfig b/arch/arm64/configs/msm_defconfig index 261a49d7944a..907cf33d6234 100644 --- a/arch/arm64/configs/msm_defconfig +++ b/arch/arm64/configs/msm_defconfig @@ -4,6 +4,9 @@ CONFIG_AUDIT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y +CONFIG_TASKSTATS=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y CONFIG_RCU_EXPERT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -228,7 +231,7 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_QSEECOM=y CONFIG_HDCP_QSEECOM=y -CONFIG_UID_CPUTIME=y +CONFIG_UID_SYS_STATS=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y diff --git a/arch/arm64/configs/msmcortex-perf_defconfig b/arch/arm64/configs/msmcortex-perf_defconfig index 4f55414454fc..5729110b112c 100644 --- a/arch/arm64/configs/msmcortex-perf_defconfig +++ b/arch/arm64/configs/msmcortex-perf_defconfig @@ -5,6 +5,9 @@ CONFIG_AUDIT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y +CONFIG_TASKSTATS=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y CONFIG_RCU_EXPERT=y CONFIG_RCU_FAST_NO_HZ=y CONFIG_IKCONFIG=y @@ -240,7 +243,7 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_QSEECOM=y CONFIG_HDCP_QSEECOM=y -CONFIG_UID_CPUTIME=y +CONFIG_UID_SYS_STATS=y CONFIG_QPNP_MISC=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y diff --git a/arch/arm64/configs/msmcortex_defconfig b/arch/arm64/configs/msmcortex_defconfig index 9814d1826d93..24303c3b7b71 100644 --- a/arch/arm64/configs/msmcortex_defconfig +++ b/arch/arm64/configs/msmcortex_defconfig @@ -4,6 +4,9 @@ CONFIG_AUDIT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y +CONFIG_TASKSTATS=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y CONFIG_RCU_EXPERT=y CONFIG_RCU_FAST_NO_HZ=y CONFIG_IKCONFIG=y @@ -243,7 +246,7 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_QSEECOM=y CONFIG_HDCP_QSEECOM=y -CONFIG_UID_CPUTIME=y +CONFIG_UID_SYS_STATS=y CONFIG_QPNP_MISC=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 2cf32e9887e1..de1aab4b5da8 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -23,6 +23,11 @@ config CRYPTO_GHASH_ARM64_CE depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_HASH +config CRYPTO_POLY_HASH_ARM64_CE + tristate "poly_hash (for HEH encryption mode) using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_HASH + config CRYPTO_AES_ARM64_CE tristate "AES core cipher using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index abb79b3cfcfe..f0a8f2475ea3 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -17,6 +17,9 @@ sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o +obj-$(CONFIG_CRYPTO_POLY_HASH_ARM64_CE) += poly-hash-ce.o +poly-hash-ce-y := poly-hash-ce-glue.o poly-hash-ce-core.o + obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 6a51dfccfe71..448b874a4826 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -294,7 +294,7 @@ static struct crypto_alg aes_algs[] = { { .cra_blkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, + .ivsize = 0, .setkey = aes_setkey, .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, @@ -371,7 +371,7 @@ static struct crypto_alg aes_algs[] = { { .cra_ablkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, + .ivsize = 0, .setkey = ablk_set_key, .encrypt = ablk_encrypt, .decrypt = ablk_decrypt, diff --git a/arch/arm64/crypto/poly-hash-ce-core.S b/arch/arm64/crypto/poly-hash-ce-core.S new file mode 100644 index 000000000000..8ccb544c5526 --- /dev/null +++ b/arch/arm64/crypto/poly-hash-ce-core.S @@ -0,0 +1,163 @@ +/* + * Accelerated poly_hash implementation with ARMv8 PMULL instructions. + * + * Based on ghash-ce-core.S. + * + * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2017 Google, Inc. <ebiggers@google.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + KEY .req v0 + KEY2 .req v1 + T1 .req v2 + T2 .req v3 + GSTAR .req v4 + XL .req v5 + XM .req v6 + XH .req v7 + + .text + .arch armv8-a+crypto + + /* 16-byte aligned (2**4 = 16); not required, but might as well */ + .align 4 +.Lgstar: + .quad 0x87, 0x87 + +/* + * void pmull_poly_hash_update(le128 *digest, const le128 *key, + * const u8 *src, unsigned int blocks, + * unsigned int partial); + */ +ENTRY(pmull_poly_hash_update) + + /* Load digest into XL */ + ld1 {XL.16b}, [x0] + + /* Load key into KEY */ + ld1 {KEY.16b}, [x1] + + /* Load g*(x) = g(x) + x^128 = x^7 + x^2 + x + 1 into both halves of + * GSTAR */ + adr x1, .Lgstar + ld1 {GSTAR.2d}, [x1] + + /* Set KEY2 to (KEY[1]+KEY[0]):(KEY[1]+KEY[0]). This is needed for + * Karatsuba multiplication. */ + ext KEY2.16b, KEY.16b, KEY.16b, #8 + eor KEY2.16b, KEY2.16b, KEY.16b + + /* If 'partial' is nonzero, then we're finishing a pending block and + * should go right to the multiplication. */ + cbnz w4, 1f + +0: + /* Add the next block from 'src' to the digest */ + ld1 {T1.16b}, [x2], #16 + eor XL.16b, XL.16b, T1.16b + sub w3, w3, #1 + +1: + /* + * Multiply the current 128-bit digest (a1:a0, in XL) by the 128-bit key + * (b1:b0, in KEY) using Karatsuba multiplication. + */ + + /* T1 = (a1+a0):(a1+a0) */ + ext T1.16b, XL.16b, XL.16b, #8 + eor T1.16b, T1.16b, XL.16b + + /* XH = a1 * b1 */ + pmull2 XH.1q, XL.2d, KEY.2d + + /* XL = a0 * b0 */ + pmull XL.1q, XL.1d, KEY.1d + + /* XM = (a1+a0) * (b1+b0) */ + pmull XM.1q, T1.1d, KEY2.1d + + /* XM += (XH[0]:XL[1]) + XL + XH */ + ext T1.16b, XL.16b, XH.16b, #8 + eor T2.16b, XL.16b, XH.16b + eor XM.16b, XM.16b, T1.16b + eor XM.16b, XM.16b, T2.16b + + /* + * Now the 256-bit product is in XH[1]:XM:XL[0]. It represents a + * polynomial over GF(2) with degree as large as 255. We need to + * compute its remainder modulo g(x) = x^128+x^7+x^2+x+1. For this it + * is sufficient to compute the remainder of the high half 'c(x)x^128' + * add it to the low half. To reduce the high half we use the Barrett + * reduction method. The basic idea is that we can express the + * remainder p(x) as g(x)q(x) mod x^128, where q(x) = (c(x)x^128)/g(x). + * As detailed in [1], to avoid having to divide by g(x) at runtime the + * following equivalent expression can be derived: + * + * p(x) = [ g*(x)((c(x)q+(x))/x^128) ] mod x^128 + * + * where g*(x) = x^128+g(x) = x^7+x^2+x+1, and q+(x) = x^256/g(x) = g(x) + * in this case. This is also equivalent to: + * + * p(x) = [ g*(x)((c(x)(x^128 + g*(x)))/x^128) ] mod x^128 + * = [ g*(x)(c(x) + (c(x)g*(x))/x^128) ] mod x^128 + * + * Since deg g*(x) < 64: + * + * p(x) = [ g*(x)(c(x) + ((c(x)/x^64)g*(x))/x^64) ] mod x^128 + * = [ g*(x)((c(x)/x^64)x^64 + (c(x) mod x^64) + + * ((c(x)/x^64)g*(x))/x^64) ] mod x^128 + * + * Letting t(x) = g*(x)(c(x)/x^64): + * + * p(x) = [ t(x)x^64 + g*(x)((c(x) mod x^64) + t(x)/x^64) ] mod x^128 + * + * Therefore, to do the reduction we only need to issue two 64-bit => + * 128-bit carryless multiplications: g*(x) times c(x)/x^64, and g*(x) + * times ((c(x) mod x^64) + t(x)/x^64). (Multiplication by x^64 doesn't + * count since it is simply a shift or move.) + * + * An alternate reduction method, also based on Barrett reduction and + * described in [1], uses only shifts and XORs --- no multiplications. + * However, the method with multiplications requires fewer instructions + * and is faster on processors with fast carryless multiplication. + * + * [1] "Intel Carry-Less Multiplication Instruction and its Usage for + * Computing the GCM Mode", + * https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf + */ + + /* 256-bit product is XH[1]:XM:XL[0], so c(x) is XH[1]:XM[1] */ + + /* T1 = t(x) = g*(x)(c(x)/x^64) */ + pmull2 T1.1q, GSTAR.2d, XH.2d + + /* T2 = g*(x)((c(x) mod x^64) + t(x)/x^64) */ + eor T2.16b, XM.16b, T1.16b + pmull2 T2.1q, GSTAR.2d, T2.2d + + /* Make XL[0] be the low half of the 128-bit result by adding the low 64 + * bits of the T2 term to what was already there. The 't(x)x^64' term + * makes no difference, so skip it. */ + eor XL.16b, XL.16b, T2.16b + + /* Make XL[1] be the high half of the 128-bit result by adding the high + * 64 bits of the 't(x)x^64' and T2 terms to what was already in XM[0], + * then moving XM[0] to XL[1]. */ + eor XM.16b, XM.16b, T1.16b + ext T2.16b, T2.16b, T2.16b, #8 + eor XM.16b, XM.16b, T2.16b + mov XL.d[1], XM.d[0] + + /* If more blocks remain, then loop back to process the next block; + * else, store the digest and return. */ + cbnz w3, 0b + st1 {XL.16b}, [x0] + ret +ENDPROC(pmull_poly_hash_update) diff --git a/arch/arm64/crypto/poly-hash-ce-glue.c b/arch/arm64/crypto/poly-hash-ce-glue.c new file mode 100644 index 000000000000..e195740c9ecf --- /dev/null +++ b/arch/arm64/crypto/poly-hash-ce-glue.c @@ -0,0 +1,166 @@ +/* + * Accelerated poly_hash implementation with ARMv8 PMULL instructions. + * + * Based on ghash-ce-glue.c. + * + * poly_hash is part of the HEH (Hash-Encrypt-Hash) encryption mode, proposed in + * Internet Draft https://tools.ietf.org/html/draft-cope-heh-01. + * + * poly_hash is very similar to GHASH: both algorithms are keyed hashes which + * interpret their input data as coefficients of a polynomial over GF(2^128), + * then calculate a hash value by evaluating that polynomial at the point given + * by the key, e.g. using Horner's rule. The difference is that poly_hash uses + * the more natural "ble" convention to represent GF(2^128) elements, whereas + * GHASH uses the less natural "lle" convention (see include/crypto/gf128mul.h). + * The ble convention makes it simpler to implement GF(2^128) multiplication. + * + * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2017 Google Inc. <ebiggers@google.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <asm/neon.h> +#include <crypto/b128ops.h> +#include <crypto/internal/hash.h> +#include <linux/cpufeature.h> +#include <linux/crypto.h> +#include <linux/module.h> + +/* + * Note: in this algorithm we currently use 'le128' to represent GF(2^128) + * elements, even though poly_hash-generic uses 'be128'. Both types are + * actually "wrong" because the elements are actually in 'ble' format, and there + * should be a ble type to represent this --- as well as lle, bbe, and lbe types + * for the other conventions for representing GF(2^128) elements. But + * practically it doesn't matter which type we choose here, so we just use le128 + * since it's arguably more accurate, while poly_hash-generic still has to use + * be128 because the generic GF(2^128) multiplication functions all take be128. + */ + +struct poly_hash_desc_ctx { + le128 digest; + unsigned int count; +}; + +asmlinkage void pmull_poly_hash_update(le128 *digest, const le128 *key, + const u8 *src, unsigned int blocks, + unsigned int partial); + +static int poly_hash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + if (keylen != sizeof(le128)) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(crypto_shash_ctx(tfm), key, sizeof(le128)); + return 0; +} + +static int poly_hash_init(struct shash_desc *desc) +{ + struct poly_hash_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->digest = (le128) { 0 }; + ctx->count = 0; + return 0; +} + +static int poly_hash_update(struct shash_desc *desc, const u8 *src, + unsigned int len) +{ + struct poly_hash_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int partial = ctx->count % sizeof(le128); + u8 *dst = (u8 *)&ctx->digest + partial; + + ctx->count += len; + + /* Finishing at least one block? */ + if (partial + len >= sizeof(le128)) { + const le128 *key = crypto_shash_ctx(desc->tfm); + + if (partial) { + /* Finish the pending block. */ + unsigned int n = sizeof(le128) - partial; + + len -= n; + do { + *dst++ ^= *src++; + } while (--n); + } + + /* + * Do the real work. If 'partial' is nonzero, this starts by + * multiplying 'digest' by 'key'. Then for each additional full + * block it adds the block to 'digest' and multiplies by 'key'. + */ + kernel_neon_begin_partial(8); + pmull_poly_hash_update(&ctx->digest, key, src, + len / sizeof(le128), partial); + kernel_neon_end(); + + src += len - (len % sizeof(le128)); + len %= sizeof(le128); + dst = (u8 *)&ctx->digest; + } + + /* Continue adding the next block to 'digest'. */ + while (len--) + *dst++ ^= *src++; + return 0; +} + +static int poly_hash_final(struct shash_desc *desc, u8 *out) +{ + struct poly_hash_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int partial = ctx->count % sizeof(le128); + + /* Finish the last block if needed. */ + if (partial) { + const le128 *key = crypto_shash_ctx(desc->tfm); + + kernel_neon_begin_partial(8); + pmull_poly_hash_update(&ctx->digest, key, NULL, 0, partial); + kernel_neon_end(); + } + + memcpy(out, &ctx->digest, sizeof(le128)); + return 0; +} + +static struct shash_alg poly_hash_alg = { + .digestsize = sizeof(le128), + .init = poly_hash_init, + .update = poly_hash_update, + .final = poly_hash_final, + .setkey = poly_hash_setkey, + .descsize = sizeof(struct poly_hash_desc_ctx), + .base = { + .cra_name = "poly_hash", + .cra_driver_name = "poly_hash-ce", + .cra_priority = 300, + .cra_ctxsize = sizeof(le128), + .cra_module = THIS_MODULE, + }, +}; + +static int __init poly_hash_ce_mod_init(void) +{ + return crypto_register_shash(&poly_hash_alg); +} + +static void __exit poly_hash_ce_mod_exit(void) +{ + crypto_unregister_shash(&poly_hash_alg); +} + +MODULE_DESCRIPTION("Polynomial evaluation hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); +MODULE_LICENSE("GPL v2"); + +module_cpu_feature_match(PMULL, poly_hash_ce_mod_init); +module_exit(poly_hash_ce_mod_exit); diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 9732908bfc8a..c72b8e201ab4 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -68,7 +68,11 @@ static inline void decode_ctrl_reg(u32 reg, /* Lengths */ #define ARM_BREAKPOINT_LEN_1 0x1 #define ARM_BREAKPOINT_LEN_2 0x3 +#define ARM_BREAKPOINT_LEN_3 0x7 #define ARM_BREAKPOINT_LEN_4 0xf +#define ARM_BREAKPOINT_LEN_5 0x1f +#define ARM_BREAKPOINT_LEN_6 0x3f +#define ARM_BREAKPOINT_LEN_7 0x7f #define ARM_BREAKPOINT_LEN_8 0xff /* Kernel stepping */ @@ -110,7 +114,7 @@ struct perf_event; struct pmu; extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, - int *gen_len, int *gen_type); + int *gen_len, int *gen_type, int *offset); extern int arch_check_bp_in_kernelspace(struct perf_event *bp); extern int arch_validate_hwbkpt_settings(struct perf_event *bp); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h index de66199673d7..2b9a63771eda 100644 --- a/arch/arm64/include/asm/vdso_datapage.h +++ b/arch/arm64/include/asm/vdso_datapage.h @@ -22,6 +22,8 @@ struct vdso_data { __u64 cs_cycle_last; /* Timebase at clocksource init */ + __u64 raw_time_sec; /* Raw time */ + __u64 raw_time_nsec; __u64 xtime_clock_sec; /* Kernel time */ __u64 xtime_clock_nsec; __u64 xtime_coarse_sec; /* Coarse time */ @@ -29,8 +31,10 @@ struct vdso_data { __u64 wtm_clock_sec; /* Wall to monotonic time */ __u64 wtm_clock_nsec; __u32 tb_seq_count; /* Timebase sequence counter */ - __u32 cs_mult; /* Clocksource multiplier */ - __u32 cs_shift; /* Clocksource shift */ + /* cs_* members must be adjacent and in this order (ldp accesses) */ + __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */ + __u32 cs_shift; /* Clocksource shift (mono = raw) */ + __u32 cs_raw_mult; /* Raw clocksource multiplier */ __u32 tz_minuteswest; /* Whacky timezone stuff */ __u32 tz_dsttime; __u32 use_syscall; diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index d3cfa681654f..69c203d4d63f 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -52,7 +52,3 @@ obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) head-y := head.o extra-y += $(head-y) vmlinux.lds - -# vDSO - this must be built first to generate the symbol offsets -$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h -$(obj)/vdso/vdso-offsets.h: $(obj)/vdso diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index c9ea87198789..350c0e99fc6b 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -92,6 +92,7 @@ int main(void) BLANK(); DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); @@ -99,6 +100,8 @@ int main(void) DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); BLANK(); DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); + DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); + DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec)); DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); @@ -106,7 +109,8 @@ int main(void) DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec)); DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); - DEFINE(VDSO_CS_MULT, offsetof(struct vdso_data, cs_mult)); + DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); + DEFINE(VDSO_CS_RAW_MULT, offsetof(struct vdso_data, cs_raw_mult)); DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 367a954f9937..f4dfd8c41e06 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -317,9 +317,21 @@ static int get_hbp_len(u8 hbp_len) case ARM_BREAKPOINT_LEN_2: len_in_bytes = 2; break; + case ARM_BREAKPOINT_LEN_3: + len_in_bytes = 3; + break; case ARM_BREAKPOINT_LEN_4: len_in_bytes = 4; break; + case ARM_BREAKPOINT_LEN_5: + len_in_bytes = 5; + break; + case ARM_BREAKPOINT_LEN_6: + len_in_bytes = 6; + break; + case ARM_BREAKPOINT_LEN_7: + len_in_bytes = 7; + break; case ARM_BREAKPOINT_LEN_8: len_in_bytes = 8; break; @@ -349,7 +361,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) * to generic breakpoint descriptions. */ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, - int *gen_len, int *gen_type) + int *gen_len, int *gen_type, int *offset) { /* Type */ switch (ctrl.type) { @@ -369,17 +381,33 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, return -EINVAL; } + if (!ctrl.len) + return -EINVAL; + *offset = __ffs(ctrl.len); + /* Len */ - switch (ctrl.len) { + switch (ctrl.len >> *offset) { case ARM_BREAKPOINT_LEN_1: *gen_len = HW_BREAKPOINT_LEN_1; break; case ARM_BREAKPOINT_LEN_2: *gen_len = HW_BREAKPOINT_LEN_2; break; + case ARM_BREAKPOINT_LEN_3: + *gen_len = HW_BREAKPOINT_LEN_3; + break; case ARM_BREAKPOINT_LEN_4: *gen_len = HW_BREAKPOINT_LEN_4; break; + case ARM_BREAKPOINT_LEN_5: + *gen_len = HW_BREAKPOINT_LEN_5; + break; + case ARM_BREAKPOINT_LEN_6: + *gen_len = HW_BREAKPOINT_LEN_6; + break; + case ARM_BREAKPOINT_LEN_7: + *gen_len = HW_BREAKPOINT_LEN_7; + break; case ARM_BREAKPOINT_LEN_8: *gen_len = HW_BREAKPOINT_LEN_8; break; @@ -423,9 +451,21 @@ static int arch_build_bp_info(struct perf_event *bp) case HW_BREAKPOINT_LEN_2: info->ctrl.len = ARM_BREAKPOINT_LEN_2; break; + case HW_BREAKPOINT_LEN_3: + info->ctrl.len = ARM_BREAKPOINT_LEN_3; + break; case HW_BREAKPOINT_LEN_4: info->ctrl.len = ARM_BREAKPOINT_LEN_4; break; + case HW_BREAKPOINT_LEN_5: + info->ctrl.len = ARM_BREAKPOINT_LEN_5; + break; + case HW_BREAKPOINT_LEN_6: + info->ctrl.len = ARM_BREAKPOINT_LEN_6; + break; + case HW_BREAKPOINT_LEN_7: + info->ctrl.len = ARM_BREAKPOINT_LEN_7; + break; case HW_BREAKPOINT_LEN_8: info->ctrl.len = ARM_BREAKPOINT_LEN_8; break; @@ -517,18 +557,17 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) default: return -EINVAL; } - - info->address &= ~alignment_mask; - info->ctrl.len <<= offset; } else { if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) alignment_mask = 0x3; else alignment_mask = 0x7; - if (info->address & alignment_mask) - return -EINVAL; + offset = info->address & alignment_mask; } + info->address &= ~alignment_mask; + info->ctrl.len <<= offset; + /* * Disallow per-task kernel breakpoints since these would * complicate the stepping code. @@ -661,12 +700,47 @@ unlock: } NOKPROBE_SYMBOL(breakpoint_handler); +/* + * Arm64 hardware does not always report a watchpoint hit address that matches + * one of the watchpoints set. It can also report an address "near" the + * watchpoint if a single instruction access both watched and unwatched + * addresses. There is no straight-forward way, short of disassembling the + * offending instruction, to map that address back to the watchpoint. This + * function computes the distance of the memory access from the watchpoint as a + * heuristic for the likelyhood that a given access triggered the watchpoint. + * + * See Section D2.10.5 "Determining the memory location that caused a Watchpoint + * exception" of ARMv8 Architecture Reference Manual for details. + * + * The function returns the distance of the address from the bytes watched by + * the watchpoint. In case of an exact match, it returns 0. + */ +static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, + struct arch_hw_breakpoint_ctrl *ctrl) +{ + u64 wp_low, wp_high; + u32 lens, lene; + + lens = __ffs(ctrl->len); + lene = __fls(ctrl->len); + + wp_low = val + lens; + wp_high = val + lene; + if (addr < wp_low) + return wp_low - addr; + else if (addr > wp_high) + return addr - wp_high; + else + return 0; +} + static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - int i, step = 0, *kernel_step, access; + int i, step = 0, *kernel_step, access, closest_match = 0; + u64 min_dist = -1, dist; u32 ctrl_reg; - u64 val, alignment_mask; + u64 val; struct perf_event *wp, **slots; struct debug_info *debug_info; struct arch_hw_breakpoint *info; @@ -675,35 +749,15 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, slots = this_cpu_ptr(wp_on_reg); debug_info = ¤t->thread.debug; + /* + * Find all watchpoints that match the reported address. If no exact + * match is found. Attribute the hit to the closest watchpoint. + */ + rcu_read_lock(); for (i = 0; i < core_num_wrps; ++i) { - rcu_read_lock(); - wp = slots[i]; - if (wp == NULL) - goto unlock; - - info = counter_arch_bp(wp); - /* AArch32 watchpoints are either 4 or 8 bytes aligned. */ - if (is_compat_task()) { - if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) - alignment_mask = 0x7; - else - alignment_mask = 0x3; - } else { - alignment_mask = 0x7; - } - - /* Check if the watchpoint value matches. */ - val = read_wb_reg(AARCH64_DBG_REG_WVR, i); - if (val != (addr & ~alignment_mask)) - goto unlock; - - /* Possible match, check the byte address select to confirm. */ - ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i); - decode_ctrl_reg(ctrl_reg, &ctrl); - if (!((1 << (addr & alignment_mask)) & ctrl.len)) - goto unlock; + continue; /* * Check that the access type matches. @@ -712,18 +766,41 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W : HW_BREAKPOINT_R; if (!(access & hw_breakpoint_type(wp))) - goto unlock; + continue; + /* Check if the watchpoint value and byte select match. */ + val = read_wb_reg(AARCH64_DBG_REG_WVR, i); + ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i); + decode_ctrl_reg(ctrl_reg, &ctrl); + dist = get_distance_from_watchpoint(addr, val, &ctrl); + if (dist < min_dist) { + min_dist = dist; + closest_match = i; + } + /* Is this an exact match? */ + if (dist != 0) + continue; + + info = counter_arch_bp(wp); info->trigger = addr; perf_bp_event(wp, regs); /* Do we need to handle the stepping? */ if (!wp->overflow_handler) step = 1; + } + if (min_dist > 0 && min_dist != -1) { + /* No exact match found. */ + wp = slots[closest_match]; + info = counter_arch_bp(wp); + info->trigger = addr; + perf_bp_event(wp, regs); -unlock: - rcu_read_unlock(); + /* Do we need to handle the stepping? */ + if (!wp->overflow_handler) + step = 1; } + rcu_read_unlock(); if (!step) return 0; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index c5ef05959813..6204b7600d1b 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -327,13 +327,13 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, struct arch_hw_breakpoint_ctrl ctrl, struct perf_event_attr *attr) { - int err, len, type, disabled = !ctrl.enabled; + int err, len, type, offset, disabled = !ctrl.enabled; attr->disabled = disabled; if (disabled) return 0; - err = arch_bp_generic_fields(ctrl, &len, &type); + err = arch_bp_generic_fields(ctrl, &len, &type, &offset); if (err) return err; @@ -352,6 +352,7 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, attr->bp_len = len; attr->bp_type = type; + attr->bp_addr += offset; return 0; } @@ -404,7 +405,7 @@ static int ptrace_hbp_get_addr(unsigned int note_type, if (IS_ERR(bp)) return PTR_ERR(bp); - *addr = bp ? bp->attr.bp_addr : 0; + *addr = bp ? counter_arch_bp(bp)->address : 0; return 0; } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 97bc68f4c689..3b8acfae7797 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -55,7 +55,7 @@ struct vdso_data *vdso_data = &vdso_data_store.data; */ static struct page *vectors_page[1]; -static int alloc_vectors_page(void) +static int __init alloc_vectors_page(void) { extern char __kuser_helper_start[], __kuser_helper_end[]; extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; @@ -88,7 +88,7 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr = AARCH32_VECTORS_BASE; - static struct vm_special_mapping spec = { + static const struct vm_special_mapping spec = { .name = "[vectors]", .pages = vectors_page, @@ -212,10 +212,16 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { + /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; + vdso_data->raw_time_sec = tk->raw_time.tv_sec; + vdso_data->raw_time_nsec = tk->raw_time.tv_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - vdso_data->cs_mult = tk->tkr_mono.mult; + /* tkr_raw.xtime_nsec == 0 */ + vdso_data->cs_mono_mult = tk->tkr_mono.mult; + vdso_data->cs_raw_mult = tk->tkr_raw.mult; + /* tkr_mono.shift == tkr_raw.shift */ vdso_data->cs_shift = tk->tkr_mono.shift; } diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index b467fd0a384b..62c84f7cb01b 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -23,7 +23,7 @@ GCOV_PROFILE := n ccflags-y += -Wl,-shared obj-y += vdso.o -extra-y += vdso.lds vdso-offsets.h +extra-y += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) # Force dependency (incbin is bad) @@ -42,11 +42,10 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ define cmd_vdsosym - $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \ - cp $@ include/generated/ + $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ endef -$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE +include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Assembly rules for the .S files diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index 9f8eeccae67c..e00b4671bd7c 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -26,24 +26,109 @@ #define NSEC_PER_SEC_HI16 0x3b9a vdso_data .req x6 -use_syscall .req w7 -seqcnt .req w8 +seqcnt .req w7 +w_tmp .req w8 +x_tmp .req x8 + +/* + * Conventions for macro arguments: + * - An argument is write-only if its name starts with "res". + * - All other arguments are read-only, unless otherwise specified. + */ .macro seqcnt_acquire 9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] tbnz seqcnt, #0, 9999b dmb ishld - ldr use_syscall, [vdso_data, #VDSO_USE_SYSCALL] .endm - .macro seqcnt_read, cnt + .macro seqcnt_check fail dmb ishld - ldr \cnt, [vdso_data, #VDSO_TB_SEQ_COUNT] + ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT] + cmp w_tmp, seqcnt + b.ne \fail .endm - .macro seqcnt_check, cnt, fail - cmp \cnt, seqcnt - b.ne \fail + .macro syscall_check fail + ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL] + cbnz w_tmp, \fail + .endm + + .macro get_nsec_per_sec res + mov \res, #NSEC_PER_SEC_LO16 + movk \res, #NSEC_PER_SEC_HI16, lsl #16 + .endm + + /* + * Returns the clock delta, in nanoseconds left-shifted by the clock + * shift. + */ + .macro get_clock_shifted_nsec res, cycle_last, mult + /* Read the virtual counter. */ + isb + mrs x_tmp, cntvct_el0 + /* Calculate cycle delta and convert to ns. */ + sub \res, x_tmp, \cycle_last + /* We can only guarantee 56 bits of precision. */ + movn x_tmp, #0xff00, lsl #48 + and \res, x_tmp, \res + mul \res, \res, \mult + .endm + + /* + * Returns in res_{sec,nsec} the REALTIME timespec, based on the + * "wall time" (xtime) and the clock_mono delta. + */ + .macro get_ts_realtime res_sec, res_nsec, \ + clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec + add \res_nsec, \clock_nsec, \xtime_nsec + udiv x_tmp, \res_nsec, \nsec_to_sec + add \res_sec, \xtime_sec, x_tmp + msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec + .endm + + /* + * Returns in res_{sec,nsec} the timespec based on the clock_raw delta, + * used for CLOCK_MONOTONIC_RAW. + */ + .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec + udiv \res_sec, \clock_nsec, \nsec_to_sec + msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec + .endm + + /* sec and nsec are modified in place. */ + .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec + /* Add timespec. */ + add \sec, \sec, \ts_sec + add \nsec, \nsec, \ts_nsec + + /* Normalise the new timespec. */ + cmp \nsec, \nsec_to_sec + b.lt 9999f + sub \nsec, \nsec, \nsec_to_sec + add \sec, \sec, #1 +9999: + cmp \nsec, #0 + b.ge 9998f + add \nsec, \nsec, \nsec_to_sec + sub \sec, \sec, #1 +9998: + .endm + + .macro clock_gettime_return, shift=0 + .if \shift == 1 + lsr x11, x11, x12 + .endif + stp x10, x11, [x1, #TSPEC_TV_SEC] + mov x0, xzr + ret + .endm + + .macro jump_slot jumptable, index, label + .if (. - \jumptable) != 4 * (\index) + .error "Jump slot index mismatch" + .endif + b \label .endm .text @@ -51,18 +136,25 @@ seqcnt .req w8 /* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ ENTRY(__kernel_gettimeofday) .cfi_startproc - mov x2, x30 - .cfi_register x30, x2 - - /* Acquire the sequence counter and get the timespec. */ adr vdso_data, _vdso_data -1: seqcnt_acquire - cbnz use_syscall, 4f - /* If tv is NULL, skip to the timezone code. */ cbz x0, 2f - bl __do_get_tspec - seqcnt_check w9, 1b + + /* Compute the time of day. */ +1: seqcnt_acquire + syscall_check fail=4f + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + seqcnt_check fail=1b + + get_nsec_per_sec res=x9 + lsl x9, x9, x12 + + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 /* Convert ns to us. */ mov x13, #1000 @@ -76,95 +168,126 @@ ENTRY(__kernel_gettimeofday) stp w4, w5, [x1, #TZ_MINWEST] 3: mov x0, xzr - ret x2 + ret 4: /* Syscall fallback. */ mov x8, #__NR_gettimeofday svc #0 - ret x2 + ret .cfi_endproc ENDPROC(__kernel_gettimeofday) +#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE + /* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ ENTRY(__kernel_clock_gettime) .cfi_startproc - cmp w0, #CLOCK_REALTIME - ccmp w0, #CLOCK_MONOTONIC, #0x4, ne - b.ne 2f + cmp w0, #JUMPSLOT_MAX + b.hi syscall + adr vdso_data, _vdso_data + adr x_tmp, jumptable + add x_tmp, x_tmp, w0, uxtw #2 + br x_tmp + + ALIGN +jumptable: + jump_slot jumptable, CLOCK_REALTIME, realtime + jump_slot jumptable, CLOCK_MONOTONIC, monotonic + b syscall + b syscall + jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw + jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse + jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse + + .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1) + .error "Wrong jumptable size" + .endif + + ALIGN +realtime: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + seqcnt_check fail=realtime - mov x2, x30 - .cfi_register x30, x2 + /* All computations are done with left-shifted nsecs. */ + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - /* Get kernel timespec. */ - adr vdso_data, _vdso_data -1: seqcnt_acquire - cbnz use_syscall, 7f + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 + clock_gettime_return, shift=1 - bl __do_get_tspec - seqcnt_check w9, 1b + ALIGN +monotonic: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] + seqcnt_check fail=monotonic - mov x30, x2 + /* All computations are done with left-shifted nsecs. */ + lsl x4, x4, x12 + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - cmp w0, #CLOCK_MONOTONIC - b.ne 6f + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - /* Get wtm timespec. */ - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9 + clock_gettime_return, shift=1 - /* Check the sequence counter. */ - seqcnt_read w9 - seqcnt_check w9, 1b - b 4f -2: - cmp w0, #CLOCK_REALTIME_COARSE - ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne - b.ne 8f + ALIGN +monotonic_raw: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_raw_mult, w12 = cs_shift */ + ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] + ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] + seqcnt_check fail=monotonic_raw - /* xtime_coarse_nsec is already right-shifted */ - mov x12, #0 + /* All computations are done with left-shifted nsecs. */ + lsl x14, x14, x12 + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - /* Get coarse timespec. */ - adr vdso_data, _vdso_data -3: seqcnt_acquire - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_clock_raw res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, nsec_to_sec=x9 - /* Get wtm timespec. */ - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 + clock_gettime_return, shift=1 - /* Check the sequence counter. */ - seqcnt_read w9 - seqcnt_check w9, 3b + ALIGN +realtime_coarse: + seqcnt_acquire + ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] + seqcnt_check fail=realtime_coarse + clock_gettime_return - cmp w0, #CLOCK_MONOTONIC_COARSE - b.ne 6f -4: - /* Add on wtm timespec. */ - add x10, x10, x13 - lsl x14, x14, x12 - add x11, x11, x14 + ALIGN +monotonic_coarse: + seqcnt_acquire + ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] + ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + seqcnt_check fail=monotonic_coarse - /* Normalise the new timespec. */ - mov x15, #NSEC_PER_SEC_LO16 - movk x15, #NSEC_PER_SEC_HI16, lsl #16 - lsl x15, x15, x12 - cmp x11, x15 - b.lt 5f - sub x11, x11, x15 - add x10, x10, #1 -5: - cmp x11, #0 - b.ge 6f - add x11, x11, x15 - sub x10, x10, #1 + /* Computations are done in (non-shifted) nsecs. */ + get_nsec_per_sec res=x9 + add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 + clock_gettime_return -6: /* Store to the user timespec. */ - lsr x11, x11, x12 - stp x10, x11, [x1, #TSPEC_TV_SEC] - mov x0, xzr - ret -7: - mov x30, x2 -8: /* Syscall fallback. */ + ALIGN +syscall: /* Syscall fallback. */ mov x8, #__NR_clock_gettime svc #0 ret @@ -176,6 +299,7 @@ ENTRY(__kernel_clock_getres) .cfi_startproc cmp w0, #CLOCK_REALTIME ccmp w0, #CLOCK_MONOTONIC, #0x4, ne + ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne b.ne 1f ldr x2, 5f @@ -203,55 +327,3 @@ ENTRY(__kernel_clock_getres) .quad CLOCK_COARSE_RES .cfi_endproc ENDPROC(__kernel_clock_getres) - -/* - * Read the current time from the architected counter. - * Expects vdso_data to be initialised. - * Clobbers the temporary registers (x9 - x15). - * Returns: - * - w9 = vDSO sequence counter - * - (x10, x11) = (ts->tv_sec, shifted ts->tv_nsec) - * - w12 = cs_shift - */ -ENTRY(__do_get_tspec) - .cfi_startproc - - /* Read the virtual counter. */ - isb -#if IS_ENABLED(CONFIG_MSM_TIMER_LEAP) -#define LEAST_32BITS 0x00000000FFFFFFFF -reread: - mrs x15, cntvct_el0 - and x13, x15, #LEAST_32BITS - eor x13, x13, #LEAST_32BITS - cbz x13, reread -#else - mrs x15, cntvct_el0 -#endif - - /* Read from the vDSO data page. */ - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - ldp w11, w12, [vdso_data, #VDSO_CS_MULT] - seqcnt_read w9 - - /* Calculate cycle delta and convert to ns. */ - sub x10, x15, x10 - /* We can only guarantee 56 bits of precision. */ - movn x15, #0xff00, lsl #48 - and x10, x15, x10 - mul x10, x10, x11 - - /* Use the kernel time to calculate the new timespec. */ - mov x11, #NSEC_PER_SEC_LO16 - movk x11, #NSEC_PER_SEC_HI16, lsl #16 - lsl x11, x11, x12 - add x15, x10, x14 - udiv x14, x15, x11 - add x10, x13, x14 - mul x13, x14, x11 - sub x11, x15, x13 - - ret - .cfi_endproc -ENDPROC(__do_get_tspec) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index a13b9a65322f..1804aea44faa 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -708,5 +708,6 @@ int cpu_enable_pan(void *__unused) int cpu_enable_uao(void *__unused) { asm(SET_PSTATE_UAO(1)); + return 0; } #endif /* CONFIG_ARM64_UAO */ diff --git a/arch/c6x/kernel/ptrace.c b/arch/c6x/kernel/ptrace.c index 3c494e84444d..a511ac16a8e3 100644 --- a/arch/c6x/kernel/ptrace.c +++ b/arch/c6x/kernel/ptrace.c @@ -69,46 +69,6 @@ static int gpr_get(struct task_struct *target, 0, sizeof(*regs)); } -static int gpr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - struct pt_regs *regs = task_pt_regs(target); - - /* Don't copyin TSR or CSR */ - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - ®s, - 0, PT_TSR * sizeof(long)); - if (ret) - return ret; - - ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - PT_TSR * sizeof(long), - (PT_TSR + 1) * sizeof(long)); - if (ret) - return ret; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - ®s, - (PT_TSR + 1) * sizeof(long), - PT_CSR * sizeof(long)); - if (ret) - return ret; - - ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - PT_CSR * sizeof(long), - (PT_CSR + 1) * sizeof(long)); - if (ret) - return ret; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - ®s, - (PT_CSR + 1) * sizeof(long), -1); - return ret; -} - enum c6x_regset { REGSET_GPR, }; @@ -120,7 +80,6 @@ static const struct user_regset c6x_regsets[] = { .size = sizeof(u32), .align = sizeof(u32), .get = gpr_get, - .set = gpr_set }, }; diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c index 92075544a19a..0dc1c8f622bc 100644 --- a/arch/h8300/kernel/ptrace.c +++ b/arch/h8300/kernel/ptrace.c @@ -95,7 +95,8 @@ static int regs_get(struct task_struct *target, long *reg = (long *)®s; /* build user regs in buffer */ - for (r = 0; r < ARRAY_SIZE(register_offset); r++) + BUILD_BUG_ON(sizeof(regs) % sizeof(long) != 0); + for (r = 0; r < sizeof(regs) / sizeof(long); r++) *reg++ = h8300_get_reg(target, r); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, @@ -113,7 +114,8 @@ static int regs_set(struct task_struct *target, long *reg; /* build user regs in buffer */ - for (reg = (long *)®s, r = 0; r < ARRAY_SIZE(register_offset); r++) + BUILD_BUG_ON(sizeof(regs) % sizeof(long) != 0); + for (reg = (long *)®s, r = 0; r < sizeof(regs) / sizeof(long); r++) *reg++ = h8300_get_reg(target, r); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, @@ -122,7 +124,7 @@ static int regs_set(struct task_struct *target, return ret; /* write back to pt_regs */ - for (reg = (long *)®s, r = 0; r < ARRAY_SIZE(register_offset); r++) + for (reg = (long *)®s, r = 0; r < sizeof(regs) / sizeof(long); r++) h8300_put_reg(target, r, *reg++); return 0; } diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 970d0bd99621..648f1cef33fa 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -95,8 +95,8 @@ define archhelp echo '* unwcheck - Check vmlinux for invalid unwind info' endef -archprepare: make_nr_irqs_h FORCE +archprepare: make_nr_irqs_h PHONY += make_nr_irqs_h FORCE -make_nr_irqs_h: FORCE +make_nr_irqs_h: $(Q)$(MAKE) $(build)=arch/ia64/kernel include/generated/nr-irqs.h diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c index 7563628822bd..5e2dc7defd2c 100644 --- a/arch/metag/kernel/ptrace.c +++ b/arch/metag/kernel/ptrace.c @@ -24,6 +24,16 @@ * user_regset definitions. */ +static unsigned long user_txstatus(const struct pt_regs *regs) +{ + unsigned long data = (unsigned long)regs->ctx.Flags; + + if (regs->ctx.SaveMask & TBICTX_CBUF_BIT) + data |= USER_GP_REGS_STATUS_CATCH_BIT; + + return data; +} + int metag_gp_regs_copyout(const struct pt_regs *regs, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) @@ -62,9 +72,7 @@ int metag_gp_regs_copyout(const struct pt_regs *regs, if (ret) goto out; /* TXSTATUS */ - data = (unsigned long)regs->ctx.Flags; - if (regs->ctx.SaveMask & TBICTX_CBUF_BIT) - data |= USER_GP_REGS_STATUS_CATCH_BIT; + data = user_txstatus(regs); ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &data, 4*25, 4*26); if (ret) @@ -119,6 +127,7 @@ int metag_gp_regs_copyin(struct pt_regs *regs, if (ret) goto out; /* TXSTATUS */ + data = user_txstatus(regs); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &data, 4*25, 4*26); if (ret) @@ -244,6 +253,8 @@ int metag_rp_state_copyin(struct pt_regs *regs, unsigned long long *ptr; int ret, i; + if (count < 4*13) + return -EINVAL; /* Read the entire pipeline before making any changes */ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &rp, 0, 4*13); @@ -303,7 +314,7 @@ static int metag_tls_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { int ret; - void __user *tls; + void __user *tls = target->thread.tls_ptr; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 74d581569778..c95bf18260f8 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -485,7 +485,8 @@ static int fpr_set(struct task_struct *target, &target->thread.fpu, 0, sizeof(elf_fpregset_t)); - for (i = 0; i < NUM_FPU_REGS; i++) { + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); + for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) { err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpr_val, i * sizeof(elf_fpreg_t), (i + 1) * sizeof(elf_fpreg_t)); diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index 861e72109df2..f080abfc2f83 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -68,6 +68,7 @@ SECTIONS } #ifdef CONFIG_PPC64_BOOT_WRAPPER + . = ALIGN(256); .got : { __toc_start = .; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 5cc2e7af3a7b..b379146de55b 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -302,7 +302,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) advance = 0; printk(KERN_ERR "Couldn't emulate instruction 0x%08x " "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); - kvmppc_core_queue_program(vcpu, 0); } } diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 3a40f718baef..4004e03267cd 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -455,7 +455,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) zdev->dma_table = dma_alloc_cpu_table(); if (!zdev->dma_table) { rc = -ENOMEM; - goto out_clean; + goto out; } /* @@ -475,18 +475,22 @@ int zpci_dma_init_device(struct zpci_dev *zdev) zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); if (!zdev->iommu_bitmap) { rc = -ENOMEM; - goto out_reg; + goto free_dma_table; } rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, (u64) zdev->dma_table); if (rc) - goto out_reg; - return 0; + goto free_bitmap; -out_reg: + return 0; +free_bitmap: + vfree(zdev->iommu_bitmap); + zdev->iommu_bitmap = NULL; +free_dma_table: dma_free_cpu_table(zdev->dma_table); -out_clean: + zdev->dma_table = NULL; +out: return rc; } diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 9ddc4928a089..c1566170964f 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -311,7 +311,7 @@ static int genregs64_set(struct task_struct *target, } if (!ret) { - unsigned long y; + unsigned long y = regs->y; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &y, diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 440df0c7a2ee..a69321a77783 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -219,6 +219,29 @@ static int ghash_async_final(struct ahash_request *req) } } +static int ghash_async_import(struct ahash_request *req, const void *in) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + ghash_async_init(req); + memcpy(dctx, in, sizeof(*dctx)); + return 0; + +} + +static int ghash_async_export(struct ahash_request *req, void *out) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memcpy(out, dctx, sizeof(*dctx)); + return 0; + +} + static int ghash_async_digest(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); @@ -288,8 +311,11 @@ static struct ahash_alg ghash_async_alg = { .final = ghash_async_final, .setkey = ghash_async_setkey, .digest = ghash_async_digest, + .export = ghash_async_export, + .import = ghash_async_import, .halg = { .digestsize = GHASH_DIGEST_SIZE, + .statesize = sizeof(struct ghash_desc_ctx), .base = { .cra_name = "ghash", .cra_driver_name = "ghash-clmulni", diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 265c0ed68118..7af017a8958f 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -187,10 +187,10 @@ vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%) $(MODLIB)/vdso: FORCE @mkdir -p $(MODLIB)/vdso -$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE +$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso $(call cmd,vdso_install) PHONY += vdso_install $(vdso_img_insttargets) -vdso_install: $(vdso_img_insttargets) FORCE +vdso_install: $(vdso_img_insttargets) clean-files := vdso32.so vdso32.so.dbg vdso64* vdso-image-*.c vdsox32.so* diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index cfc4a966e2b9..83b5f7a323a9 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -30,6 +30,7 @@ #include <asm/apic.h> #include <asm/timer.h> #include <asm/reboot.h> +#include <asm/nmi.h> struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); @@ -157,6 +158,26 @@ static unsigned char hv_get_nmi_reason(void) return 0; } +#ifdef CONFIG_X86_LOCAL_APIC +/* + * Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes + * it dificult to process CHANNELMSG_UNLOAD in case of crash. Handle + * unknown NMI on the first CPU which gets it. + */ +static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs) +{ + static atomic_t nmi_cpu = ATOMIC_INIT(-1); + + if (!unknown_nmi_panic) + return NMI_DONE; + + if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1) + return NMI_HANDLED; + + return NMI_DONE; +} +#endif + static void __init ms_hyperv_init_platform(void) { /* @@ -182,6 +203,9 @@ static void __init ms_hyperv_init_platform(void) printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n", lapic_timer_frequency); } + + register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST, + "hv_nmi_unknown"); #endif if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1a8256dd6729..5b2f2306fbcc 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1996,8 +1996,8 @@ static int x86_pmu_event_init(struct perf_event *event) static void refresh_pce(void *ignored) { - if (current->mm) - load_mm_cr4(current->mm); + if (current->active_mm) + load_mm_cr4(current->active_mm); } static void x86_pmu_event_mapped(struct perf_event *event) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index f129a9af6357..b6b0077da1af 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -4,6 +4,7 @@ * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE */ +#define DISABLE_BRANCH_PROFILING #include <linux/init.h> #include <linux/linkage.h> #include <linux/types.h> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index d470cf219a2d..4e5ac46adc9d 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,3 +1,4 @@ +#define DISABLE_BRANCH_PROFILING #define pr_fmt(fmt) "kasan: " fmt #include <linux/bootmem.h> #include <linux/kasan.h> diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index c6d6efed392a..7575f0798194 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -231,23 +231,14 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 1; for_each_pci_msi_entry(msidesc, dev) { - __pci_read_msi_msg(msidesc, &msg); - pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | - ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); - if (msg.data != XEN_PIRQ_MSI_DATA || - xen_irq_from_pirq(pirq) < 0) { - pirq = xen_allocate_pirq_msi(dev, msidesc); - if (pirq < 0) { - irq = -ENODEV; - goto error; - } - xen_msi_compose_msg(dev, pirq, &msg); - __pci_write_msi_msg(msidesc, &msg); - dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); - } else { - dev_dbg(&dev->dev, - "xen: msi already bound to pirq=%d\n", pirq); + pirq = xen_allocate_pirq_msi(dev, msidesc); + if (pirq < 0) { + irq = -ENODEV; + goto error; } + xen_msi_compose_msg(dev, pirq, &msg); + __pci_write_msi_msg(msidesc, &msg); + dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, (type == PCI_CAP_ID_MSI) ? nvec : 1, (type == PCI_CAP_ID_MSIX) ? diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 9e2ba5c6e1dd..f42e78de1e10 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -27,6 +27,12 @@ static bool xen_pvspin = true; static void xen_qlock_kick(int cpu) { + int irq = per_cpu(lock_kicker_irq, cpu); + + /* Don't kick if the target's kicker interrupt is not initialized. */ + if (irq == -1) + return; + xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); } diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 0774799942e0..c6fee7437be4 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -182,6 +182,9 @@ static void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter) __set_bit(WRITE_16, filter->write_ok); __set_bit(WRITE_LONG, filter->write_ok); __set_bit(WRITE_LONG_2, filter->write_ok); + __set_bit(WRITE_SAME, filter->write_ok); + __set_bit(WRITE_SAME_16, filter->write_ok); + __set_bit(WRITE_SAME_32, filter->write_ok); __set_bit(ERASE, filter->write_ok); __set_bit(GPCMD_MODE_SELECT_10, filter->write_ok); __set_bit(MODE_SELECT, filter->write_ok); diff --git a/build.config.goldfish.arm b/build.config.goldfish.arm new file mode 100644 index 000000000000..866da9361b71 --- /dev/null +++ b/build.config.goldfish.arm @@ -0,0 +1,12 @@ +ARCH=arm +BRANCH=android-4.4 +CROSS_COMPILE=arm-linux-androidkernel- +DEFCONFIG=ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/arm/arm-linux-androideabi-4.9/bin +FILES=" +arch/arm/boot/zImage +vmlinux +System.map +" diff --git a/build.config.goldfish.arm64 b/build.config.goldfish.arm64 new file mode 100644 index 000000000000..9c963cf4a3d8 --- /dev/null +++ b/build.config.goldfish.arm64 @@ -0,0 +1,12 @@ +ARCH=arm64 +BRANCH=android-4.4 +CROSS_COMPILE=aarch64-linux-android- +DEFCONFIG=ranchu64_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin +FILES=" +arch/arm64/boot/Image +vmlinux +System.map +" diff --git a/build.config.goldfish.mips b/build.config.goldfish.mips new file mode 100644 index 000000000000..8af53d2c2940 --- /dev/null +++ b/build.config.goldfish.mips @@ -0,0 +1,11 @@ +ARCH=mips +BRANCH=android-4.4 +CROSS_COMPILE=mips64el-linux-android- +DEFCONFIG=ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin +FILES=" +vmlinux +System.map +" diff --git a/build.config.goldfish.mips64 b/build.config.goldfish.mips64 new file mode 100644 index 000000000000..2a33d36dc4c8 --- /dev/null +++ b/build.config.goldfish.mips64 @@ -0,0 +1,11 @@ +ARCH=mips +BRANCH=android-4.4 +CROSS_COMPILE=mips64el-linux-android- +DEFCONFIG=ranchu64_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/mips/mips64el-linux-android-4.9/bin +FILES=" +vmlinux +System.map +" diff --git a/build.config.goldfish.x86 b/build.config.goldfish.x86 new file mode 100644 index 000000000000..f86253f58d4d --- /dev/null +++ b/build.config.goldfish.x86 @@ -0,0 +1,12 @@ +ARCH=x86 +BRANCH=android-4.4 +CROSS_COMPILE=x86_64-linux-android- +DEFCONFIG=i386_ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" diff --git a/build.config.goldfish.x86_64 b/build.config.goldfish.x86_64 new file mode 100644 index 000000000000..e1738861ec5c --- /dev/null +++ b/build.config.goldfish.x86_64 @@ -0,0 +1,12 @@ +ARCH=x86_64 +BRANCH=android-4.4 +CROSS_COMPILE=x86_64-linux-android- +DEFCONFIG=x86_64_ranchu_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" diff --git a/crypto/Kconfig b/crypto/Kconfig index 7240821137fd..3240d394426c 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -289,6 +289,24 @@ config CRYPTO_CBC CBC: Cipher Block Chaining mode This block cipher algorithm is required for IPSec. +config CRYPTO_HEH + tristate "HEH support" + select CRYPTO_CMAC + select CRYPTO_ECB + select CRYPTO_GF128MUL + select CRYPTO_MANAGER + select CRYPTO_POLY_HASH_ARM64_CE if ARM64 && KERNEL_MODE_NEON + help + HEH: Hash-Encrypt-Hash mode + HEH is a proposed block cipher mode of operation which extends the + strong pseudo-random permutation (SPRP) property of block ciphers to + arbitrary-length input strings. This provides a stronger notion of + security than existing block cipher modes of operation (e.g. CBC, CTR, + XTS), though it is usually less performant. Applications include disk + encryption and encryption of file names and contents. Currently, this + implementation only provides a symmetric cipher interface, so it can't + yet be used as an AEAD. + config CRYPTO_CTR tristate "CTR support" select CRYPTO_BLKCIPHER diff --git a/crypto/Makefile b/crypto/Makefile index 03e66097eb0c..8507d1fab3ac 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -67,6 +67,7 @@ obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o obj-$(CONFIG_CRYPTO_ECB) += ecb.o obj-$(CONFIG_CRYPTO_CBC) += cbc.o +obj-$(CONFIG_CRYPTO_HEH) += heh.o obj-$(CONFIG_CRYPTO_PCBC) += pcbc.o obj-$(CONFIG_CRYPTO_CTS) += cts.o obj-$(CONFIG_CRYPTO_LRW) += lrw.o diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 68a5ceaa04c8..8d8b3eeba725 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -184,7 +184,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags) struct alg_sock *ask = alg_sk(sk); struct hash_ctx *ctx = ask->private; struct ahash_request *req = &ctx->req; - char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req))]; + char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1]; struct sock *sk2; struct alg_sock *ask2; struct hash_ctx *ctx2; diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index dca7bc87dad9..7bbfadc195a6 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -373,6 +373,27 @@ int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc, } EXPORT_SYMBOL_GPL(blkcipher_aead_walk_virt_block); +/* + * This function allows ablkcipher algorithms to use the blkcipher_walk API to + * walk over their data. The specified crypto_ablkcipher tfm is used to + * initialize the struct blkcipher_walk, and the crypto_blkcipher specified in + * desc->tfm is never used so it can be left NULL. (Yes, this design is ugly, + * but it parallels blkcipher_aead_walk_virt_block() above. In the 4.10 kernel + * this is starting to be cleaned up...) + */ +int blkcipher_ablkcipher_walk_virt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + struct crypto_ablkcipher *tfm) +{ + walk->flags &= ~BLKCIPHER_WALK_PHYS; + walk->walk_blocksize = crypto_ablkcipher_blocksize(tfm); + walk->cipher_blocksize = walk->walk_blocksize; + walk->ivsize = crypto_ablkcipher_ivsize(tfm); + walk->alignmask = crypto_ablkcipher_alignmask(tfm); + return blkcipher_walk_first(desc, walk); +} +EXPORT_SYMBOL_GPL(blkcipher_ablkcipher_walk_virt); + static int setkey_unaligned(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { diff --git a/crypto/cryptd.c b/crypto/cryptd.c index e7aa904cb20b..26a504db3f53 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -642,6 +642,7 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.halg.base.cra_flags = type; inst->alg.halg.digestsize = salg->digestsize; + inst->alg.halg.statesize = salg->statesize; inst->alg.halg.base.cra_ctxsize = sizeof(struct cryptd_hash_ctx); inst->alg.halg.base.cra_init = cryptd_hash_init_tfm; diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c index 5276607c72d0..f3d9f6da0767 100644 --- a/crypto/gf128mul.c +++ b/crypto/gf128mul.c @@ -44,7 +44,7 @@ --------------------------------------------------------------------------- Issue 31/01/2006 - This file provides fast multiplication in GF(128) as required by several + This file provides fast multiplication in GF(2^128) as required by several cryptographic authentication modes */ @@ -88,37 +88,52 @@ q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \ } -/* Given the value i in 0..255 as the byte overflow when a field element - in GHASH is multiplied by x^8, this function will return the values that - are generated in the lo 16-bit word of the field value by applying the - modular polynomial. The values lo_byte and hi_byte are returned via the - macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into - memory as required by a suitable definition of this macro operating on - the table above -*/ - -#define xx(p, q) 0x##p##q +/* + * Given a value i in 0..255 as the byte overflow when a field element + * in GF(2^128) is multiplied by x^8, the following macro returns the + * 16-bit value that must be XOR-ed into the low-degree end of the + * product to reduce it modulo the irreducible polynomial x^128 + x^7 + + * x^2 + x + 1. + * + * There are two versions of the macro, and hence two tables: one for + * the "be" convention where the highest-order bit is the coefficient of + * the highest-degree polynomial term, and one for the "le" convention + * where the highest-order bit is the coefficient of the lowest-degree + * polynomial term. In both cases the values are stored in CPU byte + * endianness such that the coefficients are ordered consistently across + * bytes, i.e. in the "be" table bits 15..0 of the stored value + * correspond to the coefficients of x^15..x^0, and in the "le" table + * bits 15..0 correspond to the coefficients of x^0..x^15. + * + * Therefore, provided that the appropriate byte endianness conversions + * are done by the multiplication functions (and these must be in place + * anyway to support both little endian and big endian CPUs), the "be" + * table can be used for multiplications of both "bbe" and "ble" + * elements, and the "le" table can be used for multiplications of both + * "lle" and "lbe" elements. + */ -#define xda_bbe(i) ( \ - (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \ - (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \ - (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \ - (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \ +#define xda_be(i) ( \ + (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \ + (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \ + (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \ + (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \ ) -#define xda_lle(i) ( \ - (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \ - (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \ - (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \ - (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \ +#define xda_le(i) ( \ + (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \ + (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \ + (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \ + (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \ ) -static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle); -static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe); +static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le); +static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be); -/* These functions multiply a field element by x, by x^4 and by x^8 - * in the polynomial field representation. It uses 32-bit word operations - * to gain speed but compensates for machine endianess and hence works +/* + * The following functions multiply a field element by x or by x^8 in + * the polynomial field representation. They use 64-bit word operations + * to gain speed but compensate for machine endianness and hence work * correctly on both styles of machine. */ @@ -126,7 +141,7 @@ static void gf128mul_x_lle(be128 *r, const be128 *x) { u64 a = be64_to_cpu(x->a); u64 b = be64_to_cpu(x->b); - u64 _tt = gf128mul_table_lle[(b << 7) & 0xff]; + u64 _tt = gf128mul_table_le[(b << 7) & 0xff]; r->b = cpu_to_be64((b >> 1) | (a << 63)); r->a = cpu_to_be64((a >> 1) ^ (_tt << 48)); @@ -136,7 +151,7 @@ static void gf128mul_x_bbe(be128 *r, const be128 *x) { u64 a = be64_to_cpu(x->a); u64 b = be64_to_cpu(x->b); - u64 _tt = gf128mul_table_bbe[a >> 63]; + u64 _tt = gf128mul_table_be[a >> 63]; r->a = cpu_to_be64((a << 1) | (b >> 63)); r->b = cpu_to_be64((b << 1) ^ _tt); @@ -146,7 +161,7 @@ void gf128mul_x_ble(be128 *r, const be128 *x) { u64 a = le64_to_cpu(x->a); u64 b = le64_to_cpu(x->b); - u64 _tt = gf128mul_table_bbe[b >> 63]; + u64 _tt = gf128mul_table_be[b >> 63]; r->a = cpu_to_le64((a << 1) ^ _tt); r->b = cpu_to_le64((b << 1) | (a >> 63)); @@ -157,7 +172,7 @@ static void gf128mul_x8_lle(be128 *x) { u64 a = be64_to_cpu(x->a); u64 b = be64_to_cpu(x->b); - u64 _tt = gf128mul_table_lle[b & 0xff]; + u64 _tt = gf128mul_table_le[b & 0xff]; x->b = cpu_to_be64((b >> 8) | (a << 56)); x->a = cpu_to_be64((a >> 8) ^ (_tt << 48)); @@ -167,12 +182,22 @@ static void gf128mul_x8_bbe(be128 *x) { u64 a = be64_to_cpu(x->a); u64 b = be64_to_cpu(x->b); - u64 _tt = gf128mul_table_bbe[a >> 56]; + u64 _tt = gf128mul_table_be[a >> 56]; x->a = cpu_to_be64((a << 8) | (b >> 56)); x->b = cpu_to_be64((b << 8) ^ _tt); } +static void gf128mul_x8_ble(be128 *x) +{ + u64 a = le64_to_cpu(x->b); + u64 b = le64_to_cpu(x->a); + u64 _tt = gf128mul_table_be[a >> 56]; + + x->b = cpu_to_le64((a << 8) | (b >> 56)); + x->a = cpu_to_le64((b << 8) ^ _tt); +} + void gf128mul_lle(be128 *r, const be128 *b) { be128 p[8]; @@ -249,9 +274,48 @@ void gf128mul_bbe(be128 *r, const be128 *b) } EXPORT_SYMBOL(gf128mul_bbe); +void gf128mul_ble(be128 *r, const be128 *b) +{ + be128 p[8]; + int i; + + p[0] = *r; + for (i = 0; i < 7; ++i) + gf128mul_x_ble((be128 *)&p[i + 1], (be128 *)&p[i]); + + memset(r, 0, sizeof(*r)); + for (i = 0;;) { + u8 ch = ((u8 *)b)[15 - i]; + + if (ch & 0x80) + be128_xor(r, r, &p[7]); + if (ch & 0x40) + be128_xor(r, r, &p[6]); + if (ch & 0x20) + be128_xor(r, r, &p[5]); + if (ch & 0x10) + be128_xor(r, r, &p[4]); + if (ch & 0x08) + be128_xor(r, r, &p[3]); + if (ch & 0x04) + be128_xor(r, r, &p[2]); + if (ch & 0x02) + be128_xor(r, r, &p[1]); + if (ch & 0x01) + be128_xor(r, r, &p[0]); + + if (++i >= 16) + break; + + gf128mul_x8_ble(r); + } +} +EXPORT_SYMBOL(gf128mul_ble); + + /* This version uses 64k bytes of table space. A 16 byte buffer has to be multiplied by a 16 byte key - value in GF(128). If we consider a GF(128) value in + value in GF(2^128). If we consider a GF(2^128) value in the buffer's lowest byte, we can construct a table of the 256 16 byte values that result from the 256 values of this byte. This requires 4096 bytes. But we also @@ -352,8 +416,8 @@ void gf128mul_free_64k(struct gf128mul_64k *t) int i; for (i = 0; i < 16; i++) - kfree(t->t[i]); - kfree(t); + kzfree(t->t[i]); + kzfree(t); } EXPORT_SYMBOL(gf128mul_free_64k); @@ -385,7 +449,7 @@ EXPORT_SYMBOL(gf128mul_64k_bbe); /* This version uses 4k bytes of table space. A 16 byte buffer has to be multiplied by a 16 byte key - value in GF(128). If we consider a GF(128) value in a + value in GF(2^128). If we consider a GF(2^128) value in a single byte, we can construct a table of the 256 16 byte values that result from the 256 values of this byte. This requires 4096 bytes. If we take the highest byte in @@ -443,6 +507,28 @@ out: } EXPORT_SYMBOL(gf128mul_init_4k_bbe); +struct gf128mul_4k *gf128mul_init_4k_ble(const be128 *g) +{ + struct gf128mul_4k *t; + int j, k; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + goto out; + + t->t[1] = *g; + for (j = 1; j <= 64; j <<= 1) + gf128mul_x_ble(&t->t[j + j], &t->t[j]); + + for (j = 2; j < 256; j += j) + for (k = 1; k < j; ++k) + be128_xor(&t->t[j + k], &t->t[j], &t->t[k]); + +out: + return t; +} +EXPORT_SYMBOL(gf128mul_init_4k_ble); + void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t) { u8 *ap = (u8 *)a; @@ -473,5 +559,20 @@ void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t) } EXPORT_SYMBOL(gf128mul_4k_bbe); +void gf128mul_4k_ble(be128 *a, struct gf128mul_4k *t) +{ + u8 *ap = (u8 *)a; + be128 r[1]; + int i = 15; + + *r = t->t[ap[15]]; + while (i--) { + gf128mul_x8_ble(r); + be128_xor(r, r, &t->t[ap[i]]); + } + *a = *r; +} +EXPORT_SYMBOL(gf128mul_4k_ble); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)"); diff --git a/crypto/heh.c b/crypto/heh.c new file mode 100644 index 000000000000..10c00aaf797e --- /dev/null +++ b/crypto/heh.c @@ -0,0 +1,1033 @@ +/* + * HEH: Hash-Encrypt-Hash mode + * + * Copyright (c) 2016 Google Inc. + * + * Authors: + * Alex Cope <alexcope@google.com> + * Eric Biggers <ebiggers@google.com> + */ + +/* + * Hash-Encrypt-Hash (HEH) is a proposed block cipher mode of operation which + * extends the strong pseudo-random permutation (SPRP) property of block ciphers + * (e.g. AES) to arbitrary length input strings. It uses two keyed invertible + * hash functions with a layer of ECB encryption applied in-between. The + * algorithm is specified by the following Internet Draft: + * + * https://tools.ietf.org/html/draft-cope-heh-01 + * + * Although HEH can be used as either a regular symmetric cipher or as an AEAD, + * currently this module only provides it as a symmetric cipher. Additionally, + * only 16-byte nonces are supported. + */ + +#include <crypto/gf128mul.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> +#include <crypto/skcipher.h> +#include "internal.h" + +/* + * The block size is the size of GF(2^128) elements and also the required block + * size of the underlying block cipher. + */ +#define HEH_BLOCK_SIZE 16 + +struct heh_instance_ctx { + struct crypto_shash_spawn cmac; + struct crypto_shash_spawn poly_hash; + struct crypto_skcipher_spawn ecb; +}; + +struct heh_tfm_ctx { + struct crypto_shash *cmac; + struct crypto_shash *poly_hash; /* keyed with tau_key */ + struct crypto_ablkcipher *ecb; +}; + +struct heh_cmac_data { + u8 nonce[HEH_BLOCK_SIZE]; + __le32 nonce_length; + __le32 aad_length; + __le32 message_length; + __le32 padding; +}; + +struct heh_req_ctx { /* aligned to alignmask */ + be128 beta1_key; + be128 beta2_key; + union { + struct { + struct heh_cmac_data data; + struct shash_desc desc; + /* + crypto_shash_descsize(cmac) */ + } cmac; + struct { + struct shash_desc desc; + /* + crypto_shash_descsize(poly_hash) */ + } poly_hash; + struct { + u8 keystream[HEH_BLOCK_SIZE]; + u8 tmp[HEH_BLOCK_SIZE]; + struct scatterlist tmp_sgl[2]; + struct ablkcipher_request req; + /* + crypto_ablkcipher_reqsize(ecb) */ + } ecb; + } u; +}; + +/* + * Get the offset in bytes to the last full block, or equivalently the length of + * all full blocks excluding the last + */ +static inline unsigned int get_tail_offset(unsigned int len) +{ + len -= len % HEH_BLOCK_SIZE; + return len - HEH_BLOCK_SIZE; +} + +static inline struct heh_req_ctx *heh_req_ctx(struct ablkcipher_request *req) +{ + unsigned int alignmask = crypto_ablkcipher_alignmask( + crypto_ablkcipher_reqtfm(req)); + + return (void *)PTR_ALIGN((u8 *)ablkcipher_request_ctx(req), + alignmask + 1); +} + +static inline void async_done(struct crypto_async_request *areq, int err, + int (*next_step)(struct ablkcipher_request *, + u32)) +{ + struct ablkcipher_request *req = areq->data; + + if (err) + goto out; + + err = next_step(req, req->base.flags & ~CRYPTO_TFM_REQ_MAY_SLEEP); + if (err == -EINPROGRESS || + (err == -EBUSY && (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))) + return; +out: + ablkcipher_request_complete(req, err); +} + +/* + * Generate the per-message "beta" keys used by the hashing layers of HEH. The + * first beta key is the CMAC of the nonce, the additional authenticated data + * (AAD), and the lengths in bytes of the nonce, AAD, and message. The nonce + * and AAD are each zero-padded to the next 16-byte block boundary, and the + * lengths are serialized as 4-byte little endian integers and zero-padded to + * the next 16-byte block boundary. + * The second beta key is the first one interpreted as an element in GF(2^128) + * and multiplied by x. + * + * Note that because the nonce and AAD may, in general, be variable-length, the + * key generation must be done by a pseudo-random function (PRF) on + * variable-length inputs. CBC-MAC does not satisfy this, as it is only a PRF + * on fixed-length inputs. CMAC remedies this flaw. Including the lengths of + * the nonce, AAD, and message is also critical to avoid collisions. + * + * That being said, this implementation does not yet operate as an AEAD and + * therefore there is never any AAD, nor are variable-length nonces supported. + */ +static int generate_betas(struct ablkcipher_request *req, + be128 *beta1_key, be128 *beta2_key) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct heh_tfm_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct heh_req_ctx *rctx = heh_req_ctx(req); + struct heh_cmac_data *data = &rctx->u.cmac.data; + struct shash_desc *desc = &rctx->u.cmac.desc; + int err; + + BUILD_BUG_ON(sizeof(*data) != 2 * HEH_BLOCK_SIZE); + memcpy(data->nonce, req->info, HEH_BLOCK_SIZE); + data->nonce_length = cpu_to_le32(HEH_BLOCK_SIZE); + data->aad_length = cpu_to_le32(0); + data->message_length = cpu_to_le32(req->nbytes); + data->padding = cpu_to_le32(0); + + desc->tfm = ctx->cmac; + desc->flags = req->base.flags; + + err = crypto_shash_digest(desc, (const u8 *)data, sizeof(*data), + (u8 *)beta1_key); + if (err) + return err; + + gf128mul_x_ble(beta2_key, beta1_key); + return 0; +} + +/*****************************************************************************/ + +/* + * This is the generic version of poly_hash. It does the GF(2^128) + * multiplication by 'tau_key' using a precomputed table, without using any + * special CPU instructions. On some platforms, an accelerated version (with + * higher cra_priority) may be used instead. + */ + +struct poly_hash_tfm_ctx { + struct gf128mul_4k *tau_key; +}; + +struct poly_hash_desc_ctx { + be128 digest; + unsigned int count; +}; + +static int poly_hash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct poly_hash_tfm_ctx *tctx = crypto_shash_ctx(tfm); + be128 key128; + + if (keylen != HEH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + if (tctx->tau_key) + gf128mul_free_4k(tctx->tau_key); + memcpy(&key128, key, HEH_BLOCK_SIZE); + tctx->tau_key = gf128mul_init_4k_ble(&key128); + if (!tctx->tau_key) + return -ENOMEM; + return 0; +} + +static int poly_hash_init(struct shash_desc *desc) +{ + struct poly_hash_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->digest = (be128) { 0 }; + ctx->count = 0; + return 0; +} + +static int poly_hash_update(struct shash_desc *desc, const u8 *src, + unsigned int len) +{ + struct poly_hash_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct poly_hash_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int partial = ctx->count % HEH_BLOCK_SIZE; + u8 *dst = (u8 *)&ctx->digest + partial; + + ctx->count += len; + + /* Finishing at least one block? */ + if (partial + len >= HEH_BLOCK_SIZE) { + + if (partial) { + /* Finish the pending block. */ + unsigned int n = HEH_BLOCK_SIZE - partial; + + len -= n; + do { + *dst++ ^= *src++; + } while (--n); + + gf128mul_4k_ble(&ctx->digest, tctx->tau_key); + } + + /* Process zero or more full blocks. */ + while (len >= HEH_BLOCK_SIZE) { + be128 coeff; + + memcpy(&coeff, src, HEH_BLOCK_SIZE); + be128_xor(&ctx->digest, &ctx->digest, &coeff); + src += HEH_BLOCK_SIZE; + len -= HEH_BLOCK_SIZE; + gf128mul_4k_ble(&ctx->digest, tctx->tau_key); + } + dst = (u8 *)&ctx->digest; + } + + /* Continue adding the next block to 'digest'. */ + while (len--) + *dst++ ^= *src++; + return 0; +} + +static int poly_hash_final(struct shash_desc *desc, u8 *out) +{ + struct poly_hash_desc_ctx *ctx = shash_desc_ctx(desc); + + /* Finish the last block if needed. */ + if (ctx->count % HEH_BLOCK_SIZE) { + struct poly_hash_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + + gf128mul_4k_ble(&ctx->digest, tctx->tau_key); + } + + memcpy(out, &ctx->digest, HEH_BLOCK_SIZE); + return 0; +} + +static void poly_hash_exit(struct crypto_tfm *tfm) +{ + struct poly_hash_tfm_ctx *tctx = crypto_tfm_ctx(tfm); + + gf128mul_free_4k(tctx->tau_key); +} + +static struct shash_alg poly_hash_alg = { + .digestsize = HEH_BLOCK_SIZE, + .init = poly_hash_init, + .update = poly_hash_update, + .final = poly_hash_final, + .setkey = poly_hash_setkey, + .descsize = sizeof(struct poly_hash_desc_ctx), + .base = { + .cra_name = "poly_hash", + .cra_driver_name = "poly_hash-generic", + .cra_priority = 100, + .cra_ctxsize = sizeof(struct poly_hash_tfm_ctx), + .cra_exit = poly_hash_exit, + .cra_module = THIS_MODULE, + }, +}; + +/*****************************************************************************/ + +/* + * Split the message into 16 byte blocks, padding out the last block, and use + * the blocks as coefficients in the evaluation of a polynomial over GF(2^128) + * at the secret point 'tau_key'. For ease of implementing the higher-level + * heh_hash_inv() function, the constant and degree-1 coefficients are swapped + * if there is a partial block. + * + * Mathematically, compute: + * if (no partial block) + * k^{N-1} * m_0 + ... + k * m_{N-2} + m_{N-1} + * else if (partial block) + * k^N * m_0 + ... + k^2 * m_{N-2} + k * m_N + m_{N-1} + * + * where: + * t is tau_key + * N is the number of full blocks in the message + * m_i is the i-th full block in the message for i = 0 to N-1 inclusive + * m_N is the partial block of the message zero-padded up to 16 bytes + * + * Note that most of this is now separated out into its own keyed hash + * algorithm, to allow optimized implementations. However, we still handle the + * swapping of the last two coefficients here in the HEH template because this + * simplifies the poly_hash algorithms: they don't have to buffer an extra + * block, don't have to duplicate as much code, and are more similar to GHASH. + */ +static int poly_hash(struct ablkcipher_request *req, struct scatterlist *sgl, + be128 *hash) +{ + struct heh_req_ctx *rctx = heh_req_ctx(req); + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct heh_tfm_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct shash_desc *desc = &rctx->u.poly_hash.desc; + unsigned int tail_offset = get_tail_offset(req->nbytes); + unsigned int tail_len = req->nbytes - tail_offset; + be128 tail[2]; + unsigned int i, n; + struct sg_mapping_iter miter; + int err; + + desc->tfm = ctx->poly_hash; + desc->flags = req->base.flags; + + /* Handle all full blocks except the last */ + err = crypto_shash_init(desc); + sg_miter_start(&miter, sgl, sg_nents(sgl), + SG_MITER_FROM_SG | SG_MITER_ATOMIC); + for (i = 0; i < tail_offset && !err; i += n) { + sg_miter_next(&miter); + n = min_t(unsigned int, miter.length, tail_offset - i); + err = crypto_shash_update(desc, miter.addr, n); + } + sg_miter_stop(&miter); + if (err) + return err; + + /* Handle the last full block and the partial block */ + scatterwalk_map_and_copy(tail, sgl, tail_offset, tail_len, 0); + + if (tail_len != HEH_BLOCK_SIZE) { + /* handle the partial block */ + memset((u8 *)tail + tail_len, 0, sizeof(tail) - tail_len); + err = crypto_shash_update(desc, (u8 *)&tail[1], HEH_BLOCK_SIZE); + if (err) + return err; + } + err = crypto_shash_final(desc, (u8 *)hash); + if (err) + return err; + be128_xor(hash, hash, &tail[0]); + return 0; +} + +/* + * Transform all full blocks except the last. + * This is used by both the hash and inverse hash phases. + */ +static int heh_tfm_blocks(struct ablkcipher_request *req, + struct scatterlist *src_sgl, + struct scatterlist *dst_sgl, unsigned int len, + const be128 *hash, const be128 *beta_key) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct blkcipher_desc desc = { .flags = req->base.flags }; + struct blkcipher_walk walk; + be128 e = *beta_key; + int err; + unsigned int nbytes; + + blkcipher_walk_init(&walk, dst_sgl, src_sgl, len); + + err = blkcipher_ablkcipher_walk_virt(&desc, &walk, tfm); + + while ((nbytes = walk.nbytes)) { + const be128 *src = (be128 *)walk.src.virt.addr; + be128 *dst = (be128 *)walk.dst.virt.addr; + + do { + gf128mul_x_ble(&e, &e); + be128_xor(dst, src, hash); + be128_xor(dst, dst, &e); + src++; + dst++; + } while ((nbytes -= HEH_BLOCK_SIZE) >= HEH_BLOCK_SIZE); + err = blkcipher_walk_done(&desc, &walk, nbytes); + } + return err; +} + +/* + * The hash phase of HEH. Given a message, compute: + * + * (m_0 + H, ..., m_{N-2} + H, H, m_N) + (xb, x^2b, ..., x^{N-1}b, b, 0) + * + * where: + * N is the number of full blocks in the message + * m_i is the i-th full block in the message for i = 0 to N-1 inclusive + * m_N is the unpadded partial block, possibly empty + * H is the poly_hash() of the message, keyed by tau_key + * b is beta_key + * x is the element x in our representation of GF(2^128) + * + * Note that the partial block remains unchanged, but it does affect the result + * of poly_hash() and therefore the transformation of all the full blocks. + */ +static int heh_hash(struct ablkcipher_request *req, const be128 *beta_key) +{ + be128 hash; + unsigned int tail_offset = get_tail_offset(req->nbytes); + unsigned int partial_len = req->nbytes % HEH_BLOCK_SIZE; + int err; + + /* poly_hash() the full message including the partial block */ + err = poly_hash(req, req->src, &hash); + if (err) + return err; + + /* Transform all full blocks except the last */ + err = heh_tfm_blocks(req, req->src, req->dst, tail_offset, &hash, + beta_key); + if (err) + return err; + + /* Set the last full block to hash XOR beta_key */ + be128_xor(&hash, &hash, beta_key); + scatterwalk_map_and_copy(&hash, req->dst, tail_offset, HEH_BLOCK_SIZE, + 1); + + /* Copy the partial block if needed */ + if (partial_len != 0 && req->src != req->dst) { + unsigned int offs = tail_offset + HEH_BLOCK_SIZE; + + scatterwalk_map_and_copy(&hash, req->src, offs, partial_len, 0); + scatterwalk_map_and_copy(&hash, req->dst, offs, partial_len, 1); + } + return 0; +} + +/* + * The inverse hash phase of HEH. This undoes the result of heh_hash(). + */ +static int heh_hash_inv(struct ablkcipher_request *req, const be128 *beta_key) +{ + be128 hash; + be128 tmp; + struct scatterlist tmp_sgl[2]; + struct scatterlist *tail_sgl; + unsigned int tail_offset = get_tail_offset(req->nbytes); + struct scatterlist *sgl = req->dst; + int err; + + /* + * The last full block was computed as hash XOR beta_key, so XOR it with + * beta_key to recover hash. + */ + tail_sgl = scatterwalk_ffwd(tmp_sgl, sgl, tail_offset); + scatterwalk_map_and_copy(&hash, tail_sgl, 0, HEH_BLOCK_SIZE, 0); + be128_xor(&hash, &hash, beta_key); + + /* Transform all full blocks except the last */ + err = heh_tfm_blocks(req, sgl, sgl, tail_offset, &hash, beta_key); + if (err) + return err; + + /* + * Recover the last full block. We know 'hash', i.e. the poly_hash() of + * the the original message. The last full block was the constant term + * of the polynomial. To recover the last full block, temporarily zero + * it, compute the poly_hash(), and take the difference from 'hash'. + */ + memset(&tmp, 0, sizeof(tmp)); + scatterwalk_map_and_copy(&tmp, tail_sgl, 0, HEH_BLOCK_SIZE, 1); + err = poly_hash(req, sgl, &tmp); + if (err) + return err; + be128_xor(&tmp, &tmp, &hash); + scatterwalk_map_and_copy(&tmp, tail_sgl, 0, HEH_BLOCK_SIZE, 1); + return 0; +} + +static int heh_hash_inv_step(struct ablkcipher_request *req, u32 flags) +{ + struct heh_req_ctx *rctx = heh_req_ctx(req); + + return heh_hash_inv(req, &rctx->beta2_key); +} + +static int heh_ecb_step_3(struct ablkcipher_request *req, u32 flags) +{ + struct heh_req_ctx *rctx = heh_req_ctx(req); + u8 partial_block[HEH_BLOCK_SIZE] __aligned(__alignof__(u32)); + unsigned int tail_offset = get_tail_offset(req->nbytes); + unsigned int partial_offset = tail_offset + HEH_BLOCK_SIZE; + unsigned int partial_len = req->nbytes - partial_offset; + + /* + * Extract the pad in req->dst at tail_offset, and xor the partial block + * with it to create encrypted partial block + */ + scatterwalk_map_and_copy(rctx->u.ecb.keystream, req->dst, tail_offset, + HEH_BLOCK_SIZE, 0); + scatterwalk_map_and_copy(partial_block, req->dst, partial_offset, + partial_len, 0); + crypto_xor(partial_block, rctx->u.ecb.keystream, partial_len); + + /* + * Store the encrypted final block and partial block back in dst_sg + */ + scatterwalk_map_and_copy(&rctx->u.ecb.tmp, req->dst, tail_offset, + HEH_BLOCK_SIZE, 1); + scatterwalk_map_and_copy(partial_block, req->dst, partial_offset, + partial_len, 1); + + return heh_hash_inv_step(req, flags); +} + +static void heh_ecb_step_2_done(struct crypto_async_request *areq, int err) +{ + return async_done(areq, err, heh_ecb_step_3); +} + +static int heh_ecb_step_2(struct ablkcipher_request *req, u32 flags) +{ + struct heh_req_ctx *rctx = heh_req_ctx(req); + unsigned int partial_len = req->nbytes % HEH_BLOCK_SIZE; + struct scatterlist *tmp_sgl; + int err; + unsigned int tail_offset = get_tail_offset(req->nbytes); + + if (partial_len == 0) + return heh_hash_inv_step(req, flags); + + /* + * Extract the final full block, store it in tmp, and then xor that with + * the value saved in u.ecb.keystream + */ + scatterwalk_map_and_copy(rctx->u.ecb.tmp, req->dst, tail_offset, + HEH_BLOCK_SIZE, 0); + crypto_xor(rctx->u.ecb.keystream, rctx->u.ecb.tmp, HEH_BLOCK_SIZE); + + /* + * Encrypt the value in rctx->u.ecb.keystream to create the pad for the + * partial block. + * We cannot encrypt stack buffers, so re-use the dst_sg to do this + * encryption to avoid a malloc. The value at tail_offset is stored in + * tmp, and will be restored later. + */ + scatterwalk_map_and_copy(rctx->u.ecb.keystream, req->dst, tail_offset, + HEH_BLOCK_SIZE, 1); + tmp_sgl = scatterwalk_ffwd(rctx->u.ecb.tmp_sgl, req->dst, tail_offset); + ablkcipher_request_set_callback(&rctx->u.ecb.req, flags, + heh_ecb_step_2_done, req); + ablkcipher_request_set_crypt(&rctx->u.ecb.req, tmp_sgl, tmp_sgl, + HEH_BLOCK_SIZE, NULL); + err = crypto_ablkcipher_encrypt(&rctx->u.ecb.req); + if (err) + return err; + return heh_ecb_step_3(req, flags); +} + +static void heh_ecb_full_done(struct crypto_async_request *areq, int err) +{ + return async_done(areq, err, heh_ecb_step_2); +} + +/* + * The encrypt phase of HEH. This uses ECB encryption, with special handling + * for the partial block at the end if any. The source data is already in + * req->dst, so the encryption happens in-place. + * + * After the encrypt phase we continue on to the inverse hash phase. The + * functions calls are chained to support asynchronous ECB algorithms. + */ +static int heh_ecb(struct ablkcipher_request *req, bool decrypt) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct heh_tfm_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct heh_req_ctx *rctx = heh_req_ctx(req); + struct ablkcipher_request *ecb_req = &rctx->u.ecb.req; + unsigned int tail_offset = get_tail_offset(req->nbytes); + unsigned int full_len = tail_offset + HEH_BLOCK_SIZE; + int err; + + /* + * Save the last full block before it is encrypted/decrypted. This will + * be used later to encrypt/decrypt the partial block + */ + scatterwalk_map_and_copy(rctx->u.ecb.keystream, req->dst, tail_offset, + HEH_BLOCK_SIZE, 0); + + /* Encrypt/decrypt all full blocks */ + ablkcipher_request_set_tfm(ecb_req, ctx->ecb); + ablkcipher_request_set_callback(ecb_req, req->base.flags, + heh_ecb_full_done, req); + ablkcipher_request_set_crypt(ecb_req, req->dst, req->dst, full_len, + NULL); + if (decrypt) + err = crypto_ablkcipher_decrypt(ecb_req); + else + err = crypto_ablkcipher_encrypt(ecb_req); + if (err) + return err; + + return heh_ecb_step_2(req, req->base.flags); +} + +static int heh_crypt(struct ablkcipher_request *req, bool decrypt) +{ + struct heh_req_ctx *rctx = heh_req_ctx(req); + int err; + + /* Inputs must be at least one full block */ + if (req->nbytes < HEH_BLOCK_SIZE) + return -EINVAL; + + err = generate_betas(req, &rctx->beta1_key, &rctx->beta2_key); + if (err) + return err; + + if (decrypt) + swap(rctx->beta1_key, rctx->beta2_key); + + err = heh_hash(req, &rctx->beta1_key); + if (err) + return err; + + return heh_ecb(req, decrypt); +} + +static int heh_encrypt(struct ablkcipher_request *req) +{ + return heh_crypt(req, false); +} + +static int heh_decrypt(struct ablkcipher_request *req) +{ + return heh_crypt(req, true); +} + +static int heh_setkey(struct crypto_ablkcipher *parent, const u8 *key, + unsigned int keylen) +{ + struct heh_tfm_ctx *ctx = crypto_ablkcipher_ctx(parent); + struct crypto_shash *cmac = ctx->cmac; + struct crypto_ablkcipher *ecb = ctx->ecb; + SHASH_DESC_ON_STACK(desc, cmac); + u8 *derived_keys; + u8 digest[HEH_BLOCK_SIZE]; + unsigned int i; + int err; + + /* set prf_key = key */ + crypto_shash_clear_flags(cmac, CRYPTO_TFM_REQ_MASK); + crypto_shash_set_flags(cmac, crypto_ablkcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_shash_setkey(cmac, key, keylen); + crypto_ablkcipher_set_flags(parent, crypto_shash_get_flags(cmac) & + CRYPTO_TFM_RES_MASK); + if (err) + return err; + + /* + * Generate tau_key and ecb_key as follows: + * tau_key = cmac(prf_key, 0x00...01) + * ecb_key = cmac(prf_key, 0x00...02) || cmac(prf_key, 0x00...03) || ... + * truncated to keylen bytes + */ + derived_keys = kzalloc(round_up(HEH_BLOCK_SIZE + keylen, + HEH_BLOCK_SIZE), GFP_KERNEL); + if (!derived_keys) + return -ENOMEM; + desc->tfm = cmac; + desc->flags = (crypto_shash_get_flags(cmac) & CRYPTO_TFM_REQ_MASK); + for (i = 0; i < keylen + HEH_BLOCK_SIZE; i += HEH_BLOCK_SIZE) { + derived_keys[i + HEH_BLOCK_SIZE - 1] = + 0x01 + i / HEH_BLOCK_SIZE; + err = crypto_shash_digest(desc, derived_keys + i, + HEH_BLOCK_SIZE, digest); + if (err) + goto out; + memcpy(derived_keys + i, digest, HEH_BLOCK_SIZE); + } + + err = crypto_shash_setkey(ctx->poly_hash, derived_keys, HEH_BLOCK_SIZE); + if (err) + goto out; + + crypto_ablkcipher_clear_flags(ecb, CRYPTO_TFM_REQ_MASK); + crypto_ablkcipher_set_flags(ecb, crypto_ablkcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_ablkcipher_setkey(ecb, derived_keys + HEH_BLOCK_SIZE, + keylen); + crypto_ablkcipher_set_flags(parent, crypto_ablkcipher_get_flags(ecb) & + CRYPTO_TFM_RES_MASK); +out: + kzfree(derived_keys); + return err; +} + +static int heh_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); + struct heh_instance_ctx *ictx = crypto_instance_ctx(inst); + struct heh_tfm_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_shash *cmac; + struct crypto_shash *poly_hash; + struct crypto_ablkcipher *ecb; + unsigned int reqsize; + int err; + + cmac = crypto_spawn_shash(&ictx->cmac); + if (IS_ERR(cmac)) + return PTR_ERR(cmac); + + poly_hash = crypto_spawn_shash(&ictx->poly_hash); + err = PTR_ERR(poly_hash); + if (IS_ERR(poly_hash)) + goto err_free_cmac; + + ecb = crypto_spawn_skcipher(&ictx->ecb); + err = PTR_ERR(ecb); + if (IS_ERR(ecb)) + goto err_free_poly_hash; + + ctx->cmac = cmac; + ctx->poly_hash = poly_hash; + ctx->ecb = ecb; + + reqsize = crypto_tfm_alg_alignmask(tfm) & + ~(crypto_tfm_ctx_alignment() - 1); + reqsize += max3(offsetof(struct heh_req_ctx, u.cmac.desc) + + sizeof(struct shash_desc) + + crypto_shash_descsize(cmac), + offsetof(struct heh_req_ctx, u.poly_hash.desc) + + sizeof(struct shash_desc) + + crypto_shash_descsize(poly_hash), + offsetof(struct heh_req_ctx, u.ecb.req) + + sizeof(struct ablkcipher_request) + + crypto_ablkcipher_reqsize(ecb)); + tfm->crt_ablkcipher.reqsize = reqsize; + + return 0; + +err_free_poly_hash: + crypto_free_shash(poly_hash); +err_free_cmac: + crypto_free_shash(cmac); + return err; +} + +static void heh_exit_tfm(struct crypto_tfm *tfm) +{ + struct heh_tfm_ctx *ctx = crypto_tfm_ctx(tfm); + + crypto_free_shash(ctx->cmac); + crypto_free_shash(ctx->poly_hash); + crypto_free_ablkcipher(ctx->ecb); +} + +static void heh_free_instance(struct crypto_instance *inst) +{ + struct heh_instance_ctx *ctx = crypto_instance_ctx(inst); + + crypto_drop_shash(&ctx->cmac); + crypto_drop_shash(&ctx->poly_hash); + crypto_drop_skcipher(&ctx->ecb); + kfree(inst); +} + +/* + * Create an instance of HEH as a ablkcipher. + * + * This relies on underlying CMAC and ECB algorithms, usually cmac(aes) and + * ecb(aes). For performance reasons we support asynchronous ECB algorithms. + * However, we do not yet support asynchronous CMAC algorithms because CMAC is + * only used on a small fixed amount of data per request, independent of the + * request length. This would change if AEAD or variable-length nonce support + * were to be exposed. + */ +static int heh_create_common(struct crypto_template *tmpl, struct rtattr **tb, + const char *full_name, const char *cmac_name, + const char *poly_hash_name, const char *ecb_name) +{ + struct crypto_attr_type *algt; + struct crypto_instance *inst; + struct heh_instance_ctx *ctx; + struct shash_alg *cmac; + struct shash_alg *poly_hash; + struct crypto_alg *ecb; + int err; + + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + /* User must be asking for something compatible with ablkcipher */ + if ((algt->type ^ CRYPTO_ALG_TYPE_ABLKCIPHER) & algt->mask) + return -EINVAL; + + /* Allocate the ablkcipher instance */ + inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + ctx = crypto_instance_ctx(inst); + + /* Set up the cmac spawn */ + ctx->cmac.base.inst = inst; + err = crypto_grab_shash(&ctx->cmac, cmac_name, 0, 0); + if (err) + goto err_free_inst; + cmac = crypto_spawn_shash_alg(&ctx->cmac); + err = -EINVAL; + if (cmac->digestsize != HEH_BLOCK_SIZE) + goto err_drop_cmac; + + /* Set up the poly_hash spawn */ + ctx->poly_hash.base.inst = inst; + err = crypto_grab_shash(&ctx->poly_hash, poly_hash_name, 0, 0); + if (err) + goto err_drop_cmac; + poly_hash = crypto_spawn_shash_alg(&ctx->poly_hash); + err = -EINVAL; + if (poly_hash->digestsize != HEH_BLOCK_SIZE) + goto err_drop_poly_hash; + + /* Set up the ecb spawn */ + ctx->ecb.base.inst = inst; + err = crypto_grab_skcipher(&ctx->ecb, ecb_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); + if (err) + goto err_drop_poly_hash; + ecb = crypto_skcipher_spawn_alg(&ctx->ecb); + + /* HEH only supports block ciphers with 16 byte block size */ + err = -EINVAL; + if (ecb->cra_blocksize != HEH_BLOCK_SIZE) + goto err_drop_ecb; + + /* The underlying "ECB" algorithm must not require an IV */ + err = -EINVAL; + if ((ecb->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_BLKCIPHER) { + if (ecb->cra_blkcipher.ivsize != 0) + goto err_drop_ecb; + } else { + if (ecb->cra_ablkcipher.ivsize != 0) + goto err_drop_ecb; + } + + /* Set the instance names */ + err = -ENAMETOOLONG; + if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, + "heh_base(%s,%s,%s)", cmac->base.cra_driver_name, + poly_hash->base.cra_driver_name, + ecb->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + goto err_drop_ecb; + + err = -ENAMETOOLONG; + if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, + "%s", full_name) >= CRYPTO_MAX_ALG_NAME) + goto err_drop_ecb; + + /* Finish initializing the instance */ + + inst->alg.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + (ecb->cra_flags & CRYPTO_ALG_ASYNC); + inst->alg.cra_blocksize = HEH_BLOCK_SIZE; + inst->alg.cra_ctxsize = sizeof(struct heh_tfm_ctx); + inst->alg.cra_alignmask = ecb->cra_alignmask | (__alignof__(be128) - 1); + inst->alg.cra_priority = ecb->cra_priority; + inst->alg.cra_type = &crypto_ablkcipher_type; + inst->alg.cra_init = heh_init_tfm; + inst->alg.cra_exit = heh_exit_tfm; + + inst->alg.cra_ablkcipher.setkey = heh_setkey; + inst->alg.cra_ablkcipher.encrypt = heh_encrypt; + inst->alg.cra_ablkcipher.decrypt = heh_decrypt; + if ((ecb->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_BLKCIPHER) { + inst->alg.cra_ablkcipher.min_keysize = ecb->cra_blkcipher.min_keysize; + inst->alg.cra_ablkcipher.max_keysize = ecb->cra_blkcipher.max_keysize; + } else { + inst->alg.cra_ablkcipher.min_keysize = ecb->cra_ablkcipher.min_keysize; + inst->alg.cra_ablkcipher.max_keysize = ecb->cra_ablkcipher.max_keysize; + } + inst->alg.cra_ablkcipher.ivsize = HEH_BLOCK_SIZE; + + /* Register the instance */ + err = crypto_register_instance(tmpl, inst); + if (err) + goto err_drop_ecb; + return 0; + +err_drop_ecb: + crypto_drop_skcipher(&ctx->ecb); +err_drop_poly_hash: + crypto_drop_shash(&ctx->poly_hash); +err_drop_cmac: + crypto_drop_shash(&ctx->cmac); +err_free_inst: + kfree(inst); + return err; +} + +static int heh_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + const char *cipher_name; + char full_name[CRYPTO_MAX_ALG_NAME]; + char cmac_name[CRYPTO_MAX_ALG_NAME]; + char ecb_name[CRYPTO_MAX_ALG_NAME]; + + /* Get the name of the requested block cipher (e.g. aes) */ + cipher_name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(cipher_name)) + return PTR_ERR(cipher_name); + + if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "heh(%s)", cipher_name) >= + CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + + if (snprintf(cmac_name, CRYPTO_MAX_ALG_NAME, "cmac(%s)", cipher_name) >= + CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + + if (snprintf(ecb_name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", cipher_name) >= + CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + + return heh_create_common(tmpl, tb, full_name, cmac_name, "poly_hash", + ecb_name); +} + +static struct crypto_template heh_tmpl = { + .name = "heh", + .create = heh_create, + .free = heh_free_instance, + .module = THIS_MODULE, +}; + +static int heh_base_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + char full_name[CRYPTO_MAX_ALG_NAME]; + const char *cmac_name; + const char *poly_hash_name; + const char *ecb_name; + + cmac_name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(cmac_name)) + return PTR_ERR(cmac_name); + + poly_hash_name = crypto_attr_alg_name(tb[2]); + if (IS_ERR(poly_hash_name)) + return PTR_ERR(poly_hash_name); + + ecb_name = crypto_attr_alg_name(tb[3]); + if (IS_ERR(ecb_name)) + return PTR_ERR(ecb_name); + + if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "heh_base(%s,%s,%s)", + cmac_name, poly_hash_name, ecb_name) >= + CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + + return heh_create_common(tmpl, tb, full_name, cmac_name, poly_hash_name, + ecb_name); +} + +/* + * If HEH is instantiated as "heh_base" instead of "heh", then specific + * implementations of cmac, poly_hash, and ecb can be specified instead of just + * the cipher. + */ +static struct crypto_template heh_base_tmpl = { + .name = "heh_base", + .create = heh_base_create, + .free = heh_free_instance, + .module = THIS_MODULE, +}; + +static int __init heh_module_init(void) +{ + int err; + + err = crypto_register_template(&heh_tmpl); + if (err) + return err; + + err = crypto_register_template(&heh_base_tmpl); + if (err) + goto out_undo_heh; + + err = crypto_register_shash(&poly_hash_alg); + if (err) + goto out_undo_heh_base; + + return 0; + +out_undo_heh_base: + crypto_unregister_template(&heh_base_tmpl); +out_undo_heh: + crypto_unregister_template(&heh_tmpl); + return err; +} + +static void __exit heh_module_exit(void) +{ + crypto_unregister_template(&heh_tmpl); + crypto_unregister_template(&heh_base_tmpl); + crypto_unregister_shash(&poly_hash_alg); +} + +module_init(heh_module_init); +module_exit(heh_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Hash-Encrypt-Hash block cipher mode"); +MODULE_ALIAS_CRYPTO("heh"); +MODULE_ALIAS_CRYPTO("heh_base"); diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index a0ceb41d5ccc..b4f3930266b1 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -531,6 +531,7 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.halg.base.cra_flags = type; inst->alg.halg.digestsize = salg->digestsize; + inst->alg.halg.statesize = salg->statesize; inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx); inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm; diff --git a/crypto/shash.c b/crypto/shash.c index 359754591653..9ae1e891308d 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -683,6 +683,14 @@ void shash_free_instance(struct crypto_instance *inst) } EXPORT_SYMBOL_GPL(shash_free_instance); +int crypto_grab_shash(struct crypto_shash_spawn *spawn, + const char *name, u32 type, u32 mask) +{ + spawn->base.frontend = &crypto_shash_type; + return crypto_grab_spawn(&spawn->base, name, type, mask); +} +EXPORT_SYMBOL_GPL(crypto_grab_shash); + int crypto_init_shash_spawn(struct crypto_shash_spawn *spawn, struct shash_alg *alg, struct crypto_instance *inst) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index d4944318ca1f..8374ca8b6579 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3214,6 +3214,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "heh(aes)", + .test = alg_test_skcipher, + .suite = { + .cipher = { + .enc = { + .vecs = aes_heh_enc_tv_template, + .count = AES_HEH_ENC_TEST_VECTORS + }, + .dec = { + .vecs = aes_heh_dec_tv_template, + .count = AES_HEH_DEC_TEST_VECTORS + } + } + } + }, { .alg = "hmac(crc32)", .test = alg_test_hash, .suite = { diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 0e02c60a57b6..ba6530d8ba58 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -14139,6 +14139,8 @@ static struct cipher_testvec cast6_xts_dec_tv_template[] = { #define AES_DEC_TEST_VECTORS 4 #define AES_CBC_ENC_TEST_VECTORS 5 #define AES_CBC_DEC_TEST_VECTORS 5 +#define AES_HEH_ENC_TEST_VECTORS 4 +#define AES_HEH_DEC_TEST_VECTORS 4 #define HMAC_MD5_ECB_CIPHER_NULL_ENC_TEST_VECTORS 2 #define HMAC_MD5_ECB_CIPHER_NULL_DEC_TEST_VECTORS 2 #define HMAC_SHA1_ECB_CIPHER_NULL_ENC_TEST_VEC 2 @@ -14511,6 +14513,198 @@ static struct cipher_testvec aes_dec_tv_template[] = { }, }; +static struct cipher_testvec aes_heh_enc_tv_template[] = { + { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", + .klen = 16, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", + .ilen = 16, + .result = "\xd8\xbd\x40\xbf\xca\xe5\xee\x81" + "\x0f\x3d\x1f\x1f\xae\x89\x07\x55", + .rlen = 16, + .also_non_np = 1, + .np = 2, + .tap = { 8, 8 }, + }, { + .key = "\xa8\xda\x24\x9b\x5e\xfa\x13\xc2" + "\xc1\x94\xbf\x32\xba\x38\xa3\x77", + .klen = 16, + .iv = "\x4d\x47\x61\x37\x2b\x47\x86\xf0" + "\xd6\x47\xb5\xc2\xe8\xcf\x85\x27", + .input = "\xb8\xee\x29\xe4\xa5\xd1\xe7\x55" + "\xd0\xfd\xe7\x22\x63\x76\x36\xe2" + "\xf8\x0c\xf8\xfe\x65\x76\xe7\xca" + "\xc1\x42\xf5\xca\x5a\xa8\xac\x2a", + .ilen = 32, + .result = "\x59\xf2\x78\x4e\x10\x94\xf9\x5c" + "\x22\x23\x78\x2a\x30\x48\x11\x97" + "\xb1\xfe\x70\xc4\xef\xdf\x04\xef" + "\x16\x39\x04\xcf\xc0\x95\x9a\x98", + .rlen = 32, + .also_non_np = 1, + .np = 3, + .tap = { 16, 13, 3 }, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", + .klen = 16, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00", + .ilen = 63, + .result = "\xe0\x40\xeb\xe9\x52\xbe\x65\x60" + "\xe4\x68\x68\xa3\x73\x75\xb8\x52" + "\xef\x38\x6a\x87\x25\x25\xf6\x04" + "\xe5\x8e\xbe\x14\x8b\x02\x14\x1f" + "\xa9\x73\xb7\xad\x15\xbe\x9c\xa0" + "\xd2\x8a\x2c\xdc\xd4\xe3\x05\x55" + "\x0a\xf5\xf8\x51\xee\xe5\x62\xa5" + "\x71\xa7\x7c\x15\x5d\x7a\x9e", + .rlen = 63, + .also_non_np = 1, + .np = 8, + .tap = { 20, 20, 10, 8, 2, 1, 1, 1 }, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" + "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" + "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", + .klen = 16, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00", + .ilen = 63, + .result = "\x4b\x1a\x15\xa0\xaf\x08\x6d\x70" + "\xf0\xa7\x97\xb5\x31\x4b\x8c\xc3" + "\x4d\xf2\x7a\x9d\xdd\xd4\x15\x99" + "\x57\xad\xc6\xb1\x35\x69\xf5\x6a" + "\x2d\x70\xe4\x97\x49\xb2\x9f\x71" + "\xde\x22\xb5\x70\x8c\x69\x24\xd3" + "\xad\x80\x58\x48\x90\xe4\xed\xba" + "\x76\x3d\x71\x7c\x57\x25\x87", + .rlen = 63, + .also_non_np = 1, + .np = 8, + .tap = { 20, 20, 10, 8, 2, 1, 1, 1 }, + } +}; + +static struct cipher_testvec aes_heh_dec_tv_template[] = { + { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", + .klen = 16, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\xd8\xbd\x40\xbf\xca\xe5\xee\x81" + "\x0f\x3d\x1f\x1f\xae\x89\x07\x55", + .ilen = 16, + .result = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", + .rlen = 16, + .also_non_np = 1, + .np = 2, + .tap = { 8, 8 }, + }, { + .key = "\xa8\xda\x24\x9b\x5e\xfa\x13\xc2" + "\xc1\x94\xbf\x32\xba\x38\xa3\x77", + .klen = 16, + .iv = "\x4d\x47\x61\x37\x2b\x47\x86\xf0" + "\xd6\x47\xb5\xc2\xe8\xcf\x85\x27", + .input = "\x59\xf2\x78\x4e\x10\x94\xf9\x5c" + "\x22\x23\x78\x2a\x30\x48\x11\x97" + "\xb1\xfe\x70\xc4\xef\xdf\x04\xef" + "\x16\x39\x04\xcf\xc0\x95\x9a\x98", + .ilen = 32, + .result = "\xb8\xee\x29\xe4\xa5\xd1\xe7\x55" + "\xd0\xfd\xe7\x22\x63\x76\x36\xe2" + "\xf8\x0c\xf8\xfe\x65\x76\xe7\xca" + "\xc1\x42\xf5\xca\x5a\xa8\xac\x2a", + .rlen = 32, + .also_non_np = 1, + .np = 3, + .tap = { 16, 13, 3 }, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", + .klen = 16, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\xe0\x40\xeb\xe9\x52\xbe\x65\x60" + "\xe4\x68\x68\xa3\x73\x75\xb8\x52" + "\xef\x38\x6a\x87\x25\x25\xf6\x04" + "\xe5\x8e\xbe\x14\x8b\x02\x14\x1f" + "\xa9\x73\xb7\xad\x15\xbe\x9c\xa0" + "\xd2\x8a\x2c\xdc\xd4\xe3\x05\x55" + "\x0a\xf5\xf8\x51\xee\xe5\x62\xa5" + "\x71\xa7\x7c\x15\x5d\x7a\x9e", + .ilen = 63, + .result = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00", + .rlen = 63, + .also_non_np = 1, + .np = 8, + .tap = { 20, 20, 10, 8, 2, 1, 1, 1 }, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" + "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" + "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", + .klen = 16, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .input = "\x4b\x1a\x15\xa0\xaf\x08\x6d\x70" + "\xf0\xa7\x97\xb5\x31\x4b\x8c\xc3" + "\x4d\xf2\x7a\x9d\xdd\xd4\x15\x99" + "\x57\xad\xc6\xb1\x35\x69\xf5\x6a" + "\x2d\x70\xe4\x97\x49\xb2\x9f\x71" + "\xde\x22\xb5\x70\x8c\x69\x24\xd3" + "\xad\x80\x58\x48\x90\xe4\xed\xba" + "\x76\x3d\x71\x7c\x57\x25\x87", + .ilen = 63, + .result = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00", + .rlen = 63, + .also_non_np = 1, + .np = 8, + .tap = { 20, 20, 10, 8, 2, 1, 1, 1 }, + } +}; + static struct cipher_testvec aes_cbc_enc_tv_template[] = { { /* From RFC 3602 */ .key = "\x06\xa9\x21\x40\x36\xb8\xa1\x5b" diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 5fdac394207a..549cdbed7b0e 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -1211,6 +1211,9 @@ static int acpi_video_device_enumerate(struct acpi_video_bus *video) union acpi_object *dod = NULL; union acpi_object *obj; + if (!video->cap._DOD) + return AE_NOT_EXIST; + status = acpi_evaluate_object(video->device->handle, "_DOD", NULL, &buffer); if (!ACPI_SUCCESS(status)) { ACPI_EXCEPTION((AE_INFO, status, "Evaluating _DOD")); diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index 96809cd99ace..2f24b578bcaf 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -346,6 +346,34 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9343"), }, }, + { + .callback = dmi_enable_rev_override, + .ident = "DELL Precision 5520", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 5520"), + }, + }, + { + .callback = dmi_enable_rev_override, + .ident = "DELL Precision 3520", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3520"), + }, + }, + /* + * Resolves a quirk with the Dell Latitude 3350 that + * causes the ethernet adapter to not function. + */ + { + .callback = dmi_enable_rev_override, + .ident = "DELL Latitude 3350", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude 3350"), + }, + }, #endif {} }; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 09c07f519952..0e494108c20c 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -1042,7 +1042,7 @@ static int print_wakeup_source_stats(struct seq_file *m, active_time = ktime_set(0, 0); } - seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n", + seq_printf(m, "%-32s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n", ws->name, active_count, ws->event_count, ws->wakeup_count, ws->expire_count, ktime_to_ms(active_time), ktime_to_ms(total_time), @@ -1062,7 +1062,7 @@ static int wakeup_sources_stats_show(struct seq_file *m, void *unused) { struct wakeup_source *ws; - seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" + seq_puts(m, "name\t\t\t\t\tactive_count\tevent_count\twakeup_count\t" "expire_count\tactive_since\ttotal_time\tmax_time\t" "last_change\tprevent_suspend_time\n"); diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 65f7eecc45b0..f10a107614b4 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -401,7 +401,7 @@ static void disable_interrupts(struct tpm_chip *chip) iowrite32(intmask, chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality)); - free_irq(chip->vendor.irq, chip); + devm_free_irq(chip->pdev, chip->vendor.irq, chip); chip->vendor.irq = 0; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e4d9aef1dda4..a0dba9beac05 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -698,9 +698,11 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, char *buf) { unsigned int cur_freq = __cpufreq_get(policy); - if (!cur_freq) - return sprintf(buf, "<unknown>"); - return sprintf(buf, "%u\n", cur_freq); + + if (cur_freq) + return sprintf(buf, "%u\n", cur_freq); + + return sprintf(buf, "<unknown>\n"); } /** diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 24ac49019b29..b91e115462ae 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -419,13 +419,13 @@ static u64 update_load(int cpu) ppol->policy->governor_data; u64 now; u64 now_idle; - unsigned int delta_idle; - unsigned int delta_time; + u64 delta_idle; + u64 delta_time; u64 active_time; now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy); - delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle); - delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp); + delta_idle = (now_idle - pcpu->time_in_idle); + delta_time = (now - pcpu->time_in_idle_timestamp); if (delta_time <= delta_idle) active_time = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 7c42ff670080..a0924330d125 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -25,6 +25,7 @@ * Alex Deucher * Jerome Glisse */ +#include <linux/irq.h> #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> #include <drm/amdgpu_drm.h> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 57c191798699..ddbf7e7e0d98 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -274,7 +274,7 @@ cleanup: * * This routine is called normally during driver unloading or exiting. */ -void hv_cleanup(void) +void hv_cleanup(bool crash) { union hv_x64_msr_hypercall_contents hypercall_msr; @@ -284,7 +284,8 @@ void hv_cleanup(void) if (hv_context.hypercall_page) { hypercall_msr.as_uint64 = 0; wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - vfree(hv_context.hypercall_page); + if (!crash) + vfree(hv_context.hypercall_page); hv_context.hypercall_page = NULL; } @@ -304,7 +305,8 @@ void hv_cleanup(void) hypercall_msr.as_uint64 = 0; wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); - vfree(hv_context.tsc_page); + if (!crash) + vfree(hv_context.tsc_page); hv_context.tsc_page = NULL; } #endif diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index b853b4b083bd..43af91362be5 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -714,7 +714,7 @@ static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) * If the pfn range we are dealing with is not in the current * "hot add block", move on. */ - if ((start_pfn >= has->end_pfn)) + if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn) continue; /* * If the current hot add-request extends beyond @@ -768,7 +768,7 @@ static unsigned long handle_pg_range(unsigned long pg_start, * If the pfn range we are dealing with is not in the current * "hot add block", move on. */ - if ((start_pfn >= has->end_pfn)) + if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn) continue; old_covered_state = has->covered_end_pfn; diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 12156db2e88e..75e383e6d03d 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -581,7 +581,7 @@ struct hv_ring_buffer_debug_info { extern int hv_init(void); -extern void hv_cleanup(void); +extern void hv_cleanup(bool crash); extern int hv_post_message(union hv_connection_id connection_id, enum hv_message_type message_type, diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 509ed9731630..802dcb409030 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -889,7 +889,7 @@ err_alloc: bus_unregister(&hv_bus); err_cleanup: - hv_cleanup(); + hv_cleanup(false); return ret; } @@ -1254,7 +1254,7 @@ static void hv_kexec_handler(void) vmbus_initiate_unload(); for_each_online_cpu(cpu) smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); - hv_cleanup(); + hv_cleanup(false); }; static void hv_crash_handler(struct pt_regs *regs) @@ -1266,7 +1266,7 @@ static void hv_crash_handler(struct pt_regs *regs) * for kdump. */ hv_synic_cleanup(NULL); - hv_cleanup(); + hv_cleanup(true); }; static int __init hv_acpi_init(void) @@ -1330,7 +1330,7 @@ static void __exit vmbus_exit(void) &hyperv_panic_block); } bus_unregister(&hv_bus); - hv_cleanup(); + hv_cleanup(false); for_each_online_cpu(cpu) { tasklet_kill(hv_context.event_dpc[cpu]); smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index 0470fc843d4e..9b6854607d73 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -151,7 +151,9 @@ static irqreturn_t tiadc_irq_h(int irq, void *private) { struct iio_dev *indio_dev = private; struct tiadc_device *adc_dev = iio_priv(indio_dev); - unsigned int status, config; + unsigned int status, config, adc_fsm; + unsigned short count = 0; + status = tiadc_readl(adc_dev, REG_IRQSTATUS); /* @@ -165,6 +167,15 @@ static irqreturn_t tiadc_irq_h(int irq, void *private) tiadc_writel(adc_dev, REG_CTRL, config); tiadc_writel(adc_dev, REG_IRQSTATUS, IRQENB_FIFO1OVRRUN | IRQENB_FIFO1UNDRFLW | IRQENB_FIFO1THRES); + + /* wait for idle state. + * ADC needs to finish the current conversion + * before disabling the module + */ + do { + adc_fsm = tiadc_readl(adc_dev, REG_ADCFSM); + } while (adc_fsm != 0x10 && count++ < 100); + tiadc_writel(adc_dev, REG_CTRL, (config | CNTRLREG_TSCSSENB)); return IRQ_HANDLED; } else if (status & IRQENB_FIFO1THRES) { diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index 595511022795..0a86ef43e781 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -51,8 +51,6 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) st->report_state.report_id, st->report_state.index, HID_USAGE_SENSOR_PROP_REPORTING_STATE_ALL_EVENTS_ENUM); - - poll_value = hid_sensor_read_poll_value(st); } else { int val; @@ -89,7 +87,9 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) sensor_hub_get_feature(st->hsdev, st->power_state.report_id, st->power_state.index, sizeof(state_val), &state_val); - if (state && poll_value) + if (state) + poll_value = hid_sensor_read_poll_value(st); + if (poll_value > 0) msleep_interruptible(poll_value * 2); return 0; diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c index d96aa27dfcdc..db64adfbe1af 100644 --- a/drivers/input/joystick/iforce/iforce-usb.c +++ b/drivers/input/joystick/iforce/iforce-usb.c @@ -141,6 +141,9 @@ static int iforce_usb_probe(struct usb_interface *intf, interface = intf->cur_altsetting; + if (interface->desc.bNumEndpoints < 2) + return -ENODEV; + epirq = &interface->endpoint[0].desc; epout = &interface->endpoint[1].desc; diff --git a/drivers/input/misc/cm109.c b/drivers/input/misc/cm109.c index 9365535ba7f1..50a7faa504f7 100644 --- a/drivers/input/misc/cm109.c +++ b/drivers/input/misc/cm109.c @@ -675,6 +675,10 @@ static int cm109_usb_probe(struct usb_interface *intf, int error = -ENOMEM; interface = intf->cur_altsetting; + + if (interface->desc.bNumEndpoints < 1) + return -ENODEV; + endpoint = &interface->endpoint[0].desc; if (!usb_endpoint_is_int_in(endpoint)) diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index 9c0ea36913b4..f4e8fbec6a94 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -1667,6 +1667,10 @@ static int ims_pcu_parse_cdc_data(struct usb_interface *intf, struct ims_pcu *pc return -EINVAL; alt = pcu->ctrl_intf->cur_altsetting; + + if (alt->desc.bNumEndpoints < 1) + return -ENODEV; + pcu->ep_ctrl = &alt->endpoint[0].desc; pcu->max_ctrl_size = usb_endpoint_maxp(pcu->ep_ctrl); diff --git a/drivers/input/misc/yealink.c b/drivers/input/misc/yealink.c index 79c964c075f1..6e7ff9561d92 100644 --- a/drivers/input/misc/yealink.c +++ b/drivers/input/misc/yealink.c @@ -875,6 +875,10 @@ static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id) int ret, pipe, i; interface = intf->cur_altsetting; + + if (interface->desc.bNumEndpoints < 1) + return -ENODEV; + endpoint = &interface->endpoint[0].desc; if (!usb_endpoint_is_int_in(endpoint)) return -ENODEV; diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index ed1935f300a7..da5458dfb1e3 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -218,17 +218,19 @@ static int elan_query_product(struct elan_tp_data *data) static int elan_check_ASUS_special_fw(struct elan_tp_data *data) { - if (data->ic_type != 0x0E) - return false; - - switch (data->product_id) { - case 0x05 ... 0x07: - case 0x09: - case 0x13: + if (data->ic_type == 0x0E) { + switch (data->product_id) { + case 0x05 ... 0x07: + case 0x09: + case 0x13: + return true; + } + } else if (data->ic_type == 0x08 && data->product_id == 0x26) { + /* ASUS EeeBook X205TA */ return true; - default: - return false; } + + return false; } static int __elan_initialize(struct elan_tp_data *data) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 0cdd95801a25..25eab453f2b2 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -120,6 +120,13 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = { }, }, { + /* Dell Embedded Box PC 3000 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Embedded Box PC 3000"), + }, + }, + { /* OQO Model 01 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "OQO"), diff --git a/drivers/input/tablet/hanwang.c b/drivers/input/tablet/hanwang.c index cd852059b99e..df4bea96d7ed 100644 --- a/drivers/input/tablet/hanwang.c +++ b/drivers/input/tablet/hanwang.c @@ -340,6 +340,9 @@ static int hanwang_probe(struct usb_interface *intf, const struct usb_device_id int error; int i; + if (intf->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + hanwang = kzalloc(sizeof(struct hanwang), GFP_KERNEL); input_dev = input_allocate_device(); if (!hanwang || !input_dev) { diff --git a/drivers/input/tablet/kbtab.c b/drivers/input/tablet/kbtab.c index d2ac7c2b5b82..2812f9236b7d 100644 --- a/drivers/input/tablet/kbtab.c +++ b/drivers/input/tablet/kbtab.c @@ -122,6 +122,9 @@ static int kbtab_probe(struct usb_interface *intf, const struct usb_device_id *i struct input_dev *input_dev; int error = -ENOMEM; + if (intf->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + kbtab = kzalloc(sizeof(struct kbtab), GFP_KERNEL); input_dev = input_allocate_device(); if (!kbtab || !input_dev) diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c index 45b466e3bbe8..0146e2c74649 100644 --- a/drivers/input/touchscreen/sur40.c +++ b/drivers/input/touchscreen/sur40.c @@ -500,6 +500,9 @@ static int sur40_probe(struct usb_interface *interface, if (iface_desc->desc.bInterfaceClass != 0xFF) return -ENODEV; + if (iface_desc->desc.bNumEndpoints < 5) + return -ENODEV; + /* Use endpoint #4 (0x86). */ endpoint = &iface_desc->endpoint[4].desc; if (endpoint->bEndpointAddress != TOUCH_ENDPOINT) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f0fc6f7b5d98..0628372f3591 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -908,7 +908,7 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf * which we used for the IOMMU lookup. Strictly speaking * we could do this for all PCI devices; we only need to * get the BDF# from the scope table for ACPI matches. */ - if (pdev->is_virtfn) + if (pdev && pdev->is_virtfn) goto got_pdev; *bus = drhd->devices[i].bus; diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index aecec6d32463..7f1c625b08ec 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -2317,6 +2317,9 @@ static int gigaset_probe(struct usb_interface *interface, return -ENODEV; } + if (hostif->desc.bNumEndpoints < 1) + return -ENODEV; + dev_info(&udev->dev, "%s: Device matched (Vendor: 0x%x, Product: 0x%x)\n", __func__, le16_to_cpu(udev->descriptor.idVendor), diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 36ca4e4cbfb7..6fd8483cc668 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -533,15 +533,15 @@ config DM_LOG_WRITES If unsure, say N. config DM_ANDROID_VERITY - tristate "Android verity target support" - depends on DM_VERITY + bool "Android verity target support" + depends on DM_VERITY=y depends on X509_CERTIFICATE_PARSER depends on SYSTEM_TRUSTED_KEYRING depends on PUBLIC_KEY_ALGO_RSA depends on KEYS depends on ASYMMETRIC_KEY_TYPE depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE - depends on MD_LINEAR + depends on MD_LINEAR=y select DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 ---help--- This device-mapper target is virtually a VERITY target. This diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index 13c60bee8af5..7cef735a01a7 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -115,6 +115,12 @@ static inline bool is_userdebug(void) return !strncmp(buildvariant, typeuserdebug, sizeof(typeuserdebug)); } +static inline bool is_unlocked(void) +{ + static const char unlocked[] = "orange"; + + return !strncmp(verifiedbootstate, unlocked, sizeof(unlocked)); +} static int table_extract_mpi_array(struct public_key_signature *pks, const void *data, size_t len) @@ -585,6 +591,8 @@ static int verify_verity_signature(char *key_id, if (IS_ERR(pks)) { DMERR("hashing failed"); + retval = PTR_ERR(pks); + pks = NULL; goto error; } @@ -648,6 +656,28 @@ static int add_as_linear_device(struct dm_target *ti, char *dev) return err; } +static int create_linear_device(struct dm_target *ti, dev_t dev, + char *target_device) +{ + u64 device_size = 0; + int err = find_size(dev, &device_size); + + if (err) { + DMERR("error finding bdev size"); + handle_error(); + return err; + } + + ti->len = device_size; + err = add_as_linear_device(ti, target_device); + if (err) { + handle_error(); + return err; + } + verity_enabled = false; + return 0; +} + /* * Target parameters: * <key id> Key id of the public key in the system keyring. @@ -671,7 +701,6 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) struct fec_ecc_metadata uninitialized_var(ecc); char buf[FEC_ARG_LENGTH], *buf_ptr; unsigned long long tmpll; - u64 uninitialized_var(device_size); if (argc == 1) { /* Use the default keyid */ @@ -699,23 +728,8 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) return -EINVAL; } - if (is_eng()) { - err = find_size(dev, &device_size); - if (err) { - DMERR("error finding bdev size"); - handle_error(); - return err; - } - - ti->len = device_size; - err = add_as_linear_device(ti, target_device); - if (err) { - handle_error(); - return err; - } - verity_enabled = false; - return 0; - } + if (is_eng()) + return create_linear_device(ti, dev, target_device); strreplace(key_id, '#', ' '); @@ -730,6 +744,11 @@ static int android_verity_ctr(struct dm_target *ti, unsigned argc, char **argv) err = extract_metadata(dev, &fec, &metadata, &verity_enabled); if (err) { + /* Allow invalid metadata when the device is unlocked */ + if (is_unlocked()) { + DMWARN("Allow invalid metadata when unlocked"); + return create_linear_device(ti, dev, target_device); + } DMERR("Error while extracting metadata"); handle_error(); goto free_metadata; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ebb0dd612ebd..a92979e704e3 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1072,6 +1072,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio) int max_sectors; int sectors; + md_write_start(mddev, bio); + /* * Register the new request and wait if the reconstruction * thread has put up a bar for new requests. @@ -1455,8 +1457,6 @@ static void make_request(struct mddev *mddev, struct bio *bio) return; } - md_write_start(mddev, bio); - do { /* @@ -1477,7 +1477,25 @@ static void make_request(struct mddev *mddev, struct bio *bio) split = bio; } + /* + * If a bio is splitted, the first part of bio will pass + * barrier but the bio is queued in current->bio_list (see + * generic_make_request). If there is a raise_barrier() called + * here, the second part of bio can't pass barrier. But since + * the first part bio isn't dispatched to underlaying disks + * yet, the barrier is never released, hence raise_barrier will + * alays wait. We have a deadlock. + * Note, this only happens in read path. For write path, the + * first part of bio is dispatched in a schedule() call + * (because of blk plug) or offloaded to raid10d. + * Quitting from the function immediately can change the bio + * order queued in bio_list and avoid the deadlock. + */ __make_request(mddev, split); + if (split != bio && bio_data_dir(bio) == READ) { + generic_make_request(bio); + break; + } } while (split != bio); /* In case raid10d snuck in to freeze_array */ diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 5cefca95734e..885f689ac870 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1595,6 +1595,114 @@ static const char *uvc_print_chain(struct uvc_video_chain *chain) return buffer; } +static struct uvc_video_chain *uvc_alloc_chain(struct uvc_device *dev) +{ + struct uvc_video_chain *chain; + + chain = kzalloc(sizeof(*chain), GFP_KERNEL); + if (chain == NULL) + return NULL; + + INIT_LIST_HEAD(&chain->entities); + mutex_init(&chain->ctrl_mutex); + chain->dev = dev; + v4l2_prio_init(&chain->prio); + + return chain; +} + +/* + * Fallback heuristic for devices that don't connect units and terminals in a + * valid chain. + * + * Some devices have invalid baSourceID references, causing uvc_scan_chain() + * to fail, but if we just take the entities we can find and put them together + * in the most sensible chain we can think of, turns out they do work anyway. + * Note: This heuristic assumes there is a single chain. + * + * At the time of writing, devices known to have such a broken chain are + * - Acer Integrated Camera (5986:055a) + * - Realtek rtl157a7 (0bda:57a7) + */ +static int uvc_scan_fallback(struct uvc_device *dev) +{ + struct uvc_video_chain *chain; + struct uvc_entity *iterm = NULL; + struct uvc_entity *oterm = NULL; + struct uvc_entity *entity; + struct uvc_entity *prev; + + /* + * Start by locating the input and output terminals. We only support + * devices with exactly one of each for now. + */ + list_for_each_entry(entity, &dev->entities, list) { + if (UVC_ENTITY_IS_ITERM(entity)) { + if (iterm) + return -EINVAL; + iterm = entity; + } + + if (UVC_ENTITY_IS_OTERM(entity)) { + if (oterm) + return -EINVAL; + oterm = entity; + } + } + + if (iterm == NULL || oterm == NULL) + return -EINVAL; + + /* Allocate the chain and fill it. */ + chain = uvc_alloc_chain(dev); + if (chain == NULL) + return -ENOMEM; + + if (uvc_scan_chain_entity(chain, oterm) < 0) + goto error; + + prev = oterm; + + /* + * Add all Processing and Extension Units with two pads. The order + * doesn't matter much, use reverse list traversal to connect units in + * UVC descriptor order as we build the chain from output to input. This + * leads to units appearing in the order meant by the manufacturer for + * the cameras known to require this heuristic. + */ + list_for_each_entry_reverse(entity, &dev->entities, list) { + if (entity->type != UVC_VC_PROCESSING_UNIT && + entity->type != UVC_VC_EXTENSION_UNIT) + continue; + + if (entity->num_pads != 2) + continue; + + if (uvc_scan_chain_entity(chain, entity) < 0) + goto error; + + prev->baSourceID[0] = entity->id; + prev = entity; + } + + if (uvc_scan_chain_entity(chain, iterm) < 0) + goto error; + + prev->baSourceID[0] = iterm->id; + + list_add_tail(&chain->list, &dev->chains); + + uvc_trace(UVC_TRACE_PROBE, + "Found a video chain by fallback heuristic (%s).\n", + uvc_print_chain(chain)); + + return 0; + +error: + kfree(chain); + return -EINVAL; +} + /* * Scan the device for video chains and register video devices. * @@ -1617,15 +1725,10 @@ static int uvc_scan_device(struct uvc_device *dev) if (term->chain.next || term->chain.prev) continue; - chain = kzalloc(sizeof(*chain), GFP_KERNEL); + chain = uvc_alloc_chain(dev); if (chain == NULL) return -ENOMEM; - INIT_LIST_HEAD(&chain->entities); - mutex_init(&chain->ctrl_mutex); - chain->dev = dev; - v4l2_prio_init(&chain->prio); - term->flags |= UVC_ENTITY_FLAG_DEFAULT; if (uvc_scan_chain(chain, term) < 0) { @@ -1639,6 +1742,9 @@ static int uvc_scan_device(struct uvc_device *dev) list_add_tail(&chain->list, &dev->chains); } + if (list_empty(&dev->chains)) + uvc_scan_fallback(dev); + if (list_empty(&dev->chains)) { uvc_printk(KERN_INFO, "No valid video chain found.\n"); return -1; diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 12ffa4192605..2a4abf736d89 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -549,11 +549,13 @@ config VEXPRESS_SYSCFG bus. System Configuration interface is one of the possible means of generating transactions on this bus. -config UID_CPUTIME - bool "Per-UID cpu time statistics" +config UID_SYS_STATS + bool "Per-UID statistics" depends on PROFILING help Per UID based cpu time statistics exported to /proc/uid_cputime + Per UID based io statistics exported to /proc/uid_io + Per UID based procstat control in /proc/uid_procstat config QPNP_MISC tristate "QPNP Misc Peripheral" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index c2941afc961e..b0718228d2d9 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -62,7 +62,7 @@ endif obj-$(CONFIG_ECHO) += echo/ obj-$(CONFIG_VEXPRESS_SYSCFG) += vexpress-syscfg.o obj-$(CONFIG_CXL_BASE) += cxl/ -obj-$(CONFIG_UID_CPUTIME) += uid_cputime.o +obj-$(CONFIG_UID_SYS_STATS) += uid_sys_stats.o obj-y += qcom/ obj-$(CONFIG_QPNP_MISC) += qpnp-misc.o obj-$(CONFIG_MEMORY_STATE_TIME) += memory_state_time.o diff --git a/drivers/misc/uid_cputime.c b/drivers/misc/uid_cputime.c deleted file mode 100644 index c1ad5246f564..000000000000 --- a/drivers/misc/uid_cputime.c +++ /dev/null @@ -1,240 +0,0 @@ -/* drivers/misc/uid_cputime.c - * - * Copyright (C) 2014 - 2015 Google, Inc. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include <linux/atomic.h> -#include <linux/err.h> -#include <linux/hashtable.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/list.h> -#include <linux/proc_fs.h> -#include <linux/profile.h> -#include <linux/sched.h> -#include <linux/seq_file.h> -#include <linux/slab.h> -#include <linux/uaccess.h> - -#define UID_HASH_BITS 10 -DECLARE_HASHTABLE(hash_table, UID_HASH_BITS); - -static DEFINE_MUTEX(uid_lock); -static struct proc_dir_entry *parent; - -struct uid_entry { - uid_t uid; - cputime_t utime; - cputime_t stime; - cputime_t active_utime; - cputime_t active_stime; - struct hlist_node hash; -}; - -static struct uid_entry *find_uid_entry(uid_t uid) -{ - struct uid_entry *uid_entry; - hash_for_each_possible(hash_table, uid_entry, hash, uid) { - if (uid_entry->uid == uid) - return uid_entry; - } - return NULL; -} - -static struct uid_entry *find_or_register_uid(uid_t uid) -{ - struct uid_entry *uid_entry; - - uid_entry = find_uid_entry(uid); - if (uid_entry) - return uid_entry; - - uid_entry = kzalloc(sizeof(struct uid_entry), GFP_ATOMIC); - if (!uid_entry) - return NULL; - - uid_entry->uid = uid; - - hash_add(hash_table, &uid_entry->hash, uid); - - return uid_entry; -} - -static int uid_stat_show(struct seq_file *m, void *v) -{ - struct uid_entry *uid_entry; - struct task_struct *task, *temp; - cputime_t utime; - cputime_t stime; - unsigned long bkt; - - mutex_lock(&uid_lock); - - hash_for_each(hash_table, bkt, uid_entry, hash) { - uid_entry->active_stime = 0; - uid_entry->active_utime = 0; - } - - read_lock(&tasklist_lock); - do_each_thread(temp, task) { - uid_entry = find_or_register_uid(from_kuid_munged( - current_user_ns(), task_uid(task))); - if (!uid_entry) { - read_unlock(&tasklist_lock); - mutex_unlock(&uid_lock); - pr_err("%s: failed to find the uid_entry for uid %d\n", - __func__, from_kuid_munged(current_user_ns(), - task_uid(task))); - return -ENOMEM; - } - task_cputime_adjusted(task, &utime, &stime); - uid_entry->active_utime += utime; - uid_entry->active_stime += stime; - } while_each_thread(temp, task); - read_unlock(&tasklist_lock); - - hash_for_each(hash_table, bkt, uid_entry, hash) { - cputime_t total_utime = uid_entry->utime + - uid_entry->active_utime; - cputime_t total_stime = uid_entry->stime + - uid_entry->active_stime; - seq_printf(m, "%d: %llu %llu\n", uid_entry->uid, - (unsigned long long)jiffies_to_msecs( - cputime_to_jiffies(total_utime)) * USEC_PER_MSEC, - (unsigned long long)jiffies_to_msecs( - cputime_to_jiffies(total_stime)) * USEC_PER_MSEC); - } - - mutex_unlock(&uid_lock); - return 0; -} - -static int uid_stat_open(struct inode *inode, struct file *file) -{ - return single_open(file, uid_stat_show, PDE_DATA(inode)); -} - -static const struct file_operations uid_stat_fops = { - .open = uid_stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int uid_remove_open(struct inode *inode, struct file *file) -{ - return single_open(file, NULL, NULL); -} - -static ssize_t uid_remove_write(struct file *file, - const char __user *buffer, size_t count, loff_t *ppos) -{ - struct uid_entry *uid_entry; - struct hlist_node *tmp; - char uids[128]; - char *start_uid, *end_uid = NULL; - long int uid_start = 0, uid_end = 0; - - if (count >= sizeof(uids)) - count = sizeof(uids) - 1; - - if (copy_from_user(uids, buffer, count)) - return -EFAULT; - - uids[count] = '\0'; - end_uid = uids; - start_uid = strsep(&end_uid, "-"); - - if (!start_uid || !end_uid) - return -EINVAL; - - if (kstrtol(start_uid, 10, &uid_start) != 0 || - kstrtol(end_uid, 10, &uid_end) != 0) { - return -EINVAL; - } - mutex_lock(&uid_lock); - - for (; uid_start <= uid_end; uid_start++) { - hash_for_each_possible_safe(hash_table, uid_entry, tmp, - hash, (uid_t)uid_start) { - if (uid_start == uid_entry->uid) { - hash_del(&uid_entry->hash); - kfree(uid_entry); - } - } - } - - mutex_unlock(&uid_lock); - return count; -} - -static const struct file_operations uid_remove_fops = { - .open = uid_remove_open, - .release = single_release, - .write = uid_remove_write, -}; - -static int process_notifier(struct notifier_block *self, - unsigned long cmd, void *v) -{ - struct task_struct *task = v; - struct uid_entry *uid_entry; - cputime_t utime, stime; - uid_t uid; - - if (!task) - return NOTIFY_OK; - - mutex_lock(&uid_lock); - uid = from_kuid_munged(current_user_ns(), task_uid(task)); - uid_entry = find_or_register_uid(uid); - if (!uid_entry) { - pr_err("%s: failed to find uid %d\n", __func__, uid); - goto exit; - } - - task_cputime_adjusted(task, &utime, &stime); - uid_entry->utime += utime; - uid_entry->stime += stime; - -exit: - mutex_unlock(&uid_lock); - return NOTIFY_OK; -} - -static struct notifier_block process_notifier_block = { - .notifier_call = process_notifier, -}; - -static int __init proc_uid_cputime_init(void) -{ - hash_init(hash_table); - - parent = proc_mkdir("uid_cputime", NULL); - if (!parent) { - pr_err("%s: failed to create proc entry\n", __func__); - return -ENOMEM; - } - - proc_create_data("remove_uid_range", S_IWUGO, parent, &uid_remove_fops, - NULL); - - proc_create_data("show_uid_stat", S_IRUGO, parent, &uid_stat_fops, - NULL); - - profile_event_register(PROFILE_TASK_EXIT, &process_notifier_block); - - return 0; -} - -early_initcall(proc_uid_cputime_init); diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c new file mode 100644 index 000000000000..204b23484266 --- /dev/null +++ b/drivers/misc/uid_sys_stats.c @@ -0,0 +1,463 @@ +/* drivers/misc/uid_cputime.c + * + * Copyright (C) 2014 - 2015 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/atomic.h> +#include <linux/err.h> +#include <linux/hashtable.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/proc_fs.h> +#include <linux/profile.h> +#include <linux/rtmutex.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/uaccess.h> + +#define UID_HASH_BITS 10 +DECLARE_HASHTABLE(hash_table, UID_HASH_BITS); + +static DEFINE_RT_MUTEX(uid_lock); +static struct proc_dir_entry *cpu_parent; +static struct proc_dir_entry *io_parent; +static struct proc_dir_entry *proc_parent; + +struct io_stats { + u64 read_bytes; + u64 write_bytes; + u64 rchar; + u64 wchar; + u64 fsync; +}; + +#define UID_STATE_FOREGROUND 0 +#define UID_STATE_BACKGROUND 1 +#define UID_STATE_BUCKET_SIZE 2 + +#define UID_STATE_TOTAL_CURR 2 +#define UID_STATE_TOTAL_LAST 3 +#define UID_STATE_SIZE 4 + +struct uid_entry { + uid_t uid; + cputime_t utime; + cputime_t stime; + cputime_t active_utime; + cputime_t active_stime; + int state; + struct io_stats io[UID_STATE_SIZE]; + struct hlist_node hash; +}; + +static struct uid_entry *find_uid_entry(uid_t uid) +{ + struct uid_entry *uid_entry; + hash_for_each_possible(hash_table, uid_entry, hash, uid) { + if (uid_entry->uid == uid) + return uid_entry; + } + return NULL; +} + +static struct uid_entry *find_or_register_uid(uid_t uid) +{ + struct uid_entry *uid_entry; + + uid_entry = find_uid_entry(uid); + if (uid_entry) + return uid_entry; + + uid_entry = kzalloc(sizeof(struct uid_entry), GFP_ATOMIC); + if (!uid_entry) + return NULL; + + uid_entry->uid = uid; + + hash_add(hash_table, &uid_entry->hash, uid); + + return uid_entry; +} + +static int uid_cputime_show(struct seq_file *m, void *v) +{ + struct uid_entry *uid_entry; + struct task_struct *task, *temp; + cputime_t utime; + cputime_t stime; + unsigned long bkt; + + rt_mutex_lock(&uid_lock); + + hash_for_each(hash_table, bkt, uid_entry, hash) { + uid_entry->active_stime = 0; + uid_entry->active_utime = 0; + } + + read_lock(&tasklist_lock); + do_each_thread(temp, task) { + uid_entry = find_or_register_uid(from_kuid_munged( + current_user_ns(), task_uid(task))); + if (!uid_entry) { + read_unlock(&tasklist_lock); + rt_mutex_unlock(&uid_lock); + pr_err("%s: failed to find the uid_entry for uid %d\n", + __func__, from_kuid_munged(current_user_ns(), + task_uid(task))); + return -ENOMEM; + } + task_cputime_adjusted(task, &utime, &stime); + uid_entry->active_utime += utime; + uid_entry->active_stime += stime; + } while_each_thread(temp, task); + read_unlock(&tasklist_lock); + + hash_for_each(hash_table, bkt, uid_entry, hash) { + cputime_t total_utime = uid_entry->utime + + uid_entry->active_utime; + cputime_t total_stime = uid_entry->stime + + uid_entry->active_stime; + seq_printf(m, "%d: %llu %llu\n", uid_entry->uid, + (unsigned long long)jiffies_to_msecs( + cputime_to_jiffies(total_utime)) * USEC_PER_MSEC, + (unsigned long long)jiffies_to_msecs( + cputime_to_jiffies(total_stime)) * USEC_PER_MSEC); + } + + rt_mutex_unlock(&uid_lock); + return 0; +} + +static int uid_cputime_open(struct inode *inode, struct file *file) +{ + return single_open(file, uid_cputime_show, PDE_DATA(inode)); +} + +static const struct file_operations uid_cputime_fops = { + .open = uid_cputime_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int uid_remove_open(struct inode *inode, struct file *file) +{ + return single_open(file, NULL, NULL); +} + +static ssize_t uid_remove_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) +{ + struct uid_entry *uid_entry; + struct hlist_node *tmp; + char uids[128]; + char *start_uid, *end_uid = NULL; + long int uid_start = 0, uid_end = 0; + + if (count >= sizeof(uids)) + count = sizeof(uids) - 1; + + if (copy_from_user(uids, buffer, count)) + return -EFAULT; + + uids[count] = '\0'; + end_uid = uids; + start_uid = strsep(&end_uid, "-"); + + if (!start_uid || !end_uid) + return -EINVAL; + + if (kstrtol(start_uid, 10, &uid_start) != 0 || + kstrtol(end_uid, 10, &uid_end) != 0) { + return -EINVAL; + } + rt_mutex_lock(&uid_lock); + + for (; uid_start <= uid_end; uid_start++) { + hash_for_each_possible_safe(hash_table, uid_entry, tmp, + hash, (uid_t)uid_start) { + if (uid_start == uid_entry->uid) { + hash_del(&uid_entry->hash); + kfree(uid_entry); + } + } + } + + rt_mutex_unlock(&uid_lock); + return count; +} + +static const struct file_operations uid_remove_fops = { + .open = uid_remove_open, + .release = single_release, + .write = uid_remove_write, +}; + +static u64 compute_write_bytes(struct task_struct *task) +{ + if (task->ioac.write_bytes <= task->ioac.cancelled_write_bytes) + return 0; + + return task->ioac.write_bytes - task->ioac.cancelled_write_bytes; +} + +static void add_uid_io_curr_stats(struct uid_entry *uid_entry, + struct task_struct *task) +{ + struct io_stats *io_curr = &uid_entry->io[UID_STATE_TOTAL_CURR]; + + io_curr->read_bytes += task->ioac.read_bytes; + io_curr->write_bytes += compute_write_bytes(task); + io_curr->rchar += task->ioac.rchar; + io_curr->wchar += task->ioac.wchar; + io_curr->fsync += task->ioac.syscfs; +} + +static void clean_uid_io_last_stats(struct uid_entry *uid_entry, + struct task_struct *task) +{ + struct io_stats *io_last = &uid_entry->io[UID_STATE_TOTAL_LAST]; + + io_last->read_bytes -= task->ioac.read_bytes; + io_last->write_bytes -= compute_write_bytes(task); + io_last->rchar -= task->ioac.rchar; + io_last->wchar -= task->ioac.wchar; + io_last->fsync -= task->ioac.syscfs; +} + +static void update_io_stats_locked(void) +{ + struct uid_entry *uid_entry; + struct task_struct *task, *temp; + struct io_stats *io_bucket, *io_curr, *io_last; + unsigned long bkt; + + BUG_ON(!rt_mutex_is_locked(&uid_lock)); + + hash_for_each(hash_table, bkt, uid_entry, hash) + memset(&uid_entry->io[UID_STATE_TOTAL_CURR], 0, + sizeof(struct io_stats)); + + read_lock(&tasklist_lock); + do_each_thread(temp, task) { + uid_entry = find_or_register_uid(from_kuid_munged( + current_user_ns(), task_uid(task))); + if (!uid_entry) + continue; + add_uid_io_curr_stats(uid_entry, task); + } while_each_thread(temp, task); + read_unlock(&tasklist_lock); + + hash_for_each(hash_table, bkt, uid_entry, hash) { + io_bucket = &uid_entry->io[uid_entry->state]; + io_curr = &uid_entry->io[UID_STATE_TOTAL_CURR]; + io_last = &uid_entry->io[UID_STATE_TOTAL_LAST]; + + io_bucket->read_bytes += + io_curr->read_bytes - io_last->read_bytes; + io_bucket->write_bytes += + io_curr->write_bytes - io_last->write_bytes; + io_bucket->rchar += io_curr->rchar - io_last->rchar; + io_bucket->wchar += io_curr->wchar - io_last->wchar; + io_bucket->fsync += io_curr->fsync - io_last->fsync; + + io_last->read_bytes = io_curr->read_bytes; + io_last->write_bytes = io_curr->write_bytes; + io_last->rchar = io_curr->rchar; + io_last->wchar = io_curr->wchar; + io_last->fsync = io_curr->fsync; + } +} + +static int uid_io_show(struct seq_file *m, void *v) +{ + struct uid_entry *uid_entry; + unsigned long bkt; + + rt_mutex_lock(&uid_lock); + + update_io_stats_locked(); + + hash_for_each(hash_table, bkt, uid_entry, hash) { + seq_printf(m, "%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + uid_entry->uid, + uid_entry->io[UID_STATE_FOREGROUND].rchar, + uid_entry->io[UID_STATE_FOREGROUND].wchar, + uid_entry->io[UID_STATE_FOREGROUND].read_bytes, + uid_entry->io[UID_STATE_FOREGROUND].write_bytes, + uid_entry->io[UID_STATE_BACKGROUND].rchar, + uid_entry->io[UID_STATE_BACKGROUND].wchar, + uid_entry->io[UID_STATE_BACKGROUND].read_bytes, + uid_entry->io[UID_STATE_BACKGROUND].write_bytes, + uid_entry->io[UID_STATE_FOREGROUND].fsync, + uid_entry->io[UID_STATE_BACKGROUND].fsync); + } + + rt_mutex_unlock(&uid_lock); + + return 0; +} + +static int uid_io_open(struct inode *inode, struct file *file) +{ + return single_open(file, uid_io_show, PDE_DATA(inode)); +} + +static const struct file_operations uid_io_fops = { + .open = uid_io_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int uid_procstat_open(struct inode *inode, struct file *file) +{ + return single_open(file, NULL, NULL); +} + +static ssize_t uid_procstat_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) +{ + struct uid_entry *uid_entry; + uid_t uid; + int argc, state; + char input[128]; + + if (count >= sizeof(input)) + return -EINVAL; + + if (copy_from_user(input, buffer, count)) + return -EFAULT; + + input[count] = '\0'; + + argc = sscanf(input, "%u %d", &uid, &state); + if (argc != 2) + return -EINVAL; + + if (state != UID_STATE_BACKGROUND && state != UID_STATE_FOREGROUND) + return -EINVAL; + + rt_mutex_lock(&uid_lock); + + uid_entry = find_or_register_uid(uid); + if (!uid_entry) { + rt_mutex_unlock(&uid_lock); + return -EINVAL; + } + + if (uid_entry->state == state) { + rt_mutex_unlock(&uid_lock); + return count; + } + + update_io_stats_locked(); + + uid_entry->state = state; + + rt_mutex_unlock(&uid_lock); + + return count; +} + +static const struct file_operations uid_procstat_fops = { + .open = uid_procstat_open, + .release = single_release, + .write = uid_procstat_write, +}; + +static int process_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + struct task_struct *task = v; + struct uid_entry *uid_entry; + cputime_t utime, stime; + uid_t uid; + + if (!task) + return NOTIFY_OK; + + rt_mutex_lock(&uid_lock); + uid = from_kuid_munged(current_user_ns(), task_uid(task)); + uid_entry = find_or_register_uid(uid); + if (!uid_entry) { + pr_err("%s: failed to find uid %d\n", __func__, uid); + goto exit; + } + + task_cputime_adjusted(task, &utime, &stime); + uid_entry->utime += utime; + uid_entry->stime += stime; + + update_io_stats_locked(); + clean_uid_io_last_stats(uid_entry, task); + +exit: + rt_mutex_unlock(&uid_lock); + return NOTIFY_OK; +} + +static struct notifier_block process_notifier_block = { + .notifier_call = process_notifier, +}; + +static int __init proc_uid_sys_stats_init(void) +{ + hash_init(hash_table); + + cpu_parent = proc_mkdir("uid_cputime", NULL); + if (!cpu_parent) { + pr_err("%s: failed to create uid_cputime proc entry\n", + __func__); + goto err; + } + + proc_create_data("remove_uid_range", 0222, cpu_parent, + &uid_remove_fops, NULL); + proc_create_data("show_uid_stat", 0444, cpu_parent, + &uid_cputime_fops, NULL); + + io_parent = proc_mkdir("uid_io", NULL); + if (!io_parent) { + pr_err("%s: failed to create uid_io proc entry\n", + __func__); + goto err; + } + + proc_create_data("stats", 0444, io_parent, + &uid_io_fops, NULL); + + proc_parent = proc_mkdir("uid_procstat", NULL); + if (!proc_parent) { + pr_err("%s: failed to create uid_procstat proc entry\n", + __func__); + goto err; + } + + proc_create_data("set", 0222, proc_parent, + &uid_procstat_fops, NULL); + + profile_event_register(PROFILE_TASK_EXIT, &process_notifier_block); + + return 0; + +err: + remove_proc_subtree("uid_cputime", NULL); + remove_proc_subtree("uid_io", NULL); + remove_proc_subtree("uid_procstat", NULL); + return -ENOMEM; +} + +early_initcall(proc_uid_sys_stats_init); diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 3b79f514350e..afdf70000651 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -680,6 +680,12 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) card->ext_csd.ffu_capable = (ext_csd[EXT_CSD_SUPPORTED_MODE] & 0x1) && !(ext_csd[EXT_CSD_FW_CONFIG] & 0x1); + + card->ext_csd.pre_eol_info = ext_csd[EXT_CSD_PRE_EOL_INFO]; + card->ext_csd.device_life_time_est_typ_a = + ext_csd[EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_A]; + card->ext_csd.device_life_time_est_typ_b = + ext_csd[EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_B]; } out: return err; @@ -813,6 +819,11 @@ MMC_DEV_ATTR(manfid, "0x%06x\n", card->cid.manfid); MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name); MMC_DEV_ATTR(oemid, "0x%04x\n", card->cid.oemid); MMC_DEV_ATTR(prv, "0x%x\n", card->cid.prv); +MMC_DEV_ATTR(rev, "0x%x\n", card->ext_csd.rev); +MMC_DEV_ATTR(pre_eol_info, "%02x\n", card->ext_csd.pre_eol_info); +MMC_DEV_ATTR(life_time, "0x%02x 0x%02x\n", + card->ext_csd.device_life_time_est_typ_a, + card->ext_csd.device_life_time_est_typ_b); MMC_DEV_ATTR(serial, "0x%08x\n", card->cid.serial); MMC_DEV_ATTR(enhanced_area_offset, "%llu\n", card->ext_csd.enhanced_area_offset); @@ -851,6 +862,9 @@ static struct attribute *mmc_std_attrs[] = { &dev_attr_name.attr, &dev_attr_oemid.attr, &dev_attr_prv.attr, + &dev_attr_rev.attr, + &dev_attr_pre_eol_info.attr, + &dev_attr_life_time.attr, &dev_attr_serial.attr, &dev_attr_enhanced_area_offset.attr, &dev_attr_enhanced_area_size.attr, diff --git a/drivers/mmc/host/ushc.c b/drivers/mmc/host/ushc.c index d2c386f09d69..1d843357422e 100644 --- a/drivers/mmc/host/ushc.c +++ b/drivers/mmc/host/ushc.c @@ -426,6 +426,9 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id struct ushc_data *ushc; int ret; + if (intf->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev); if (mmc == NULL) return -ENOMEM; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index b6fa89102526..66ba1e0ff37e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -913,8 +913,8 @@ #define RX_PACKET_ATTRIBUTES_CSUM_DONE_WIDTH 1 #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_INDEX 1 #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_WIDTH 1 -#define RX_PACKET_ATTRIBUTES_INCOMPLETE_INDEX 2 -#define RX_PACKET_ATTRIBUTES_INCOMPLETE_WIDTH 1 +#define RX_PACKET_ATTRIBUTES_LAST_INDEX 2 +#define RX_PACKET_ATTRIBUTES_LAST_WIDTH 1 #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_INDEX 3 #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_WIDTH 1 #define RX_PACKET_ATTRIBUTES_CONTEXT_INDEX 4 @@ -923,6 +923,8 @@ #define RX_PACKET_ATTRIBUTES_RX_TSTAMP_WIDTH 1 #define RX_PACKET_ATTRIBUTES_RSS_HASH_INDEX 6 #define RX_PACKET_ATTRIBUTES_RSS_HASH_WIDTH 1 +#define RX_PACKET_ATTRIBUTES_FIRST_INDEX 7 +#define RX_PACKET_ATTRIBUTES_FIRST_WIDTH 1 #define RX_NORMAL_DESC0_OVT_INDEX 0 #define RX_NORMAL_DESC0_OVT_WIDTH 16 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index f6a7161e3b85..5e6238e0b2bd 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -1658,10 +1658,15 @@ static int xgbe_dev_read(struct xgbe_channel *channel) /* Get the header length */ if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, FD)) { + XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, + FIRST, 1); rdata->rx.hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2, RX_NORMAL_DESC2, HL); if (rdata->rx.hdr_len) pdata->ext_stats.rx_split_header_packets++; + } else { + XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, + FIRST, 0); } /* Get the RSS hash */ @@ -1684,19 +1689,16 @@ static int xgbe_dev_read(struct xgbe_channel *channel) } } - /* Get the packet length */ - rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL); - - if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD)) { - /* Not all the data has been transferred for this packet */ - XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, - INCOMPLETE, 1); + /* Not all the data has been transferred for this packet */ + if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD)) return 0; - } /* This is the last of the data for this packet */ XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, - INCOMPLETE, 0); + LAST, 1); + + /* Get the packet length */ + rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL); /* Set checksum done indicator as appropriate */ if (netdev->features & NETIF_F_RXCSUM) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 53ce1222b11d..865b7e0b133b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1760,13 +1760,12 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata, { struct sk_buff *skb; u8 *packet; - unsigned int copy_len; skb = napi_alloc_skb(napi, rdata->rx.hdr.dma_len); if (!skb) return NULL; - /* Start with the header buffer which may contain just the header + /* Pull in the header buffer which may contain just the header * or the header plus data */ dma_sync_single_range_for_cpu(pdata->dev, rdata->rx.hdr.dma_base, @@ -1775,30 +1774,49 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata, packet = page_address(rdata->rx.hdr.pa.pages) + rdata->rx.hdr.pa.pages_offset; - copy_len = (rdata->rx.hdr_len) ? rdata->rx.hdr_len : len; - copy_len = min(rdata->rx.hdr.dma_len, copy_len); - skb_copy_to_linear_data(skb, packet, copy_len); - skb_put(skb, copy_len); - - len -= copy_len; - if (len) { - /* Add the remaining data as a frag */ - dma_sync_single_range_for_cpu(pdata->dev, - rdata->rx.buf.dma_base, - rdata->rx.buf.dma_off, - rdata->rx.buf.dma_len, - DMA_FROM_DEVICE); - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - rdata->rx.buf.pa.pages, - rdata->rx.buf.pa.pages_offset, - len, rdata->rx.buf.dma_len); - rdata->rx.buf.pa.pages = NULL; - } + skb_copy_to_linear_data(skb, packet, len); + skb_put(skb, len); return skb; } +static unsigned int xgbe_rx_buf1_len(struct xgbe_ring_data *rdata, + struct xgbe_packet_data *packet) +{ + /* Always zero if not the first descriptor */ + if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST)) + return 0; + + /* First descriptor with split header, return header length */ + if (rdata->rx.hdr_len) + return rdata->rx.hdr_len; + + /* First descriptor but not the last descriptor and no split header, + * so the full buffer was used + */ + if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST)) + return rdata->rx.hdr.dma_len; + + /* First descriptor and last descriptor and no split header, so + * calculate how much of the buffer was used + */ + return min_t(unsigned int, rdata->rx.hdr.dma_len, rdata->rx.len); +} + +static unsigned int xgbe_rx_buf2_len(struct xgbe_ring_data *rdata, + struct xgbe_packet_data *packet, + unsigned int len) +{ + /* Always the full buffer if not the last descriptor */ + if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST)) + return rdata->rx.buf.dma_len; + + /* Last descriptor so calculate how much of the buffer was used + * for the last bit of data + */ + return rdata->rx.len - len; +} + static int xgbe_tx_poll(struct xgbe_channel *channel) { struct xgbe_prv_data *pdata = channel->pdata; @@ -1881,8 +1899,8 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) struct napi_struct *napi; struct sk_buff *skb; struct skb_shared_hwtstamps *hwtstamps; - unsigned int incomplete, error, context_next, context; - unsigned int len, rdesc_len, max_len; + unsigned int last, error, context_next, context; + unsigned int len, buf1_len, buf2_len, max_len; unsigned int received = 0; int packet_count = 0; @@ -1892,7 +1910,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) if (!ring) return 0; - incomplete = 0; + last = 0; context_next = 0; napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi; @@ -1926,9 +1944,8 @@ read_again: received++; ring->cur++; - incomplete = XGMAC_GET_BITS(packet->attributes, - RX_PACKET_ATTRIBUTES, - INCOMPLETE); + last = XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, + LAST); context_next = XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, CONTEXT_NEXT); @@ -1937,7 +1954,7 @@ read_again: CONTEXT); /* Earlier error, just drain the remaining data */ - if ((incomplete || context_next) && error) + if ((!last || context_next) && error) goto read_again; if (error || packet->errors) { @@ -1949,16 +1966,22 @@ read_again: } if (!context) { - /* Length is cumulative, get this descriptor's length */ - rdesc_len = rdata->rx.len - len; - len += rdesc_len; + /* Get the data length in the descriptor buffers */ + buf1_len = xgbe_rx_buf1_len(rdata, packet); + len += buf1_len; + buf2_len = xgbe_rx_buf2_len(rdata, packet, len); + len += buf2_len; - if (rdesc_len && !skb) { + if (!skb) { skb = xgbe_create_skb(pdata, napi, rdata, - rdesc_len); - if (!skb) + buf1_len); + if (!skb) { error = 1; - } else if (rdesc_len) { + goto skip_data; + } + } + + if (buf2_len) { dma_sync_single_range_for_cpu(pdata->dev, rdata->rx.buf.dma_base, rdata->rx.buf.dma_off, @@ -1968,13 +1991,14 @@ read_again: skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rdata->rx.buf.pa.pages, rdata->rx.buf.pa.pages_offset, - rdesc_len, + buf2_len, rdata->rx.buf.dma_len); rdata->rx.buf.pa.pages = NULL; } } - if (incomplete || context_next) +skip_data: + if (!last || context_next) goto read_again; if (!skb) @@ -2033,7 +2057,7 @@ next_packet: } /* Check if we need to save state before leaving */ - if (received && (incomplete || context_next)) { + if (received && (!last || context_next)) { rdata = XGBE_GET_DESC_DATA(ring, ring->cur); rdata->state_saved = 1; rdata->state.skb = skb; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 91627561c58d..f971d92f7b41 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3495,7 +3495,8 @@ static int bcmgenet_suspend(struct device *d) bcmgenet_netif_stop(dev); - phy_suspend(priv->phydev); + if (!device_may_wakeup(d)) + phy_suspend(priv->phydev); netif_device_detach(dev); @@ -3592,7 +3593,8 @@ static int bcmgenet_resume(struct device *d) netif_device_attach(dev); - phy_resume(priv->phydev); + if (!device_may_wakeup(d)) + phy_resume(priv->phydev); if (priv->eee.eee_enabled) bcmgenet_eee_enable_set(dev, true); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 8bdfe53754ba..e96d1f95bb47 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -220,20 +220,6 @@ void bcmgenet_phy_power_set(struct net_device *dev, bool enable) udelay(60); } -static void bcmgenet_internal_phy_setup(struct net_device *dev) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - u32 reg; - - /* Power up PHY */ - bcmgenet_phy_power_set(dev, true); - /* enable APD */ - reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT); - reg |= EXT_PWR_DN_EN_LD; - bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT); - bcmgenet_mii_reset(dev); -} - static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) { u32 reg; @@ -281,7 +267,6 @@ int bcmgenet_mii_config(struct net_device *dev) if (priv->internal_phy) { phy_name = "internal PHY"; - bcmgenet_internal_phy_setup(dev); } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) { phy_name = "MoCA"; bcmgenet_moca_phy_setup(priv); diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 23ec28f43f6d..afaa98d1d4e4 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -77,6 +77,10 @@ s32 igb_get_phy_id(struct e1000_hw *hw) s32 ret_val = 0; u16 phy_id; + /* ensure PHY page selection to fix misconfigured i210 */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) + phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0); + ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id); if (ret_val) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index cf0098596e85..e9408f5e2a1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -197,6 +197,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, if (lro_num_seg > 1) { mlx5e_lro_update_hdr(skb, cqe); skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); + /* Subtract one since we already counted this as one + * "regular" packet in mlx5e_complete_rx_cqe() + */ + rq->stats.packets += lro_num_seg - 1; rq->stats.lro_packets++; rq->stats.lro_bytes += cqe_bcnt; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ba115ec7aa92..1e611980cf99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -85,7 +85,7 @@ static struct mlx5_profile profile[] = { [2] = { .mask = MLX5_PROF_MASK_QP_SIZE | MLX5_PROF_MASK_MR_CACHE, - .log_max_qp = 17, + .log_max_qp = 18, .mr_cache[0] = { .size = 500, .limit = 250 diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index e8a09ff9e724..c8a7802d2953 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -197,65 +197,6 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, return ppi; } -union sub_key { - u64 k; - struct { - u8 pad[3]; - u8 kb; - u32 ka; - }; -}; - -/* Toeplitz hash function - * data: network byte order - * return: host byte order - */ -static u32 comp_hash(u8 *key, int klen, void *data, int dlen) -{ - union sub_key subk; - int k_next = 4; - u8 dt; - int i, j; - u32 ret = 0; - - subk.k = 0; - subk.ka = ntohl(*(u32 *)key); - - for (i = 0; i < dlen; i++) { - subk.kb = key[k_next]; - k_next = (k_next + 1) % klen; - dt = ((u8 *)data)[i]; - for (j = 0; j < 8; j++) { - if (dt & 0x80) - ret ^= subk.ka; - dt <<= 1; - subk.k <<= 1; - } - } - - return ret; -} - -static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) -{ - struct flow_keys flow; - int data_len; - - if (!skb_flow_dissect_flow_keys(skb, &flow, 0) || - !(flow.basic.n_proto == htons(ETH_P_IP) || - flow.basic.n_proto == htons(ETH_P_IPV6))) - return false; - - if (flow.basic.ip_proto == IPPROTO_TCP) - data_len = 12; - else - data_len = 8; - - *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, &flow, data_len); - - return true; -} - static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { @@ -268,11 +209,9 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1) return 0; - if (netvsc_set_hash(&hash, skb)) { - q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] % - ndev->real_num_tx_queues; - skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); - } + hash = skb_get_hash(skb); + q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] % + ndev->real_num_tx_queues; return q_idx; } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index d6b619667f1a..349aecbc210a 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -345,6 +345,7 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { + int len = skb->len; netdev_tx_t ret = is_ip_tx_frame(skb, dev); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { @@ -352,7 +353,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) u64_stats_update_begin(&dstats->syncp); dstats->tx_pkts++; - dstats->tx_bytes += skb->len; + dstats->tx_bytes += len; u64_stats_update_end(&dstats->syncp); } else { this_cpu_inc(dev->dstats->tx_drps); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6fa8e165878e..590750ab6564 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2600,7 +2600,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) if (data[IFLA_VXLAN_ID]) { __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); - if (id >= VXLAN_VID_MASK) + if (id >= VXLAN_N_VID) return -ERANGE; } diff --git a/drivers/parport/share.c b/drivers/parport/share.c index 5ce5ef211bdb..754f21fd9768 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -936,8 +936,10 @@ parport_register_dev_model(struct parport *port, const char *name, * pardevice fields. -arca */ port->ops->init_state(par_dev, par_dev->state); - port->proc_device = par_dev; - parport_device_proc_register(par_dev); + if (!test_and_set_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags)) { + port->proc_device = par_dev; + parport_device_proc_register(par_dev); + } return par_dev; diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 31f31d460fc9..357527712539 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -303,13 +303,6 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) return rc; } - pci_iov_set_numvfs(dev, nr_virtfn); - iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; - pci_cfg_access_lock(dev); - pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); - msleep(100); - pci_cfg_access_unlock(dev); - iov->initial_VFs = initial; if (nr_virtfn < initial) initial = nr_virtfn; @@ -320,6 +313,13 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) goto err_pcibios; } + pci_iov_set_numvfs(dev, nr_virtfn); + iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; + pci_cfg_access_lock(dev); + pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); + msleep(100); + pci_cfg_access_unlock(dev); + for (i = 0; i < initial; i++) { rc = virtfn_add(dev, i, 0); if (rc) @@ -555,21 +555,61 @@ void pci_iov_release(struct pci_dev *dev) } /** - * pci_iov_resource_bar - get position of the SR-IOV BAR + * pci_iov_update_resource - update a VF BAR * @dev: the PCI device * @resno: the resource number * - * Returns position of the BAR encapsulated in the SR-IOV capability. + * Update a VF BAR in the SR-IOV capability of a PF. */ -int pci_iov_resource_bar(struct pci_dev *dev, int resno) +void pci_iov_update_resource(struct pci_dev *dev, int resno) { - if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END) - return 0; + struct pci_sriov *iov = dev->is_physfn ? dev->sriov : NULL; + struct resource *res = dev->resource + resno; + int vf_bar = resno - PCI_IOV_RESOURCES; + struct pci_bus_region region; + u16 cmd; + u32 new; + int reg; + + /* + * The generic pci_restore_bars() path calls this for all devices, + * including VFs and non-SR-IOV devices. If this is not a PF, we + * have nothing to do. + */ + if (!iov) + return; + + pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &cmd); + if ((cmd & PCI_SRIOV_CTRL_VFE) && (cmd & PCI_SRIOV_CTRL_MSE)) { + dev_WARN(&dev->dev, "can't update enabled VF BAR%d %pR\n", + vf_bar, res); + return; + } + + /* + * Ignore unimplemented BARs, unused resource slots for 64-bit + * BARs, and non-movable resources, e.g., those described via + * Enhanced Allocation. + */ + if (!res->flags) + return; + + if (res->flags & IORESOURCE_UNSET) + return; + + if (res->flags & IORESOURCE_PCI_FIXED) + return; - BUG_ON(!dev->is_physfn); + pcibios_resource_to_bus(dev->bus, ®ion, res); + new = region.start; + new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK; - return dev->sriov->pos + PCI_SRIOV_BAR + - 4 * (resno - PCI_IOV_RESOURCES); + reg = iov->pos + PCI_SRIOV_BAR + 4 * vf_bar; + pci_write_config_dword(dev, reg, new); + if (res->flags & IORESOURCE_MEM_64) { + new = region.start >> 16 >> 16; + pci_write_config_dword(dev, reg + 4, new); + } } resource_size_t __weak pcibios_iov_resource_alignment(struct pci_dev *dev, diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e311a9bf2c90..0e53488f8ec1 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -519,10 +519,6 @@ static void pci_restore_bars(struct pci_dev *dev) { int i; - /* Per SR-IOV spec 3.4.1.11, VF BARs are RO zero */ - if (dev->is_virtfn) - return; - for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) pci_update_resource(dev, i); } @@ -4472,36 +4468,6 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags) } EXPORT_SYMBOL(pci_select_bars); -/** - * pci_resource_bar - get position of the BAR associated with a resource - * @dev: the PCI device - * @resno: the resource number - * @type: the BAR type to be filled in - * - * Returns BAR position in config space, or 0 if the BAR is invalid. - */ -int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type) -{ - int reg; - - if (resno < PCI_ROM_RESOURCE) { - *type = pci_bar_unknown; - return PCI_BASE_ADDRESS_0 + 4 * resno; - } else if (resno == PCI_ROM_RESOURCE) { - *type = pci_bar_mem32; - return dev->rom_base_reg; - } else if (resno < PCI_BRIDGE_RESOURCES) { - /* device specific resource */ - *type = pci_bar_unknown; - reg = pci_iov_resource_bar(dev, resno); - if (reg) - return reg; - } - - dev_err(&dev->dev, "BAR %d: invalid resource\n", resno); - return 0; -} - /* Some architectures require additional programming to enable VGA */ static arch_set_vga_state_t arch_set_vga_state; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index d390fc1475ec..c43e448873ca 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -232,7 +232,6 @@ bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl, int pci_setup_device(struct pci_dev *dev); int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int reg); -int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type); void pci_configure_ari(struct pci_dev *dev); void __pci_bus_size_bridges(struct pci_bus *bus, struct list_head *realloc_head); @@ -276,7 +275,7 @@ static inline void pci_restore_ats_state(struct pci_dev *dev) #ifdef CONFIG_PCI_IOV int pci_iov_init(struct pci_dev *dev); void pci_iov_release(struct pci_dev *dev); -int pci_iov_resource_bar(struct pci_dev *dev, int resno); +void pci_iov_update_resource(struct pci_dev *dev, int resno); resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno); void pci_restore_iov_state(struct pci_dev *dev); int pci_iov_bus_range(struct pci_bus *bus); @@ -290,10 +289,6 @@ static inline void pci_iov_release(struct pci_dev *dev) { } -static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno) -{ - return 0; -} static inline void pci_restore_iov_state(struct pci_dev *dev) { } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 71d9a6d1bd56..b83df942794f 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -226,7 +226,8 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, mask64 = (u32)PCI_BASE_ADDRESS_MEM_MASK; } } else { - res->flags |= (l & IORESOURCE_ROM_ENABLE); + if (l & PCI_ROM_ADDRESS_ENABLE) + res->flags |= IORESOURCE_ROM_ENABLE; l64 = l & PCI_ROM_ADDRESS_MASK; sz64 = sz & PCI_ROM_ADDRESS_MASK; mask64 = (u32)PCI_ROM_ADDRESS_MASK; diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index eb0ad530dc43..3eea7fc5e1a2 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -31,6 +31,11 @@ int pci_enable_rom(struct pci_dev *pdev) if (!res->flags) return -1; + /* + * Ideally pci_update_resource() would update the ROM BAR address, + * and we would only set the enable bit here. But apparently some + * devices have buggy ROM BARs that read as zero when disabled. + */ pcibios_resource_to_bus(pdev->bus, ®ion, res); pci_read_config_dword(pdev, pdev->rom_base_reg, &rom_addr); rom_addr &= ~PCI_ROM_ADDRESS_MASK; diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 604011e047d6..25062966cbfa 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -25,21 +25,18 @@ #include <linux/slab.h> #include "pci.h" - -void pci_update_resource(struct pci_dev *dev, int resno) +static void pci_std_update_resource(struct pci_dev *dev, int resno) { struct pci_bus_region region; bool disable; u16 cmd; u32 new, check, mask; int reg; - enum pci_bar_type type; struct resource *res = dev->resource + resno; - if (dev->is_virtfn) { - dev_warn(&dev->dev, "can't update VF BAR%d\n", resno); + /* Per SR-IOV spec 3.4.1.11, VF BARs are RO zero */ + if (dev->is_virtfn) return; - } /* * Ignore resources for unimplemented BARs and unused resource slots @@ -60,21 +57,34 @@ void pci_update_resource(struct pci_dev *dev, int resno) return; pcibios_resource_to_bus(dev->bus, ®ion, res); + new = region.start; - new = region.start | (res->flags & PCI_REGION_FLAG_MASK); - if (res->flags & IORESOURCE_IO) + if (res->flags & IORESOURCE_IO) { mask = (u32)PCI_BASE_ADDRESS_IO_MASK; - else + new |= res->flags & ~PCI_BASE_ADDRESS_IO_MASK; + } else if (resno == PCI_ROM_RESOURCE) { + mask = (u32)PCI_ROM_ADDRESS_MASK; + } else { mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; + new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK; + } - reg = pci_resource_bar(dev, resno, &type); - if (!reg) - return; - if (type != pci_bar_unknown) { + if (resno < PCI_ROM_RESOURCE) { + reg = PCI_BASE_ADDRESS_0 + 4 * resno; + } else if (resno == PCI_ROM_RESOURCE) { + + /* + * Apparently some Matrox devices have ROM BARs that read + * as zero when disabled, so don't update ROM BARs unless + * they're enabled. See https://lkml.org/lkml/2005/8/30/138. + */ if (!(res->flags & IORESOURCE_ROM_ENABLE)) return; + + reg = dev->rom_base_reg; new |= PCI_ROM_ADDRESS_ENABLE; - } + } else + return; /* * We can't update a 64-bit BAR atomically, so when possible, @@ -110,6 +120,16 @@ void pci_update_resource(struct pci_dev *dev, int resno) pci_write_config_word(dev, PCI_COMMAND, cmd); } +void pci_update_resource(struct pci_dev *dev, int resno) +{ + if (resno <= PCI_ROM_RESOURCE) + pci_std_update_resource(dev, resno); +#ifdef CONFIG_PCI_IOV + else if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END) + pci_iov_update_resource(dev, resno); +#endif +} + int pci_claim_resource(struct pci_dev *dev, int resource) { struct resource *res = &dev->resource[resource]; diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index a009ae34c5ef..930f0f25c1ce 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1466,12 +1466,11 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) offset += range->npins; } - /* Mask and clear all interrupts */ - chv_writel(0, pctrl->regs + CHV_INTMASK); + /* Clear all interrupts */ chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, 0, - handle_simple_irq, IRQ_TYPE_NONE); + handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_err(pctrl->dev, "failed to add IRQ chip\n"); goto fail; diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index c1448955d3ed..c5a351e7bb4e 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -602,10 +602,6 @@ static void msm_gpio_irq_unmask(struct irq_data *d) spin_lock_irqsave(&pctrl->lock, flags); - val = readl(pctrl->regs + g->intr_status_reg); - val &= ~BIT(g->intr_status_bit); - writel(val, pctrl->regs + g->intr_status_reg); - val = readl(pctrl->regs + g->intr_cfg_reg); val |= BIT(g->intr_enable_bit); writel(val, pctrl->regs + g->intr_cfg_reg); diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 24ec282e15d8..7c3b8d3516e3 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1651,6 +1651,9 @@ static void ap_scan_bus(struct work_struct *unused) ap_dev->queue_depth = queue_depth; ap_dev->raw_hwtype = device_type; ap_dev->device_type = device_type; + /* CEX6 toleration: map to CEX5 */ + if (device_type == AP_DEVICE_TYPE_CEX6) + ap_dev->device_type = AP_DEVICE_TYPE_CEX5; ap_dev->functions = device_functions; spin_lock_init(&ap_dev->lock); INIT_LIST_HEAD(&ap_dev->pendingq); diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 6adcbdf225d1..cc741e948170 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -105,6 +105,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_DEVICE_TYPE_CEX3C 9 #define AP_DEVICE_TYPE_CEX4 10 #define AP_DEVICE_TYPE_CEX5 11 +#define AP_DEVICE_TYPE_CEX6 12 /* * Known function facilities diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h index 5ada9268a450..a8ac4c0a1493 100644 --- a/drivers/scsi/cxlflash/common.h +++ b/drivers/scsi/cxlflash/common.h @@ -34,7 +34,6 @@ extern const struct file_operations cxlflash_cxl_fops; sectors */ -#define NUM_RRQ_ENTRY 16 /* for master issued cmds */ #define MAX_RHT_PER_CONTEXT (PAGE_SIZE / sizeof(struct sisl_rht_entry)) /* AFU command retry limit */ @@ -48,9 +47,12 @@ extern const struct file_operations cxlflash_cxl_fops; index derivation */ -#define CXLFLASH_MAX_CMDS 16 +#define CXLFLASH_MAX_CMDS 256 #define CXLFLASH_MAX_CMDS_PER_LUN CXLFLASH_MAX_CMDS +/* RRQ for master issued cmds */ +#define NUM_RRQ_ENTRY CXLFLASH_MAX_CMDS + static inline void check_sizes(void) { @@ -149,7 +151,7 @@ struct afu_cmd { struct afu { /* Stuff requiring alignment go first. */ - u64 rrq_entry[NUM_RRQ_ENTRY]; /* 128B RRQ */ + u64 rrq_entry[NUM_RRQ_ENTRY]; /* 2K RRQ */ /* * Command & data for AFU commands. */ diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index c86847c68448..2882bcac918a 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -2305,7 +2305,7 @@ static struct scsi_host_template driver_template = { .eh_device_reset_handler = cxlflash_eh_device_reset_handler, .eh_host_reset_handler = cxlflash_eh_host_reset_handler, .change_queue_depth = cxlflash_change_queue_depth, - .cmd_per_lun = 16, + .cmd_per_lun = CXLFLASH_MAX_CMDS_PER_LUN, .can_queue = CXLFLASH_MAX_CMDS, .this_id = -1, .sg_tablesize = SG_NONE, /* No scatter gather support */ diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 6bffd91b973a..c1ccf1ee99ea 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -560,8 +560,12 @@ static void iscsi_complete_task(struct iscsi_task *task, int state) WARN_ON_ONCE(task->state == ISCSI_TASK_FREE); task->state = state; - if (!list_empty(&task->running)) + spin_lock_bh(&conn->taskqueuelock); + if (!list_empty(&task->running)) { + pr_debug_once("%s while task on list", __func__); list_del_init(&task->running); + } + spin_unlock_bh(&conn->taskqueuelock); if (conn->task == task) conn->task = NULL; @@ -783,7 +787,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, if (session->tt->xmit_task(task)) goto free_task; } else { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&task->running, &conn->mgmtqueue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } @@ -1474,8 +1480,10 @@ void iscsi_requeue_task(struct iscsi_task *task) * this may be on the requeue list already if the xmit_task callout * is handling the r2ts while we are adding new ones */ + spin_lock_bh(&conn->taskqueuelock); if (list_empty(&task->running)) list_add_tail(&task->running, &conn->requeue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } EXPORT_SYMBOL_GPL(iscsi_requeue_task); @@ -1512,22 +1520,26 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) * only have one nop-out as a ping from us and targets should not * overflow us with nop-ins */ + spin_lock_bh(&conn->taskqueuelock); check_mgmt: while (!list_empty(&conn->mgmtqueue)) { conn->task = list_entry(conn->mgmtqueue.next, struct iscsi_task, running); list_del_init(&conn->task->running); + spin_unlock_bh(&conn->taskqueuelock); if (iscsi_prep_mgmt_task(conn, conn->task)) { /* regular RX path uses back_lock */ spin_lock_bh(&conn->session->back_lock); __iscsi_put_task(conn->task); spin_unlock_bh(&conn->session->back_lock); conn->task = NULL; + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_xmit_task(conn); if (rc) goto done; + spin_lock_bh(&conn->taskqueuelock); } /* process pending command queue */ @@ -1535,19 +1547,24 @@ check_mgmt: conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task, running); list_del_init(&conn->task->running); + spin_unlock_bh(&conn->taskqueuelock); if (conn->session->state == ISCSI_STATE_LOGGING_OUT) { fail_scsi_task(conn->task, DID_IMM_RETRY); + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_prep_scsi_cmd_pdu(conn->task); if (rc) { if (rc == -ENOMEM || rc == -EACCES) { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&conn->task->running, &conn->cmdqueue); conn->task = NULL; + spin_unlock_bh(&conn->taskqueuelock); goto done; } else fail_scsi_task(conn->task, DID_ABORT); + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_xmit_task(conn); @@ -1558,6 +1575,7 @@ check_mgmt: * we need to check the mgmt queue for nops that need to * be sent to aviod starvation */ + spin_lock_bh(&conn->taskqueuelock); if (!list_empty(&conn->mgmtqueue)) goto check_mgmt; } @@ -1577,12 +1595,15 @@ check_mgmt: conn->task = task; list_del_init(&conn->task->running); conn->task->state = ISCSI_TASK_RUNNING; + spin_unlock_bh(&conn->taskqueuelock); rc = iscsi_xmit_task(conn); if (rc) goto done; + spin_lock_bh(&conn->taskqueuelock); if (!list_empty(&conn->mgmtqueue)) goto check_mgmt; } + spin_unlock_bh(&conn->taskqueuelock); spin_unlock_bh(&conn->session->frwd_lock); return -ENODATA; @@ -1738,7 +1759,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) goto prepd_reject; } } else { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&task->running, &conn->cmdqueue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } @@ -2900,6 +2923,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size, INIT_LIST_HEAD(&conn->mgmtqueue); INIT_LIST_HEAD(&conn->cmdqueue); INIT_LIST_HEAD(&conn->requeue); + spin_lock_init(&conn->taskqueuelock); INIT_WORK(&conn->xmitwork, iscsi_xmitworker); /* allocate login_task used for the login/text sequences */ diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index c14ab6c3ae40..60c21093f865 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -11387,6 +11387,7 @@ static struct pci_driver lpfc_driver = { .id_table = lpfc_id_table, .probe = lpfc_pci_probe_one, .remove = lpfc_pci_remove_one, + .shutdown = lpfc_pci_remove_one, .suspend = lpfc_pci_suspend_one, .resume = lpfc_pci_resume_one, .err_handler = &lpfc_err_handler, diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 3f1133230a1a..e4530ac6d5d4 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -392,6 +392,7 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) ret = PTR_ERR(vmfile); goto out; } + vmfile->f_mode |= FMODE_LSEEK; asma->file = vmfile; } get_file(asma->file); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index de18790eb21c..d72a4058fd08 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -154,7 +154,7 @@ static void pscsi_tape_read_blocksize(struct se_device *dev, buf = kzalloc(12, GFP_KERNEL); if (!buf) - return; + goto out_free; memset(cdb, 0, MAX_COMMAND_SIZE); cdb[0] = MODE_SENSE; @@ -169,9 +169,10 @@ static void pscsi_tape_read_blocksize(struct se_device *dev, * If MODE_SENSE still returns zero, set the default value to 1024. */ sdev->sector_size = (buf[9] << 16) | (buf[10] << 8) | (buf[11]); +out_free: if (!sdev->sector_size) sdev->sector_size = 1024; -out_free: + kfree(buf); } @@ -314,9 +315,10 @@ static int pscsi_add_device_to_list(struct se_device *dev, sd->lun, sd->queue_depth); } - dev->dev_attrib.hw_block_size = sd->sector_size; + dev->dev_attrib.hw_block_size = + min_not_zero((int)sd->sector_size, 512); dev->dev_attrib.hw_max_sectors = - min_t(int, sd->host->max_sectors, queue_max_hw_sectors(q)); + min_not_zero(sd->host->max_sectors, queue_max_hw_sectors(q)); dev->dev_attrib.hw_queue_depth = sd->queue_depth; /* @@ -339,8 +341,10 @@ static int pscsi_add_device_to_list(struct se_device *dev, /* * For TYPE_TAPE, attempt to determine blocksize with MODE_SENSE. */ - if (sd->type == TYPE_TAPE) + if (sd->type == TYPE_TAPE) { pscsi_tape_read_blocksize(dev, sd); + dev->dev_attrib.hw_block_size = sd->sector_size; + } return 0; } @@ -406,7 +410,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd) /* * Called with struct Scsi_Host->host_lock called. */ -static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd) +static int pscsi_create_type_nondisk(struct se_device *dev, struct scsi_device *sd) __releases(sh->host_lock) { struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr; @@ -433,28 +437,6 @@ static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd) return 0; } -/* - * Called with struct Scsi_Host->host_lock called. - */ -static int pscsi_create_type_other(struct se_device *dev, - struct scsi_device *sd) - __releases(sh->host_lock) -{ - struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr; - struct Scsi_Host *sh = sd->host; - int ret; - - spin_unlock_irq(sh->host_lock); - ret = pscsi_add_device_to_list(dev, sd); - if (ret) - return ret; - - pr_debug("CORE_PSCSI[%d] - Added Type: %s for %d:%d:%d:%llu\n", - phv->phv_host_id, scsi_device_type(sd->type), sh->host_no, - sd->channel, sd->id, sd->lun); - return 0; -} - static int pscsi_configure_device(struct se_device *dev) { struct se_hba *hba = dev->se_hba; @@ -542,11 +524,8 @@ static int pscsi_configure_device(struct se_device *dev) case TYPE_DISK: ret = pscsi_create_type_disk(dev, sd); break; - case TYPE_ROM: - ret = pscsi_create_type_rom(dev, sd); - break; default: - ret = pscsi_create_type_other(dev, sd); + ret = pscsi_create_type_nondisk(dev, sd); break; } @@ -611,8 +590,7 @@ static void pscsi_free_device(struct se_device *dev) else if (pdv->pdv_lld_host) scsi_host_put(pdv->pdv_lld_host); - if ((sd->type == TYPE_DISK) || (sd->type == TYPE_ROM)) - scsi_device_put(sd); + scsi_device_put(sd); pdv->pdv_sd = NULL; } @@ -1088,7 +1066,6 @@ static sector_t pscsi_get_blocks(struct se_device *dev) if (pdv->pdv_bd && pdv->pdv_bd->bd_part) return pdv->pdv_bd->bd_part->nr_sects; - dump_stack(); return 0; } diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 2e27b1034ede..90c5dffc9fa4 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1096,9 +1096,15 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; break; case VERIFY: + case VERIFY_16: size = 0; - sectors = transport_get_sectors_10(cdb); - cmd->t_task_lba = transport_lba_32(cdb); + if (cdb[0] == VERIFY) { + sectors = transport_get_sectors_10(cdb); + cmd->t_task_lba = transport_lba_32(cdb); + } else { + sectors = transport_get_sectors_16(cdb); + cmd->t_task_lba = transport_lba_64(cdb); + } cmd->execute_cmd = sbc_emulate_noop; goto check_lba; case REZERO_UNIT: diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 5b24ffd93649..83ff1724ec79 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -57,6 +57,7 @@ struct serial_private { unsigned int nr; void __iomem *remapped_bar[PCI_NUM_BAR_RESOURCES]; struct pci_serial_quirk *quirk; + const struct pciserial_board *board; int line[0]; }; @@ -4058,6 +4059,7 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board) } } priv->nr = i; + priv->board = board; return priv; err_deinit: @@ -4068,7 +4070,7 @@ err_out: } EXPORT_SYMBOL_GPL(pciserial_init_ports); -void pciserial_remove_ports(struct serial_private *priv) +void pciserial_detach_ports(struct serial_private *priv) { struct pci_serial_quirk *quirk; int i; @@ -4088,7 +4090,11 @@ void pciserial_remove_ports(struct serial_private *priv) quirk = find_quirk(priv->dev); if (quirk->exit) quirk->exit(priv->dev); +} +void pciserial_remove_ports(struct serial_private *priv) +{ + pciserial_detach_ports(priv); kfree(priv); } EXPORT_SYMBOL_GPL(pciserial_remove_ports); @@ -5819,7 +5825,7 @@ static pci_ers_result_t serial8250_io_error_detected(struct pci_dev *dev, return PCI_ERS_RESULT_DISCONNECT; if (priv) - pciserial_suspend_ports(priv); + pciserial_detach_ports(priv); pci_disable_device(dev); @@ -5844,9 +5850,18 @@ static pci_ers_result_t serial8250_io_slot_reset(struct pci_dev *dev) static void serial8250_io_resume(struct pci_dev *dev) { struct serial_private *priv = pci_get_drvdata(dev); + const struct pciserial_board *board; - if (priv) - pciserial_resume_ports(priv); + if (!priv) + return; + + board = priv->board; + kfree(priv); + priv = pciserial_init_ports(dev, board); + + if (!IS_ERR(priv)) { + pci_set_drvdata(dev, priv); + } } static const struct pci_error_handlers serial8250_err_handler = { diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index deaddb950c20..24337ac3323f 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -1105,7 +1105,7 @@ static int usbtmc_probe(struct usb_interface *intf, dev_dbg(&intf->dev, "%s called\n", __func__); - data = kmalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -1163,6 +1163,12 @@ static int usbtmc_probe(struct usb_interface *intf, } } + if (!data->bulk_out || !data->bulk_in) { + dev_err(&intf->dev, "bulk endpoints not found\n"); + retcode = -ENODEV; + goto err_put; + } + retcode = get_capabilities(data); if (retcode) dev_err(&intf->dev, "can't read capabilities\n"); @@ -1186,6 +1192,7 @@ static int usbtmc_probe(struct usb_interface *intf, error_register: sysfs_remove_group(&intf->dev.kobj, &capability_attr_grp); sysfs_remove_group(&intf->dev.kobj, &data_attr_grp); +err_put: kref_put(&data->kref, usbtmc_delete); return retcode; } diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index ac30a051ad71..325cbc9c35d8 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -246,6 +246,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, /* * Adjust bInterval for quirked devices. + */ + /* + * This quirk fixes bIntervals reported in ms. + */ + if (to_usb_device(ddev)->quirks & + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL) { + n = clamp(fls(d->bInterval) + 3, i, j); + i = j = n; + } + /* * This quirk fixes bIntervals reported in * linear microframes. */ diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 9a3bf5e2977f..755dbca78646 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -966,7 +966,7 @@ static void usb_bus_init (struct usb_bus *bus) bus->bandwidth_allocated = 0; bus->bandwidth_int_reqs = 0; bus->bandwidth_isoc_reqs = 0; - mutex_init(&bus->usb_address0_mutex); + mutex_init(&bus->devnum_next_mutex); INIT_LIST_HEAD (&bus->bus_list); } @@ -2555,6 +2555,14 @@ struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver, return NULL; } if (primary_hcd == NULL) { + hcd->address0_mutex = kmalloc(sizeof(*hcd->address0_mutex), + GFP_KERNEL); + if (!hcd->address0_mutex) { + kfree(hcd); + dev_dbg(dev, "hcd address0 mutex alloc failed\n"); + return NULL; + } + mutex_init(hcd->address0_mutex); hcd->bandwidth_mutex = kmalloc(sizeof(*hcd->bandwidth_mutex), GFP_KERNEL); if (!hcd->bandwidth_mutex) { @@ -2566,6 +2574,7 @@ struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver, dev_set_drvdata(dev, hcd); } else { mutex_lock(&usb_port_peer_mutex); + hcd->address0_mutex = primary_hcd->address0_mutex; hcd->bandwidth_mutex = primary_hcd->bandwidth_mutex; hcd->primary_hcd = primary_hcd; primary_hcd->primary_hcd = primary_hcd; @@ -2622,24 +2631,23 @@ EXPORT_SYMBOL_GPL(usb_create_hcd); * Don't deallocate the bandwidth_mutex until the last shared usb_hcd is * deallocated. * - * Make sure to only deallocate the bandwidth_mutex when the primary HCD is - * freed. When hcd_release() is called for either hcd in a peer set - * invalidate the peer's ->shared_hcd and ->primary_hcd pointers to - * block new peering attempts + * Make sure to deallocate the bandwidth_mutex only when the last HCD is + * freed. When hcd_release() is called for either hcd in a peer set, + * invalidate the peer's ->shared_hcd and ->primary_hcd pointers. */ static void hcd_release(struct kref *kref) { struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref); mutex_lock(&usb_port_peer_mutex); - if (hcd->primary_hcd == hcd) - kfree(hcd->bandwidth_mutex); if (hcd->shared_hcd) { struct usb_hcd *peer = hcd->shared_hcd; peer->shared_hcd = NULL; - if (peer->primary_hcd == hcd) - peer->primary_hcd = NULL; + peer->primary_hcd = NULL; + } else { + kfree(hcd->address0_mutex); + kfree(hcd->bandwidth_mutex); } mutex_unlock(&usb_port_peer_mutex); kfree(hcd); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index f84ef04284f5..41b1ef2135d7 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1991,7 +1991,7 @@ static void choose_devnum(struct usb_device *udev) struct usb_bus *bus = udev->bus; /* be safe when more hub events are proceed in parallel */ - mutex_lock(&bus->usb_address0_mutex); + mutex_lock(&bus->devnum_next_mutex); if (udev->wusb) { devnum = udev->portnum + 1; BUG_ON(test_bit(devnum, bus->devmap.devicemap)); @@ -2009,7 +2009,7 @@ static void choose_devnum(struct usb_device *udev) set_bit(devnum, bus->devmap.devicemap); udev->devnum = devnum; } - mutex_unlock(&bus->usb_address0_mutex); + mutex_unlock(&bus->devnum_next_mutex); } static void release_devnum(struct usb_device *udev) @@ -4212,7 +4212,7 @@ static void hub_set_initial_usb2_lpm_policy(struct usb_device *udev) struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); int connect_type = USB_PORT_CONNECT_TYPE_UNKNOWN; - if (!udev->usb2_hw_lpm_capable) + if (!udev->usb2_hw_lpm_capable || !udev->bos) return; if (hub) @@ -4275,7 +4275,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, if (oldspeed == USB_SPEED_LOW) delay = HUB_LONG_RESET_TIME; - mutex_lock(&hdev->bus->usb_address0_mutex); + mutex_lock(hcd->address0_mutex); /* Reset the device; full speed may morph to high speed */ /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */ @@ -4561,7 +4561,7 @@ fail: hub_port_disable(hub, port1, 0); update_devnum(udev, devnum); /* for disconnect processing */ } - mutex_unlock(&hdev->bus->usb_address0_mutex); + mutex_unlock(hcd->address0_mutex); return retval; } diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 24f9f98968a5..96b21b0dac1e 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -170,6 +170,14 @@ static const struct usb_device_id usb_quirk_list[] = { /* M-Systems Flash Disk Pioneers */ { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Baum Vario Ultra */ + { USB_DEVICE(0x0904, 0x6101), .driver_info = + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + { USB_DEVICE(0x0904, 0x6102), .driver_info = + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + { USB_DEVICE(0x0904, 0x6103), .driver_info = + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + /* Keytouch QWERTY Panel keyboard */ { USB_DEVICE(0x0926, 0x3333), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c index cd096fb9078f..1ef3442cf618 100644 --- a/drivers/usb/gadget/function/f_accessory.c +++ b/drivers/usb/gadget/function/f_accessory.c @@ -77,9 +77,13 @@ struct acc_dev { struct usb_ep *ep_in; struct usb_ep *ep_out; - /* set to 1 when we connect */ + /* online indicates state of function_set_alt & function_unbind + * set to 1 when we connect + */ int online:1; - /* Set to 1 when we disconnect. + + /* disconnected indicates state of open & release + * Set to 1 when we disconnect. * Not cleared until our file is closed. */ int disconnected:1; @@ -307,7 +311,6 @@ static struct usb_request *req_get(struct acc_dev *dev, struct list_head *head) static void acc_set_disconnected(struct acc_dev *dev) { - dev->online = 0; dev->disconnected = 1; } @@ -721,9 +724,10 @@ static ssize_t acc_write(struct file *fp, const char __user *buf, req->zero = 0; } else { xfer = count; - /* If the data length is a multple of the + /* + * If the data length is a multple of the * maxpacket size then send a zero length packet(ZLP). - */ + */ req->zero = ((xfer % dev->ep_in->maxpacket) == 0); } if (copy_from_user(req->buf, buf, xfer)) { @@ -808,7 +812,10 @@ static int acc_release(struct inode *ip, struct file *fp) printk(KERN_INFO "acc_release\n"); WARN_ON(!atomic_xchg(&_acc_dev->open_excl, 0)); - _acc_dev->disconnected = 0; + /* indicate that we are disconnected + * still could be online so don't touch online flag + */ + _acc_dev->disconnected = 1; return 0; } @@ -868,11 +875,11 @@ int acc_ctrlrequest(struct usb_composite_dev *cdev, unsigned long flags; /* - printk(KERN_INFO "acc_ctrlrequest " - "%02x.%02x v%04x i%04x l%u\n", - b_requestType, b_request, - w_value, w_index, w_length); -*/ + * printk(KERN_INFO "acc_ctrlrequest " + * "%02x.%02x v%04x i%04x l%u\n", + * b_requestType, b_request, + * w_value, w_index, w_length); + */ if (b_requestType == (USB_DIR_OUT | USB_TYPE_VENDOR)) { if (b_request == ACCESSORY_START) { @@ -1069,6 +1076,10 @@ acc_function_unbind(struct usb_configuration *c, struct usb_function *f) struct usb_request *req; int i; + dev->online = 0; /* clear online flag */ + wake_up(&dev->read_wq); /* unblock reads on closure */ + wake_up(&dev->write_wq); /* likewise for writes */ + while ((req = req_get(dev, &dev->tx_idle))) acc_request_free(req, dev->ep_in); for (i = 0; i < RX_REQ_MAX; i++) @@ -1200,6 +1211,7 @@ static int acc_function_set_alt(struct usb_function *f, } dev->online = 1; + dev->disconnected = 0; /* if online then not disconnected */ /* readers may be blocked waiting for us to go online */ wake_up(&dev->read_wq); @@ -1212,7 +1224,8 @@ static void acc_function_disable(struct usb_function *f) struct usb_composite_dev *cdev = dev->cdev; DBG(cdev, "acc_function_disable\n"); - acc_set_disconnected(dev); + acc_set_disconnected(dev); /* this now only sets disconnected */ + dev->online = 0; /* so now need to clear online flag here too */ usb_ep_disable(dev->ep_in); usb_ep_disable(dev->ep_out); diff --git a/drivers/usb/gadget/function/f_acm.c b/drivers/usb/gadget/function/f_acm.c index 651e4afe0520..1bcfe819fad3 100644 --- a/drivers/usb/gadget/function/f_acm.c +++ b/drivers/usb/gadget/function/f_acm.c @@ -535,13 +535,15 @@ static int acm_notify_serial_state(struct f_acm *acm) { struct usb_composite_dev *cdev = acm->port.func.config->cdev; int status; + __le16 serial_state; spin_lock(&acm->lock); if (acm->notify_req) { dev_dbg(&cdev->gadget->dev, "acm ttyGS%d serial state %04x\n", acm->port_num, acm->serial_state); + serial_state = cpu_to_le16(acm->serial_state); status = acm_cdc_notify(acm, USB_CDC_NOTIFY_SERIAL_STATE, - 0, &acm->serial_state, sizeof(acm->serial_state)); + 0, &serial_state, sizeof(acm->serial_state)); } else { acm->pending = true; status = 0; diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 29b41b5dee04..c7689d05356c 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -625,7 +625,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) uvc_ss_streaming_comp.bMaxBurst = opts->streaming_maxburst; uvc_ss_streaming_comp.wBytesPerInterval = cpu_to_le16(max_packet_size * max_packet_mult * - opts->streaming_maxburst); + (opts->streaming_maxburst + 1)); /* Allocate endpoints. */ ep = usb_ep_autoconfig(cdev->gadget, &uvc_control_ep); diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c index 4e38683c653c..6d4e75785710 100644 --- a/drivers/usb/misc/idmouse.c +++ b/drivers/usb/misc/idmouse.c @@ -346,6 +346,9 @@ static int idmouse_probe(struct usb_interface *interface, if (iface_desc->desc.bInterfaceClass != 0x0A) return -ENODEV; + if (iface_desc->desc.bNumEndpoints < 1) + return -ENODEV; + /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (dev == NULL) diff --git a/drivers/usb/misc/lvstest.c b/drivers/usb/misc/lvstest.c index 86b4e4b2ab9a..383fa007348f 100644 --- a/drivers/usb/misc/lvstest.c +++ b/drivers/usb/misc/lvstest.c @@ -370,6 +370,10 @@ static int lvs_rh_probe(struct usb_interface *intf, hdev = interface_to_usbdev(intf); desc = intf->cur_altsetting; + + if (desc->desc.bNumEndpoints < 1) + return -ENODEV; + endpoint = &desc->endpoint[0].desc; /* valid only for SS root hub */ diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index bbd029c9c725..442b6631162e 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -711,6 +711,11 @@ static int uss720_probe(struct usb_interface *intf, interface = intf->cur_altsetting; + if (interface->desc.bNumEndpoints < 3) { + usb_put_dev(usbdev); + return -ENODEV; + } + /* * Allocate parport interface */ diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c index e499b862a946..88f26ac2a185 100644 --- a/drivers/usb/musb/musb_cppi41.c +++ b/drivers/usb/musb/musb_cppi41.c @@ -250,8 +250,27 @@ static void cppi41_dma_callback(void *private_data) transferred < cppi41_channel->packet_sz) cppi41_channel->prog_len = 0; - if (cppi41_channel->is_tx) - empty = musb_is_tx_fifo_empty(hw_ep); + if (cppi41_channel->is_tx) { + u8 type; + + if (is_host_active(musb)) + type = hw_ep->out_qh->type; + else + type = hw_ep->ep_in.type; + + if (type == USB_ENDPOINT_XFER_ISOC) + /* + * Don't use the early-TX-interrupt workaround below + * for Isoch transfter. Since Isoch are periodic + * transfer, by the time the next transfer is + * scheduled, the current one should be done already. + * + * This avoids audio playback underrun issue. + */ + empty = true; + else + empty = musb_is_tx_fifo_empty(hw_ep); + } if (!cppi41_channel->is_tx || empty) { cppi41_trans_done(cppi41_channel); diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 42cc72e54c05..af67a0de6b5d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -233,6 +233,14 @@ static void option_instat_callback(struct urb *urb); #define BANDRICH_PRODUCT_1012 0x1012 #define QUALCOMM_VENDOR_ID 0x05C6 +/* These Quectel products use Qualcomm's vendor ID */ +#define QUECTEL_PRODUCT_UC20 0x9003 +#define QUECTEL_PRODUCT_UC15 0x9090 + +#define QUECTEL_VENDOR_ID 0x2c7c +/* These Quectel products use Quectel's vendor ID */ +#define QUECTEL_PRODUCT_EC21 0x0121 +#define QUECTEL_PRODUCT_EC25 0x0125 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -1161,7 +1169,14 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ - { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9003), /* Quectel UC20 */ + /* Quectel products using Qualcomm vendor ID */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)}, + { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + /* Quectel products using Quectel vendor ID */ + { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 696458db7e3c..38b3f0d8cd58 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -169,6 +169,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81a9)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81b1)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81b3)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ + {DEVICE_SWI(0x413c, 0x81b5)}, /* Dell Wireless 5811e QDL */ + {DEVICE_SWI(0x413c, 0x81b6)}, /* Dell Wireless 5811e QDL */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ diff --git a/drivers/usb/wusbcore/wa-hc.c b/drivers/usb/wusbcore/wa-hc.c index 252c7bd9218a..d01496fd27fe 100644 --- a/drivers/usb/wusbcore/wa-hc.c +++ b/drivers/usb/wusbcore/wa-hc.c @@ -39,6 +39,9 @@ int wa_create(struct wahc *wa, struct usb_interface *iface, int result; struct device *dev = &iface->dev; + if (iface->cur_altsetting->desc.bNumEndpoints < 3) + return -ENODEV; + result = wa_rpipes_create(wa); if (result < 0) goto error_rpipes_create; diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c index 0257f35cfb9d..e75bbe5a10cd 100644 --- a/drivers/uwb/hwa-rc.c +++ b/drivers/uwb/hwa-rc.c @@ -825,6 +825,9 @@ static int hwarc_probe(struct usb_interface *iface, struct hwarc *hwarc; struct device *dev = &iface->dev; + if (iface->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + result = -ENOMEM; uwb_rc = uwb_rc_alloc(); if (uwb_rc == NULL) { diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c index 2bfc846ac071..6345e85822a4 100644 --- a/drivers/uwb/i1480/dfu/usb.c +++ b/drivers/uwb/i1480/dfu/usb.c @@ -362,6 +362,9 @@ int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id) result); } + if (iface->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + result = -ENOMEM; i1480_usb = kzalloc(sizeof(*i1480_usb), GFP_KERNEL); if (i1480_usb == NULL) { diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 0582b72ef377..1a9f18b40be6 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -511,6 +511,12 @@ static long tce_iommu_build_v2(struct tce_container *container, unsigned long hpa; enum dma_data_direction dirtmp; + if (!tbl->it_userspace) { + ret = tce_iommu_userspace_view_alloc(tbl); + if (ret) + return ret; + } + for (i = 0; i < pages; ++i) { struct mm_iommu_table_group_mem_t *mem = NULL; unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, @@ -584,15 +590,6 @@ static long tce_iommu_create_table(struct tce_container *container, WARN_ON(!ret && !(*ptbl)->it_ops->free); WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size)); - if (!ret && container->v2) { - ret = tce_iommu_userspace_view_alloc(*ptbl); - if (ret) - (*ptbl)->it_ops->free(*ptbl); - } - - if (ret) - decrement_locked_vm(table_size >> PAGE_SHIFT); - return ret; } @@ -1064,10 +1061,7 @@ static int tce_iommu_take_ownership(struct tce_container *container, if (!tbl || !tbl->it_map) continue; - rc = tce_iommu_userspace_view_alloc(tbl); - if (!rc) - rc = iommu_take_ownership(tbl); - + rc = iommu_take_ownership(tbl); if (rc) { for (j = 0; j < i; ++j) iommu_release_ownership( diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 6e92917ba77a..4e3c78d88832 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -1168,6 +1168,8 @@ static void fbcon_free_font(struct display *p, bool freefont) p->userfont = 0; } +static void set_vc_hi_font(struct vc_data *vc, bool set); + static void fbcon_deinit(struct vc_data *vc) { struct display *p = &fb_display[vc->vc_num]; @@ -1203,6 +1205,9 @@ finished: if (free_font) vc->vc_font.data = NULL; + if (vc->vc_hi_font_mask) + set_vc_hi_font(vc, false); + if (!con_is_bound(&fb_con)) fbcon_exit(); @@ -2439,32 +2444,10 @@ static int fbcon_get_font(struct vc_data *vc, struct console_font *font) return 0; } -static int fbcon_do_set_font(struct vc_data *vc, int w, int h, - const u8 * data, int userfont) +/* set/clear vc_hi_font_mask and update vc attrs accordingly */ +static void set_vc_hi_font(struct vc_data *vc, bool set) { - struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; - struct fbcon_ops *ops = info->fbcon_par; - struct display *p = &fb_display[vc->vc_num]; - int resize; - int cnt; - char *old_data = NULL; - - if (CON_IS_VISIBLE(vc) && softback_lines) - fbcon_set_origin(vc); - - resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); - if (p->userfont) - old_data = vc->vc_font.data; - if (userfont) - cnt = FNTCHARCNT(data); - else - cnt = 256; - vc->vc_font.data = (void *)(p->fontdata = data); - if ((p->userfont = userfont)) - REFCOUNT(data)++; - vc->vc_font.width = w; - vc->vc_font.height = h; - if (vc->vc_hi_font_mask && cnt == 256) { + if (!set) { vc->vc_hi_font_mask = 0; if (vc->vc_can_do_color) { vc->vc_complement_mask >>= 1; @@ -2487,7 +2470,7 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, ((c & 0xfe00) >> 1) | (c & 0xff); vc->vc_attr >>= 1; } - } else if (!vc->vc_hi_font_mask && cnt == 512) { + } else { vc->vc_hi_font_mask = 0x100; if (vc->vc_can_do_color) { vc->vc_complement_mask <<= 1; @@ -2519,8 +2502,38 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, } else vc->vc_video_erase_char = c & ~0x100; } - } +} + +static int fbcon_do_set_font(struct vc_data *vc, int w, int h, + const u8 * data, int userfont) +{ + struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; + struct fbcon_ops *ops = info->fbcon_par; + struct display *p = &fb_display[vc->vc_num]; + int resize; + int cnt; + char *old_data = NULL; + + if (CON_IS_VISIBLE(vc) && softback_lines) + fbcon_set_origin(vc); + + resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); + if (p->userfont) + old_data = vc->vc_font.data; + if (userfont) + cnt = FNTCHARCNT(data); + else + cnt = 256; + vc->vc_font.data = (void *)(p->fontdata = data); + if ((p->userfont = userfont)) + REFCOUNT(data)++; + vc->vc_font.width = w; + vc->vc_font.height = h; + if (vc->vc_hi_font_mask && cnt == 256) + set_vc_hi_font(vc, false); + else if (!vc->vc_hi_font_mask && cnt == 512) + set_vc_hi_font(vc, true); if (resize) { int cols, rows; diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 8959b320d472..84c6add93f1f 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -421,6 +421,8 @@ static int init_vqs(struct virtio_balloon *vb) * Prime this virtqueue with one buffer so the hypervisor can * use it to signal us later (it can't be broken yet!). */ + update_balloon_stats(vb); + sg_init_one(&sg, vb->stats, sizeof vb->stats); if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) < 0) diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 611f9c11da85..2e319d0c395d 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -27,10 +27,10 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> +#include <linux/syscore_ops.h> #include <linux/acpi.h> #include <acpi/processor.h> #include <xen/xen.h> -#include <xen/xen-ops.h> #include <xen/interface/platform.h> #include <asm/xen/hypercall.h> @@ -466,15 +466,33 @@ static int xen_upload_processor_pm_data(void) return rc; } -static int xen_acpi_processor_resume(struct notifier_block *nb, - unsigned long action, void *data) +static void xen_acpi_processor_resume_worker(struct work_struct *dummy) { + int rc; + bitmap_zero(acpi_ids_done, nr_acpi_bits); - return xen_upload_processor_pm_data(); + + rc = xen_upload_processor_pm_data(); + if (rc != 0) + pr_info("ACPI data upload failed, error = %d\n", rc); +} + +static void xen_acpi_processor_resume(void) +{ + static DECLARE_WORK(wq, xen_acpi_processor_resume_worker); + + /* + * xen_upload_processor_pm_data() calls non-atomic code. + * However, the context for xen_acpi_processor_resume is syscore + * with only the boot CPU online and in an atomic context. + * + * So defer the upload for some point safer. + */ + schedule_work(&wq); } -struct notifier_block xen_acpi_processor_resume_nb = { - .notifier_call = xen_acpi_processor_resume, +static struct syscore_ops xap_syscore_ops = { + .resume = xen_acpi_processor_resume, }; static int __init xen_acpi_processor_init(void) @@ -527,7 +545,7 @@ static int __init xen_acpi_processor_init(void) if (rc) goto err_unregister; - xen_resume_notifier_register(&xen_acpi_processor_resume_nb); + register_syscore_ops(&xap_syscore_ops); return 0; err_unregister: @@ -544,7 +562,7 @@ static void __exit xen_acpi_processor_exit(void) { int i; - xen_resume_notifier_unregister(&xen_acpi_processor_resume_nb); + unregister_syscore_ops(&xap_syscore_ops); kfree(acpi_ids_done); kfree(acpi_id_present); kfree(acpi_id_cst_present); diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 929b618da43b..c30c6ceac2c4 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -283,6 +283,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler, case ACL_TYPE_ACCESS: if (acl) { struct iattr iattr; + struct posix_acl *old_acl = acl; retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl); if (retval) @@ -293,6 +294,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler, * by the mode bits. So don't * update ACL. */ + posix_acl_release(old_acl); value = NULL; size = 0; } diff --git a/fs/attr.c b/fs/attr.c index 11be2265a2d5..c86b37c38fb7 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -211,7 +211,7 @@ int notify_change2(struct vfsmount *mnt, struct dentry * dentry, struct iattr * return -EPERM; if (!inode_owner_or_capable(inode)) { - error = inode_permission(inode, MAY_WRITE); + error = inode_permission2(mnt, inode, MAY_WRITE); if (error) return error; } diff --git a/fs/exec.c b/fs/exec.c index 8c58183eccb7..b1f5ddbf7d56 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1132,8 +1132,10 @@ EXPORT_SYMBOL(flush_old_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { struct inode *inode = file_inode(file); + if (inode_permission2(file->f_path.mnt, inode, MAY_READ) < 0) { struct user_namespace *old, *user_ns; + bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; /* Ensure mm->user_ns contains the executable */ diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 95a49ef2781a..ebaff5ab93da 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -106,6 +106,7 @@ config EXT4_ENCRYPTION select CRYPTO_ECB select CRYPTO_XTS select CRYPTO_CTS + select CRYPTO_HEH select CRYPTO_CTR select CRYPTO_SHA256 select KEYS diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c index 2fbef8a14760..e2645ca9b95e 100644 --- a/fs/ext4/crypto_fname.c +++ b/fs/ext4/crypto_fname.c @@ -44,7 +44,8 @@ static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res) bool ext4_valid_filenames_enc_mode(uint32_t mode) { - return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS); + return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS || + mode == EXT4_ENCRYPTION_MODE_AES_256_HEH); } static unsigned max_name_len(struct inode *inode) diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c index 15342bfff70d..363e4c6bf37f 100644 --- a/fs/ext4/crypto_key.c +++ b/fs/ext4/crypto_key.c @@ -30,16 +30,16 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc) } /** - * ext4_derive_key_aes() - Derive a key using AES-128-ECB + * ext4_derive_key_v1() - Derive a key using AES-128-ECB * @deriving_key: Encryption key used for derivation. * @source_key: Source key to which to apply derivation. * @derived_key: Derived key. * - * Return: Zero on success; non-zero otherwise. + * Return: 0 on success, -errno on failure */ -static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE], - char source_key[EXT4_AES_256_XTS_KEY_SIZE], - char derived_key[EXT4_AES_256_XTS_KEY_SIZE]) +static int ext4_derive_key_v1(const char deriving_key[EXT4_AES_128_ECB_KEY_SIZE], + const char source_key[EXT4_AES_256_XTS_KEY_SIZE], + char derived_key[EXT4_AES_256_XTS_KEY_SIZE]) { int res = 0; struct ablkcipher_request *req = NULL; @@ -84,6 +84,91 @@ out: return res; } +/** + * ext4_derive_key_v2() - Derive a key non-reversibly + * @nonce: the nonce associated with the file + * @master_key: the master key referenced by the file + * @derived_key: (output) the resulting derived key + * + * This function computes the following: + * derived_key[0:127] = AES-256-ENCRYPT(master_key[0:255], nonce) + * derived_key[128:255] = AES-256-ENCRYPT(master_key[0:255], nonce ^ 0x01) + * derived_key[256:383] = AES-256-ENCRYPT(master_key[256:511], nonce) + * derived_key[384:511] = AES-256-ENCRYPT(master_key[256:511], nonce ^ 0x01) + * + * 'nonce ^ 0x01' denotes flipping the low order bit of the last byte. + * + * Unlike the v1 algorithm, the v2 algorithm is "non-reversible", meaning that + * compromising a derived key does not also compromise the master key. + * + * Return: 0 on success, -errno on failure + */ +static int ext4_derive_key_v2(const char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE], + const char master_key[EXT4_MAX_KEY_SIZE], + char derived_key[EXT4_MAX_KEY_SIZE]) +{ + const int noncelen = EXT4_KEY_DERIVATION_NONCE_SIZE; + struct crypto_cipher *tfm; + int err; + int i; + + /* + * Since we only use each transform for a small number of encryptions, + * requesting just "aes" turns out to be significantly faster than + * "ecb(aes)", by about a factor of two. + */ + tfm = crypto_alloc_cipher("aes", 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + BUILD_BUG_ON(4 * EXT4_KEY_DERIVATION_NONCE_SIZE != EXT4_MAX_KEY_SIZE); + BUILD_BUG_ON(2 * EXT4_AES_256_ECB_KEY_SIZE != EXT4_MAX_KEY_SIZE); + for (i = 0; i < 2; i++) { + memcpy(derived_key, nonce, noncelen); + memcpy(derived_key + noncelen, nonce, noncelen); + derived_key[2 * noncelen - 1] ^= 0x01; + err = crypto_cipher_setkey(tfm, master_key, + EXT4_AES_256_ECB_KEY_SIZE); + if (err) + break; + crypto_cipher_encrypt_one(tfm, derived_key, derived_key); + crypto_cipher_encrypt_one(tfm, derived_key + noncelen, + derived_key + noncelen); + master_key += EXT4_AES_256_ECB_KEY_SIZE; + derived_key += 2 * noncelen; + } + crypto_free_cipher(tfm); + return err; +} + +/** + * ext4_derive_key() - Derive a per-file key from a nonce and master key + * @ctx: the encryption context associated with the file + * @master_key: the master key referenced by the file + * @derived_key: (output) the resulting derived key + * + * Return: 0 on success, -errno on failure + */ +static int ext4_derive_key(const struct ext4_encryption_context *ctx, + const char master_key[EXT4_MAX_KEY_SIZE], + char derived_key[EXT4_MAX_KEY_SIZE]) +{ + BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE != EXT4_KEY_DERIVATION_NONCE_SIZE); + BUILD_BUG_ON(EXT4_AES_256_XTS_KEY_SIZE != EXT4_MAX_KEY_SIZE); + + /* + * Although the key derivation algorithm is logically independent of the + * choice of encryption modes, in this kernel it is bundled with HEH + * encryption of filenames, which is another crypto improvement that + * requires an on-disk format change and requires userspace to specify + * different encryption policies. + */ + if (ctx->filenames_encryption_mode == EXT4_ENCRYPTION_MODE_AES_256_HEH) + return ext4_derive_key_v2(ctx->nonce, master_key, derived_key); + else + return ext4_derive_key_v1(ctx->nonce, master_key, derived_key); +} + void ext4_free_crypt_info(struct ext4_crypt_info *ci) { if (!ci) @@ -192,6 +277,8 @@ retry: break; case EXT4_ENCRYPTION_MODE_PRIVATE: cipher_str = "bugon"; + case EXT4_ENCRYPTION_MODE_AES_256_HEH: + cipher_str = "heh(aes)"; break; default: printk_once(KERN_WARNING @@ -242,7 +329,7 @@ retry: up_read(&keyring_key->sem); goto out; } - res = ext4_derive_key_aes(ctx.nonce, master_key->raw, + res = ext4_derive_key(&ctx, master_key->raw, crypt_info->ci_raw_key); up_read(&keyring_key->sem); if (res) diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c index 8a9feb341f31..dd561f916f0b 100644 --- a/fs/ext4/crypto_policy.c +++ b/fs/ext4/crypto_policy.c @@ -156,6 +156,12 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent, WARN_ON(1); /* Should never happen */ return 0; } + + /* No restrictions on file types which are never encrypted */ + if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && + !S_ISLNK(child->i_mode)) + return 1; + /* no restrictions if the parent directory is not encrypted */ if (!ext4_encrypted_inode(parent)) return 1; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 14d7f72f8092..482d1caee849 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -590,6 +590,7 @@ enum { #define EXT4_ENCRYPTION_MODE_AES_256_CBC 3 #define EXT4_ENCRYPTION_MODE_AES_256_CTS 4 #define EXT4_ENCRYPTION_MODE_PRIVATE 127 +#define EXT4_ENCRYPTION_MODE_AES_256_HEH 126 #include "ext4_crypto.h" @@ -2339,19 +2340,6 @@ int _ext4_get_encryption_info(struct inode *inode); #ifdef CONFIG_EXT4_FS_ENCRYPTION int ext4_has_encryption_key(struct inode *inode); -static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) -{ - return EXT4_I(inode)->i_crypt_info; -} - -static inline int ext4_using_hardware_encryption(struct inode *inode) -{ - struct ext4_crypt_info *ci = ext4_encryption_info(inode); - - return S_ISREG(inode->i_mode) && ci && - ci->ci_data_mode == EXT4_ENCRYPTION_MODE_PRIVATE; -} - static inline int ext4_get_encryption_info(struct inode *inode) { struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; @@ -2365,6 +2353,19 @@ static inline int ext4_get_encryption_info(struct inode *inode) return 0; } +static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) +{ + return EXT4_I(inode)->i_crypt_info; +} + +static inline int ext4_using_hardware_encryption(struct inode *inode) +{ + struct ext4_crypt_info *ci = ext4_encryption_info(inode); + + return S_ISREG(inode->i_mode) && ci && + ci->ci_data_mode == EXT4_ENCRYPTION_MODE_PRIVATE; +} + #else static inline int ext4_has_encryption_key(struct inode *inode) { diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h index 95cbc9bc1995..e28cc5aab04a 100644 --- a/fs/ext4/ext4_crypto.h +++ b/fs/ext4/ext4_crypto.h @@ -59,8 +59,10 @@ struct ext4_encryption_context { #define EXT4_XTS_TWEAK_SIZE 16 #define EXT4_AES_128_ECB_KEY_SIZE 16 #define EXT4_AES_256_GCM_KEY_SIZE 32 +#define EXT4_AES_256_ECB_KEY_SIZE 32 #define EXT4_AES_256_CBC_KEY_SIZE 32 #define EXT4_AES_256_CTS_KEY_SIZE 32 +#define EXT4_AES_256_HEH_KEY_SIZE 32 #define EXT4_AES_256_XTS_KEY_SIZE 64 #define EXT4_PRIVATE_KEY_SIZE 64 #define EXT4_MAX_KEY_SIZE 64 @@ -127,6 +129,8 @@ static inline int ext4_encryption_key_size(int mode) return EXT4_AES_256_CBC_KEY_SIZE; case EXT4_ENCRYPTION_MODE_AES_256_CTS: return EXT4_AES_256_CTS_KEY_SIZE; + case EXT4_ENCRYPTION_MODE_AES_256_HEH: + return EXT4_AES_256_HEH_KEY_SIZE; default: BUG(); } diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index db81acb686c0..280d67fe33a7 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1171,10 +1171,9 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, set_buffer_uptodate(dir_block); err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); if (err) - goto out; + return err; set_buffer_verified(dir_block); -out: - return err; + return ext4_mark_inode_dirty(handle, inode); } static int ext4_convert_inline_data_nolock(handle_t *handle, diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7e974878d9a9..3a2594665b44 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -637,8 +637,12 @@ resizefs_out: if (err) goto encryption_policy_out; + mutex_lock(&inode->i_mutex); + err = ext4_process_policy(&policy, inode); + mutex_unlock(&inode->i_mutex); + mnt_drop_write_file(filp); encryption_policy_out: return err; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6fe8e30eeb99..68345a9e59b8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3666,7 +3666,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); if (ext4_has_feature_meta_bg(sb)) { - if (le32_to_cpu(es->s_first_meta_bg) >= db_count) { + if (le32_to_cpu(es->s_first_meta_bg) > db_count) { ext4_msg(sb, KERN_WARNING, "first meta block group too large: %u " "(group descriptor block count %u)", diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c index e504f548b64e..5bbd1989d5e6 100644 --- a/fs/f2fs/crypto_policy.c +++ b/fs/f2fs/crypto_policy.c @@ -149,6 +149,11 @@ int f2fs_is_child_context_consistent_with_parent(struct inode *parent, BUG_ON(1); } + /* No restrictions on file types which are never encrypted */ + if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && + !S_ISLNK(child->i_mode)) + return 1; + /* no restrictions if the parent directory is not encrypted */ if (!f2fs_encrypted_inode(parent)) return 1; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a197215ad52b..4b449d263333 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1535,12 +1535,19 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) #ifdef CONFIG_F2FS_FS_ENCRYPTION struct f2fs_encryption_policy policy; struct inode *inode = file_inode(filp); + int err; if (copy_from_user(&policy, (struct f2fs_encryption_policy __user *)arg, sizeof(policy))) return -EFAULT; - return f2fs_process_policy(&policy, inode); + mutex_lock(&inode->i_mutex); + + err = f2fs_process_policy(&policy, inode); + + mutex_unlock(&inode->i_mutex); + + return err; #else return -EOPNOTSUPP; #endif diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index de7b4f97ac75..be519416c112 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -207,7 +207,7 @@ struct lm_lockname { struct gfs2_sbd *ln_sbd; u64 ln_number; unsigned int ln_type; -}; +} __packed __aligned(sizeof(int)); #define lm_name_equal(name1, name2) \ (((name1)->ln_number == (name2)->ln_number) && \ diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c index 971928ab6c21..8e31d1a80f0c 100644 --- a/fs/sdcardfs/dentry.c +++ b/fs/sdcardfs/dentry.c @@ -46,7 +46,8 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags) spin_unlock(&dentry->d_lock); /* check uninitialized obb_dentry and - * whether the base obbpath has been changed or not */ + * whether the base obbpath has been changed or not + */ if (is_obbpath_invalid(dentry)) { d_drop(dentry); return 0; @@ -76,17 +77,13 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags) if (dentry < lower_dentry) { spin_lock(&dentry->d_lock); - spin_lock(&lower_dentry->d_lock); + spin_lock_nested(&lower_dentry->d_lock, DENTRY_D_LOCK_NESTED); } else { spin_lock(&lower_dentry->d_lock); - spin_lock(&dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); } - if (dentry->d_name.len != lower_dentry->d_name.len) { - __d_drop(dentry); - err = 0; - } else if (strncasecmp(dentry->d_name.name, lower_dentry->d_name.name, - dentry->d_name.len) != 0) { + if (!qstr_case_eq(&dentry->d_name, &lower_dentry->d_name)) { __d_drop(dentry); err = 0; } @@ -110,12 +107,10 @@ out: static void sdcardfs_d_release(struct dentry *dentry) { /* release and reset the lower paths */ - if(has_graft_path(dentry)) { + if (has_graft_path(dentry)) sdcardfs_put_reset_orig_path(dentry); - } sdcardfs_put_reset_lower_path(dentry); free_dentry_private_data(dentry); - return; } static int sdcardfs_hash_ci(const struct dentry *dentry, @@ -132,12 +127,10 @@ static int sdcardfs_hash_ci(const struct dentry *dentry, unsigned long hash; name = qstr->name; - //len = vfat_striptail_len(qstr); len = qstr->len; hash = init_name_hash(); while (len--) - //hash = partial_name_hash(nls_tolower(t, *name++), hash); hash = partial_name_hash(tolower(*name++), hash); qstr->hash = end_name_hash(hash); @@ -151,35 +144,25 @@ static int sdcardfs_cmp_ci(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { - /* This function is copy of vfat_cmpi */ - // FIXME Should we support national language? - //struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io; - //unsigned int alen, blen; + /* FIXME Should we support national language? */ - /* A filename cannot end in '.' or we treat it like it has none */ - /* - alen = vfat_striptail_len(name); - blen = __vfat_striptail_len(len, str); - if (alen == blen) { - if (nls_strnicmp(t, name->name, str, alen) == 0) - return 0; - } - */ if (name->len == len) { - if (strncasecmp(name->name, str, len) == 0) + if (str_n_case_eq(name->name, str, len)) return 0; } return 1; } -static void sdcardfs_canonical_path(const struct path *path, struct path *actual_path) { +static void sdcardfs_canonical_path(const struct path *path, + struct path *actual_path) +{ sdcardfs_get_real_lower(path->dentry, actual_path); } const struct dentry_operations sdcardfs_ci_dops = { .d_revalidate = sdcardfs_d_revalidate, .d_release = sdcardfs_d_release, - .d_hash = sdcardfs_hash_ci, + .d_hash = sdcardfs_hash_ci, .d_compare = sdcardfs_cmp_ci, .d_canonical_path = sdcardfs_canonical_path, }; diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 0bb442338a85..f82fedd29007 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -37,7 +37,8 @@ static void inherit_derived_state(struct inode *parent, struct inode *child) /* helper function for derived state */ void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, - uid_t uid, bool under_android, struct inode *top) + uid_t uid, bool under_android, + struct inode *top) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); @@ -50,12 +51,17 @@ void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, set_top(info, top); } -/* While renaming, there is a point where we want the path from dentry, but the name from newdentry */ -void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name) +/* While renaming, there is a point where we want the path from dentry, + * but the name from newdentry + */ +void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, + const struct qstr *name) { struct sdcardfs_inode_info *info = SDCARDFS_I(d_inode(dentry)); - struct sdcardfs_inode_info *parent_info= SDCARDFS_I(d_inode(parent)); + struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent)); appid_t appid; + unsigned long user_num; + int err; struct qstr q_Android = QSTR_LITERAL("Android"); struct qstr q_data = QSTR_LITERAL("data"); struct qstr q_obb = QSTR_LITERAL("obb"); @@ -77,58 +83,61 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, co return; /* Derive custom permissions based on parent and current node */ switch (parent_info->perm) { - case PERM_INHERIT: - case PERM_ANDROID_PACKAGE_CACHE: - /* Already inherited above */ - break; - case PERM_PRE_ROOT: - /* Legacy internal layout places users at top level */ - info->perm = PERM_ROOT; - info->userid = simple_strtoul(name->name, NULL, 10); + case PERM_INHERIT: + case PERM_ANDROID_PACKAGE_CACHE: + /* Already inherited above */ + break; + case PERM_PRE_ROOT: + /* Legacy internal layout places users at top level */ + info->perm = PERM_ROOT; + err = kstrtoul(name->name, 10, &user_num); + if (err) + info->userid = 0; + else + info->userid = user_num; + set_top(info, &info->vfs_inode); + break; + case PERM_ROOT: + /* Assume masked off by default. */ + if (qstr_case_eq(name, &q_Android)) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID; + info->under_android = true; set_top(info, &info->vfs_inode); - break; - case PERM_ROOT: - /* Assume masked off by default. */ - if (qstr_case_eq(name, &q_Android)) { - /* App-specific directories inside; let anyone traverse */ - info->perm = PERM_ANDROID; - info->under_android = true; - set_top(info, &info->vfs_inode); - } - break; - case PERM_ANDROID: - if (qstr_case_eq(name, &q_data)) { - /* App-specific directories inside; let anyone traverse */ - info->perm = PERM_ANDROID_DATA; - set_top(info, &info->vfs_inode); - } else if (qstr_case_eq(name, &q_obb)) { - /* App-specific directories inside; let anyone traverse */ - info->perm = PERM_ANDROID_OBB; - info->under_obb = true; - set_top(info, &info->vfs_inode); - /* Single OBB directory is always shared */ - } else if (qstr_case_eq(name, &q_media)) { - /* App-specific directories inside; let anyone traverse */ - info->perm = PERM_ANDROID_MEDIA; - set_top(info, &info->vfs_inode); - } - break; - case PERM_ANDROID_OBB: - case PERM_ANDROID_DATA: - case PERM_ANDROID_MEDIA: - info->perm = PERM_ANDROID_PACKAGE; - appid = get_appid(name->name); - if (appid != 0 && !is_excluded(name->name, parent_info->userid)) { - info->d_uid = multiuser_get_uid(parent_info->userid, appid); - } + } + break; + case PERM_ANDROID: + if (qstr_case_eq(name, &q_data)) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID_DATA; set_top(info, &info->vfs_inode); - break; - case PERM_ANDROID_PACKAGE: - if (qstr_case_eq(name, &q_cache)) { - info->perm = PERM_ANDROID_PACKAGE_CACHE; - info->under_cache = true; - } - break; + } else if (qstr_case_eq(name, &q_obb)) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID_OBB; + info->under_obb = true; + set_top(info, &info->vfs_inode); + /* Single OBB directory is always shared */ + } else if (qstr_case_eq(name, &q_media)) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID_MEDIA; + set_top(info, &info->vfs_inode); + } + break; + case PERM_ANDROID_OBB: + case PERM_ANDROID_DATA: + case PERM_ANDROID_MEDIA: + info->perm = PERM_ANDROID_PACKAGE; + appid = get_appid(name->name); + if (appid != 0 && !is_excluded(name->name, parent_info->userid)) + info->d_uid = multiuser_get_uid(parent_info->userid, appid); + set_top(info, &info->vfs_inode); + break; + case PERM_ANDROID_PACKAGE: + if (qstr_case_eq(name, &q_cache)) { + info->perm = PERM_ANDROID_PACKAGE_CACHE; + info->under_cache = true; + } + break; } } @@ -137,7 +146,8 @@ void get_derived_permission(struct dentry *parent, struct dentry *dentry) get_derived_permission_new(parent, dentry, &dentry->d_name); } -static appid_t get_type(const char *name) { +static appid_t get_type(const char *name) +{ const char *ext = strrchr(name, '.'); appid_t id; @@ -149,7 +159,8 @@ static appid_t get_type(const char *name) { return AID_MEDIA_RW; } -void fixup_lower_ownership(struct dentry* dentry, const char *name) { +void fixup_lower_ownership(struct dentry *dentry, const char *name) +{ struct path path; struct inode *inode; struct inode *delegated_inode = NULL; @@ -175,49 +186,49 @@ void fixup_lower_ownership(struct dentry* dentry, const char *name) { } switch (perm) { - case PERM_ROOT: - case PERM_ANDROID: - case PERM_ANDROID_DATA: - case PERM_ANDROID_MEDIA: - case PERM_ANDROID_PACKAGE: - case PERM_ANDROID_PACKAGE_CACHE: - uid = multiuser_get_uid(info->userid, uid); - break; - case PERM_ANDROID_OBB: - uid = AID_MEDIA_OBB; - break; - case PERM_PRE_ROOT: - default: - break; + case PERM_ROOT: + case PERM_ANDROID: + case PERM_ANDROID_DATA: + case PERM_ANDROID_MEDIA: + case PERM_ANDROID_PACKAGE: + case PERM_ANDROID_PACKAGE_CACHE: + uid = multiuser_get_uid(info->userid, uid); + break; + case PERM_ANDROID_OBB: + uid = AID_MEDIA_OBB; + break; + case PERM_PRE_ROOT: + default: + break; } switch (perm) { - case PERM_ROOT: - case PERM_ANDROID: - case PERM_ANDROID_DATA: - case PERM_ANDROID_MEDIA: - if (S_ISDIR(d_inode(dentry)->i_mode)) - gid = multiuser_get_uid(info->userid, AID_MEDIA_RW); - else - gid = multiuser_get_uid(info->userid, get_type(name)); - break; - case PERM_ANDROID_OBB: - gid = AID_MEDIA_OBB; - break; - case PERM_ANDROID_PACKAGE: - if (info->d_uid != 0) - gid = multiuser_get_ext_gid(info->userid, info->d_uid); - else - gid = multiuser_get_uid(info->userid, uid); - break; - case PERM_ANDROID_PACKAGE_CACHE: - if (info->d_uid != 0) - gid = multiuser_get_cache_gid(info->userid, info->d_uid); - else - gid = multiuser_get_uid(info->userid, uid); - break; - case PERM_PRE_ROOT: - default: - break; + case PERM_ROOT: + case PERM_ANDROID: + case PERM_ANDROID_DATA: + case PERM_ANDROID_MEDIA: + if (S_ISDIR(d_inode(dentry)->i_mode)) + gid = multiuser_get_uid(info->userid, AID_MEDIA_RW); + else + gid = multiuser_get_uid(info->userid, get_type(name)); + break; + case PERM_ANDROID_OBB: + gid = AID_MEDIA_OBB; + break; + case PERM_ANDROID_PACKAGE: + if (info->d_uid != 0) + gid = multiuser_get_ext_gid(info->d_uid); + else + gid = multiuser_get_uid(info->userid, uid); + break; + case PERM_ANDROID_PACKAGE_CACHE: + if (info->d_uid != 0) + gid = multiuser_get_cache_gid(info->d_uid); + else + gid = multiuser_get_uid(info->userid, uid); + break; + case PERM_PRE_ROOT: + default: + break; } sdcardfs_get_lower_path(dentry, &path); @@ -243,9 +254,11 @@ retry_deleg: if (error) pr_err("sdcardfs: Failed to touch up lower fs gid/uid.\n"); } + sdcardfs_put_lower_path(dentry, &path); } -static int descendant_may_need_fixup(struct sdcardfs_inode_info *info, struct limit_search *limit) { +static int descendant_may_need_fixup(struct sdcardfs_inode_info *info, struct limit_search *limit) +{ if (info->perm == PERM_ROOT) return (limit->flags & BY_USERID)?info->userid == limit->userid:1; if (info->perm == PERM_PRE_ROOT || info->perm == PERM_ANDROID) @@ -253,85 +266,56 @@ static int descendant_may_need_fixup(struct sdcardfs_inode_info *info, struct li return 0; } -static int needs_fixup(perm_t perm) { +static int needs_fixup(perm_t perm) +{ if (perm == PERM_ANDROID_DATA || perm == PERM_ANDROID_OBB || perm == PERM_ANDROID_MEDIA) return 1; return 0; } -void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit) { +static void __fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit, int depth) +{ struct dentry *child; struct sdcardfs_inode_info *info; - if (!dget(dentry)) - return; + + /* + * All paths will terminate their recursion on hitting PERM_ANDROID_OBB, + * PERM_ANDROID_MEDIA, or PERM_ANDROID_DATA. This happens at a depth of + * at most 3. + */ + WARN(depth > 3, "%s: Max expected depth exceeded!\n", __func__); + spin_lock_nested(&dentry->d_lock, depth); if (!d_inode(dentry)) { - dput(dentry); + spin_unlock(&dentry->d_lock); return; } info = SDCARDFS_I(d_inode(dentry)); if (needs_fixup(info->perm)) { - spin_lock(&dentry->d_lock); list_for_each_entry(child, &dentry->d_subdirs, d_child) { - dget(child); - if (!(limit->flags & BY_NAME) || !strncasecmp(child->d_name.name, limit->name, limit->length)) { + spin_lock_nested(&child->d_lock, depth + 1); + if (!(limit->flags & BY_NAME) || qstr_case_eq(&child->d_name, &limit->name)) { if (d_inode(child)) { get_derived_permission(dentry, child); fixup_tmp_permissions(d_inode(child)); - dput(child); + spin_unlock(&child->d_lock); break; } } - dput(child); + spin_unlock(&child->d_lock); } - spin_unlock(&dentry->d_lock); - } else if (descendant_may_need_fixup(info, limit)) { - spin_lock(&dentry->d_lock); + } else if (descendant_may_need_fixup(info, limit)) { list_for_each_entry(child, &dentry->d_subdirs, d_child) { - fixup_perms_recursive(child, limit); - } - spin_unlock(&dentry->d_lock); - } - dput(dentry); -} - -void drop_recursive(struct dentry *parent) { - struct dentry *dentry; - struct sdcardfs_inode_info *info; - if (!d_inode(parent)) - return; - info = SDCARDFS_I(d_inode(parent)); - spin_lock(&parent->d_lock); - list_for_each_entry(dentry, &parent->d_subdirs, d_child) { - if (d_inode(dentry)) { - if (SDCARDFS_I(d_inode(parent))->top != SDCARDFS_I(d_inode(dentry))->top) { - drop_recursive(dentry); - d_drop(dentry); - } + __fixup_perms_recursive(child, limit, depth + 1); } } - spin_unlock(&parent->d_lock); + spin_unlock(&dentry->d_lock); } -void fixup_top_recursive(struct dentry *parent) { - struct dentry *dentry; - struct sdcardfs_inode_info *info; - - if (!d_inode(parent)) - return; - info = SDCARDFS_I(d_inode(parent)); - spin_lock(&parent->d_lock); - list_for_each_entry(dentry, &parent->d_subdirs, d_child) { - if (d_inode(dentry)) { - if (SDCARDFS_I(d_inode(parent))->top != SDCARDFS_I(d_inode(dentry))->top) { - get_derived_permission(parent, dentry); - fixup_tmp_permissions(d_inode(dentry)); - fixup_top_recursive(dentry); - } - } - } - spin_unlock(&parent->d_lock); +void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit) +{ + __fixup_perms_recursive(dentry, limit, 0); } /* main function for updating derived permission */ @@ -339,19 +323,17 @@ inline void update_derived_permission_lock(struct dentry *dentry) { struct dentry *parent; - if(!dentry || !d_inode(dentry)) { - printk(KERN_ERR "sdcardfs: %s: invalid dentry\n", __func__); + if (!dentry || !d_inode(dentry)) { + pr_err("sdcardfs: %s: invalid dentry\n", __func__); return; } /* FIXME: * 1. need to check whether the dentry is updated or not * 2. remove the root dentry update */ - if(IS_ROOT(dentry)) { - //setup_default_pre_root_state(d_inode(dentry)); - } else { + if (!IS_ROOT(dentry)) { parent = dget_parent(dentry); - if(parent) { + if (parent) { get_derived_permission(parent, dentry); dput(parent); } @@ -363,15 +345,15 @@ int need_graft_path(struct dentry *dentry) { int ret = 0; struct dentry *parent = dget_parent(dentry); - struct sdcardfs_inode_info *parent_info= SDCARDFS_I(d_inode(parent)); + struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent)); struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); struct qstr obb = QSTR_LITERAL("obb"); - if(parent_info->perm == PERM_ANDROID && + if (parent_info->perm == PERM_ANDROID && qstr_case_eq(&dentry->d_name, &obb)) { /* /Android/obb is the base obbpath of DERIVED_UNIFIED */ - if(!(sbi->options.multiuser == false + if (!(sbi->options.multiuser == false && parent_info->userid == 0)) { ret = 1; } @@ -386,22 +368,24 @@ int is_obbpath_invalid(struct dentry *dent) struct sdcardfs_dentry_info *di = SDCARDFS_D(dent); struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dent->d_sb); char *path_buf, *obbpath_s; + int need_put = 0; + struct path lower_path; /* check the base obbpath has been changed. * this routine can check an uninitialized obb dentry as well. - * regarding the uninitialized obb, refer to the sdcardfs_mkdir() */ + * regarding the uninitialized obb, refer to the sdcardfs_mkdir() + */ spin_lock(&di->lock); - if(di->orig_path.dentry) { - if(!di->lower_path.dentry) { + if (di->orig_path.dentry) { + if (!di->lower_path.dentry) { ret = 1; } else { path_get(&di->lower_path); - //lower_parent = lock_parent(lower_path->dentry); path_buf = kmalloc(PATH_MAX, GFP_ATOMIC); - if(!path_buf) { + if (!path_buf) { ret = 1; - printk(KERN_ERR "sdcardfs: fail to allocate path_buf in %s.\n", __func__); + pr_err("sdcardfs: fail to allocate path_buf in %s.\n", __func__); } else { obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX); if (d_unhashed(di->lower_path.dentry) || @@ -411,11 +395,13 @@ int is_obbpath_invalid(struct dentry *dent) kfree(path_buf); } - //unlock_dir(lower_parent); - path_put(&di->lower_path); + pathcpy(&lower_path, &di->lower_path); + need_put = 1; } } spin_unlock(&di->lock); + if (need_put) + path_put(&lower_path); return ret; } @@ -423,13 +409,13 @@ int is_base_obbpath(struct dentry *dentry) { int ret = 0; struct dentry *parent = dget_parent(dentry); - struct sdcardfs_inode_info *parent_info= SDCARDFS_I(d_inode(parent)); + struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent)); struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); struct qstr q_obb = QSTR_LITERAL("obb"); spin_lock(&SDCARDFS_D(dentry)->lock); if (sbi->options.multiuser) { - if(parent_info->perm == PERM_PRE_ROOT && + if (parent_info->perm == PERM_PRE_ROOT && qstr_case_eq(&dentry->d_name, &q_obb)) { ret = 1; } @@ -444,7 +430,8 @@ int is_base_obbpath(struct dentry *dentry) /* The lower_path will be stored to the dentry's orig_path * and the base obbpath will be copyed to the lower_path variable. * if an error returned, there's no change in the lower_path - * returns: -ERRNO if error (0: no error) */ + * returns: -ERRNO if error (0: no error) + */ int setup_obb_dentry(struct dentry *dentry, struct path *lower_path) { int err = 0; @@ -453,23 +440,24 @@ int setup_obb_dentry(struct dentry *dentry, struct path *lower_path) /* A local obb dentry must have its own orig_path to support rmdir * and mkdir of itself. Usually, we expect that the sbi->obbpath - * is avaiable on this stage. */ + * is avaiable on this stage. + */ sdcardfs_set_orig_path(dentry, lower_path); err = kern_path(sbi->obbpath_s, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &obbpath); - if(!err) { + if (!err) { /* the obbpath base has been found */ - printk(KERN_INFO "sdcardfs: the sbi->obbpath is found\n"); pathcpy(lower_path, &obbpath); } else { /* if the sbi->obbpath is not available, we can optionally * setup the lower_path with its orig_path. * but, the current implementation just returns an error * because the sdcard daemon also regards this case as - * a lookup fail. */ - printk(KERN_INFO "sdcardfs: the sbi->obbpath is not available\n"); + * a lookup fail. + */ + pr_info("sdcardfs: the sbi->obbpath is not available\n"); } return err; } diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index 23f8cd7f8877..c0146e03fa2e 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -65,7 +65,7 @@ static ssize_t sdcardfs_write(struct file *file, const char __user *buf, /* check disk space */ if (!check_min_free_space(dentry, count, 0)) { - printk(KERN_INFO "No minimum free space.\n"); + pr_err("No minimum free space.\n"); return -ENOSPC; } @@ -113,6 +113,10 @@ static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd, if (lower_file->f_op->unlocked_ioctl) err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); + /* some ioctls can change inode attributes (EXT2_IOC_SETFLAGS) */ + if (!err) + sdcardfs_copy_and_fix_attrs(file_inode(file), + file_inode(lower_file)); out: return err; } @@ -160,8 +164,7 @@ static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma) lower_file = sdcardfs_lower_file(file); if (willwrite && !lower_file->f_mapping->a_ops->writepage) { err = -EINVAL; - printk(KERN_ERR "sdcardfs: lower file system does not " - "support writeable mmap\n"); + pr_err("sdcardfs: lower file system does not support writeable mmap\n"); goto out; } @@ -173,16 +176,10 @@ static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma) if (!SDCARDFS_F(file)->lower_vm_ops) { err = lower_file->f_op->mmap(lower_file, vma); if (err) { - printk(KERN_ERR "sdcardfs: lower mmap failed %d\n", err); + pr_err("sdcardfs: lower mmap failed %d\n", err); goto out; } saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */ - err = do_munmap(current->mm, vma->vm_start, - vma->vm_end - vma->vm_start); - if (err) { - printk(KERN_ERR "sdcardfs: do_munmap failed %d\n", err); - goto out; - } } /* @@ -216,10 +213,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file) goto out_err; } - if(!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { err = -EACCES; goto out_err; } @@ -251,9 +245,8 @@ static int sdcardfs_open(struct inode *inode, struct file *file) if (err) kfree(SDCARDFS_F(file)); - else { + else sdcardfs_copy_and_fix_attrs(inode, sdcardfs_lower_inode(inode)); - } out_revert_cred: REVERT_CRED(saved_cred); @@ -323,6 +316,75 @@ static int sdcardfs_fasync(int fd, struct file *file, int flag) return err; } +/* + * Sdcardfs cannot use generic_file_llseek as ->llseek, because it would + * only set the offset of the upper file. So we have to implement our + * own method to set both the upper and lower file offsets + * consistently. + */ +static loff_t sdcardfs_file_llseek(struct file *file, loff_t offset, int whence) +{ + int err; + struct file *lower_file; + + err = generic_file_llseek(file, offset, whence); + if (err < 0) + goto out; + + lower_file = sdcardfs_lower_file(file); + err = generic_file_llseek(lower_file, offset, whence); + +out: + return err; +} + +/* + * Sdcardfs read_iter, redirect modified iocb to lower read_iter + */ +ssize_t sdcardfs_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + int err; + struct file *file = iocb->ki_filp, *lower_file; + + lower_file = sdcardfs_lower_file(file); + if (!lower_file->f_op->read_iter) { + err = -EINVAL; + goto out; + } + + get_file(lower_file); /* prevent lower_file from being released */ + iocb->ki_filp = lower_file; + err = lower_file->f_op->read_iter(iocb, iter); + /* ? wait IO finish to update atime as ecryptfs ? */ + iocb->ki_filp = file; + fput(lower_file); +out: + return err; +} + +/* + * Sdcardfs write_iter, redirect modified iocb to lower write_iter + */ +ssize_t sdcardfs_write_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + int err; + struct file *file = iocb->ki_filp, *lower_file; + + lower_file = sdcardfs_lower_file(file); + if (!lower_file->f_op->write_iter) { + err = -EINVAL; + goto out; + } + + get_file(lower_file); /* prevent lower_file from being released */ + iocb->ki_filp = lower_file; + err = lower_file->f_op->write_iter(iocb, iter); + iocb->ki_filp = file; + fput(lower_file); +out: + return err; +} + const struct file_operations sdcardfs_main_fops = { .llseek = generic_file_llseek, .read = sdcardfs_read, @@ -337,11 +399,13 @@ const struct file_operations sdcardfs_main_fops = { .release = sdcardfs_file_release, .fsync = sdcardfs_fsync, .fasync = sdcardfs_fasync, + .read_iter = sdcardfs_read_iter, + .write_iter = sdcardfs_write_iter, }; /* trimmed directory options */ const struct file_operations sdcardfs_dir_fops = { - .llseek = generic_file_llseek, + .llseek = sdcardfs_file_llseek, .read = generic_read_dir, .iterate = sdcardfs_readdir, .unlocked_ioctl = sdcardfs_unlocked_ioctl, diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 68e615045616..f15cb11ca8fd 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -20,12 +20,13 @@ #include "sdcardfs.h" #include <linux/fs_struct.h> +#include <linux/ratelimit.h> /* Do not directly use this function. Use OVERRIDE_CRED() instead. */ -const struct cred * override_fsids(struct sdcardfs_sb_info* sbi, struct sdcardfs_inode_info *info) +const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, struct sdcardfs_inode_info *info) { - struct cred * cred; - const struct cred * old_cred; + struct cred *cred; + const struct cred *old_cred; uid_t uid; cred = prepare_creds(); @@ -45,9 +46,9 @@ const struct cred * override_fsids(struct sdcardfs_sb_info* sbi, struct sdcardfs } /* Do not directly use this function, use REVERT_CRED() instead. */ -void revert_fsids(const struct cred * old_cred) +void revert_fsids(const struct cred *old_cred) { - const struct cred * cur_cred; + const struct cred *cur_cred; cur_cred = current->cred; revert_creds(old_cred); @@ -66,10 +67,7 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, struct fs_struct *saved_fs; struct fs_struct *copied_fs; - if(!check_caller_access_to_name(dir, &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(dir, &dentry->d_name)) { err = -EACCES; goto out_eacces; } @@ -168,10 +166,7 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) struct path lower_path; const struct cred *saved_cred = NULL; - if(!check_caller_access_to_name(dir, &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(dir, &dentry->d_name)) { err = -EACCES; goto out_eacces; } @@ -245,14 +240,15 @@ out: } #endif -static int touch(char *abs_path, mode_t mode) { +static int touch(char *abs_path, mode_t mode) +{ struct file *filp = filp_open(abs_path, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, mode); + if (IS_ERR(filp)) { if (PTR_ERR(filp) == -EEXIST) { return 0; - } - else { - printk(KERN_ERR "sdcardfs: failed to open(%s): %ld\n", + } else { + pr_err("sdcardfs: failed to open(%s): %ld\n", abs_path, PTR_ERR(filp)); return PTR_ERR(filp); } @@ -278,10 +274,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode struct qstr q_obb = QSTR_LITERAL("obb"); struct qstr q_data = QSTR_LITERAL("data"); - if(!check_caller_access_to_name(dir, &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(dir, &dentry->d_name)) { err = -EACCES; goto out_eacces; } @@ -291,7 +284,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode /* check disk space */ if (!check_min_free_space(dentry, 0, 1)) { - printk(KERN_INFO "sdcardfs: No minimum free space.\n"); + pr_err("sdcardfs: No minimum free space.\n"); err = -ENOSPC; goto out_revert; } @@ -323,19 +316,21 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode } /* if it is a local obb dentry, setup it with the base obbpath */ - if(need_graft_path(dentry)) { + if (need_graft_path(dentry)) { err = setup_obb_dentry(dentry, &lower_path); - if(err) { + if (err) { /* if the sbi->obbpath is not available, the lower_path won't be * changed by setup_obb_dentry() but the lower path is saved to * its orig_path. this dentry will be revalidated later. - * but now, the lower_path should be NULL */ + * but now, the lower_path should be NULL + */ sdcardfs_put_reset_lower_path(dentry); /* the newly created lower path which saved to its orig_path or * the lower_path is the base obbpath. - * therefore, an additional path_get is required */ + * therefore, an additional path_get is required + */ path_get(&lower_path); } else make_nomedia_in_obb = 1; @@ -365,7 +360,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode set_fs_pwd(current->fs, &lower_path); touch_err = touch(".nomedia", 0664); if (touch_err) { - printk(KERN_ERR "sdcardfs: failed to create .nomedia in %s: %d\n", + pr_err("sdcardfs: failed to create .nomedia in %s: %d\n", lower_path.dentry->d_name.name, touch_err); goto out; } @@ -390,10 +385,7 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) struct path lower_path; const struct cred *saved_cred = NULL; - if(!check_caller_access_to_name(dir, &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(dir, &dentry->d_name)) { err = -EACCES; goto out_eacces; } @@ -402,7 +394,8 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir)); /* sdcardfs_get_real_lower(): in case of remove an user's obb dentry - * the dentry on the original path should be deleted. */ + * the dentry on the original path should be deleted. + */ sdcardfs_get_real_lower(dentry, &lower_path); lower_dentry = lower_path.dentry; @@ -478,11 +471,8 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct path lower_old_path, lower_new_path; const struct cred *saved_cred = NULL; - if(!check_caller_access_to_name(old_dir, &old_dentry->d_name) || + if (!check_caller_access_to_name(old_dir, &old_dentry->d_name) || !check_caller_access_to_name(new_dir, &new_dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " new_dentry: %s, task:%s\n", - __func__, new_dentry->d_name.name, current->comm); err = -EACCES; goto out_eacces; } @@ -528,7 +518,7 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, get_derived_permission_new(new_dentry->d_parent, old_dentry, &new_dentry->d_name); fixup_tmp_permissions(d_inode(old_dentry)); fixup_lower_ownership(old_dentry, new_dentry->d_name.name); - drop_recursive(old_dentry); /* Can't fixup ownership recursively :( */ + d_invalidate(old_dentry); /* Can't fixup ownership recursively :( */ out: unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); dput(lower_old_dir_dentry); @@ -599,7 +589,7 @@ static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie) static int sdcardfs_permission_wrn(struct inode *inode, int mask) { - WARN(1, "sdcardfs does not support permission. Use permission2.\n"); + WARN_RATELIMIT(1, "sdcardfs does not support permission. Use permission2.\n"); return -EINVAL; } @@ -652,7 +642,7 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma release_top(SDCARDFS_I(inode)); tmp.i_sb = inode->i_sb; if (IS_POSIXACL(inode)) - printk(KERN_WARNING "%s: This may be undefined behavior... \n", __func__); + pr_warn("%s: This may be undefined behavior...\n", __func__); err = generic_permission(&tmp, mask); /* XXX * Original sdcardfs code calls inode_permission(lower_inode,.. ) @@ -670,6 +660,7 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma * we check it with AID_MEDIA_RW permission */ struct inode *lower_inode; + OVERRIDE_CRED(SDCARDFS_SB(inode->sb)); lower_inode = sdcardfs_lower_inode(inode); @@ -684,7 +675,7 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma static int sdcardfs_setattr_wrn(struct dentry *dentry, struct iattr *ia) { - WARN(1, "sdcardfs does not support setattr. User setattr2.\n"); + WARN_RATELIMIT(1, "sdcardfs does not support setattr. User setattr2.\n"); return -EINVAL; } @@ -738,19 +729,16 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct /* prepare our own lower struct iattr (with the lower file) */ memcpy(&lower_ia, ia, sizeof(lower_ia)); /* Allow touch updating timestamps. A previous permission check ensures - * we have write access. Changes to mode, owner, and group are ignored*/ + * we have write access. Changes to mode, owner, and group are ignored + */ ia->ia_valid |= ATTR_FORCE; err = inode_change_ok(&tmp, ia); if (!err) { /* check the Android group ID */ parent = dget_parent(dentry); - if(!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) err = -EACCES; - } dput(parent); } @@ -828,10 +816,12 @@ out_err: return err; } -static int sdcardfs_fillattr(struct vfsmount *mnt, struct inode *inode, struct kstat *stat) +static int sdcardfs_fillattr(struct vfsmount *mnt, + struct inode *inode, struct kstat *stat) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); struct inode *top = grab_top(info); + if (!top) return -EINVAL; @@ -855,33 +845,27 @@ static int sdcardfs_fillattr(struct vfsmount *mnt, struct inode *inode, struct k static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { - struct dentry *lower_dentry; - struct inode *inode; - struct inode *lower_inode; + struct kstat lower_stat; struct path lower_path; struct dentry *parent; int err; parent = dget_parent(dentry); - if(!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { dput(parent); return -EACCES; } dput(parent); - inode = d_inode(dentry); - sdcardfs_get_lower_path(dentry, &lower_path); - lower_dentry = lower_path.dentry; - lower_inode = sdcardfs_lower_inode(inode); - - sdcardfs_copy_and_fix_attrs(inode, lower_inode); - fsstack_copy_inode_size(inode, lower_inode); - - err = sdcardfs_fillattr(mnt, inode, stat); + err = vfs_getattr(&lower_path, &lower_stat); + if (err) + goto out; + sdcardfs_copy_and_fix_attrs(d_inode(dentry), + d_inode(lower_path.dentry)); + err = sdcardfs_fillattr(mnt, d_inode(dentry), stat); + stat->blocks = lower_stat.blocks; +out: sdcardfs_put_lower_path(dentry, &lower_path); return err; } diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index 9135866b7766..f10f7752f514 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -36,8 +36,7 @@ int sdcardfs_init_dentry_cache(void) void sdcardfs_destroy_dentry_cache(void) { - if (sdcardfs_dentry_cachep) - kmem_cache_destroy(sdcardfs_dentry_cachep); + kmem_cache_destroy(sdcardfs_dentry_cachep); } void free_dentry_private_data(struct dentry *dentry) @@ -73,6 +72,7 @@ static int sdcardfs_inode_test(struct inode *inode, void *candidate_data/*void * { struct inode *current_lower_inode = sdcardfs_lower_inode(inode); userid_t current_userid = SDCARDFS_I(inode)->userid; + if (current_lower_inode == ((struct inode_data *)candidate_data)->lower_inode && current_userid == ((struct inode_data *)candidate_data)->id) return 1; /* found a match */ @@ -102,7 +102,7 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, u * instead. */ lower_inode->i_ino, /* hashval */ - sdcardfs_inode_test, /* inode comparison function */ + sdcardfs_inode_test, /* inode comparison function */ sdcardfs_inode_set, /* inode init function */ &data); /* data passed to test+set fxns */ if (!inode) { @@ -164,27 +164,25 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, u } /* - * Connect a sdcardfs inode dentry/inode with several lower ones. This is - * the classic stackable file system "vnode interposition" action. - * - * @dentry: sdcardfs's dentry which interposes on lower one - * @sb: sdcardfs's super_block - * @lower_path: the lower path (caller does path_get/put) + * Helper interpose routine, called directly by ->lookup to handle + * spliced dentries. */ -int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, - struct path *lower_path, userid_t id) +static struct dentry *__sdcardfs_interpose(struct dentry *dentry, + struct super_block *sb, + struct path *lower_path, + userid_t id) { - int err = 0; struct inode *inode; struct inode *lower_inode; struct super_block *lower_sb; + struct dentry *ret_dentry; lower_inode = d_inode(lower_path->dentry); lower_sb = sdcardfs_lower_super(sb); /* check that the lower file system didn't cross a mount point */ if (lower_inode->i_sb != lower_sb) { - err = -EXDEV; + ret_dentry = ERR_PTR(-EXDEV); goto out; } @@ -196,14 +194,54 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, /* inherit lower inode number for sdcardfs's inode */ inode = sdcardfs_iget(sb, lower_inode, id); if (IS_ERR(inode)) { - err = PTR_ERR(inode); + ret_dentry = ERR_CAST(inode); goto out; } - d_add(dentry, inode); + ret_dentry = d_splice_alias(inode, dentry); + dentry = ret_dentry ?: dentry; update_derived_permission_lock(dentry); out: - return err; + return ret_dentry; +} + +/* + * Connect an sdcardfs inode dentry/inode with several lower ones. This is + * the classic stackable file system "vnode interposition" action. + * + * @dentry: sdcardfs's dentry which interposes on lower one + * @sb: sdcardfs's super_block + * @lower_path: the lower path (caller does path_get/put) + */ +int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, + struct path *lower_path, userid_t id) +{ + struct dentry *ret_dentry; + + ret_dentry = __sdcardfs_interpose(dentry, sb, lower_path, id); + return PTR_ERR(ret_dentry); +} + +struct sdcardfs_name_data { + struct dir_context ctx; + const struct qstr *to_find; + char *name; + bool found; +}; + +static int sdcardfs_name_match(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, unsigned int d_type) +{ + struct sdcardfs_name_data *buf = container_of(ctx, struct sdcardfs_name_data, ctx); + struct qstr candidate = QSTR_INIT(name, namelen); + + if (qstr_case_eq(buf->to_find, &candidate)) { + memcpy(buf->name, name, namelen); + buf->name[namelen] = 0; + buf->found = true; + return 1; + } + return 0; } /* @@ -221,6 +259,8 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, struct dentry *lower_dentry; const struct qstr *name; struct path lower_path; + struct qstr dname; + struct dentry *ret_dentry = NULL; struct sdcardfs_sb_info *sbi; sbi = SDCARDFS_SB(dentry->d_sb); @@ -241,60 +281,79 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, &lower_path); /* check for other cases */ if (err == -ENOENT) { - struct dentry *child; - struct dentry *match = NULL; - mutex_lock(&d_inode(lower_dir_dentry)->i_mutex); - spin_lock(&lower_dir_dentry->d_lock); - list_for_each_entry(child, &lower_dir_dentry->d_subdirs, d_child) { - if (child && d_inode(child)) { - if (qstr_case_eq(&child->d_name, name)) { - match = dget(child); - break; - } - } + struct file *file; + const struct cred *cred = current_cred(); + + struct sdcardfs_name_data buffer = { + .ctx.actor = sdcardfs_name_match, + .to_find = name, + .name = __getname(), + .found = false, + }; + + if (!buffer.name) { + err = -ENOMEM; + goto out; + } + file = dentry_open(lower_parent_path, O_RDONLY, cred); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto put_name; } - spin_unlock(&lower_dir_dentry->d_lock); - mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex); - if (match) { + err = iterate_dir(file, &buffer.ctx); + fput(file); + if (err) + goto put_name; + + if (buffer.found) err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, - match->d_name.name, 0, + buffer.name, 0, &lower_path); - dput(match); - } + else + err = -ENOENT; +put_name: + __putname(buffer.name); } /* no error: handle positive dentries */ if (!err) { /* check if the dentry is an obb dentry * if true, the lower_inode must be replaced with - * the inode of the graft path */ + * the inode of the graft path + */ - if(need_graft_path(dentry)) { + if (need_graft_path(dentry)) { /* setup_obb_dentry() - * The lower_path will be stored to the dentry's orig_path + * The lower_path will be stored to the dentry's orig_path * and the base obbpath will be copyed to the lower_path variable. * if an error returned, there's no change in the lower_path - * returns: -ERRNO if error (0: no error) */ + * returns: -ERRNO if error (0: no error) + */ err = setup_obb_dentry(dentry, &lower_path); - if(err) { + if (err) { /* if the sbi->obbpath is not available, we can optionally * setup the lower_path with its orig_path. * but, the current implementation just returns an error * because the sdcard daemon also regards this case as - * a lookup fail. */ - printk(KERN_INFO "sdcardfs: base obbpath is not available\n"); + * a lookup fail. + */ + pr_info("sdcardfs: base obbpath is not available\n"); sdcardfs_put_reset_orig_path(dentry); goto out; } } sdcardfs_set_lower_path(dentry, &lower_path); - err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_path, id); - if (err) /* path_put underlying path on error */ + ret_dentry = + __sdcardfs_interpose(dentry, dentry->d_sb, &lower_path, id); + if (IS_ERR(ret_dentry)) { + err = PTR_ERR(ret_dentry); + /* path_put underlying path on error */ sdcardfs_put_reset_lower_path(dentry); + } goto out; } @@ -306,11 +365,14 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, goto out; /* instatiate a new negative dentry */ - lower_dentry = d_lookup(lower_dir_dentry, name); + dname.name = name->name; + dname.len = name->len; + dname.hash = full_name_hash(dname.name, dname.len); + lower_dentry = d_lookup(lower_dir_dentry, &dname); if (lower_dentry) goto setup_lower; - lower_dentry = d_alloc(lower_dir_dentry, name); + lower_dentry = d_alloc(lower_dir_dentry, &dname); if (!lower_dentry) { err = -ENOMEM; goto out; @@ -331,14 +393,16 @@ setup_lower: err = 0; out: - return ERR_PTR(err); + if (err) + return ERR_PTR(err); + return ret_dentry; } /* * On success: - * fills dentry object appropriate values and returns NULL. + * fills dentry object appropriate values and returns NULL. * On fail (== error) - * returns error ptr + * returns error ptr * * @dir : Parent inode. It is locked (dir->i_mutex) * @dentry : Target dentry to lookup. we should set each of fields. @@ -355,13 +419,10 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, parent = dget_parent(dentry); - if(!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { + if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { ret = ERR_PTR(-EACCES); - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); goto out_err; - } + } /* save current_cred and override it */ OVERRIDE_CRED_PTR(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir)); @@ -377,9 +438,7 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path, SDCARDFS_I(dir)->userid); if (IS_ERR(ret)) - { goto out; - } if (ret) dentry = ret; if (d_inode(dentry)) { diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 7a8eae29e44d..953d2156d2e9 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -29,7 +29,7 @@ enum { Opt_gid, Opt_debug, Opt_mask, - Opt_multiuser, // May need? + Opt_multiuser, Opt_userid, Opt_reserved_mb, Opt_err, @@ -72,6 +72,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, while ((p = strsep(&options, ",")) != NULL) { int token; + if (!*p) continue; @@ -116,19 +117,17 @@ static int parse_options(struct super_block *sb, char *options, int silent, break; /* unknown option */ default: - if (!silent) { - printk( KERN_ERR "Unrecognized mount option \"%s\" " - "or missing value", p); - } + if (!silent) + pr_err("Unrecognized mount option \"%s\" or missing value", p); return -EINVAL; } } if (*debug) { - printk( KERN_INFO "sdcardfs : options - debug:%d\n", *debug); - printk( KERN_INFO "sdcardfs : options - uid:%d\n", + pr_info("sdcardfs : options - debug:%d\n", *debug); + pr_info("sdcardfs : options - uid:%d\n", opts->fs_low_uid); - printk( KERN_INFO "sdcardfs : options - gid:%d\n", + pr_info("sdcardfs : options - gid:%d\n", opts->fs_low_gid); } @@ -148,6 +147,7 @@ int parse_options_remount(struct super_block *sb, char *options, int silent, while ((p = strsep(&options, ",")) != NULL) { int token; + if (!*p) continue; @@ -173,22 +173,20 @@ int parse_options_remount(struct super_block *sb, char *options, int silent, case Opt_fsuid: case Opt_fsgid: case Opt_reserved_mb: - printk( KERN_WARNING "Option \"%s\" can't be changed during remount\n", p); + pr_warn("Option \"%s\" can't be changed during remount\n", p); break; /* unknown option */ default: - if (!silent) { - printk( KERN_ERR "Unrecognized mount option \"%s\" " - "or missing value", p); - } + if (!silent) + pr_err("Unrecognized mount option \"%s\" or missing value", p); return -EINVAL; } } if (debug) { - printk( KERN_INFO "sdcardfs : options - debug:%d\n", debug); - printk( KERN_INFO "sdcardfs : options - gid:%d\n", vfsopts->gid); - printk( KERN_INFO "sdcardfs : options - mask:%d\n", vfsopts->mask); + pr_info("sdcardfs : options - debug:%d\n", debug); + pr_info("sdcardfs : options - gid:%d\n", vfsopts->gid); + pr_info("sdcardfs : options - mask:%d\n", vfsopts->mask); } return 0; @@ -223,8 +221,8 @@ static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb) #endif DEFINE_MUTEX(sdcardfs_super_list_lock); -LIST_HEAD(sdcardfs_super_list); EXPORT_SYMBOL_GPL(sdcardfs_super_list_lock); +LIST_HEAD(sdcardfs_super_list); EXPORT_SYMBOL_GPL(sdcardfs_super_list); /* @@ -242,31 +240,30 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, struct sdcardfs_vfsmount_options *mnt_opt = mnt->data; struct inode *inode; - printk(KERN_INFO "sdcardfs version 2.0\n"); + pr_info("sdcardfs version 2.0\n"); if (!dev_name) { - printk(KERN_ERR - "sdcardfs: read_super: missing dev_name argument\n"); + pr_err("sdcardfs: read_super: missing dev_name argument\n"); err = -EINVAL; goto out; } - printk(KERN_INFO "sdcardfs: dev_name -> %s\n", dev_name); - printk(KERN_INFO "sdcardfs: options -> %s\n", (char *)raw_data); - printk(KERN_INFO "sdcardfs: mnt -> %p\n", mnt); + pr_info("sdcardfs: dev_name -> %s\n", dev_name); + pr_info("sdcardfs: options -> %s\n", (char *)raw_data); + pr_info("sdcardfs: mnt -> %p\n", mnt); /* parse lower path */ err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &lower_path); if (err) { - printk(KERN_ERR "sdcardfs: error accessing lower directory '%s'\n", dev_name); + pr_err("sdcardfs: error accessing lower directory '%s'\n", dev_name); goto out; } /* allocate superblock private data */ sb->s_fs_info = kzalloc(sizeof(struct sdcardfs_sb_info), GFP_KERNEL); if (!SDCARDFS_SB(sb)) { - printk(KERN_CRIT "sdcardfs: read_super: out of memory\n"); + pr_crit("sdcardfs: read_super: out of memory\n"); err = -ENOMEM; goto out_free; } @@ -275,7 +272,7 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, /* parse options */ err = parse_options(sb, raw_data, silent, &debug, mnt_opt, &sb_info->options); if (err) { - printk(KERN_ERR "sdcardfs: invalid options\n"); + pr_err("sdcardfs: invalid options\n"); goto out_freesbi; } @@ -328,14 +325,15 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, /* setup permission policy */ sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); mutex_lock(&sdcardfs_super_list_lock); - if(sb_info->options.multiuser) { - setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT, sb_info->options.fs_user_id, AID_ROOT, false, d_inode(sb->s_root)); + if (sb_info->options.multiuser) { + setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT, + sb_info->options.fs_user_id, AID_ROOT, + false, d_inode(sb->s_root)); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); - /*err = prepare_dir(sb_info->obbpath_s, - sb_info->options.fs_low_uid, - sb_info->options.fs_low_gid, 00755);*/ } else { - setup_derived_state(d_inode(sb->s_root), PERM_ROOT, sb_info->options.fs_user_id, AID_ROOT, false, d_inode(sb->s_root)); + setup_derived_state(d_inode(sb->s_root), PERM_ROOT, + sb_info->options.fs_user_id, AID_ROOT, + false, d_inode(sb->s_root)); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); } fixup_tmp_permissions(d_inode(sb->s_root)); @@ -344,7 +342,7 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, mutex_unlock(&sdcardfs_super_list_lock); if (!silent) - printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n", + pr_info("sdcardfs: mounted on top of %s type %s\n", dev_name, lower_sb->s_type->name); goto out; /* all is well */ @@ -368,8 +366,10 @@ out: /* A feature which supports mount_nodev() with options */ static struct dentry *mount_nodev_with_options(struct vfsmount *mnt, - struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - int (*fill_super)(struct vfsmount *, struct super_block *, const char *, void *, int)) + struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, + int (*fill_super)(struct vfsmount *, struct super_block *, + const char *, void *, int)) { int error; @@ -401,19 +401,22 @@ static struct dentry *sdcardfs_mount(struct vfsmount *mnt, raw_data, sdcardfs_read_super); } -static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) +static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) { WARN(1, "sdcardfs does not support mount. Use mount2.\n"); return ERR_PTR(-EINVAL); } -void *sdcardfs_alloc_mnt_data(void) { +void *sdcardfs_alloc_mnt_data(void) +{ return kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL); } -void sdcardfs_kill_sb(struct super_block *sb) { +void sdcardfs_kill_sb(struct super_block *sb) +{ struct sdcardfs_sb_info *sbi; + if (sb->s_magic == SDCARDFS_SUPER_MAGIC) { sbi = SDCARDFS_SB(sb); mutex_lock(&sdcardfs_super_list_lock); @@ -432,6 +435,7 @@ static struct file_system_type sdcardfs_fs_type = { .kill_sb = sdcardfs_kill_sb, .fs_flags = 0, }; +MODULE_ALIAS_FS(SDCARDFS_NAME); static int __init init_sdcardfs_fs(void) { @@ -467,10 +471,15 @@ static void __exit exit_sdcardfs_fs(void) pr_info("Completed sdcardfs module unload\n"); } -MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University" - " (http://www.fsl.cs.sunysb.edu/)"); -MODULE_DESCRIPTION("Wrapfs " SDCARDFS_VERSION - " (http://wrapfs.filesystems.org/)"); +/* Original wrapfs authors */ +MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University (http://www.fsl.cs.sunysb.edu/)"); + +/* Original sdcardfs authors */ +MODULE_AUTHOR("Woojoong Lee, Daeho Jeong, Kitae Lee, Yeongjin Gil System Memory Lab., Samsung Electronics"); + +/* Current maintainer */ +MODULE_AUTHOR("Daniel Rosenberg, Google"); +MODULE_DESCRIPTION("Sdcardfs " SDCARDFS_VERSION); MODULE_LICENSE("GPL"); module_init(init_sdcardfs_fs); diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c index e21f64675a80..0d4089c62c3a 100644 --- a/fs/sdcardfs/mmap.c +++ b/fs/sdcardfs/mmap.c @@ -48,34 +48,55 @@ static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return err; } +static int sdcardfs_page_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + int err = 0; + struct file *file, *lower_file; + const struct vm_operations_struct *lower_vm_ops; + struct vm_area_struct lower_vma; + + memcpy(&lower_vma, vma, sizeof(struct vm_area_struct)); + file = lower_vma.vm_file; + lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops; + BUG_ON(!lower_vm_ops); + if (!lower_vm_ops->page_mkwrite) + goto out; + + lower_file = sdcardfs_lower_file(file); + /* + * XXX: vm_ops->page_mkwrite may be called in parallel. + * Because we have to resort to temporarily changing the + * vma->vm_file to point to the lower file, a concurrent + * invocation of sdcardfs_page_mkwrite could see a different + * value. In this workaround, we keep a different copy of the + * vma structure in our stack, so we never expose a different + * value of the vma->vm_file called to us, even temporarily. + * A better fix would be to change the calling semantics of + * ->page_mkwrite to take an explicit file pointer. + */ + lower_vma.vm_file = lower_file; + err = lower_vm_ops->page_mkwrite(&lower_vma, vmf); +out: + return err; +} + static ssize_t sdcardfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { /* - * This function returns zero on purpose in order to support direct IO. - * __dentry_open checks a_ops->direct_IO and returns EINVAL if it is null. - * - * However, this function won't be called by certain file operations - * including generic fs functions. * reads and writes are delivered to - * the lower file systems and the direct IOs will be handled by them. - * - * NOTE: exceptionally, on the recent kernels (since Linux 3.8.x), - * swap_writepage invokes this function directly. + * This function should never be called directly. We need it + * to exist, to get past a check in open_check_o_direct(), + * which is called from do_last(). */ - printk(KERN_INFO "%s, operation is not supported\n", __func__); - return 0; + return -EINVAL; } -/* - * XXX: the default address_space_ops for sdcardfs is empty. We cannot set - * our inode->i_mapping->a_ops to NULL because too many code paths expect - * the a_ops vector to be non-NULL. - */ const struct address_space_operations sdcardfs_aops = { - /* empty on purpose */ .direct_IO = sdcardfs_direct_IO, }; const struct vm_operations_struct sdcardfs_vm_ops = { .fault = sdcardfs_fault, + .page_mkwrite = sdcardfs_page_mkwrite, }; diff --git a/fs/sdcardfs/multiuser.h b/fs/sdcardfs/multiuser.h index 52bc20080904..2e89b5872314 100644 --- a/fs/sdcardfs/multiuser.h +++ b/fs/sdcardfs/multiuser.h @@ -28,22 +28,17 @@ typedef uid_t userid_t; typedef uid_t appid_t; -static inline uid_t multiuser_get_uid(userid_t user_id, appid_t app_id) { - return (user_id * AID_USER_OFFSET) + (app_id % AID_USER_OFFSET); +static inline uid_t multiuser_get_uid(userid_t user_id, appid_t app_id) +{ + return (user_id * AID_USER_OFFSET) + (app_id % AID_USER_OFFSET); } -static inline gid_t multiuser_get_cache_gid(userid_t user_id, appid_t app_id) { - if (app_id >= AID_APP_START && app_id <= AID_APP_END) { - return multiuser_get_uid(user_id, (app_id - AID_APP_START) + AID_CACHE_GID_START); - } else { - return -1; - } +static inline gid_t multiuser_get_cache_gid(uid_t uid) +{ + return uid - AID_APP_START + AID_CACHE_GID_START; } -static inline gid_t multiuser_get_ext_gid(userid_t user_id, appid_t app_id) { - if (app_id >= AID_APP_START && app_id <= AID_APP_END) { - return multiuser_get_uid(user_id, (app_id - AID_APP_START) + AID_EXT_GID_START); - } else { - return -1; - } +static inline gid_t multiuser_get_ext_gid(uid_t uid) +{ + return uid - AID_APP_START + AID_EXT_GID_START; } diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index d96fcde041cc..89196e31073e 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -20,6 +20,7 @@ #include "sdcardfs.h" #include <linux/hashtable.h> +#include <linux/ctype.h> #include <linux/delay.h> #include <linux/radix-tree.h> #include <linux/dcache.h> @@ -44,13 +45,24 @@ static DEFINE_HASHTABLE(ext_to_groupid, 8); static struct kmem_cache *hashtable_entry_cachep; -static void inline qstr_init(struct qstr *q, const char *name) { +static unsigned int full_name_case_hash(const unsigned char *name, unsigned int len) +{ + unsigned long hash = init_name_hash(); + + while (len--) + hash = partial_name_hash(tolower(*name++), hash); + return end_name_hash(hash); +} + +static inline void qstr_init(struct qstr *q, const char *name) +{ q->name = name; q->len = strlen(q->name); - q->hash = full_name_hash(q->name, q->len); + q->hash = full_name_case_hash(q->name, q->len); } -static inline int qstr_copy(const struct qstr *src, struct qstr *dest) { +static inline int qstr_copy(const struct qstr *src, struct qstr *dest) +{ dest->name = kstrdup(src->name, GFP_KERNEL); dest->hash_len = src->hash_len; return !!dest->name; @@ -78,6 +90,7 @@ static appid_t __get_appid(const struct qstr *key) appid_t get_appid(const char *key) { struct qstr q; + qstr_init(&q, key); return __get_appid(&q); } @@ -103,6 +116,7 @@ static appid_t __get_ext_gid(const struct qstr *key) appid_t get_ext_gid(const char *key) { struct qstr q; + qstr_init(&q, key); return __get_ext_gid(&q); } @@ -133,8 +147,10 @@ appid_t is_excluded(const char *key, userid_t user) /* Kernel has already enforced everything we returned through * derive_permissions_locked(), so this is used to lock down access - * even further, such as enforcing that apps hold sdcard_rw. */ -int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name) { + * even further, such as enforcing that apps hold sdcard_rw. + */ +int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name) +{ struct qstr q_autorun = QSTR_LITERAL("autorun.inf"); struct qstr q__android_secure = QSTR_LITERAL(".android_secure"); struct qstr q_android_secure = QSTR_LITERAL("android_secure"); @@ -149,26 +165,26 @@ int check_caller_access_to_name(struct inode *parent_node, const struct qstr *na } /* Root always has access; access for any other UIDs should always - * be controlled through packages.list. */ - if (from_kuid(&init_user_ns, current_fsuid()) == 0) { + * be controlled through packages.list. + */ + if (from_kuid(&init_user_ns, current_fsuid()) == 0) return 1; - } /* No extra permissions to enforce */ return 1; } /* This function is used when file opening. The open flags must be - * checked before calling check_caller_access_to_name() */ -int open_flags_to_access_mode(int open_flags) { - if((open_flags & O_ACCMODE) == O_RDONLY) { + * checked before calling check_caller_access_to_name() + */ +int open_flags_to_access_mode(int open_flags) +{ + if ((open_flags & O_ACCMODE) == O_RDONLY) return 0; /* R_OK */ - } else if ((open_flags & O_ACCMODE) == O_WRONLY) { + if ((open_flags & O_ACCMODE) == O_WRONLY) return 1; /* W_OK */ - } else { - /* Probably O_RDRW, but treat as default to be safe */ + /* Probably O_RDRW, but treat as default to be safe */ return 1; /* R_OK | W_OK */ - } } static struct hashtable_entry *alloc_hashtable_entry(const struct qstr *key, @@ -178,6 +194,8 @@ static struct hashtable_entry *alloc_hashtable_entry(const struct qstr *key, GFP_KERNEL); if (!ret) return NULL; + INIT_HLIST_NODE(&ret->dlist); + INIT_HLIST_NODE(&ret->hlist); if (!qstr_copy(key, &ret->key)) { kmem_cache_free(hashtable_entry_cachep, ret); @@ -249,8 +267,7 @@ static void fixup_all_perms_name(const struct qstr *key) struct sdcardfs_sb_info *sbinfo; struct limit_search limit = { .flags = BY_NAME, - .name = key->name, - .length = key->len, + .name = QSTR_INIT(key->name, key->len), }; list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { if (sbinfo_has_sdcard_magic(sbinfo)) @@ -263,8 +280,7 @@ static void fixup_all_perms_name_userid(const struct qstr *key, userid_t userid) struct sdcardfs_sb_info *sbinfo; struct limit_search limit = { .flags = BY_NAME | BY_USERID, - .name = key->name, - .length = key->len, + .name = QSTR_INIT(key->name, key->len), .userid = userid, }; list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { @@ -326,7 +342,6 @@ static int insert_userid_exclude_entry(const struct qstr *key, userid_t value) static void free_hashtable_entry(struct hashtable_entry *entry) { kfree(entry->key.name); - hash_del_rcu(&entry->dlist); kmem_cache_free(hashtable_entry_cachep, entry); } @@ -361,7 +376,6 @@ static void remove_packagelist_entry(const struct qstr *key) remove_packagelist_entry_locked(key); fixup_all_perms_name(key); mutex_unlock(&sdcardfs_super_list_lock); - return; } static void remove_ext_gid_entry_locked(const struct qstr *key, gid_t group) @@ -384,7 +398,6 @@ static void remove_ext_gid_entry(const struct qstr *key, gid_t group) mutex_lock(&sdcardfs_super_list_lock); remove_ext_gid_entry_locked(key, group); mutex_unlock(&sdcardfs_super_list_lock); - return; } static void remove_userid_all_entry_locked(userid_t userid) @@ -412,7 +425,6 @@ static void remove_userid_all_entry(userid_t userid) remove_userid_all_entry_locked(userid); fixup_all_perms_userid(userid); mutex_unlock(&sdcardfs_super_list_lock); - return; } static void remove_userid_exclude_entry_locked(const struct qstr *key, userid_t userid) @@ -437,7 +449,6 @@ static void remove_userid_exclude_entry(const struct qstr *key, userid_t userid) remove_userid_exclude_entry_locked(key, userid); fixup_all_perms_name_userid(key, userid); mutex_unlock(&sdcardfs_super_list_lock); - return; } static void packagelist_destroy(void) @@ -446,6 +457,7 @@ static void packagelist_destroy(void) struct hlist_node *h_t; HLIST_HEAD(free_list); int i; + mutex_lock(&sdcardfs_super_list_lock); hash_for_each_rcu(package_to_appid, i, hash_cur, hlist) { hash_del_rcu(&hash_cur->hlist); @@ -459,7 +471,7 @@ static void packagelist_destroy(void) hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist) free_hashtable_entry(hash_cur); mutex_unlock(&sdcardfs_super_list_lock); - printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld\n"); + pr_info("sdcardfs: destroyed packagelist pkgld\n"); } #define SDCARDFS_CONFIGFS_ATTR(_pfx, _name) \ @@ -575,7 +587,8 @@ static ssize_t package_details_clear_userid_store(struct config_item *item, static void package_details_release(struct config_item *item) { struct package_details *package_details = to_package_details(item); - printk(KERN_INFO "sdcardfs: removing %s\n", package_details->name.name); + + pr_info("sdcardfs: removing %s\n", package_details->name.name); remove_packagelist_entry(&package_details->name); kfree(package_details->name.name); kfree(package_details); @@ -593,7 +606,7 @@ static struct configfs_attribute *package_details_attrs[] = { }; static struct configfs_item_operations package_details_item_ops = { - .release = package_details_release, + .release = package_details_release, }; static struct config_item_type package_appid_type = { @@ -627,7 +640,7 @@ static void extension_details_release(struct config_item *item) { struct extension_details *extension_details = to_extension_details(item); - printk(KERN_INFO "sdcardfs: No longer mapping %s files to gid %d\n", + pr_info("sdcardfs: No longer mapping %s files to gid %d\n", extension_details->name.name, extension_details->num); remove_ext_gid_entry(&extension_details->name, extension_details->num); kfree(extension_details->name.name); @@ -649,6 +662,7 @@ static struct config_item *extension_details_make_item(struct config_group *grou struct extension_details *extension_details = kzalloc(sizeof(struct extension_details), GFP_KERNEL); const char *tmp; int ret; + if (!extension_details) return ERR_PTR(-ENOMEM); @@ -703,7 +717,8 @@ static struct config_group *extensions_make_group(struct config_group *group, co static void extensions_drop_group(struct config_group *group, struct config_item *item) { struct extensions_value *value = to_extensions_value(item); - printk(KERN_INFO "sdcardfs: No longer mapping any files to gid %d\n", value->num); + + pr_info("sdcardfs: No longer mapping any files to gid %d\n", value->num); kfree(value); } @@ -837,14 +852,14 @@ static int configfs_sdcardfs_init(void) { int ret, i; struct configfs_subsystem *subsys = &sdcardfs_packages; - for (i = 0; sd_default_groups[i]; i++) { + + for (i = 0; sd_default_groups[i]; i++) config_group_init(sd_default_groups[i]); - } config_group_init(&subsys->su_group); mutex_init(&subsys->su_mutex); ret = configfs_register_subsystem(subsys); if (ret) { - printk(KERN_ERR "Error %d while registering subsystem %s\n", + pr_err("Error %d while registering subsystem %s\n", ret, subsys->su_group.cg_item.ci_namebuf); } @@ -862,18 +877,17 @@ int packagelist_init(void) kmem_cache_create("packagelist_hashtable_entry", sizeof(struct hashtable_entry), 0, 0, NULL); if (!hashtable_entry_cachep) { - printk(KERN_ERR "sdcardfs: failed creating pkgl_hashtable entry slab cache\n"); + pr_err("sdcardfs: failed creating pkgl_hashtable entry slab cache\n"); return -ENOMEM; } configfs_sdcardfs_init(); - return 0; + return 0; } void packagelist_exit(void) { configfs_sdcardfs_exit(); packagelist_destroy(); - if (hashtable_entry_cachep) - kmem_cache_destroy(hashtable_entry_cachep); + kmem_cache_destroy(hashtable_entry_cachep); } diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index f3cced313108..2b67b9a8ef9f 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -29,6 +29,7 @@ #include <linux/dcache.h> #include <linux/file.h> #include <linux/fs.h> +#include <linux/aio.h> #include <linux/mm.h> #include <linux/mount.h> #include <linux/namei.h> @@ -52,7 +53,7 @@ #define SDCARDFS_ROOT_INO 1 /* useful for tracking code reachability */ -#define UDBG printk(KERN_DEFAULT "DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__) +#define UDBG pr_default("DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__) #define SDCARDFS_DIRENT_SIZE 256 @@ -86,54 +87,56 @@ } while (0) /* OVERRIDE_CRED() and REVERT_CRED() - * OVERRID_CRED() - * backup original task->cred - * and modifies task->cred->fsuid/fsgid to specified value. + * OVERRIDE_CRED() + * backup original task->cred + * and modifies task->cred->fsuid/fsgid to specified value. * REVERT_CRED() - * restore original task->cred->fsuid/fsgid. + * restore original task->cred->fsuid/fsgid. * These two macro should be used in pair, and OVERRIDE_CRED() should be * placed at the beginning of a function, right after variable declaration. */ #define OVERRIDE_CRED(sdcardfs_sbi, saved_cred, info) \ + do { \ saved_cred = override_fsids(sdcardfs_sbi, info); \ - if (!saved_cred) { return -ENOMEM; } + if (!saved_cred) \ + return -ENOMEM; \ + } while (0) #define OVERRIDE_CRED_PTR(sdcardfs_sbi, saved_cred, info) \ + do { \ saved_cred = override_fsids(sdcardfs_sbi, info); \ - if (!saved_cred) { return ERR_PTR(-ENOMEM); } + if (!saved_cred) \ + return ERR_PTR(-ENOMEM); \ + } while (0) #define REVERT_CRED(saved_cred) revert_fsids(saved_cred) -#define DEBUG_CRED() \ - printk("KAKJAGI: %s:%d fsuid %d fsgid %d\n", \ - __FUNCTION__, __LINE__, \ - (int)current->cred->fsuid, \ - (int)current->cred->fsgid); - /* Android 5.0 support */ /* Permission mode for a specific node. Controls how file permissions - * are derived for children nodes. */ + * are derived for children nodes. + */ typedef enum { - /* Nothing special; this node should just inherit from its parent. */ - PERM_INHERIT, - /* This node is one level above a normal root; used for legacy layouts - * which use the first level to represent user_id. */ - PERM_PRE_ROOT, - /* This node is "/" */ - PERM_ROOT, - /* This node is "/Android" */ - PERM_ANDROID, - /* This node is "/Android/data" */ - PERM_ANDROID_DATA, - /* This node is "/Android/obb" */ - PERM_ANDROID_OBB, - /* This node is "/Android/media" */ - PERM_ANDROID_MEDIA, - /* This node is "/Android/[data|media|obb]/[package]" */ - PERM_ANDROID_PACKAGE, - /* This node is "/Android/[data|media|obb]/[package]/cache" */ - PERM_ANDROID_PACKAGE_CACHE, + /* Nothing special; this node should just inherit from its parent. */ + PERM_INHERIT, + /* This node is one level above a normal root; used for legacy layouts + * which use the first level to represent user_id. + */ + PERM_PRE_ROOT, + /* This node is "/" */ + PERM_ROOT, + /* This node is "/Android" */ + PERM_ANDROID, + /* This node is "/Android/data" */ + PERM_ANDROID_DATA, + /* This node is "/Android/obb" */ + PERM_ANDROID_OBB, + /* This node is "/Android/media" */ + PERM_ANDROID_MEDIA, + /* This node is "/Android/[data|media|obb]/[package]" */ + PERM_ANDROID_PACKAGE, + /* This node is "/Android/[data|media|obb]/[package]/cache" */ + PERM_ANDROID_PACKAGE_CACHE, } perm_t; struct sdcardfs_sb_info; @@ -141,9 +144,9 @@ struct sdcardfs_mount_options; struct sdcardfs_inode_info; /* Do not directly use this function. Use OVERRIDE_CRED() instead. */ -const struct cred * override_fsids(struct sdcardfs_sb_info* sbi, struct sdcardfs_inode_info *info); +const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, struct sdcardfs_inode_info *info); /* Do not directly use this function, use REVERT_CRED() instead. */ -void revert_fsids(const struct cred * old_cred); +void revert_fsids(const struct cred *old_cred); /* operations vectors defined in specific files */ extern const struct file_operations sdcardfs_main_fops; @@ -220,7 +223,8 @@ struct sdcardfs_sb_info { struct super_block *sb; struct super_block *lower_sb; /* derived perm policy : some of options have been added - * to sdcardfs_mount_options (Android 4.4 support) */ + * to sdcardfs_mount_options (Android 4.4 support) + */ struct sdcardfs_mount_options options; spinlock_t lock; /* protects obbpath */ char *obbpath_s; @@ -331,7 +335,7 @@ static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \ { \ struct path pname; \ spin_lock(&SDCARDFS_D(dent)->lock); \ - if(SDCARDFS_D(dent)->pname.dentry) { \ + if (SDCARDFS_D(dent)->pname.dentry) { \ pathcpy(&pname, &SDCARDFS_D(dent)->pname); \ SDCARDFS_D(dent)->pname.dentry = NULL; \ SDCARDFS_D(dent)->pname.mnt = NULL; \ @@ -347,17 +351,17 @@ SDCARDFS_DENT_FUNC(orig_path) static inline bool sbinfo_has_sdcard_magic(struct sdcardfs_sb_info *sbinfo) { - return sbinfo && sbinfo->sb && sbinfo->sb->s_magic == SDCARDFS_SUPER_MAGIC; + return sbinfo && sbinfo->sb && sbinfo->sb->s_magic == SDCARDFS_SUPER_MAGIC; } /* grab a refererence if we aren't linking to ourself */ static inline void set_top(struct sdcardfs_inode_info *info, struct inode *top) { struct inode *old_top = NULL; + BUG_ON(IS_ERR_OR_NULL(top)); - if (info->top && info->top != &info->vfs_inode) { + if (info->top && info->top != &info->vfs_inode) old_top = info->top; - } if (top != &info->vfs_inode) igrab(top); info->top = top; @@ -367,11 +371,11 @@ static inline void set_top(struct sdcardfs_inode_info *info, struct inode *top) static inline struct inode *grab_top(struct sdcardfs_inode_info *info) { struct inode *top = info->top; - if (top) { + + if (top) return igrab(top); - } else { + else return NULL; - } } static inline void release_top(struct sdcardfs_inode_info *info) @@ -379,21 +383,24 @@ static inline void release_top(struct sdcardfs_inode_info *info) iput(info->top); } -static inline int get_gid(struct vfsmount *mnt, struct sdcardfs_inode_info *info) { +static inline int get_gid(struct vfsmount *mnt, struct sdcardfs_inode_info *info) +{ struct sdcardfs_vfsmount_options *opts = mnt->data; - if (opts->gid == AID_SDCARD_RW) { + if (opts->gid == AID_SDCARD_RW) /* As an optimization, certain trusted system components only run * as owner but operate across all users. Since we're now handing * out the sdcard_rw GID only to trusted apps, we're okay relaxing * the user boundary enforcement for the default view. The UIDs - * assigned to app directories are still multiuser aware. */ + * assigned to app directories are still multiuser aware. + */ return AID_SDCARD_RW; - } else { + else return multiuser_get_uid(info->userid, opts->gid); - } } -static inline int get_mode(struct vfsmount *mnt, struct sdcardfs_inode_info *info) { + +static inline int get_mode(struct vfsmount *mnt, struct sdcardfs_inode_info *info) +{ int owner_mode; int filtered_mode; struct sdcardfs_vfsmount_options *opts = mnt->data; @@ -402,17 +409,18 @@ static inline int get_mode(struct vfsmount *mnt, struct sdcardfs_inode_info *inf if (info->perm == PERM_PRE_ROOT) { /* Top of multi-user view should always be visible to ensure - * secondary users can traverse inside. */ + * secondary users can traverse inside. + */ visible_mode = 0711; } else if (info->under_android) { /* Block "other" access to Android directories, since only apps * belonging to a specific user should be in there; we still - * leave +x open for the default view. */ - if (opts->gid == AID_SDCARD_RW) { + * leave +x open for the default view. + */ + if (opts->gid == AID_SDCARD_RW) visible_mode = visible_mode & ~0006; - } else { + else visible_mode = visible_mode & ~0007; - } } owner_mode = info->lower_inode->i_mode & 0700; filtered_mode = visible_mode & (owner_mode | (owner_mode >> 3) | (owner_mode >> 6)); @@ -437,7 +445,7 @@ static inline void sdcardfs_get_real_lower(const struct dentry *dent, /* in case of a local obb dentry * the orig_path should be returned */ - if(has_graft_path(dent)) + if (has_graft_path(dent)) sdcardfs_get_orig_path(dent, real_lower); else sdcardfs_get_lower_path(dent, real_lower); @@ -446,7 +454,7 @@ static inline void sdcardfs_get_real_lower(const struct dentry *dent, static inline void sdcardfs_put_real_lower(const struct dentry *dent, struct path *real_lower) { - if(has_graft_path(dent)) + if (has_graft_path(dent)) sdcardfs_put_orig_path(dent, real_lower); else sdcardfs_put_lower_path(dent, real_lower); @@ -459,7 +467,7 @@ extern struct list_head sdcardfs_super_list; extern appid_t get_appid(const char *app_name); extern appid_t get_ext_gid(const char *app_name); extern appid_t is_excluded(const char *app_name, userid_t userid); -extern int check_caller_access_to_name(struct inode *parent_node, const struct qstr* name); +extern int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name); extern int open_flags_to_access_mode(int open_flags); extern int packagelist_init(void); extern void packagelist_exit(void); @@ -469,8 +477,7 @@ extern void packagelist_exit(void); #define BY_USERID (1 << 1) struct limit_search { unsigned int flags; - const char *name; - size_t length; + struct qstr name; userid_t userid; }; @@ -478,12 +485,10 @@ extern void setup_derived_state(struct inode *inode, perm_t perm, userid_t useri uid_t uid, bool under_android, struct inode *top); extern void get_derived_permission(struct dentry *parent, struct dentry *dentry); extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name); -extern void drop_recursive(struct dentry *parent); -extern void fixup_top_recursive(struct dentry *parent); extern void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit); extern void update_derived_permission_lock(struct dentry *dentry); -void fixup_lower_ownership(struct dentry* dentry, const char *name); +void fixup_lower_ownership(struct dentry *dentry, const char *name); extern int need_graft_path(struct dentry *dentry); extern int is_base_obbpath(struct dentry *dentry); extern int is_obbpath_invalid(struct dentry *dentry); @@ -493,6 +498,7 @@ extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path); static inline struct dentry *lock_parent(struct dentry *dentry) { struct dentry *dir = dget_parent(dentry); + mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); return dir; } @@ -611,6 +617,11 @@ static inline bool str_case_eq(const char *s1, const char *s2) return !strcasecmp(s1, s2); } +static inline bool str_n_case_eq(const char *s1, const char *s2, size_t len) +{ + return !strncasecmp(s1, s2, len); +} + static inline bool qstr_case_eq(const struct qstr *q1, const struct qstr *q2) { return q1->len == q2->len && str_case_eq(q1->name, q2->name); diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index edda32b68dc0..a3393e959c63 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -36,7 +36,7 @@ static void sdcardfs_put_super(struct super_block *sb) if (!spd) return; - if(spd->obbpath_s) { + if (spd->obbpath_s) { kfree(spd->obbpath_s); path_put(&spd->obbpath); } @@ -64,7 +64,7 @@ static int sdcardfs_statfs(struct dentry *dentry, struct kstatfs *buf) if (sbi->options.reserved_mb) { /* Invalid statfs informations. */ if (buf->f_bsize == 0) { - printk(KERN_ERR "Returned block size is zero.\n"); + pr_err("Returned block size is zero.\n"); return -EINVAL; } @@ -100,8 +100,7 @@ static int sdcardfs_remount_fs(struct super_block *sb, int *flags, char *options * SILENT, but anything else left over is an error. */ if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT)) != 0) { - printk(KERN_ERR - "sdcardfs: remount flags 0x%x unsupported\n", *flags); + pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags); err = -EINVAL; } @@ -125,29 +124,33 @@ static int sdcardfs_remount_fs2(struct vfsmount *mnt, struct super_block *sb, * SILENT, but anything else left over is an error. */ if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT | MS_REMOUNT)) != 0) { - printk(KERN_ERR - "sdcardfs: remount flags 0x%x unsupported\n", *flags); + pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags); err = -EINVAL; } - printk(KERN_INFO "Remount options were %s for vfsmnt %p.\n", options, mnt); + pr_info("Remount options were %s for vfsmnt %p.\n", options, mnt); err = parse_options_remount(sb, options, *flags & ~MS_SILENT, mnt->data); return err; } -static void* sdcardfs_clone_mnt_data(void *data) { - struct sdcardfs_vfsmount_options* opt = kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL); - struct sdcardfs_vfsmount_options* old = data; - if(!opt) return NULL; +static void *sdcardfs_clone_mnt_data(void *data) +{ + struct sdcardfs_vfsmount_options *opt = kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL); + struct sdcardfs_vfsmount_options *old = data; + + if (!opt) + return NULL; opt->gid = old->gid; opt->mask = old->mask; return opt; } -static void sdcardfs_copy_mnt_data(void *data, void *newdata) { - struct sdcardfs_vfsmount_options* old = data; - struct sdcardfs_vfsmount_options* new = newdata; +static void sdcardfs_copy_mnt_data(void *data, void *newdata) +{ + struct sdcardfs_vfsmount_options *old = data; + struct sdcardfs_vfsmount_options *new = newdata; + old->gid = new->gid; old->mask = new->mask; } @@ -218,8 +221,7 @@ int sdcardfs_init_inode_cache(void) /* sdcardfs inode cache destructor */ void sdcardfs_destroy_inode_cache(void) { - if (sdcardfs_inode_cachep) - kmem_cache_destroy(sdcardfs_inode_cachep); + kmem_cache_destroy(sdcardfs_inode_cachep); } /* @@ -235,7 +237,8 @@ static void sdcardfs_umount_begin(struct super_block *sb) lower_sb->s_op->umount_begin(lower_sb); } -static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, struct dentry *root) +static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, + struct dentry *root) { struct sdcardfs_sb_info *sbi = SDCARDFS_SB(root->d_sb); struct sdcardfs_mount_options *opts = &sbi->options; @@ -248,7 +251,7 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, struc if (vfsopts->gid != 0) seq_printf(m, ",gid=%u", vfsopts->gid); if (opts->multiuser) - seq_printf(m, ",multiuser"); + seq_puts(m, ",multiuser"); if (vfsopts->mask) seq_printf(m, ",mask=%u", vfsopts->mask); if (opts->fs_user_id) diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index ffb093e72b6c..6dd158a216f4 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -26,34 +26,6 @@ config SQUASHFS If unsure, say N. choice - prompt "File decompression options" - depends on SQUASHFS - help - Squashfs now supports two options for decompressing file - data. Traditionally Squashfs has decompressed into an - intermediate buffer and then memcopied it into the page cache. - Squashfs now supports the ability to decompress directly into - the page cache. - - If unsure, select "Decompress file data into an intermediate buffer" - -config SQUASHFS_FILE_CACHE - bool "Decompress file data into an intermediate buffer" - help - Decompress file data into an intermediate buffer and then - memcopy it into the page cache. - -config SQUASHFS_FILE_DIRECT - bool "Decompress files directly into the page cache" - help - Directly decompress file data into the page cache. - Doing so can significantly improve performance because - it eliminates a memcpy and it also removes the lock contention - on the single buffer. - -endchoice - -choice prompt "Decompressor parallelisation options" depends on SQUASHFS help diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 246a6f329d89..fe51f1507ed1 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,8 +5,7 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o decompressor.o -squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o -squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o +squashfs-y += file_direct.o page_actor.o squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 0cea9b9236d0..2eb66decc5ab 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -28,9 +28,12 @@ #include <linux/fs.h> #include <linux/vfs.h> +#include <linux/bio.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/pagemap.h> #include <linux/buffer_head.h> +#include <linux/workqueue.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -38,177 +41,434 @@ #include "decompressor.h" #include "page_actor.h" -/* - * Read the metadata block length, this is stored in the first two - * bytes of the metadata block. - */ -static struct buffer_head *get_block_length(struct super_block *sb, - u64 *cur_index, int *offset, int *length) +static struct workqueue_struct *squashfs_read_wq; + +struct squashfs_read_request { + struct super_block *sb; + u64 index; + int length; + int compressed; + int offset; + u64 read_end; + struct squashfs_page_actor *output; + enum { + SQUASHFS_COPY, + SQUASHFS_DECOMPRESS, + SQUASHFS_METADATA, + } data_processing; + bool synchronous; + + /* + * If the read is synchronous, it is possible to retrieve information + * about the request by setting these pointers. + */ + int *res; + int *bytes_read; + int *bytes_uncompressed; + + int nr_buffers; + struct buffer_head **bh; + struct work_struct offload; +}; + +struct squashfs_bio_request { + struct buffer_head **bh; + int nr_buffers; +}; + +static int squashfs_bio_submit(struct squashfs_read_request *req); + +int squashfs_init_read_wq(void) { - struct squashfs_sb_info *msblk = sb->s_fs_info; - struct buffer_head *bh; + squashfs_read_wq = create_workqueue("SquashFS read wq"); + return !!squashfs_read_wq; +} + +void squashfs_destroy_read_wq(void) +{ + flush_workqueue(squashfs_read_wq); + destroy_workqueue(squashfs_read_wq); +} + +static void free_read_request(struct squashfs_read_request *req, int error) +{ + if (!req->synchronous) + squashfs_page_actor_free(req->output, error); + if (req->res) + *(req->res) = error; + kfree(req->bh); + kfree(req); +} + +static void squashfs_process_blocks(struct squashfs_read_request *req) +{ + int error = 0; + int bytes, i, length; + struct squashfs_sb_info *msblk = req->sb->s_fs_info; + struct squashfs_page_actor *actor = req->output; + struct buffer_head **bh = req->bh; + int nr_buffers = req->nr_buffers; + + for (i = 0; i < nr_buffers; ++i) { + if (!bh[i]) + continue; + wait_on_buffer(bh[i]); + if (!buffer_uptodate(bh[i])) + error = -EIO; + } + if (error) + goto cleanup; + + if (req->data_processing == SQUASHFS_METADATA) { + /* Extract the length of the metadata block */ + if (req->offset != msblk->devblksize - 1) + length = *((u16 *)(bh[0]->b_data + req->offset)); + else { + length = bh[0]->b_data[req->offset]; + length |= bh[1]->b_data[0] << 8; + } + req->compressed = SQUASHFS_COMPRESSED(length); + req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS + : SQUASHFS_COPY; + length = SQUASHFS_COMPRESSED_SIZE(length); + if (req->index + length + 2 > req->read_end) { + for (i = 0; i < nr_buffers; ++i) + put_bh(bh[i]); + kfree(bh); + req->length = length; + req->index += 2; + squashfs_bio_submit(req); + return; + } + req->length = length; + req->offset = (req->offset + 2) % PAGE_SIZE; + if (req->offset < 2) { + put_bh(bh[0]); + ++bh; + --nr_buffers; + } + } + if (req->bytes_read) + *(req->bytes_read) = req->length; - bh = sb_bread(sb, *cur_index); - if (bh == NULL) - return NULL; - - if (msblk->devblksize - *offset == 1) { - *length = (unsigned char) bh->b_data[*offset]; - put_bh(bh); - bh = sb_bread(sb, ++(*cur_index)); - if (bh == NULL) - return NULL; - *length |= (unsigned char) bh->b_data[0] << 8; - *offset = 1; - } else { - *length = (unsigned char) bh->b_data[*offset] | - (unsigned char) bh->b_data[*offset + 1] << 8; - *offset += 2; - - if (*offset == msblk->devblksize) { - put_bh(bh); - bh = sb_bread(sb, ++(*cur_index)); - if (bh == NULL) - return NULL; - *offset = 0; + if (req->data_processing == SQUASHFS_COPY) { + squashfs_bh_to_actor(bh, nr_buffers, req->output, req->offset, + req->length, msblk->devblksize); + } else if (req->data_processing == SQUASHFS_DECOMPRESS) { + req->length = squashfs_decompress(msblk, bh, nr_buffers, + req->offset, req->length, actor); + if (req->length < 0) { + error = -EIO; + goto cleanup; } } - return bh; + /* Last page may have trailing bytes not filled */ + bytes = req->length % PAGE_SIZE; + if (bytes && actor->page[actor->pages - 1]) + zero_user_segment(actor->page[actor->pages - 1], bytes, + PAGE_SIZE); + +cleanup: + if (req->bytes_uncompressed) + *(req->bytes_uncompressed) = req->length; + if (error) { + for (i = 0; i < nr_buffers; ++i) + if (bh[i]) + put_bh(bh[i]); + } + free_read_request(req, error); } +static void read_wq_handler(struct work_struct *work) +{ + squashfs_process_blocks(container_of(work, + struct squashfs_read_request, offload)); +} -/* - * Read and decompress a metadata block or datablock. Length is non-zero - * if a datablock is being read (the size is stored elsewhere in the - * filesystem), otherwise the length is obtained from the first two bytes of - * the metadata block. A bit in the length field indicates if the block - * is stored uncompressed in the filesystem (usually because compression - * generated a larger block - this does occasionally happen with compression - * algorithms). - */ -int squashfs_read_data(struct super_block *sb, u64 index, int length, - u64 *next_index, struct squashfs_page_actor *output) +static void squashfs_bio_end_io(struct bio *bio) { - struct squashfs_sb_info *msblk = sb->s_fs_info; - struct buffer_head **bh; - int offset = index & ((1 << msblk->devblksize_log2) - 1); - u64 cur_index = index >> msblk->devblksize_log2; - int bytes, compressed, b = 0, k = 0, avail, i; + int i; + int error = bio->bi_error; + struct squashfs_bio_request *bio_req = bio->bi_private; + + bio_put(bio); + + for (i = 0; i < bio_req->nr_buffers; ++i) { + if (!bio_req->bh[i]) + continue; + if (!error) + set_buffer_uptodate(bio_req->bh[i]); + else + clear_buffer_uptodate(bio_req->bh[i]); + unlock_buffer(bio_req->bh[i]); + } + kfree(bio_req); +} + +static int bh_is_optional(struct squashfs_read_request *req, int idx) +{ + int start_idx, end_idx; + struct squashfs_sb_info *msblk = req->sb->s_fs_info; - bh = kcalloc(((output->length + msblk->devblksize - 1) - >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); - if (bh == NULL) + start_idx = (idx * msblk->devblksize - req->offset) / PAGE_CACHE_SIZE; + end_idx = ((idx + 1) * msblk->devblksize - req->offset + 1) / PAGE_CACHE_SIZE; + if (start_idx >= req->output->pages) + return 1; + if (start_idx < 0) + start_idx = end_idx; + if (end_idx >= req->output->pages) + end_idx = start_idx; + return !req->output->page[start_idx] && !req->output->page[end_idx]; +} + +static int actor_getblks(struct squashfs_read_request *req, u64 block) +{ + int i; + + req->bh = kmalloc_array(req->nr_buffers, sizeof(*(req->bh)), GFP_NOIO); + if (!req->bh) return -ENOMEM; - if (length) { + for (i = 0; i < req->nr_buffers; ++i) { /* - * Datablock. + * When dealing with an uncompressed block, the actor may + * contains NULL pages. There's no need to read the buffers + * associated with these pages. */ - bytes = -offset; - compressed = SQUASHFS_COMPRESSED_BLOCK(length); - length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length); - if (next_index) - *next_index = index + length; - - TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", - index, compressed ? "" : "un", length, output->length); - - if (length < 0 || length > output->length || - (index + length) > msblk->bytes_used) - goto read_failure; - - for (b = 0; bytes < length; b++, cur_index++) { - bh[b] = sb_getblk(sb, cur_index); - if (bh[b] == NULL) - goto block_release; - bytes += msblk->devblksize; + if (!req->compressed && bh_is_optional(req, i)) { + req->bh[i] = NULL; + continue; } - ll_rw_block(READ, b, bh); - } else { - /* - * Metadata block. - */ - if ((index + 2) > msblk->bytes_used) - goto read_failure; + req->bh[i] = sb_getblk(req->sb, block + i); + if (!req->bh[i]) { + while (--i) { + if (req->bh[i]) + put_bh(req->bh[i]); + } + return -1; + } + } + return 0; +} - bh[0] = get_block_length(sb, &cur_index, &offset, &length); - if (bh[0] == NULL) - goto read_failure; - b = 1; +static int squashfs_bio_submit(struct squashfs_read_request *req) +{ + struct bio *bio = NULL; + struct buffer_head *bh; + struct squashfs_bio_request *bio_req = NULL; + int b = 0, prev_block = 0; + struct squashfs_sb_info *msblk = req->sb->s_fs_info; - bytes = msblk->devblksize - offset; - compressed = SQUASHFS_COMPRESSED(length); - length = SQUASHFS_COMPRESSED_SIZE(length); - if (next_index) - *next_index = index + length + 2; + u64 read_start = round_down(req->index, msblk->devblksize); + u64 read_end = round_up(req->index + req->length, msblk->devblksize); + sector_t block = read_start >> msblk->devblksize_log2; + sector_t block_end = read_end >> msblk->devblksize_log2; + int offset = read_start - round_down(req->index, PAGE_SIZE); + int nr_buffers = block_end - block; + int blksz = msblk->devblksize; + int bio_max_pages = nr_buffers > BIO_MAX_PAGES ? BIO_MAX_PAGES + : nr_buffers; - TRACE("Block @ 0x%llx, %scompressed size %d\n", index, - compressed ? "" : "un", length); + /* Setup the request */ + req->read_end = read_end; + req->offset = req->index - read_start; + req->nr_buffers = nr_buffers; + if (actor_getblks(req, block) < 0) + goto getblk_failed; - if (length < 0 || length > output->length || - (index + length) > msblk->bytes_used) - goto block_release; + /* Create and submit the BIOs */ + for (b = 0; b < nr_buffers; ++b, offset += blksz) { + bh = req->bh[b]; + if (!bh || !trylock_buffer(bh)) + continue; + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + continue; + } + offset %= PAGE_SIZE; - for (; bytes < length; b++) { - bh[b] = sb_getblk(sb, ++cur_index); - if (bh[b] == NULL) - goto block_release; - bytes += msblk->devblksize; + /* Append the buffer to the current BIO if it is contiguous */ + if (bio && bio_req && prev_block + 1 == b) { + if (bio_add_page(bio, bh->b_page, blksz, offset)) { + bio_req->nr_buffers += 1; + prev_block = b; + continue; + } } - ll_rw_block(READ, b - 1, bh + 1); + + /* Otherwise, submit the current BIO and create a new one */ + if (bio) + submit_bio(READ, bio); + bio_req = kcalloc(1, sizeof(struct squashfs_bio_request), + GFP_NOIO); + if (!bio_req) + goto req_alloc_failed; + bio_req->bh = &req->bh[b]; + bio = bio_alloc(GFP_NOIO, bio_max_pages); + if (!bio) + goto bio_alloc_failed; + bio->bi_bdev = req->sb->s_bdev; + bio->bi_iter.bi_sector = (block + b) + << (msblk->devblksize_log2 - 9); + bio->bi_private = bio_req; + bio->bi_end_io = squashfs_bio_end_io; + + bio_add_page(bio, bh->b_page, blksz, offset); + bio_req->nr_buffers += 1; + prev_block = b; } + if (bio) + submit_bio(READ, bio); - for (i = 0; i < b; i++) { - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - goto block_release; + if (req->synchronous) + squashfs_process_blocks(req); + else { + INIT_WORK(&req->offload, read_wq_handler); + schedule_work(&req->offload); } + return 0; - if (compressed) { - length = squashfs_decompress(msblk, bh, b, offset, length, - output); - if (length < 0) - goto read_failure; - } else { - /* - * Block is uncompressed. - */ - int in, pg_offset = 0; - void *data = squashfs_first_page(output); - - for (bytes = length; k < b; k++) { - in = min(bytes, msblk->devblksize - offset); - bytes -= in; - while (in) { - if (pg_offset == PAGE_CACHE_SIZE) { - data = squashfs_next_page(output); - pg_offset = 0; - } - avail = min_t(int, in, PAGE_CACHE_SIZE - - pg_offset); - memcpy(data + pg_offset, bh[k]->b_data + offset, - avail); - in -= avail; - pg_offset += avail; - offset += avail; - } - offset = 0; - put_bh(bh[k]); - } - squashfs_finish_page(output); +bio_alloc_failed: + kfree(bio_req); +req_alloc_failed: + unlock_buffer(bh); + while (--nr_buffers >= b) + if (req->bh[nr_buffers]) + put_bh(req->bh[nr_buffers]); + while (--b >= 0) + if (req->bh[b]) + wait_on_buffer(req->bh[b]); +getblk_failed: + free_read_request(req, -ENOMEM); + return -ENOMEM; +} + +static int read_metadata_block(struct squashfs_read_request *req, + u64 *next_index) +{ + int ret, error, bytes_read = 0, bytes_uncompressed = 0; + struct squashfs_sb_info *msblk = req->sb->s_fs_info; + + if (req->index + 2 > msblk->bytes_used) { + free_read_request(req, -EINVAL); + return -EINVAL; + } + req->length = 2; + + /* Do not read beyond the end of the device */ + if (req->index + req->length > msblk->bytes_used) + req->length = msblk->bytes_used - req->index; + req->data_processing = SQUASHFS_METADATA; + + /* + * Reading metadata is always synchronous because we don't know the + * length in advance and the function is expected to update + * 'next_index' and return the length. + */ + req->synchronous = true; + req->res = &error; + req->bytes_read = &bytes_read; + req->bytes_uncompressed = &bytes_uncompressed; + + TRACE("Metadata block @ 0x%llx, %scompressed size %d, src size %d\n", + req->index, req->compressed ? "" : "un", bytes_read, + req->output->length); + + ret = squashfs_bio_submit(req); + if (ret) + return ret; + if (error) + return error; + if (next_index) + *next_index += 2 + bytes_read; + return bytes_uncompressed; +} + +static int read_data_block(struct squashfs_read_request *req, int length, + u64 *next_index, bool synchronous) +{ + int ret, error = 0, bytes_uncompressed = 0, bytes_read = 0; + + req->compressed = SQUASHFS_COMPRESSED_BLOCK(length); + req->length = length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length); + req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS + : SQUASHFS_COPY; + + req->synchronous = synchronous; + if (synchronous) { + req->res = &error; + req->bytes_read = &bytes_read; + req->bytes_uncompressed = &bytes_uncompressed; + } + + TRACE("Data block @ 0x%llx, %scompressed size %d, src size %d\n", + req->index, req->compressed ? "" : "un", req->length, + req->output->length); + + ret = squashfs_bio_submit(req); + if (ret) + return ret; + if (synchronous) + ret = error ? error : bytes_uncompressed; + if (next_index) + *next_index += length; + return ret; +} + +/* + * Read and decompress a metadata block or datablock. Length is non-zero + * if a datablock is being read (the size is stored elsewhere in the + * filesystem), otherwise the length is obtained from the first two bytes of + * the metadata block. A bit in the length field indicates if the block + * is stored uncompressed in the filesystem (usually because compression + * generated a larger block - this does occasionally happen with compression + * algorithms). + */ +static int __squashfs_read_data(struct super_block *sb, u64 index, int length, + u64 *next_index, struct squashfs_page_actor *output, bool sync) +{ + struct squashfs_read_request *req; + + req = kcalloc(1, sizeof(struct squashfs_read_request), GFP_KERNEL); + if (!req) { + if (!sync) + squashfs_page_actor_free(output, -ENOMEM); + return -ENOMEM; + } + + req->sb = sb; + req->index = index; + req->output = output; + + if (next_index) + *next_index = index; + + if (length) + length = read_data_block(req, length, next_index, sync); + else + length = read_metadata_block(req, next_index); + + if (length < 0) { + ERROR("squashfs_read_data failed to read block 0x%llx\n", + (unsigned long long)index); + return -EIO; } - kfree(bh); return length; +} -block_release: - for (; k < b; k++) - put_bh(bh[k]); +int squashfs_read_data(struct super_block *sb, u64 index, int length, + u64 *next_index, struct squashfs_page_actor *output) +{ + return __squashfs_read_data(sb, index, length, next_index, output, + true); +} + +int squashfs_read_data_async(struct super_block *sb, u64 index, int length, + u64 *next_index, struct squashfs_page_actor *output) +{ -read_failure: - ERROR("squashfs_read_data failed to read block 0x%llx\n", - (unsigned long long) index); - kfree(bh); - return -EIO; + return __squashfs_read_data(sb, index, length, next_index, output, + false); } diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 1cb70a0b2168..6785d086ab38 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -209,17 +209,14 @@ void squashfs_cache_put(struct squashfs_cache_entry *entry) */ void squashfs_cache_delete(struct squashfs_cache *cache) { - int i, j; + int i; if (cache == NULL) return; for (i = 0; i < cache->entries; i++) { - if (cache->entry[i].data) { - for (j = 0; j < cache->pages; j++) - kfree(cache->entry[i].data[j]); - kfree(cache->entry[i].data); - } + if (cache->entry[i].page) + free_page_array(cache->entry[i].page, cache->pages); kfree(cache->entry[i].actor); } @@ -236,7 +233,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache) struct squashfs_cache *squashfs_cache_init(char *name, int entries, int block_size) { - int i, j; + int i; struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) { @@ -268,22 +265,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, init_waitqueue_head(&cache->entry[i].wait_queue); entry->cache = cache; entry->block = SQUASHFS_INVALID_BLK; - entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL); - if (entry->data == NULL) { + entry->page = alloc_page_array(cache->pages, GFP_KERNEL); + if (!entry->page) { ERROR("Failed to allocate %s cache entry\n", name); goto cleanup; } - - for (j = 0; j < cache->pages; j++) { - entry->data[j] = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); - if (entry->data[j] == NULL) { - ERROR("Failed to allocate %s buffer\n", name); - goto cleanup; - } - } - - entry->actor = squashfs_page_actor_init(entry->data, - cache->pages, 0); + entry->actor = squashfs_page_actor_init(entry->page, + cache->pages, 0, NULL); if (entry->actor == NULL) { ERROR("Failed to allocate %s cache entry\n", name); goto cleanup; @@ -314,18 +302,20 @@ int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry, return min(length, entry->length - offset); while (offset < entry->length) { - void *buff = entry->data[offset / PAGE_CACHE_SIZE] - + (offset % PAGE_CACHE_SIZE); + void *buff = kmap_atomic(entry->page[offset / PAGE_CACHE_SIZE]) + + (offset % PAGE_CACHE_SIZE); int bytes = min_t(int, entry->length - offset, PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE)); if (bytes >= remaining) { memcpy(buffer, buff, remaining); + kunmap_atomic(buff); remaining = 0; break; } memcpy(buffer, buff, bytes); + kunmap_atomic(buff); buffer += bytes; remaining -= bytes; offset += bytes; @@ -416,43 +406,38 @@ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb, void *squashfs_read_table(struct super_block *sb, u64 block, int length) { int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - int i, res; - void *table, *buffer, **data; + struct page **page; + void *buff; + int res; struct squashfs_page_actor *actor; - table = buffer = kmalloc(length, GFP_KERNEL); - if (table == NULL) + page = alloc_page_array(pages, GFP_KERNEL); + if (!page) return ERR_PTR(-ENOMEM); - data = kcalloc(pages, sizeof(void *), GFP_KERNEL); - if (data == NULL) { - res = -ENOMEM; - goto failed; - } - - actor = squashfs_page_actor_init(data, pages, length); + actor = squashfs_page_actor_init(page, pages, length, NULL); if (actor == NULL) { res = -ENOMEM; - goto failed2; + goto failed; } - for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) - data[i] = buffer; - res = squashfs_read_data(sb, block, length | SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor); - kfree(data); - kfree(actor); - if (res < 0) - goto failed; + goto failed2; - return table; + buff = kmalloc(length, GFP_KERNEL); + if (!buff) + goto failed2; + squashfs_actor_to_buf(actor, buff, length); + squashfs_page_actor_free(actor, 0); + free_page_array(page, pages); + return buff; failed2: - kfree(data); + squashfs_page_actor_free(actor, 0); failed: - kfree(table); + free_page_array(page, pages); return ERR_PTR(res); } diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index e9034bf6e5ae..7de35bf297aa 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -24,7 +24,8 @@ #include <linux/types.h> #include <linux/mutex.h> #include <linux/slab.h> -#include <linux/buffer_head.h> +#include <linux/highmem.h> +#include <linux/fs.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -94,40 +95,44 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id) static void *get_comp_opts(struct super_block *sb, unsigned short flags) { struct squashfs_sb_info *msblk = sb->s_fs_info; - void *buffer = NULL, *comp_opts; + void *comp_opts, *buffer = NULL; + struct page *page; struct squashfs_page_actor *actor = NULL; int length = 0; + if (!SQUASHFS_COMP_OPTS(flags)) + return squashfs_comp_opts(msblk, buffer, length); + /* * Read decompressor specific options from file system if present */ - if (SQUASHFS_COMP_OPTS(flags)) { - buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); - if (buffer == NULL) { - comp_opts = ERR_PTR(-ENOMEM); - goto out; - } - - actor = squashfs_page_actor_init(&buffer, 1, 0); - if (actor == NULL) { - comp_opts = ERR_PTR(-ENOMEM); - goto out; - } - - length = squashfs_read_data(sb, - sizeof(struct squashfs_super_block), 0, NULL, actor); - - if (length < 0) { - comp_opts = ERR_PTR(length); - goto out; - } + + page = alloc_page(GFP_KERNEL); + if (!page) + return ERR_PTR(-ENOMEM); + + actor = squashfs_page_actor_init(&page, 1, 0, NULL); + if (actor == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto actor_error; + } + + length = squashfs_read_data(sb, + sizeof(struct squashfs_super_block), 0, NULL, actor); + + if (length < 0) { + comp_opts = ERR_PTR(length); + goto read_error; } + buffer = kmap_atomic(page); comp_opts = squashfs_comp_opts(msblk, buffer, length); + kunmap_atomic(buffer); -out: - kfree(actor); - kfree(buffer); +read_error: + squashfs_page_actor_free(actor, 0); +actor_error: + __free_page(page); return comp_opts; } diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index e5c9689062ba..6f5ef8d7e55a 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -47,12 +47,16 @@ #include <linux/string.h> #include <linux/pagemap.h> #include <linux/mutex.h> +#include <linux/mm_inline.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs_fs_i.h" #include "squashfs.h" +// Backported from 4.5 +#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) + /* * Locate cache slot in range [offset, index] for specified inode. If * there's more than one return the slot closest to index. @@ -438,6 +442,21 @@ static int squashfs_readpage_fragment(struct page *page) return res; } +static int squashfs_readpages_fragment(struct page *page, + struct list_head *readahead_pages, struct address_space *mapping) +{ + if (!page) { + page = lru_to_page(readahead_pages); + list_del(&page->lru); + if (add_to_page_cache_lru(page, mapping, page->index, + mapping_gfp_constraint(mapping, GFP_KERNEL))) { + put_page(page); + return 0; + } + } + return squashfs_readpage_fragment(page); +} + static int squashfs_readpage_sparse(struct page *page, int index, int file_end) { struct inode *inode = page->mapping->host; @@ -450,54 +469,105 @@ static int squashfs_readpage_sparse(struct page *page, int index, int file_end) return 0; } -static int squashfs_readpage(struct file *file, struct page *page) +static int squashfs_readpages_sparse(struct page *page, + struct list_head *readahead_pages, int index, int file_end, + struct address_space *mapping) { - struct inode *inode = page->mapping->host; + if (!page) { + page = lru_to_page(readahead_pages); + list_del(&page->lru); + if (add_to_page_cache_lru(page, mapping, page->index, + mapping_gfp_constraint(mapping, GFP_KERNEL))) { + put_page(page); + return 0; + } + } + return squashfs_readpage_sparse(page, index, file_end); +} + +static int __squashfs_readpages(struct file *file, struct page *page, + struct list_head *readahead_pages, unsigned int nr_pages, + struct address_space *mapping) +{ + struct inode *inode = mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); int file_end = i_size_read(inode) >> msblk->block_log; int res; - void *pageaddr; - TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", - page->index, squashfs_i(inode)->start); + do { + struct page *cur_page = page ? page + : lru_to_page(readahead_pages); + int page_index = cur_page->index; + int index = page_index >> (msblk->block_log - PAGE_CACHE_SHIFT); + + if (page_index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) + return 1; + + if (index < file_end || squashfs_i(inode)->fragment_block == + SQUASHFS_INVALID_BLK) { + u64 block = 0; + int bsize = read_blocklist(inode, index, &block); + + if (bsize < 0) + return -1; + + if (bsize == 0) { + res = squashfs_readpages_sparse(page, + readahead_pages, index, file_end, + mapping); + } else { + res = squashfs_readpages_block(page, + readahead_pages, &nr_pages, mapping, + page_index, block, bsize); + } + } else { + res = squashfs_readpages_fragment(page, + readahead_pages, mapping); + } + if (res) + return 0; + page = NULL; + } while (readahead_pages && !list_empty(readahead_pages)); - if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT)) - goto out; + return 0; +} - if (index < file_end || squashfs_i(inode)->fragment_block == - SQUASHFS_INVALID_BLK) { - u64 block = 0; - int bsize = read_blocklist(inode, index, &block); - if (bsize < 0) - goto error_out; +static int squashfs_readpage(struct file *file, struct page *page) +{ + int ret; - if (bsize == 0) - res = squashfs_readpage_sparse(page, index, file_end); + TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", + page->index, squashfs_i(page->mapping->host)->start); + + get_page(page); + + ret = __squashfs_readpages(file, page, NULL, 1, page->mapping); + if (ret) { + flush_dcache_page(page); + if (ret < 0) + SetPageError(page); else - res = squashfs_readpage_block(page, block, bsize); - } else - res = squashfs_readpage_fragment(page); - - if (!res) - return 0; - -error_out: - SetPageError(page); -out: - pageaddr = kmap_atomic(page); - memset(pageaddr, 0, PAGE_CACHE_SIZE); - kunmap_atomic(pageaddr); - flush_dcache_page(page); - if (!PageError(page)) - SetPageUptodate(page); - unlock_page(page); + SetPageUptodate(page); + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + unlock_page(page); + put_page(page); + } return 0; } +static int squashfs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned int nr_pages) +{ + TRACE("Entered squashfs_readpages, %u pages, first page index %lx\n", + nr_pages, lru_to_page(pages)->index); + __squashfs_readpages(file, NULL, pages, nr_pages, mapping); + return 0; +} + const struct address_space_operations squashfs_aops = { - .readpage = squashfs_readpage + .readpage = squashfs_readpage, + .readpages = squashfs_readpages, }; diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c deleted file mode 100644 index f2310d2a2019..000000000000 --- a/fs/squashfs/file_cache.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2013 - * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ - -#include <linux/fs.h> -#include <linux/vfs.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/pagemap.h> -#include <linux/mutex.h> - -#include "squashfs_fs.h" -#include "squashfs_fs_sb.h" -#include "squashfs_fs_i.h" -#include "squashfs.h" - -/* Read separately compressed datablock and memcopy into page cache */ -int squashfs_readpage_block(struct page *page, u64 block, int bsize) -{ - struct inode *i = page->mapping->host; - struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, - block, bsize); - int res = buffer->error; - - if (res) - ERROR("Unable to read page, block %llx, size %x\n", block, - bsize); - else - squashfs_copy_cache(page, buffer, buffer->length, 0); - - squashfs_cache_put(buffer); - return res; -} diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 43e7a7eddac0..c97af4c6ccd0 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -13,6 +13,7 @@ #include <linux/string.h> #include <linux/pagemap.h> #include <linux/mutex.h> +#include <linux/mm_inline.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -20,157 +21,139 @@ #include "squashfs.h" #include "page_actor.h" -static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page); - -/* Read separately compressed datablock directly into page cache */ -int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) +// Backported from 4.5 +#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +static void release_actor_pages(struct page **page, int pages, int error) { - struct inode *inode = target_page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int i; - int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; - int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; - int start_index = target_page->index & ~mask; - int end_index = start_index | mask; - int i, n, pages, missing_pages, bytes, res = -ENOMEM; + for (i = 0; i < pages; i++) { + if (!page[i]) + continue; + flush_dcache_page(page[i]); + if (!error) + SetPageUptodate(page[i]); + else { + SetPageError(page[i]); + zero_user_segment(page[i], 0, PAGE_CACHE_SIZE); + } + unlock_page(page[i]); + put_page(page[i]); + } + kfree(page); +} + +/* + * Create a "page actor" which will kmap and kunmap the + * page cache pages appropriately within the decompressor + */ +static struct squashfs_page_actor *actor_from_page_cache( + unsigned int actor_pages, struct page *target_page, + struct list_head *rpages, unsigned int *nr_pages, int start_index, + struct address_space *mapping) +{ struct page **page; struct squashfs_page_actor *actor; - void *pageaddr; - - if (end_index > file_end) - end_index = file_end; - - pages = end_index - start_index + 1; - - page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL); - if (page == NULL) - return res; - - /* - * Create a "page actor" which will kmap and kunmap the - * page cache pages appropriately within the decompressor - */ - actor = squashfs_page_actor_init_special(page, pages, 0); - if (actor == NULL) - goto out; - - /* Try to grab all the pages covered by the Squashfs block */ - for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { - page[i] = (n == target_page->index) ? target_page : - grab_cache_page_nowait(target_page->mapping, n); + int i, n; + gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); + + page = kmalloc_array(actor_pages, sizeof(void *), GFP_KERNEL); + if (!page) + return NULL; + + for (i = 0, n = start_index; i < actor_pages; i++, n++) { + if (target_page == NULL && rpages && !list_empty(rpages)) { + struct page *cur_page = lru_to_page(rpages); + + if (cur_page->index < start_index + actor_pages) { + list_del(&cur_page->lru); + --(*nr_pages); + if (add_to_page_cache_lru(cur_page, mapping, + cur_page->index, gfp)) + put_page(cur_page); + else + target_page = cur_page; + } else + rpages = NULL; + } - if (page[i] == NULL) { - missing_pages++; - continue; + if (target_page && target_page->index == n) { + page[i] = target_page; + target_page = NULL; + } else { + page[i] = grab_cache_page_nowait(mapping, n); + if (page[i] == NULL) + continue; } if (PageUptodate(page[i])) { unlock_page(page[i]); - page_cache_release(page[i]); + put_page(page[i]); page[i] = NULL; - missing_pages++; } } - if (missing_pages) { - /* - * Couldn't get one or more pages, this page has either - * been VM reclaimed, but others are still in the page cache - * and uptodate, or we're racing with another thread in - * squashfs_readpage also trying to grab them. Fall back to - * using an intermediate buffer. - */ - res = squashfs_read_cache(target_page, block, bsize, pages, - page); - if (res < 0) - goto mark_errored; - - goto out; + actor = squashfs_page_actor_init(page, actor_pages, 0, + release_actor_pages); + if (!actor) { + release_actor_pages(page, actor_pages, -ENOMEM); + kfree(page); + return NULL; } - - /* Decompress directly into the page cache buffers */ - res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); - if (res < 0) - goto mark_errored; - - /* Last page may have trailing bytes not filled */ - bytes = res % PAGE_CACHE_SIZE; - if (bytes) { - pageaddr = kmap_atomic(page[pages - 1]); - memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); - kunmap_atomic(pageaddr); - } - - /* Mark pages as uptodate, unlock and release */ - for (i = 0; i < pages; i++) { - flush_dcache_page(page[i]); - SetPageUptodate(page[i]); - unlock_page(page[i]); - if (page[i] != target_page) - page_cache_release(page[i]); - } - - kfree(actor); - kfree(page); - - return 0; - -mark_errored: - /* Decompression failed, mark pages as errored. Target_page is - * dealt with by the caller - */ - for (i = 0; i < pages; i++) { - if (page[i] == NULL || page[i] == target_page) - continue; - flush_dcache_page(page[i]); - SetPageError(page[i]); - unlock_page(page[i]); - page_cache_release(page[i]); - } - -out: - kfree(actor); - kfree(page); - return res; + return actor; } +int squashfs_readpages_block(struct page *target_page, + struct list_head *readahead_pages, + unsigned int *nr_pages, + struct address_space *mapping, + int page_index, u64 block, int bsize) -static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page) { - struct inode *i = target_page->mapping->host; - struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, - block, bsize); - int bytes = buffer->length, res = buffer->error, n, offset = 0; - void *pageaddr; - - if (res) { - ERROR("Unable to read page, block %llx, size %x\n", block, - bsize); - goto out; - } - - for (n = 0; n < pages && bytes > 0; n++, - bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { - int avail = min_t(int, bytes, PAGE_CACHE_SIZE); - - if (page[n] == NULL) - continue; + struct squashfs_page_actor *actor; + struct inode *inode = mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int start_index, end_index, file_end, actor_pages, res; + int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; - pageaddr = kmap_atomic(page[n]); - squashfs_copy_data(pageaddr, buffer, offset, avail); - memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); - kunmap_atomic(pageaddr); - flush_dcache_page(page[n]); - SetPageUptodate(page[n]); - unlock_page(page[n]); - if (page[n] != target_page) - page_cache_release(page[n]); + /* + * If readpage() is called on an uncompressed datablock, we can just + * read the pages instead of fetching the whole block. + * This greatly improves the performance when a process keep doing + * random reads because we only fetch the necessary data. + * The readahead algorithm will take care of doing speculative reads + * if necessary. + * We can't read more than 1 block even if readahead provides use more + * pages because we don't know yet if the next block is compressed or + * not. + */ + if (bsize && !SQUASHFS_COMPRESSED_BLOCK(bsize)) { + u64 block_end = block + msblk->block_size; + + block += (page_index & mask) * PAGE_CACHE_SIZE; + actor_pages = (block_end - block) / PAGE_CACHE_SIZE; + if (*nr_pages < actor_pages) + actor_pages = *nr_pages; + start_index = page_index; + bsize = min_t(int, bsize, (PAGE_CACHE_SIZE * actor_pages) + | SQUASHFS_COMPRESSED_BIT_BLOCK); + } else { + file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; + start_index = page_index & ~mask; + end_index = start_index | mask; + if (end_index > file_end) + end_index = file_end; + actor_pages = end_index - start_index + 1; } -out: - squashfs_cache_put(buffer); - return res; + actor = actor_from_page_cache(actor_pages, target_page, + readahead_pages, nr_pages, start_index, + mapping); + if (!actor) + return -ENOMEM; + + res = squashfs_read_data_async(inode->i_sb, block, bsize, NULL, + actor); + return res < 0 ? res : 0; } diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c index c31e2bc9c081..df4fa3c7ddd0 100644 --- a/fs/squashfs/lz4_wrapper.c +++ b/fs/squashfs/lz4_wrapper.c @@ -94,39 +94,17 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, struct buffer_head **bh, int b, int offset, int length, struct squashfs_page_actor *output) { - struct squashfs_lz4 *stream = strm; - void *buff = stream->input, *data; - int avail, i, bytes = length, res; + int res; size_t dest_len = output->length; + struct squashfs_lz4 *stream = strm; - for (i = 0; i < b; i++) { - avail = min(bytes, msblk->devblksize - offset); - memcpy(buff, bh[i]->b_data + offset, avail); - buff += avail; - bytes -= avail; - offset = 0; - put_bh(bh[i]); - } - + squashfs_bh_to_buf(bh, b, stream->input, offset, length, + msblk->devblksize); res = lz4_decompress_unknownoutputsize(stream->input, length, stream->output, &dest_len); if (res) return -EIO; - - bytes = dest_len; - data = squashfs_first_page(output); - buff = stream->output; - while (data) { - if (bytes <= PAGE_CACHE_SIZE) { - memcpy(data, buff, bytes); - break; - } - memcpy(data, buff, PAGE_CACHE_SIZE); - buff += PAGE_CACHE_SIZE; - bytes -= PAGE_CACHE_SIZE; - data = squashfs_next_page(output); - } - squashfs_finish_page(output); + squashfs_buf_to_actor(stream->output, output, dest_len); return dest_len; } diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 244b9fbfff7b..2c844d53a59e 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -79,45 +79,19 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, struct buffer_head **bh, int b, int offset, int length, struct squashfs_page_actor *output) { - struct squashfs_lzo *stream = strm; - void *buff = stream->input, *data; - int avail, i, bytes = length, res; + int res; size_t out_len = output->length; + struct squashfs_lzo *stream = strm; - for (i = 0; i < b; i++) { - avail = min(bytes, msblk->devblksize - offset); - memcpy(buff, bh[i]->b_data + offset, avail); - buff += avail; - bytes -= avail; - offset = 0; - put_bh(bh[i]); - } - + squashfs_bh_to_buf(bh, b, stream->input, offset, length, + msblk->devblksize); res = lzo1x_decompress_safe(stream->input, (size_t)length, stream->output, &out_len); if (res != LZO_E_OK) - goto failed; + return -EIO; + squashfs_buf_to_actor(stream->output, output, out_len); - res = bytes = (int)out_len; - data = squashfs_first_page(output); - buff = stream->output; - while (data) { - if (bytes <= PAGE_CACHE_SIZE) { - memcpy(data, buff, bytes); - break; - } else { - memcpy(data, buff, PAGE_CACHE_SIZE); - buff += PAGE_CACHE_SIZE; - bytes -= PAGE_CACHE_SIZE; - data = squashfs_next_page(output); - } - } - squashfs_finish_page(output); - - return res; - -failed: - return -EIO; + return out_len; } const struct squashfs_decompressor squashfs_lzo_comp_ops = { diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c index 5a1c11f56441..53863508e400 100644 --- a/fs/squashfs/page_actor.c +++ b/fs/squashfs/page_actor.c @@ -9,39 +9,11 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/pagemap.h> +#include <linux/buffer_head.h> #include "page_actor.h" -/* - * This file contains implementations of page_actor for decompressing into - * an intermediate buffer, and for decompressing directly into the - * page cache. - * - * Calling code should avoid sleeping between calls to squashfs_first_page() - * and squashfs_finish_page(). - */ - -/* Implementation of page_actor for decompressing into intermediate buffer */ -static void *cache_first_page(struct squashfs_page_actor *actor) -{ - actor->next_page = 1; - return actor->buffer[0]; -} - -static void *cache_next_page(struct squashfs_page_actor *actor) -{ - if (actor->next_page == actor->pages) - return NULL; - - return actor->buffer[actor->next_page++]; -} - -static void cache_finish_page(struct squashfs_page_actor *actor) -{ - /* empty */ -} - -struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, - int pages, int length) +struct squashfs_page_actor *squashfs_page_actor_init(struct page **page, + int pages, int length, void (*release_pages)(struct page **, int, int)) { struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); @@ -49,52 +21,133 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, return NULL; actor->length = length ? : pages * PAGE_CACHE_SIZE; - actor->buffer = buffer; + actor->page = page; actor->pages = pages; actor->next_page = 0; - actor->squashfs_first_page = cache_first_page; - actor->squashfs_next_page = cache_next_page; - actor->squashfs_finish_page = cache_finish_page; + actor->pageaddr = NULL; + actor->release_pages = release_pages; return actor; } -/* Implementation of page_actor for decompressing directly into page cache. */ -static void *direct_first_page(struct squashfs_page_actor *actor) +void squashfs_page_actor_free(struct squashfs_page_actor *actor, int error) +{ + if (!actor) + return; + + if (actor->release_pages) + actor->release_pages(actor->page, actor->pages, error); + kfree(actor); +} + +void squashfs_actor_to_buf(struct squashfs_page_actor *actor, void *buf, + int length) { - actor->next_page = 1; - return actor->pageaddr = kmap_atomic(actor->page[0]); + void *pageaddr; + int pos = 0, avail, i; + + for (i = 0; i < actor->pages && pos < length; ++i) { + avail = min_t(int, length - pos, PAGE_CACHE_SIZE); + if (actor->page[i]) { + pageaddr = kmap_atomic(actor->page[i]); + memcpy(buf + pos, pageaddr, avail); + kunmap_atomic(pageaddr); + } + pos += avail; + } } -static void *direct_next_page(struct squashfs_page_actor *actor) +void squashfs_buf_to_actor(void *buf, struct squashfs_page_actor *actor, + int length) { - if (actor->pageaddr) - kunmap_atomic(actor->pageaddr); + void *pageaddr; + int pos = 0, avail, i; + + for (i = 0; i < actor->pages && pos < length; ++i) { + avail = min_t(int, length - pos, PAGE_CACHE_SIZE); + if (actor->page[i]) { + pageaddr = kmap_atomic(actor->page[i]); + memcpy(pageaddr, buf + pos, avail); + kunmap_atomic(pageaddr); + } + pos += avail; + } +} - return actor->pageaddr = actor->next_page == actor->pages ? NULL : - kmap_atomic(actor->page[actor->next_page++]); +void squashfs_bh_to_actor(struct buffer_head **bh, int nr_buffers, + struct squashfs_page_actor *actor, int offset, int length, int blksz) +{ + void *kaddr = NULL; + int bytes = 0, pgoff = 0, b = 0, p = 0, avail, i; + + while (bytes < length) { + if (actor->page[p]) { + kaddr = kmap_atomic(actor->page[p]); + while (pgoff < PAGE_CACHE_SIZE && bytes < length) { + avail = min_t(int, blksz - offset, + PAGE_CACHE_SIZE - pgoff); + memcpy(kaddr + pgoff, bh[b]->b_data + offset, + avail); + pgoff += avail; + bytes += avail; + offset = (offset + avail) % blksz; + if (!offset) { + put_bh(bh[b]); + ++b; + } + } + kunmap_atomic(kaddr); + pgoff = 0; + } else { + for (i = 0; i < PAGE_CACHE_SIZE / blksz; ++i) { + if (bh[b]) + put_bh(bh[b]); + ++b; + } + bytes += PAGE_CACHE_SIZE; + } + ++p; + } } -static void direct_finish_page(struct squashfs_page_actor *actor) +void squashfs_bh_to_buf(struct buffer_head **bh, int nr_buffers, void *buf, + int offset, int length, int blksz) { - if (actor->pageaddr) - kunmap_atomic(actor->pageaddr); + int i, avail, bytes = 0; + + for (i = 0; i < nr_buffers && bytes < length; ++i) { + avail = min_t(int, length - bytes, blksz - offset); + if (bh[i]) { + memcpy(buf + bytes, bh[i]->b_data + offset, avail); + put_bh(bh[i]); + } + bytes += avail; + offset = 0; + } } -struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, - int pages, int length) +void free_page_array(struct page **page, int nr_pages) { - struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + int i; - if (actor == NULL) - return NULL; + for (i = 0; i < nr_pages; ++i) + __free_page(page[i]); + kfree(page); +} - actor->length = length ? : pages * PAGE_CACHE_SIZE; - actor->page = page; - actor->pages = pages; - actor->next_page = 0; - actor->pageaddr = NULL; - actor->squashfs_first_page = direct_first_page; - actor->squashfs_next_page = direct_next_page; - actor->squashfs_finish_page = direct_finish_page; - return actor; +struct page **alloc_page_array(int nr_pages, int gfp_mask) +{ + int i; + struct page **page; + + page = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); + if (!page) + return NULL; + for (i = 0; i < nr_pages; ++i) { + page[i] = alloc_page(gfp_mask); + if (!page[i]) { + free_page_array(page, i); + return NULL; + } + } + return page; } diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 26dd82008b82..aa1ed790b5a3 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -5,77 +5,61 @@ * Phillip Lougher <phillip@squashfs.org.uk> * * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. + * the COPYING file in the top-level squashfsory. */ -#ifndef CONFIG_SQUASHFS_FILE_DIRECT struct squashfs_page_actor { - void **page; + struct page **page; + void *pageaddr; int pages; int length; int next_page; + void (*release_pages)(struct page **, int, int); }; -static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, - int pages, int length) -{ - struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); - - if (actor == NULL) - return NULL; +extern struct squashfs_page_actor *squashfs_page_actor_init(struct page **, + int, int, void (*)(struct page **, int, int)); +extern void squashfs_page_actor_free(struct squashfs_page_actor *, int); - actor->length = length ? : pages * PAGE_CACHE_SIZE; - actor->page = page; - actor->pages = pages; - actor->next_page = 0; - return actor; -} +extern void squashfs_actor_to_buf(struct squashfs_page_actor *, void *, int); +extern void squashfs_buf_to_actor(void *, struct squashfs_page_actor *, int); +extern void squashfs_bh_to_actor(struct buffer_head **, int, + struct squashfs_page_actor *, int, int, int); +extern void squashfs_bh_to_buf(struct buffer_head **, int, void *, int, int, + int); +/* + * Calling code should avoid sleeping between calls to squashfs_first_page() + * and squashfs_finish_page(). + */ static inline void *squashfs_first_page(struct squashfs_page_actor *actor) { actor->next_page = 1; - return actor->page[0]; + return actor->pageaddr = actor->page[0] ? kmap_atomic(actor->page[0]) + : NULL; } static inline void *squashfs_next_page(struct squashfs_page_actor *actor) { - return actor->next_page == actor->pages ? NULL : - actor->page[actor->next_page++]; -} + if (!IS_ERR_OR_NULL(actor->pageaddr)) + kunmap_atomic(actor->pageaddr); -static inline void squashfs_finish_page(struct squashfs_page_actor *actor) -{ - /* empty */ -} -#else -struct squashfs_page_actor { - union { - void **buffer; - struct page **page; - }; - void *pageaddr; - void *(*squashfs_first_page)(struct squashfs_page_actor *); - void *(*squashfs_next_page)(struct squashfs_page_actor *); - void (*squashfs_finish_page)(struct squashfs_page_actor *); - int pages; - int length; - int next_page; -}; + if (actor->next_page == actor->pages) + return actor->pageaddr = ERR_PTR(-ENODATA); -extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); -extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page - **, int, int); -static inline void *squashfs_first_page(struct squashfs_page_actor *actor) -{ - return actor->squashfs_first_page(actor); -} -static inline void *squashfs_next_page(struct squashfs_page_actor *actor) -{ - return actor->squashfs_next_page(actor); + actor->pageaddr = actor->page[actor->next_page] ? + kmap_atomic(actor->page[actor->next_page]) : NULL; + ++actor->next_page; + return actor->pageaddr; } + static inline void squashfs_finish_page(struct squashfs_page_actor *actor) { - actor->squashfs_finish_page(actor); + if (!IS_ERR_OR_NULL(actor->pageaddr)) + kunmap_atomic(actor->pageaddr); } -#endif + +extern struct page **alloc_page_array(int, int); +extern void free_page_array(struct page **, int); + #endif diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 887d6d270080..6093579c6c5d 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -28,8 +28,14 @@ #define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args) /* block.c */ +extern int squashfs_init_read_wq(void); +extern void squashfs_destroy_read_wq(void); extern int squashfs_read_data(struct super_block *, u64, int, u64 *, struct squashfs_page_actor *); +extern int squashfs_read_data(struct super_block *, u64, int, u64 *, + struct squashfs_page_actor *); +extern int squashfs_read_data_async(struct super_block *, u64, int, u64 *, + struct squashfs_page_actor *); /* cache.c */ extern struct squashfs_cache *squashfs_cache_init(char *, int, int); @@ -70,8 +76,9 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *, void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, int); -/* file_xxx.c */ -extern int squashfs_readpage_block(struct page *, u64, int); +/* file_direct.c */ +extern int squashfs_readpages_block(struct page *, struct list_head *, + unsigned int *, struct address_space *, int, u64, int); /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 1da565cb50c3..8a6995de0277 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -49,7 +49,7 @@ struct squashfs_cache_entry { int num_waiters; wait_queue_head_t wait_queue; struct squashfs_cache *cache; - void **data; + struct page **page; struct squashfs_page_actor *actor; }; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 5056babe00df..61cd0b39ed0e 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -444,9 +444,15 @@ static int __init init_squashfs_fs(void) if (err) return err; + if (!squashfs_init_read_wq()) { + destroy_inodecache(); + return -ENOMEM; + } + err = register_filesystem(&squashfs_fs_type); if (err) { destroy_inodecache(); + squashfs_destroy_read_wq(); return err; } @@ -460,6 +466,7 @@ static void __exit exit_squashfs_fs(void) { unregister_filesystem(&squashfs_fs_type); destroy_inodecache(); + squashfs_destroy_read_wq(); } diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index c609624e4b8a..14cd373e1897 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -55,7 +55,7 @@ static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk, struct comp_opts *opts; int err = 0, n; - opts = kmalloc(sizeof(*opts), GFP_KERNEL); + opts = kmalloc(sizeof(*opts), GFP_ATOMIC); if (opts == NULL) { err = -ENOMEM; goto out2; @@ -136,6 +136,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, enum xz_ret xz_err; int avail, total = 0, k = 0; struct squashfs_xz *stream = strm; + void *buf = NULL; xz_dec_reset(stream->state); stream->buf.in_pos = 0; @@ -156,12 +157,20 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, if (stream->buf.out_pos == stream->buf.out_size) { stream->buf.out = squashfs_next_page(output); - if (stream->buf.out != NULL) { + if (!IS_ERR(stream->buf.out)) { stream->buf.out_pos = 0; total += PAGE_CACHE_SIZE; } } + if (!stream->buf.out) { + if (!buf) { + buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC); + if (!buf) + goto out; + } + stream->buf.out = buf; + } xz_err = xz_dec_run(stream->state, &stream->buf); if (stream->buf.in_pos == stream->buf.in_size && k < b) @@ -173,11 +182,13 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, if (xz_err != XZ_STREAM_END || k < b) goto out; + kfree(buf); return total + stream->buf.out_pos; out: for (; k < b; k++) put_bh(bh[k]); + kfree(buf); return -EIO; } diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 8727caba6882..09c892b5308e 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -66,6 +66,7 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, struct buffer_head **bh, int b, int offset, int length, struct squashfs_page_actor *output) { + void *buf = NULL; int zlib_err, zlib_init = 0, k = 0; z_stream *stream = strm; @@ -84,10 +85,19 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, if (stream->avail_out == 0) { stream->next_out = squashfs_next_page(output); - if (stream->next_out != NULL) + if (!IS_ERR(stream->next_out)) stream->avail_out = PAGE_CACHE_SIZE; } + if (!stream->next_out) { + if (!buf) { + buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC); + if (!buf) + goto out; + } + stream->next_out = buf; + } + if (!zlib_init) { zlib_err = zlib_inflateInit(stream); if (zlib_err != Z_OK) { @@ -115,11 +125,13 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, if (k < b) goto out; + kfree(buf); return stream->total_out; out: for (; k < b; k++) put_bh(bh[k]); + kfree(buf); return -EIO; } diff --git a/fs/sync.c b/fs/sync.c index dd5d1711c7ac..452179e31c39 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -218,6 +218,7 @@ static int do_fsync(unsigned int fd, int datasync) if (f.file) { ret = vfs_fsync(f.file, datasync); fdput(f); + inc_syscfs(current); } return ret; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 1aabfda669b0..7183b7ea065b 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -299,6 +299,14 @@ xfs_dinode_verify( if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) return false; + /* don't allow invalid i_size */ + if (be64_to_cpu(dip->di_size) & (1ULL << 63)) + return false; + + /* No zero-length symlinks. */ + if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0) + return false; + /* only version 3 or greater inodes are extensively verified here */ if (dip->di_version < 3) return true; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index dbae6490a79a..832764ee035a 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1713,6 +1713,7 @@ xfs_swap_extents( xfs_trans_t *tp; xfs_bstat_t *sbp = &sxp->sx_stat; xfs_ifork_t *tempifp, *ifp, *tifp; + xfs_extnum_t nextents; int src_log_flags, target_log_flags; int error = 0; int aforkblks = 0; @@ -1899,7 +1900,8 @@ xfs_swap_extents( * pointer. Otherwise it's already NULL or * pointing to the extent. */ - if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { + nextents = ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (nextents <= XFS_INLINE_EXTS) { ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } @@ -1918,7 +1920,8 @@ xfs_swap_extents( * pointer. Otherwise it's already NULL or * pointing to the extent. */ - if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { + nextents = tip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (nextents <= XFS_INLINE_EXTS) { tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index eb1b8c8acfcb..8146b0cf20ce 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -375,6 +375,7 @@ retry: out_free_pages: for (i = 0; i < bp->b_page_count; i++) __free_page(bp->b_pages[i]); + bp->b_flags &= ~_XBF_PAGES; return error; } diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 4b4b056a6eb0..5148150cc80b 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -1,6 +1,8 @@ #ifndef __ASM_MEMORY_MODEL_H #define __ASM_MEMORY_MODEL_H +#include <linux/pfn.h> + #ifndef __ASSEMBLY__ #if defined(CONFIG_FLATMEM) @@ -72,7 +74,7 @@ /* * Convert a physical address to a Page Frame Number and back */ -#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) +#define __phys_to_pfn(paddr) PHYS_PFN(paddr) #define __pfn_to_phys(pfn) PFN_PHYS(pfn) #define page_to_pfn __page_to_pfn diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index c9fe145f7dd3..04661e1fb625 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -202,6 +202,9 @@ int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc, struct blkcipher_walk *walk, struct crypto_aead *tfm, unsigned int blocksize); +int blkcipher_ablkcipher_walk_virt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + struct crypto_ablkcipher *tfm); int ablkcipher_walk_done(struct ablkcipher_request *req, struct ablkcipher_walk *walk, int err); diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h index da2530e34b26..230760aef93b 100644 --- a/include/crypto/gf128mul.h +++ b/include/crypto/gf128mul.h @@ -43,7 +43,7 @@ --------------------------------------------------------------------------- Issue Date: 31/01/2006 - An implementation of field multiplication in Galois Field GF(128) + An implementation of field multiplication in Galois Field GF(2^128) */ #ifndef _CRYPTO_GF128MUL_H @@ -65,7 +65,7 @@ * are left and the lsb's are right. char b[16] is an array and b[0] is * the first octet. * - * 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 + * 10000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 * b[0] b[1] b[2] b[3] b[13] b[14] b[15] * * Every bit is a coefficient of some power of X. We can store the bits @@ -99,21 +99,21 @@ * * bbe on a little endian machine u32 x[4]: * - * MS x[0] LS MS x[1] LS + * MS x[0] LS MS x[1] LS * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls * 103..96 111.104 119.112 127.120 71...64 79...72 87...80 95...88 * - * MS x[2] LS MS x[3] LS + * MS x[2] LS MS x[3] LS * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls * 39...32 47...40 55...48 63...56 07...00 15...08 23...16 31...24 * * ble on a little endian machine * - * MS x[0] LS MS x[1] LS + * MS x[0] LS MS x[1] LS * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls * 31...24 23...16 15...08 07...00 63...56 55...48 47...40 39...32 * - * MS x[2] LS MS x[3] LS + * MS x[2] LS MS x[3] LS * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls * 95...88 87...80 79...72 71...64 127.120 199.112 111.104 103..96 * @@ -127,7 +127,7 @@ * machines this will automatically aligned to wordsize and on a 64-bit * machine also. */ -/* Multiply a GF128 field element by x. Field elements are held in arrays +/* Multiply a GF128 field element by x. Field elements are held in arrays of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower indexed bits placed in the more numerically significant bit positions within bytes. @@ -135,62 +135,65 @@ On little endian machines the bit indexes translate into the bit positions within four 32-bit words in the following way - MS x[0] LS MS x[1] LS + MS x[0] LS MS x[1] LS ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 24...31 16...23 08...15 00...07 56...63 48...55 40...47 32...39 - MS x[2] LS MS x[3] LS + MS x[2] LS MS x[3] LS ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 88...95 80...87 72...79 64...71 120.127 112.119 104.111 96..103 On big endian machines the bit indexes translate into the bit positions within four 32-bit words in the following way - MS x[0] LS MS x[1] LS + MS x[0] LS MS x[1] LS ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 00...07 08...15 16...23 24...31 32...39 40...47 48...55 56...63 - MS x[2] LS MS x[3] LS + MS x[2] LS MS x[3] LS ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 64...71 72...79 80...87 88...95 96..103 104.111 112.119 120.127 */ -/* A slow generic version of gf_mul, implemented for lle and bbe - * It multiplies a and b and puts the result in a */ +/* A slow generic version of gf_mul, implemented for lle, bbe, and ble. + * It multiplies a and b and puts the result in a + */ void gf128mul_lle(be128 *a, const be128 *b); - void gf128mul_bbe(be128 *a, const be128 *b); +void gf128mul_ble(be128 *a, const be128 *b); -/* multiply by x in ble format, needed by XTS */ +/* multiply by x in ble format, needed by XTS and HEH */ void gf128mul_x_ble(be128 *a, const be128 *b); /* 4k table optimization */ - struct gf128mul_4k { be128 t[256]; }; struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g); struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g); +struct gf128mul_4k *gf128mul_init_4k_ble(const be128 *g); void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t); void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t); +void gf128mul_4k_ble(be128 *a, struct gf128mul_4k *t); static inline void gf128mul_free_4k(struct gf128mul_4k *t) { - kfree(t); + kzfree(t); } -/* 64k table optimization, implemented for lle and bbe */ +/* 64k table optimization, implemented for lle, ble, and bbe */ struct gf128mul_64k { struct gf128mul_4k *t[16]; }; -/* first initialize with the constant factor with which you - * want to multiply and then call gf128_64k_lle with the other - * factor in the first argument, the table in the second and a - * scratch register in the third. Afterwards *a = *r. */ +/* First initialize with the constant factor with which you + * want to multiply and then call gf128mul_64k_bbe with the other + * factor in the first argument, and the table in the second. + * Afterwards, the result is stored in *a. + */ struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g); struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); void gf128mul_free_64k(struct gf128mul_64k *t); diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 3b4af1d7c7e9..476d99d0edb7 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -102,6 +102,8 @@ int shash_register_instance(struct crypto_template *tmpl, struct shash_instance *inst); void shash_free_instance(struct crypto_instance *inst); +int crypto_grab_shash(struct crypto_shash_spawn *spawn, + const char *name, u32 type, u32 mask); int crypto_init_shash_spawn(struct crypto_shash_spawn *spawn, struct shash_alg *alg, struct crypto_instance *inst); @@ -111,6 +113,12 @@ static inline void crypto_drop_shash(struct crypto_shash_spawn *spawn) crypto_drop_spawn(&spawn->base); } +static inline struct shash_alg *crypto_spawn_shash_alg( + struct crypto_shash_spawn *spawn) +{ + return container_of(spawn->base.alg, struct shash_alg, base); +} + struct shash_alg *shash_attr_alg(struct rtattr *rta, u32 type, u32 mask); int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 702b6c53c12f..2043af2141c1 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -161,8 +161,8 @@ struct dentry_operations { struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); struct inode *(*d_select_inode)(struct dentry *, unsigned); - void (*d_canonical_path)(const struct path *, struct path *); struct dentry *(*d_real)(struct dentry *, struct inode *); + void (*d_canonical_path)(const struct path *, struct path *); } ____cacheline_aligned; /* diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 61d042bbbf60..68449293c4b6 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -163,6 +163,7 @@ struct dccp_request_sock { __u64 dreq_isr; __u64 dreq_gsr; __be32 dreq_service; + spinlock_t dreq_lock; struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 1129d68cb6cc..cc69810fcc32 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -18,6 +18,7 @@ struct ipv6_devconf { __s32 dad_transmits; __s32 rtr_solicits; __s32 rtr_solicit_interval; + __s32 rtr_solicit_max_interval; __s32 rtr_solicit_delay; __s32 force_mld_version; __s32 mldv1_unsolicited_report_interval; @@ -36,6 +37,7 @@ struct ipv6_devconf { __s32 accept_ra_rtr_pref; __s32 rtr_probe_interval; #ifdef CONFIG_IPV6_ROUTE_INFO + __s32 accept_ra_rt_info_min_plen; __s32 accept_ra_rt_info_max_plen; #endif #endif diff --git a/include/linux/log2.h b/include/linux/log2.h index fd7ff3d91e6a..f38fae23bdac 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -16,12 +16,6 @@ #include <linux/bitops.h> /* - * deal with unrepresentable constant logarithms - */ -extern __attribute__((const, noreturn)) -int ____ilog2_NaN(void); - -/* * non-constant log of base 2 calculators * - the arch may override these in asm/bitops.h if they can be implemented * more efficiently than using fls() and fls64() @@ -85,7 +79,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n) < 1 ? ____ilog2_NaN() : \ + (n) < 2 ? 0 : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ @@ -148,10 +142,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ - (n) & (1ULL << 1) ? 1 : \ - (n) & (1ULL << 0) ? 0 : \ - ____ilog2_NaN() \ - ) : \ + 1 ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 31cc6f40baa5..8e76f46adde3 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -131,6 +131,9 @@ struct mmc_ext_csd { #define MMC_BKOPS_URGENCY_MASK 0x3 u8 raw_bkops_status; /* 246 */ u8 raw_sectors[4]; /* 212 - 4 bytes */ + u8 pre_eol_info; /* 267 */ + u8 device_life_time_est_typ_a; /* 268 */ + u8 device_life_time_est_typ_b; /* 269 */ u8 cmdq_depth; /* 307 */ u8 cmdq_support; /* 308 */ u8 barrier_support; /* 486 */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 9cd5afaef626..307a8e4be91f 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -282,6 +282,9 @@ struct _mmc_csd { #define EXT_CSD_CACHE_SIZE 249 /* RO, 4 bytes */ #define EXT_CSD_PWR_CL_DDR_200_360 253 /* RO */ #define EXT_CSD_FIRMWARE_VERSION 254 /* RO, 8 bytes */ +#define EXT_CSD_PRE_EOL_INFO 267 /* RO */ +#define EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_A 268 /* RO */ +#define EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_B 269 /* RO */ #define EXT_CSD_CMDQ_DEPTH 307 /* RO */ #define EXT_CSD_CMDQ_SUPPORT 308 /* RO */ #define EXT_CSD_BARRIER_SUPPORT 486 /* RO */ diff --git a/include/linux/pfn.h b/include/linux/pfn.h index 7646637221f3..97f3e88aead4 100644 --- a/include/linux/pfn.h +++ b/include/linux/pfn.h @@ -9,5 +9,6 @@ #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) #define PFN_DOWN(x) ((x) >> PAGE_SHIFT) #define PFN_PHYS(x) ((phys_addr_t)(x) << PAGE_SHIFT) +#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 4b56a62a0a58..9cb0c1712792 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3446,6 +3446,11 @@ static inline void inc_syscw(struct task_struct *tsk) { tsk->ioac.syscw++; } + +static inline void inc_syscfs(struct task_struct *tsk) +{ + tsk->ioac.syscfs++; +} #else static inline void add_rchar(struct task_struct *tsk, ssize_t amt) { @@ -3462,6 +3467,9 @@ static inline void inc_syscr(struct task_struct *tsk) static inline void inc_syscw(struct task_struct *tsk) { } +static inline void inc_syscfs(struct task_struct *tsk) +{ +} #endif #ifndef TASK_SIZE_OF diff --git a/include/linux/task_io_accounting.h b/include/linux/task_io_accounting.h index bdf855c2856f..2dd338fdf881 100644 --- a/include/linux/task_io_accounting.h +++ b/include/linux/task_io_accounting.h @@ -18,6 +18,8 @@ struct task_io_accounting { u64 syscr; /* # of write syscalls */ u64 syscw; + /* # of fsync syscalls */ + u64 syscfs; #endif /* CONFIG_TASK_XACCT */ #ifdef CONFIG_TASK_IO_ACCOUNTING diff --git a/include/linux/task_io_accounting_ops.h b/include/linux/task_io_accounting_ops.h index 4d090f9ee608..1b505c804af3 100644 --- a/include/linux/task_io_accounting_ops.h +++ b/include/linux/task_io_accounting_ops.h @@ -96,6 +96,7 @@ static inline void task_chr_io_accounting_add(struct task_io_accounting *dst, dst->wchar += src->wchar; dst->syscr += src->syscr; dst->syscw += src->syscw; + dst->syscfs += src->syscfs; } #else static inline void task_chr_io_accounting_add(struct task_io_accounting *dst, diff --git a/include/linux/usb.h b/include/linux/usb.h index 7e6e4665212f..a55f127d6836 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -371,14 +371,13 @@ struct usb_bus { int devnum_next; /* Next open device number in * round-robin allocation */ + struct mutex devnum_next_mutex; /* devnum_next mutex */ struct usb_devmap devmap; /* device address allocation map */ struct usb_device *root_hub; /* Root hub */ struct usb_bus *hs_companion; /* Companion EHCI bus, if any */ struct list_head bus_list; /* list of busses */ - struct mutex usb_address0_mutex; /* unaddressed device mutex */ - int bandwidth_allocated; /* on this bus: how much of the time * reserved for periodic (intr/iso) * requests is used, on average? diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index cef429cf3dce..e888eb9a2eb9 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -180,6 +180,7 @@ struct usb_hcd { * bandwidth_mutex should be dropped after a successful control message * to the device, or resetting the bandwidth after a failed attempt. */ + struct mutex *address0_mutex; struct mutex *bandwidth_mutex; struct usb_hcd *shared_hcd; struct usb_hcd *primary_hcd; diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 1d0043dc34e4..de2a722fe3cf 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -50,4 +50,10 @@ /* device can't handle Link Power Management */ #define USB_QUIRK_NO_LPM BIT(10) +/* + * Device reports its bInterval as linear frames instead of the + * USB 2.0 calculation. + */ +#define USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL BIT(11) + #endif /* __LINUX_USB_QUIRKS_H */ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 3275ddf9f00d..d540657819ef 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -1,8 +1,9 @@ #ifndef _ADDRCONF_H #define _ADDRCONF_H -#define MAX_RTR_SOLICITATIONS 3 +#define MAX_RTR_SOLICITATIONS -1 /* unlimited */ #define RTR_SOLICITATION_INTERVAL (4*HZ) +#define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ #define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 1c8b6820b694..515352c6280a 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -201,6 +201,7 @@ struct inet6_dev { struct ipv6_devstat stats; struct timer_list rs_timer; + __s32 rs_interval; /* in jiffies */ __u8 rs_probes; __u8 addr_gen_mode; diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 4d1c46aac331..c7b1dc713cdd 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -196,6 +196,7 @@ struct iscsi_conn { struct iscsi_task *task; /* xmit task in progress */ /* xmit */ + spinlock_t taskqueuelock; /* protects the next three lists */ struct list_head mgmtqueue; /* mgmt (control) xmit queue */ struct list_head cmdqueue; /* data-path cmd queue */ struct list_head requeue; /* tasks needing another run */ diff --git a/include/trace/events/android_fs_template.h b/include/trace/events/android_fs_template.h index 4e61ffe7a814..b23d17b56c63 100644 --- a/include/trace/events/android_fs_template.h +++ b/include/trace/events/android_fs_template.h @@ -18,11 +18,18 @@ DECLARE_EVENT_CLASS(android_fs_data_start_template, ), TP_fast_assign( { + /* + * Replace the spaces in filenames and cmdlines + * because this screws up the tooling that parses + * the traces. + */ __assign_str(pathbuf, pathname); + (void)strreplace(__get_str(pathbuf), ' ', '_'); __entry->offset = offset; __entry->bytes = bytes; __entry->i_size = i_size_read(inode); __assign_str(cmdline, command); + (void)strreplace(__get_str(cmdline), ' ', '_'); __entry->pid = pid; __entry->ino = inode->i_ino; } diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 51f891fb1b18..7668b5791c91 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -132,6 +132,7 @@ enum { /* struct binder_fd_array_object - object describing an array of fds in a buffer * @hdr: common header structure + * @pad: padding to ensure correct alignment * @num_fds: number of file descriptors in the buffer * @parent: index in offset array to buffer holding the fd array * @parent_offset: start offset of fd array in the buffer @@ -152,6 +153,7 @@ enum { */ struct binder_fd_array_object { struct binder_object_header hdr; + __u32 pad; binder_size_t num_fds; binder_size_t parent; binder_size_t parent_offset; diff --git a/include/uapi/linux/hw_breakpoint.h b/include/uapi/linux/hw_breakpoint.h index b04000a2296a..2b65efd19a46 100644 --- a/include/uapi/linux/hw_breakpoint.h +++ b/include/uapi/linux/hw_breakpoint.h @@ -4,7 +4,11 @@ enum { HW_BREAKPOINT_LEN_1 = 1, HW_BREAKPOINT_LEN_2 = 2, + HW_BREAKPOINT_LEN_3 = 3, HW_BREAKPOINT_LEN_4 = 4, + HW_BREAKPOINT_LEN_5 = 5, + HW_BREAKPOINT_LEN_6 = 6, + HW_BREAKPOINT_LEN_7 = 7, HW_BREAKPOINT_LEN_8 = 8, }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 35bf6f1b186d..f0c24f24f0c3 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -176,6 +176,16 @@ enum { DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT, DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, DEVCONF_ACCEPT_RA_PREFIX_ROUTE, + DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, + DEVCONF_DROP_UNSOLICITED_NA, + DEVCONF_KEEP_ADDR_ON_DOWN, + DEVCONF_RTR_SOLICIT_MAX_INTERVAL, + DEVCONF_SEG6_ENABLED, + DEVCONF_SEG6_REQUIRE_HMAC, + DEVCONF_ENHANCED_DAD, + DEVCONF_ADDR_GEN_MODE, + DEVCONF_DISABLE_POLICY, + DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN, DEVCONF_MAX }; diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index ee4581d652f4..a9d1772199bf 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -107,8 +107,10 @@ struct nlmsgerr { #define NETLINK_PKTINFO 3 #define NETLINK_BROADCAST_ERROR 4 #define NETLINK_NO_ENOBUFS 5 +#ifndef __KERNEL__ #define NETLINK_RX_RING 6 #define NETLINK_TX_RING 7 +#endif #define NETLINK_LISTEN_ALL_NSID 8 #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 @@ -134,6 +136,7 @@ struct nl_mmap_hdr { __u32 nm_gid; }; +#ifndef __KERNEL__ enum nl_mmap_status { NL_MMAP_STATUS_UNUSED, NL_MMAP_STATUS_RESERVED, @@ -145,6 +148,7 @@ enum nl_mmap_status { #define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO #define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) #define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) +#endif #define NET_MAJOR 36 /* Major 36 is reserved for networking */ diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h index f2159d30d1f5..d79399394b46 100644 --- a/include/uapi/linux/netlink_diag.h +++ b/include/uapi/linux/netlink_diag.h @@ -48,6 +48,8 @@ enum { #define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */ #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ +#ifndef __KERNEL__ #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ +#endif #endif diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h index d08c63f3dd6f..0c5d5dd61b6a 100644 --- a/include/uapi/linux/packet_diag.h +++ b/include/uapi/linux/packet_diag.h @@ -64,7 +64,7 @@ struct packet_diag_mclist { __u32 pdmc_count; __u16 pdmc_type; __u16 pdmc_alen; - __u8 pdmc_addr[MAX_ADDR_LEN]; + __u8 pdmc_addr[32]; /* MAX_ADDR_LEN */ }; struct packet_diag_ring { diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 33c6cde783ce..01eb22ca6b3d 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -571,6 +571,7 @@ enum { NET_IPV6_PROXY_NDP=23, NET_IPV6_ACCEPT_SOURCE_ROUTE=25, NET_IPV6_ACCEPT_RA_FROM_LOCAL=26, + NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27, __NET_IPV6_MAX }; diff --git a/kernel/events/core.c b/kernel/events/core.c index f9c6f554460e..06db8b64129d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9455,7 +9455,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); if (ret) - break; + goto out_unlock; } /* @@ -9471,7 +9471,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); if (ret) - break; + goto out_unlock; } raw_spin_lock_irqsave(&parent_ctx->lock, flags); @@ -9499,6 +9499,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) } raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); +out_unlock: mutex_unlock(&parent_ctx->mutex); perf_unpin_context(parent_ctx); diff --git a/kernel/fork.c b/kernel/fork.c index 75573eeb49b2..622571d2a833 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -333,13 +333,14 @@ void set_task_stack_end_magic(struct task_struct *tsk) *stackend = STACK_END_MAGIC; /* for overflow detection */ } -static struct task_struct *dup_task_struct(struct task_struct *orig) +static struct task_struct *dup_task_struct(struct task_struct *orig, int node) { struct task_struct *tsk; struct thread_info *ti; - int node = tsk_fork_get_node(orig); int err; + if (node == NUMA_NO_NODE) + node = tsk_fork_get_node(orig); tsk = alloc_task_struct_node(node); if (!tsk) return NULL; @@ -1276,7 +1277,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, int __user *child_tidptr, struct pid *pid, int trace, - unsigned long tls) + unsigned long tls, + int node) { int retval; struct task_struct *p; @@ -1329,7 +1331,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto fork_out; retval = -ENOMEM; - p = dup_task_struct(current); + p = dup_task_struct(current, node); if (!p) goto fork_out; @@ -1706,7 +1708,8 @@ static inline void init_idle_pids(struct pid_link *links) struct task_struct *fork_idle(int cpu) { struct task_struct *task; - task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0); + task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0, + cpu_to_node(cpu)); if (!IS_ERR(task)) { init_idle_pids(task->pids); init_idle(task, cpu, false); @@ -1751,7 +1754,7 @@ long _do_fork(unsigned long clone_flags, } p = copy_process(clone_flags, stack_start, stack_size, - child_tidptr, NULL, trace, tls); + child_tidptr, NULL, trace, tls, NUMA_NO_NODE); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. diff --git a/kernel/futex.c b/kernel/futex.c index beb042dcc332..af29863f3349 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2690,7 +2690,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, { struct hrtimer_sleeper timeout, *to = NULL; struct rt_mutex_waiter rt_waiter; - struct rt_mutex *pi_mutex = NULL; struct futex_hash_bucket *hb; union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; @@ -2774,6 +2773,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) + rt_mutex_unlock(&q.pi_state->pi_mutex); /* * Drop the reference to the pi state which * the requeue_pi() code acquired for us. @@ -2782,6 +2783,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, spin_unlock(q.lock_ptr); } } else { + struct rt_mutex *pi_mutex; + /* * We have been woken up by futex_unlock_pi(), a timeout, or a * signal. futex_unlock_pi() will not destroy the lock_ptr nor @@ -2805,18 +2808,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (res) ret = (res < 0) ? res : 0; + /* + * If fixup_pi_state_owner() faulted and was unable to handle + * the fault, unlock the rt_mutex and return the fault to + * userspace. + */ + if (ret && rt_mutex_owner(pi_mutex) == current) + rt_mutex_unlock(pi_mutex); + /* Unqueue and drop the lock. */ unqueue_me_pi(&q); } - /* - * If fixup_pi_state_owner() faulted and was unable to handle the - * fault, unlock the rt_mutex and return the fault to userspace. - */ - if (ret == -EFAULT) { - if (pi_mutex && rt_mutex_owner(pi_mutex) == current) - rt_mutex_unlock(pi_mutex); - } else if (ret == -EINTR) { + if (ret == -EINTR) { /* * We've already been requeued, but cannot restart by calling * futex_lock_pi() directly. We could restart this syscall, but diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 685ae83b2bfa..3d55ec89c400 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1843,12 +1843,11 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) #ifdef CONFIG_SMP if (p->nr_cpus_allowed > 1 && rq->dl.overloaded) queue_push_tasks(rq); -#else +#endif if (dl_task(rq->curr)) check_preempt_curr_dl(rq, p, 0); else resched_curr(rq); -#endif } } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8d5353906c8d..4d96380b35e8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6060,7 +6060,7 @@ long group_norm_util(struct energy_env *eenv, struct sched_group *sg) } static int find_new_capacity(struct energy_env *eenv, - const struct sched_group_energy const *sge) + const struct sched_group_energy * const sge) { int idx; unsigned long util = group_max_util(eenv); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 07b2c63e4983..29345ed74069 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2425,10 +2425,9 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) #ifdef CONFIG_SMP if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) queue_push_tasks(rq); -#else +#endif /* CONFIG_SMP */ if (p->prio < rq->curr->prio) resched_curr(rq); -#endif /* CONFIG_SMP */ } } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 75500042fd32..276a2387f06f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1011,7 +1011,7 @@ struct sched_group { unsigned int group_weight; struct sched_group_capacity *sgc; - const struct sched_group_energy const *sge; + const struct sched_group_energy *sge; /* * The CPUs this group covers. diff --git a/mm/percpu.c b/mm/percpu.c index 1f376bce413c..ef6353f0adbd 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1012,8 +1012,11 @@ area_found: mutex_unlock(&pcpu_alloc_mutex); } - if (chunk != pcpu_reserved_chunk) + if (chunk != pcpu_reserved_chunk) { + spin_lock_irqsave(&pcpu_lock, flags); pcpu_nr_empty_pop_pages -= occ_pages; + spin_unlock_irqrestore(&pcpu_lock, flags); + } if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW) pcpu_schedule_balance_work(); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index f7fba74108a9..e24754a0e052 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -29,6 +29,7 @@ EXPORT_SYMBOL(br_should_route_hook); static int br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { + br_drop_fake_rtable(skb); return netif_receive_skb(skb); } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 7ddbe7ec81d6..97fc19f001bf 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -516,21 +516,6 @@ static unsigned int br_nf_pre_routing(void *priv, } -/* PF_BRIDGE/LOCAL_IN ************************************************/ -/* The packet is locally destined, which requires a real - * dst_entry, so detach the fake one. On the way up, the - * packet would pass through PRE_ROUTING again (which already - * took place when the packet entered the bridge), but we - * register an IPv4 PRE_ROUTING 'sabotage' hook that will - * prevent this from happening. */ -static unsigned int br_nf_local_in(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - br_drop_fake_rtable(skb); - return NF_ACCEPT; -} - /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -901,12 +886,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { .priority = NF_BR_PRI_BRNF, }, { - .hook = br_nf_local_in, - .pf = NFPROTO_BRIDGE, - .hooknum = NF_BR_LOCAL_IN, - .priority = NF_BR_PRI_BRNF, - }, - { .hook = br_nf_forward_ip, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index ddc3573894b0..bc95e48d5cfb 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1265,7 +1265,6 @@ static int decode_new_up_state_weight(void **p, void *end, if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && (xorstate & CEPH_OSD_EXISTS)) { pr_info("osd%d does not exist\n", osd); - map->osd_weight[osd] = CEPH_OSD_IN; ret = set_primary_affinity(map, osd, CEPH_OSD_DEFAULT_PRIMARY_AFFINITY); if (ret) diff --git a/net/core/dev.c b/net/core/dev.c index 51aed87e8eec..2587d7f30191 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1679,27 +1679,54 @@ EXPORT_SYMBOL_GPL(net_dec_ingress_queue); static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL static atomic_t netstamp_needed_deferred; +static atomic_t netstamp_wanted; static void netstamp_clear(struct work_struct *work) { int deferred = atomic_xchg(&netstamp_needed_deferred, 0); + int wanted; - while (deferred--) - static_key_slow_dec(&netstamp_needed); + wanted = atomic_add_return(deferred, &netstamp_wanted); + if (wanted > 0) + static_key_enable(&netstamp_needed); + else + static_key_disable(&netstamp_needed); } static DECLARE_WORK(netstamp_work, netstamp_clear); #endif void net_enable_timestamp(void) { +#ifdef HAVE_JUMP_LABEL + int wanted; + + while (1) { + wanted = atomic_read(&netstamp_wanted); + if (wanted <= 0) + break; + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted) + return; + } + atomic_inc(&netstamp_needed_deferred); + schedule_work(&netstamp_work); +#else static_key_slow_inc(&netstamp_needed); +#endif } EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { #ifdef HAVE_JUMP_LABEL - /* net_disable_timestamp() can be called from non process context */ - atomic_inc(&netstamp_needed_deferred); + int wanted; + + while (1) { + wanted = atomic_read(&netstamp_wanted); + if (wanted <= 1) + break; + if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted) + return; + } + atomic_dec(&netstamp_needed_deferred); schedule_work(&netstamp_work); #else static_key_slow_dec(&netstamp_needed); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5dd643d524d6..fedcee8263b6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3704,13 +3704,14 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, if (!skb_may_tx_timestamp(sk, false)) return; - /* take a reference to prevent skb_orphan() from freeing the socket */ - sock_hold(sk); - - *skb_hwtstamps(skb) = *hwtstamps; - __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); - - sock_put(sk); + /* Take a reference to prevent skb_orphan() from freeing the socket, + * but only if the socket refcount is not zero. + */ + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + *skb_hwtstamps(skb) = *hwtstamps; + __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); + sock_put(sk); + } } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); @@ -3761,7 +3762,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) { struct sock *sk = skb->sk; struct sock_exterr_skb *serr; - int err; + int err = 1; skb->wifi_acked_valid = 1; skb->wifi_acked = acked; @@ -3771,14 +3772,15 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; - /* take a reference to prevent skb_orphan() from freeing the socket */ - sock_hold(sk); - - err = sock_queue_err_skb(sk, skb); + /* Take a reference to prevent skb_orphan() from freeing the socket, + * but only if the socket refcount is not zero. + */ + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + err = sock_queue_err_skb(sk, skb); + sock_put(sk); + } if (err) kfree_skb(skb); - - sock_put(sk); } EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); diff --git a/net/core/sock.c b/net/core/sock.c index a84a154cdf0c..4efaa3b6633d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1463,6 +1463,11 @@ static void __sk_destruct(struct rcu_head *head) pr_debug("%s: optmem leakage (%d bytes) detected\n", __func__, atomic_read(&sk->sk_omem_alloc)); + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + } + if (sk->sk_peer_cred) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); @@ -1564,6 +1569,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) is_charged = sk_filter_charge(newsk, filter); if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { + /* We need to make sure that we don't uncharge the new + * socket if we couldn't charge it in the first place + * as otherwise we uncharge the parent's filter. + */ + if (!is_charged) + RCU_INIT_POINTER(newsk->sk_filter, NULL); /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; @@ -2706,11 +2717,6 @@ void sk_common_release(struct sock *sk) sk_refcnt_debug_release(sk); - if (sk->sk_frag.page) { - put_page(sk->sk_frag.page); - sk->sk_frag.page = NULL; - } - sock_put(sk); } EXPORT_SYMBOL(sk_common_release); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index f053198e730c..5e3a7302f774 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock *sk) for (i = 0; i < hc->tx_seqbufc; i++) kfree(hc->tx_seqbuf[i]); hc->tx_seqbufc = 0; + dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 0759f5b9180e..6467bf392e1b 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -289,7 +289,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) switch (type) { case ICMP_REDIRECT: - dccp_do_redirect(skb, sk); + if (!sock_owned_by_user(sk)) + dccp_do_redirect(skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 27c4e81efa24..8113ad58fcb4 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -122,10 +122,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 1994f8af646b..68eed344b471 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -122,6 +122,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk, /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; + bh_unlock_sock(newsk); sk_free(newsk); return NULL; } @@ -145,6 +146,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, struct dccp_request_sock *dreq = dccp_rsk(req); bool own_req; + /* TCP/DCCP listeners became lockless. + * DCCP stores complex state in its request_sock, so we need + * a protection for them, now this code runs without being protected + * by the parent (listener) lock. + */ + spin_lock_bh(&dreq->dreq_lock); + /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { @@ -159,7 +167,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, inet_rtx_syn_ack(sk, req); } /* Network Duplicate, discard packet */ - return NULL; + goto out; } DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; @@ -185,20 +193,20 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, req, &own_req); - if (!child) - goto listen_overflow; - - return inet_csk_complete_hashdance(sk, child, req, own_req); + if (child) { + child = inet_csk_complete_hashdance(sk, child, req, own_req); + goto out; + } -listen_overflow: - dccp_pr_debug("listen_overflow!\n"); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) req->rsk_ops->send_reset(sk, skb); inet_csk_reqsk_queue_drop(sk, req); - return NULL; +out: + spin_unlock_bh(&dreq->dreq_lock); + return child; } EXPORT_SYMBOL_GPL(dccp_check_req); @@ -249,6 +257,7 @@ int dccp_reqsk_init(struct request_sock *req, { struct dccp_request_sock *dreq = dccp_rsk(req); + spin_lock_init(&dreq->dreq_lock); inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); inet_rsk(req)->acked = 0; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7e30c7b50a28..a353d1d92f01 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1081,7 +1081,8 @@ static void nl_fib_input(struct sk_buff *skb) net = sock_net(skb->sk); nlh = nlmsg_hdr(skb); - if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len || + if (skb->len < nlmsg_total_size(sizeof(*frn)) || + skb->len < nlh->nlmsg_len || nlmsg_len(nlh) < sizeof(*frn)) return; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6ace04d14e30..9af0f461b00d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1963,6 +1963,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, { int res; + tos &= IPTOS_RT_MASK; rcu_read_lock(); /* Multicast recognition logic is moved from route cache to here. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e7e227f6760f..3f87c731477f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5437,6 +5437,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) struct inet_connection_sock *icsk = inet_csk(sk); tcp_set_state(sk, TCP_ESTABLISHED); + icsk->icsk_ack.lrcvtime = tcp_time_stamp; if (skb) { icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); @@ -5649,7 +5650,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * to stand against the temptation 8) --ANK */ inet_csk_schedule_ack(sk); - icsk->icsk_ack.lrcvtime = tcp_time_stamp; tcp_enter_quickack_mode(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 51a77e20f6c6..1660613ddae4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -271,10 +271,13 @@ EXPORT_SYMBOL(tcp_v4_connect); */ void tcp_v4_mtu_reduced(struct sock *sk) { - struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); - u32 mtu = tcp_sk(sk)->mtu_info; + struct dst_entry *dst; + u32 mtu; + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) + return; + mtu = tcp_sk(sk)->mtu_info; dst = inet_csk_update_pmtu(sk, mtu); if (!dst) return; @@ -420,7 +423,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) switch (type) { case ICMP_REDIRECT: - do_redirect(icmp_skb, sk); + if (!sock_owned_by_user(sk)) + do_redirect(icmp_skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 9475a2748a9a..019db68bdb9f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -472,6 +472,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); newtp->rtt_min[0].rtt = ~0U; newicsk->icsk_rto = TCP_TIMEOUT_INIT; + newicsk->icsk_ack.lrcvtime = tcp_time_stamp; newtp->packets_out = 0; newtp->retrans_out = 0; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ce20968de667..4aef80d30fab 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -257,7 +257,8 @@ void tcp_delack_timer_handler(struct sock *sk) sk_mem_reclaim_partial(sk); - if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || + !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) goto out; if (time_after(icsk->icsk_ack.timeout, jiffies)) { @@ -538,7 +539,8 @@ void tcp_write_timer_handler(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); int event; - if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || + !icsk->icsk_pending) goto out; if (time_after(icsk->icsk_timeout, jiffies)) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 01455f492e17..3d72aeffa3f1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -112,6 +112,27 @@ static inline u32 cstamp_delta(unsigned long cstamp) return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; } +static inline s32 rfc3315_s14_backoff_init(s32 irt) +{ + /* multiply 'initial retransmission time' by 0.9 .. 1.1 */ + u64 tmp = (900000 + prandom_u32() % 200001) * (u64)irt; + do_div(tmp, 1000000); + return (s32)tmp; +} + +static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt) +{ + /* multiply 'retransmission timeout' by 1.9 .. 2.1 */ + u64 tmp = (1900000 + prandom_u32() % 200001) * (u64)rt; + do_div(tmp, 1000000); + if ((s32)tmp > mrt) { + /* multiply 'maximum retransmission time' by 0.9 .. 1.1 */ + tmp = (900000 + prandom_u32() % 200001) * (u64)mrt; + do_div(tmp, 1000000); + } + return (s32)tmp; +} + #ifdef CONFIG_SYSCTL static int addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); @@ -187,6 +208,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -202,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_rtr_pref = 1, .rtr_probe_interval = 60 * HZ, #ifdef CONFIG_IPV6_ROUTE_INFO + .accept_ra_rt_info_min_plen = 0, .accept_ra_rt_info_max_plen = 0, #endif #endif @@ -233,6 +256,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -248,6 +272,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_ra_rtr_pref = 1, .rtr_probe_interval = 60 * HZ, #ifdef CONFIG_IPV6_ROUTE_INFO + .accept_ra_rt_info_min_plen = 0, .accept_ra_rt_info_max_plen = 0, #endif #endif @@ -3502,7 +3527,7 @@ static void addrconf_rs_timer(unsigned long data) if (idev->if_flags & IF_RA_RCVD) goto out; - if (idev->rs_probes++ < idev->cnf.rtr_solicits) { + if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) { write_unlock(&idev->lock); if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) ndisc_send_rs(dev, &lladdr, @@ -3511,11 +3536,13 @@ static void addrconf_rs_timer(unsigned long data) goto put; write_lock(&idev->lock); + idev->rs_interval = rfc3315_s14_backoff_update( + idev->rs_interval, idev->cnf.rtr_solicit_max_interval); /* The wait after the last probe can be shorter */ addrconf_mod_rs_timer(idev, (idev->rs_probes == idev->cnf.rtr_solicits) ? idev->cnf.rtr_solicit_delay : - idev->cnf.rtr_solicit_interval); + idev->rs_interval); } else { /* * Note: we do not support deprecated "all on-link" @@ -3743,7 +3770,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp); send_rs = send_mld && ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits > 0 && + ifp->idev->cnf.rtr_solicits != 0 && (dev->flags&IFF_LOOPBACK) == 0; read_unlock_bh(&ifp->idev->lock); @@ -3765,10 +3792,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); + ifp->idev->rs_interval = rfc3315_s14_backoff_init( + ifp->idev->cnf.rtr_solicit_interval); ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; - addrconf_mod_rs_timer(ifp->idev, - ifp->idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval); spin_unlock(&ifp->lock); write_unlock_bh(&ifp->idev->lock); } @@ -4685,6 +4713,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; array[DEVCONF_RTR_SOLICIT_INTERVAL] = jiffies_to_msecs(cnf->rtr_solicit_interval); + array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] = + jiffies_to_msecs(cnf->rtr_solicit_max_interval); array[DEVCONF_RTR_SOLICIT_DELAY] = jiffies_to_msecs(cnf->rtr_solicit_delay); array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; @@ -4706,6 +4736,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_RTR_PROBE_INTERVAL] = jiffies_to_msecs(cnf->rtr_probe_interval); #ifdef CONFIG_IPV6_ROUTE_INFO + array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen; array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif @@ -4893,7 +4924,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) return -EINVAL; if (!ipv6_accept_ra(idev)) return -EINVAL; - if (idev->cnf.rtr_solicits <= 0) + if (idev->cnf.rtr_solicits == 0) return -EINVAL; write_lock_bh(&idev->lock); @@ -4918,8 +4949,10 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) if (update_rs) { idev->if_flags |= IF_RS_SENT; + idev->rs_interval = rfc3315_s14_backoff_init( + idev->cnf.rtr_solicit_interval); idev->rs_probes = 1; - addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(idev, idev->rs_interval); } /* Well, that's kinda nasty ... */ @@ -5557,6 +5590,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec_jiffies, }, { + .procname = "router_solicitation_max_interval", + .data = &ipv6_devconf.rtr_solicit_max_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { .procname = "router_solicitation_delay", .data = &ipv6_devconf.rtr_solicit_delay, .maxlen = sizeof(int), @@ -5666,6 +5706,13 @@ static struct addrconf_sysctl_table }, #ifdef CONFIG_IPV6_ROUTE_INFO { + .procname = "accept_ra_rt_info_min_plen", + .data = &ipv6_devconf.accept_ra_rt_info_min_plen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "accept_ra_rt_info_max_plen", .data = &ipv6_devconf.accept_ra_rt_info_max_plen, .maxlen = sizeof(int), diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 34cf46d74554..85bf86458706 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -903,6 +903,8 @@ add: ins = &rt->dst.rt6_next; iter = *ins; while (iter) { + if (iter->rt6i_metric > rt->rt6i_metric) + break; if (rt6_qualify_for_ecmp(iter)) { *ins = iter->dst.rt6_next; fib6_purge_rt(iter, fn, info->nl_net); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cd96a01032a2..cf90a9bf26a3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -742,13 +742,14 @@ slow_path: * Fragment the datagram. */ - *prevhdr = NEXTHDR_FRAGMENT; troom = rt->dst.dev->needed_tailroom; /* * Keep copying data until we run out. */ while (left > 0) { + u8 *fragnexthdr_offset; + len = left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) @@ -793,6 +794,10 @@ slow_path: */ skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); + fragnexthdr_offset = skb_network_header(frag); + fragnexthdr_offset += prevhdr - skb_network_header(skb); + *fragnexthdr_offset = NEXTHDR_FRAGMENT; + /* * Build fragment header. */ diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 24fb9c0efd00..5b7433887eda 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -681,6 +681,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) u->link = p->link; u->i_key = p->i_key; u->o_key = p->o_key; + if (u->i_key) + u->i_flags |= GRE_KEY; + if (u->o_key) + u->o_flags |= GRE_KEY; u->proto = p->proto; memcpy(u->name, p->name, sizeof(u->name)); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 84afb9a77278..3452f9037ad4 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1358,6 +1358,8 @@ skip_linkparms: if (ri->prefix_len == 0 && !in6_dev->cnf.accept_ra_defrtr) continue; + if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen) + continue; if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) continue; rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 108b39967694..8532768b4eaa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -377,10 +377,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 445b7cd0826a..48ab93842322 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -383,7 +383,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) drop: IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); kfree_skb(skb); - return -1; + return 0; } /* Userspace will call sendmsg() on the tunnel socket to send L2TP diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 881bc2072809..52cfc4478511 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1567,6 +1567,7 @@ static void mpls_net_exit(struct net *net) for (index = 0; index < platform_labels; index++) { struct mpls_route *rt = rtnl_dereference(platform_label[index]); RCU_INIT_POINTER(platform_label[index], NULL); + mpls_notify_route(net, index, rt, NULL, NULL); mpls_rt_free(rt); } rtnl_unlock(); diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c index ececa65868ef..e0d8e9ad315b 100644 --- a/net/netfilter/xt_qtaguid.c +++ b/net/netfilter/xt_qtaguid.c @@ -1802,8 +1802,11 @@ ret_res: } #ifdef DDEBUG -/* This function is not in xt_qtaguid_print.c because of locks visibility */ -static void prdebug_full_state(int indent_level, const char *fmt, ...) +/* + * This function is not in xt_qtaguid_print.c because of locks visibility. + * The lock of sock_tag_list must be aquired before calling this function + */ +static void prdebug_full_state_locked(int indent_level, const char *fmt, ...) { va_list args; char *fmt_buff; @@ -1824,16 +1827,12 @@ static void prdebug_full_state(int indent_level, const char *fmt, ...) kfree(buff); va_end(args); - spin_lock_bh(&sock_tag_list_lock); prdebug_sock_tag_tree(indent_level, &sock_tag_tree); - spin_unlock_bh(&sock_tag_list_lock); - spin_lock_bh(&sock_tag_list_lock); spin_lock_bh(&uid_tag_data_tree_lock); prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree); prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree); spin_unlock_bh(&uid_tag_data_tree_lock); - spin_unlock_bh(&sock_tag_list_lock); spin_lock_bh(&iface_stat_list_lock); prdebug_iface_stat_list(indent_level, &iface_stat_list); @@ -1842,7 +1841,7 @@ static void prdebug_full_state(int indent_level, const char *fmt, ...) pr_debug("qtaguid: %s(): }\n", __func__); } #else -static void prdebug_full_state(int indent_level, const char *fmt, ...) {} +static void prdebug_full_state_locked(int indent_level, const char *fmt, ...) {} #endif struct proc_ctrl_print_info { @@ -1965,8 +1964,11 @@ static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v) (u64)atomic64_read(&qtu_events.match_no_sk), (u64)atomic64_read(&qtu_events.match_no_sk_file)); - /* Count the following as part of the last item_index */ - prdebug_full_state(0, "proc ctrl"); + /* Count the following as part of the last item_index. No need + * to lock the sock_tag_list here since it is already locked when + * starting the seq_file operation + */ + prdebug_full_state_locked(0, "proc ctrl"); } return 0; @@ -2875,8 +2877,10 @@ static int qtudev_release(struct inode *inode, struct file *file) sock_tag_tree_erase(&st_to_free_tree); - prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__, + spin_lock_bh(&sock_tag_list_lock); + prdebug_full_state_locked(0, "%s(): pid=%u tgid=%u", __func__, current->pid, current->tgid); + spin_unlock_bh(&sock_tag_list_lock); return 0; } diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig index 2c5e95e9bfbd..5d6e8c05b3d4 100644 --- a/net/netlink/Kconfig +++ b/net/netlink/Kconfig @@ -2,15 +2,6 @@ # Netlink Sockets # -config NETLINK_MMAP - bool "NETLINK: mmaped IO" - ---help--- - This option enables support for memory mapped netlink IO. This - reduces overhead by avoiding copying data between kernel- and - userspace. - - If unsure, say N. - config NETLINK_DIAG tristate "NETLINK: socket monitoring interface" default n diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f4dd706c6cd1..96fe1c103bf9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -225,7 +225,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, dev_hold(dev); - if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) + if (is_vmalloc_addr(skb->head)) nskb = netlink_to_full_skb(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); @@ -300,610 +300,8 @@ static void netlink_rcv_wake(struct sock *sk) wake_up_interruptible(&nlk->wait); } -#ifdef CONFIG_NETLINK_MMAP -static bool netlink_rx_is_mmaped(struct sock *sk) -{ - return nlk_sk(sk)->rx_ring.pg_vec != NULL; -} - -static bool netlink_tx_is_mmaped(struct sock *sk) -{ - return nlk_sk(sk)->tx_ring.pg_vec != NULL; -} - -static __pure struct page *pgvec_to_page(const void *addr) -{ - if (is_vmalloc_addr(addr)) - return vmalloc_to_page(addr); - else - return virt_to_page(addr); -} - -static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) -{ - unsigned int i; - - for (i = 0; i < len; i++) { - if (pg_vec[i] != NULL) { - if (is_vmalloc_addr(pg_vec[i])) - vfree(pg_vec[i]); - else - free_pages((unsigned long)pg_vec[i], order); - } - } - kfree(pg_vec); -} - -static void *alloc_one_pg_vec_page(unsigned long order) -{ - void *buffer; - gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | - __GFP_NOWARN | __GFP_NORETRY; - - buffer = (void *)__get_free_pages(gfp_flags, order); - if (buffer != NULL) - return buffer; - - buffer = vzalloc((1 << order) * PAGE_SIZE); - if (buffer != NULL) - return buffer; - - gfp_flags &= ~__GFP_NORETRY; - return (void *)__get_free_pages(gfp_flags, order); -} - -static void **alloc_pg_vec(struct netlink_sock *nlk, - struct nl_mmap_req *req, unsigned int order) -{ - unsigned int block_nr = req->nm_block_nr; - unsigned int i; - void **pg_vec; - - pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); - if (pg_vec == NULL) - return NULL; - - for (i = 0; i < block_nr; i++) { - pg_vec[i] = alloc_one_pg_vec_page(order); - if (pg_vec[i] == NULL) - goto err1; - } - - return pg_vec; -err1: - free_pg_vec(pg_vec, order, block_nr); - return NULL; -} - - -static void -__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, - unsigned int order) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct sk_buff_head *queue; - struct netlink_ring *ring; - - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - - spin_lock_bh(&queue->lock); - - ring->frame_max = req->nm_frame_nr - 1; - ring->head = 0; - ring->frame_size = req->nm_frame_size; - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; - - swap(ring->pg_vec_len, req->nm_block_nr); - swap(ring->pg_vec_order, order); - swap(ring->pg_vec, pg_vec); - - __skb_queue_purge(queue); - spin_unlock_bh(&queue->lock); - - WARN_ON(atomic_read(&nlk->mapped)); - - if (pg_vec) - free_pg_vec(pg_vec, order, req->nm_block_nr); -} - -static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, - bool tx_ring) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - void **pg_vec = NULL; - unsigned int order = 0; - - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - - if (atomic_read(&nlk->mapped)) - return -EBUSY; - if (atomic_read(&ring->pending)) - return -EBUSY; - - if (req->nm_block_nr) { - if (ring->pg_vec != NULL) - return -EBUSY; - - if ((int)req->nm_block_size <= 0) - return -EINVAL; - if (!PAGE_ALIGNED(req->nm_block_size)) - return -EINVAL; - if (req->nm_frame_size < NL_MMAP_HDRLEN) - return -EINVAL; - if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) - return -EINVAL; - - ring->frames_per_block = req->nm_block_size / - req->nm_frame_size; - if (ring->frames_per_block == 0) - return -EINVAL; - if (ring->frames_per_block * req->nm_block_nr != - req->nm_frame_nr) - return -EINVAL; - - order = get_order(req->nm_block_size); - pg_vec = alloc_pg_vec(nlk, req, order); - if (pg_vec == NULL) - return -ENOMEM; - } else { - if (req->nm_frame_nr) - return -EINVAL; - } - - mutex_lock(&nlk->pg_vec_lock); - if (atomic_read(&nlk->mapped) == 0) { - __netlink_set_ring(sk, req, tx_ring, pg_vec, order); - mutex_unlock(&nlk->pg_vec_lock); - return 0; - } - - mutex_unlock(&nlk->pg_vec_lock); - - if (pg_vec) - free_pg_vec(pg_vec, order, req->nm_block_nr); - - return -EBUSY; -} - -static void netlink_mm_open(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct socket *sock = file->private_data; - struct sock *sk = sock->sk; - - if (sk) - atomic_inc(&nlk_sk(sk)->mapped); -} - -static void netlink_mm_close(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct socket *sock = file->private_data; - struct sock *sk = sock->sk; - - if (sk) - atomic_dec(&nlk_sk(sk)->mapped); -} - -static const struct vm_operations_struct netlink_mmap_ops = { - .open = netlink_mm_open, - .close = netlink_mm_close, -}; - -static int netlink_mmap(struct file *file, struct socket *sock, - struct vm_area_struct *vma) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - unsigned long start, size, expected; - unsigned int i; - int err = -EINVAL; - - if (vma->vm_pgoff) - return -EINVAL; - - mutex_lock(&nlk->pg_vec_lock); - - expected = 0; - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { - if (ring->pg_vec == NULL) - continue; - expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; - } - - if (expected == 0) - goto out; - - size = vma->vm_end - vma->vm_start; - if (size != expected) - goto out; - - start = vma->vm_start; - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { - if (ring->pg_vec == NULL) - continue; - - for (i = 0; i < ring->pg_vec_len; i++) { - struct page *page; - void *kaddr = ring->pg_vec[i]; - unsigned int pg_num; - - for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { - page = pgvec_to_page(kaddr); - err = vm_insert_page(vma, start, page); - if (err < 0) - goto out; - start += PAGE_SIZE; - kaddr += PAGE_SIZE; - } - } - } - - atomic_inc(&nlk->mapped); - vma->vm_ops = &netlink_mmap_ops; - err = 0; -out: - mutex_unlock(&nlk->pg_vec_lock); - return err; -} - -static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) -{ -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 - struct page *p_start, *p_end; - - /* First page is flushed through netlink_{get,set}_status */ - p_start = pgvec_to_page(hdr + PAGE_SIZE); - p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); - while (p_start <= p_end) { - flush_dcache_page(p_start); - p_start++; - } -#endif -} - -static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) -{ - smp_rmb(); - flush_dcache_page(pgvec_to_page(hdr)); - return hdr->nm_status; -} - -static void netlink_set_status(struct nl_mmap_hdr *hdr, - enum nl_mmap_status status) -{ - smp_mb(); - hdr->nm_status = status; - flush_dcache_page(pgvec_to_page(hdr)); -} - -static struct nl_mmap_hdr * -__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) -{ - unsigned int pg_vec_pos, frame_off; - - pg_vec_pos = pos / ring->frames_per_block; - frame_off = pos % ring->frames_per_block; - - return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); -} - -static struct nl_mmap_hdr * -netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, - enum nl_mmap_status status) -{ - struct nl_mmap_hdr *hdr; - - hdr = __netlink_lookup_frame(ring, pos); - if (netlink_get_status(hdr) != status) - return NULL; - - return hdr; -} - -static struct nl_mmap_hdr * -netlink_current_frame(const struct netlink_ring *ring, - enum nl_mmap_status status) -{ - return netlink_lookup_frame(ring, ring->head, status); -} - -static void netlink_increment_head(struct netlink_ring *ring) -{ - ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; -} - -static void netlink_forward_ring(struct netlink_ring *ring) -{ - unsigned int head = ring->head; - const struct nl_mmap_hdr *hdr; - - do { - hdr = __netlink_lookup_frame(ring, ring->head); - if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) - break; - if (hdr->nm_status != NL_MMAP_STATUS_SKIP) - break; - netlink_increment_head(ring); - } while (ring->head != head); -} - -static bool netlink_has_valid_frame(struct netlink_ring *ring) -{ - unsigned int head = ring->head, pos = head; - const struct nl_mmap_hdr *hdr; - - do { - hdr = __netlink_lookup_frame(ring, pos); - if (hdr->nm_status == NL_MMAP_STATUS_VALID) - return true; - pos = pos != 0 ? pos - 1 : ring->frame_max; - } while (pos != head); - - return false; -} - -static bool netlink_dump_space(struct netlink_sock *nlk) -{ - struct netlink_ring *ring = &nlk->rx_ring; - struct nl_mmap_hdr *hdr; - unsigned int n; - - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) - return false; - - n = ring->head + ring->frame_max / 2; - if (n > ring->frame_max) - n -= ring->frame_max; - - hdr = __netlink_lookup_frame(ring, n); - - return hdr->nm_status == NL_MMAP_STATUS_UNUSED; -} - -static unsigned int netlink_poll(struct file *file, struct socket *sock, - poll_table *wait) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - unsigned int mask; - int err; - - if (nlk->rx_ring.pg_vec != NULL) { - /* Memory mapped sockets don't call recvmsg(), so flow control - * for dumps is performed here. A dump is allowed to continue - * if at least half the ring is unused. - */ - while (nlk->cb_running && netlink_dump_space(nlk)) { - err = netlink_dump(sk); - if (err < 0) { - sk->sk_err = -err; - sk->sk_error_report(sk); - break; - } - } - netlink_rcv_wake(sk); - } - - mask = datagram_poll(file, sock, wait); - - /* We could already have received frames in the normal receive - * queue, that will show up as NL_MMAP_STATUS_COPY in the ring, - * so if mask contains pollin/etc already, there's no point - * walking the ring. - */ - if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) { - spin_lock_bh(&sk->sk_receive_queue.lock); - if (nlk->rx_ring.pg_vec) { - if (netlink_has_valid_frame(&nlk->rx_ring)) - mask |= POLLIN | POLLRDNORM; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - } - - spin_lock_bh(&sk->sk_write_queue.lock); - if (nlk->tx_ring.pg_vec) { - if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) - mask |= POLLOUT | POLLWRNORM; - } - spin_unlock_bh(&sk->sk_write_queue.lock); - - return mask; -} - -static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) -{ - return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); -} - -static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, - struct netlink_ring *ring, - struct nl_mmap_hdr *hdr) -{ - unsigned int size; - void *data; - - size = ring->frame_size - NL_MMAP_HDRLEN; - data = (void *)hdr + NL_MMAP_HDRLEN; - - skb->head = data; - skb->data = data; - skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; - skb->len = 0; - - skb->destructor = netlink_skb_destructor; - NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; - NETLINK_CB(skb).sk = sk; -} - -static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, - u32 dst_portid, u32 dst_group, - struct scm_cookie *scm) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - struct nl_mmap_hdr *hdr; - struct sk_buff *skb; - unsigned int maxlen; - int err = 0, len = 0; - - mutex_lock(&nlk->pg_vec_lock); - - ring = &nlk->tx_ring; - maxlen = ring->frame_size - NL_MMAP_HDRLEN; - - do { - unsigned int nm_len; - - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); - if (hdr == NULL) { - if (!(msg->msg_flags & MSG_DONTWAIT) && - atomic_read(&nlk->tx_ring.pending)) - schedule(); - continue; - } - - nm_len = ACCESS_ONCE(hdr->nm_len); - if (nm_len > maxlen) { - err = -EINVAL; - goto out; - } - - netlink_frame_flush_dcache(hdr, nm_len); - - skb = alloc_skb(nm_len, GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - __skb_put(skb, nm_len); - memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); - - netlink_increment_head(ring); - - NETLINK_CB(skb).portid = nlk->portid; - NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = scm->creds; - - err = security_netlink_send(sk, skb); - if (err) { - kfree_skb(skb); - goto out; - } - - if (unlikely(dst_group)) { - atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_portid, dst_group, - GFP_KERNEL); - } - err = netlink_unicast(sk, skb, dst_portid, - msg->msg_flags & MSG_DONTWAIT); - if (err < 0) - goto out; - len += err; - - } while (hdr != NULL || - (!(msg->msg_flags & MSG_DONTWAIT) && - atomic_read(&nlk->tx_ring.pending))); - - if (len > 0) - err = len; -out: - mutex_unlock(&nlk->pg_vec_lock); - return err; -} - -static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) -{ - struct nl_mmap_hdr *hdr; - - hdr = netlink_mmap_hdr(skb); - hdr->nm_len = skb->len; - hdr->nm_group = NETLINK_CB(skb).dst_group; - hdr->nm_pid = NETLINK_CB(skb).creds.pid; - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_frame_flush_dcache(hdr, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); - - NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; - kfree_skb(skb); -} - -static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring = &nlk->rx_ring; - struct nl_mmap_hdr *hdr; - - spin_lock_bh(&sk->sk_receive_queue.lock); - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) { - spin_unlock_bh(&sk->sk_receive_queue.lock); - kfree_skb(skb); - netlink_overrun(sk); - return; - } - netlink_increment_head(ring); - __skb_queue_tail(&sk->sk_receive_queue, skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - - hdr->nm_len = skb->len; - hdr->nm_group = NETLINK_CB(skb).dst_group; - hdr->nm_pid = NETLINK_CB(skb).creds.pid; - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_set_status(hdr, NL_MMAP_STATUS_COPY); -} - -#else /* CONFIG_NETLINK_MMAP */ -#define netlink_rx_is_mmaped(sk) false -#define netlink_tx_is_mmaped(sk) false -#define netlink_mmap sock_no_mmap -#define netlink_poll datagram_poll -#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0 -#endif /* CONFIG_NETLINK_MMAP */ - static void netlink_skb_destructor(struct sk_buff *skb) { -#ifdef CONFIG_NETLINK_MMAP - struct nl_mmap_hdr *hdr; - struct netlink_ring *ring; - struct sock *sk; - - /* If a packet from the kernel to userspace was freed because of an - * error without being delivered to userspace, the kernel must reset - * the status. In the direction userspace to kernel, the status is - * always reset here after the packet was processed and freed. - */ - if (netlink_skb_is_mmaped(skb)) { - hdr = netlink_mmap_hdr(skb); - sk = NETLINK_CB(skb).sk; - - if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) { - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); - ring = &nlk_sk(sk)->tx_ring; - } else { - if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { - hdr->nm_len = 0; - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); - } - ring = &nlk_sk(sk)->rx_ring; - } - - WARN_ON(atomic_read(&ring->pending) == 0); - atomic_dec(&ring->pending); - sock_put(sk); - - skb->head = NULL; - } -#endif if (is_vmalloc_addr(skb->head)) { if (!skb->cloned || !atomic_dec_return(&(skb_shinfo(skb)->dataref))) @@ -927,18 +325,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) static void netlink_sock_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_receive_queue); -#ifdef CONFIG_NETLINK_MMAP - if (1) { - struct nl_mmap_req req; - - memset(&req, 0, sizeof(req)); - if (nlk->rx_ring.pg_vec) - __netlink_set_ring(sk, &req, false, NULL, 0); - memset(&req, 0, sizeof(req)); - if (nlk->tx_ring.pg_vec) - __netlink_set_ring(sk, &req, true, NULL, 0); - } -#endif /* CONFIG_NETLINK_MMAP */ if (!sock_flag(sk, SOCK_DEAD)) { printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); @@ -1185,9 +571,6 @@ static int __netlink_create(struct net *net, struct socket *sock, mutex_init(nlk->cb_mutex); } init_waitqueue_head(&nlk->wait); -#ifdef CONFIG_NETLINK_MMAP - mutex_init(&nlk->pg_vec_lock); -#endif sock_set_flag(sk, SOCK_RCU_FREE); sk->sk_destruct = netlink_sock_destruct; @@ -1725,8 +1108,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, nlk = nlk_sk(sk); if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state)) && - !netlink_skb_is_mmaped(skb)) { + test_bit(NETLINK_S_CONGESTED, &nlk->state))) { DECLARE_WAITQUEUE(wait, current); if (!*timeo) { if (!ssk || netlink_is_kernel(ssk)) @@ -1764,14 +1146,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) netlink_deliver_tap(skb); -#ifdef CONFIG_NETLINK_MMAP - if (netlink_skb_is_mmaped(skb)) - netlink_queue_mmaped_skb(sk, skb); - else if (netlink_rx_is_mmaped(sk)) - netlink_ring_set_copied(sk, skb); - else -#endif /* CONFIG_NETLINK_MMAP */ - skb_queue_tail(&sk->sk_receive_queue, skb); + skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); return len; } @@ -1795,9 +1170,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) int delta; WARN_ON(skb->sk != NULL); - if (netlink_skb_is_mmaped(skb)) - return skb; - delta = skb->end - skb->tail; if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) return skb; @@ -1877,71 +1249,6 @@ struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size, unsigned int ldiff, u32 dst_portid, gfp_t gfp_mask) { -#ifdef CONFIG_NETLINK_MMAP - unsigned int maxlen, linear_size; - struct sock *sk = NULL; - struct sk_buff *skb; - struct netlink_ring *ring; - struct nl_mmap_hdr *hdr; - - sk = netlink_getsockbyportid(ssk, dst_portid); - if (IS_ERR(sk)) - goto out; - - ring = &nlk_sk(sk)->rx_ring; - /* fast-path without atomic ops for common case: non-mmaped receiver */ - if (ring->pg_vec == NULL) - goto out_put; - - /* We need to account the full linear size needed as a ring - * slot cannot have non-linear parts. - */ - linear_size = size + ldiff; - if (ring->frame_size - NL_MMAP_HDRLEN < linear_size) - goto out_put; - - skb = alloc_skb_head(gfp_mask); - if (skb == NULL) - goto err1; - - spin_lock_bh(&sk->sk_receive_queue.lock); - /* check again under lock */ - if (ring->pg_vec == NULL) - goto out_free; - - /* check again under lock */ - maxlen = ring->frame_size - NL_MMAP_HDRLEN; - if (maxlen < linear_size) - goto out_free; - - netlink_forward_ring(ring); - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) - goto err2; - - netlink_ring_setup_skb(skb, sk, ring, hdr); - netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); - atomic_inc(&ring->pending); - netlink_increment_head(ring); - - spin_unlock_bh(&sk->sk_receive_queue.lock); - return skb; - -err2: - kfree_skb(skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - netlink_overrun(sk); -err1: - sock_put(sk); - return NULL; - -out_free: - kfree_skb(skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); -out_put: - sock_put(sk); -out: -#endif return alloc_skb(size, gfp_mask); } EXPORT_SYMBOL_GPL(__netlink_alloc_skb); @@ -2222,8 +1529,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_NETLINK) return -ENOPROTOOPT; - if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && - optlen >= sizeof(int) && + if (optlen >= sizeof(int) && get_user(val, (unsigned int __user *)optval)) return -EFAULT; @@ -2276,25 +1582,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, } err = 0; break; -#ifdef CONFIG_NETLINK_MMAP - case NETLINK_RX_RING: - case NETLINK_TX_RING: { - struct nl_mmap_req req; - - /* Rings might consume more memory than queue limits, require - * CAP_NET_ADMIN. - */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (optlen < sizeof(req)) - return -EINVAL; - if (copy_from_user(&req, optval, sizeof(req))) - return -EFAULT; - err = netlink_set_ring(sk, &req, - optname == NETLINK_TX_RING); - break; - } -#endif /* CONFIG_NETLINK_MMAP */ case NETLINK_LISTEN_ALL_NSID: if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) return -EPERM; @@ -2464,18 +1751,6 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) smp_rmb(); } - /* It's a really convoluted way for userland to ask for mmaped - * sendmsg(), but that's what we've got... - */ - if (netlink_tx_is_mmaped(sk) && - iter_is_iovec(&msg->msg_iter) && - msg->msg_iter.nr_segs == 1 && - msg->msg_iter.iov->iov_base == NULL) { - err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, - &scm); - goto out; - } - err = -EMSGSIZE; if (len > sk->sk_sndbuf - 32) goto out; @@ -2792,8 +2067,7 @@ static int netlink_dump(struct sock *sk) goto errout_skb; } - if (!netlink_rx_is_mmaped(sk) && - atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) + if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto errout_skb; /* NLMSG_GOODSIZE is small to avoid high order allocations being @@ -2882,16 +2156,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct netlink_sock *nlk; int ret; - /* Memory mapped dump requests need to be copied to avoid looping - * on the pending state in netlink_mmap_sendmsg() while the CB hold - * a reference to the skb. - */ - if (netlink_skb_is_mmaped(skb)) { - skb = skb_copy(skb, GFP_KERNEL); - if (skb == NULL) - return -ENOBUFS; - } else - atomic_inc(&skb->users); + atomic_inc(&skb->users); sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); if (sk == NULL) { @@ -3235,7 +2500,7 @@ static const struct proto_ops netlink_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = netlink_getname, - .poll = netlink_poll, + .poll = datagram_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -3243,7 +2508,7 @@ static const struct proto_ops netlink_ops = { .getsockopt = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, - .mmap = netlink_mmap, + .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index b0c1ddc97260..15e62973cfc6 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -45,12 +45,6 @@ struct netlink_sock { int (*netlink_bind)(struct net *net, int group); void (*netlink_unbind)(struct net *net, int group); struct module *module; -#ifdef CONFIG_NETLINK_MMAP - struct mutex pg_vec_lock; - struct netlink_ring rx_ring; - struct netlink_ring tx_ring; - atomic_t mapped; -#endif /* CONFIG_NETLINK_MMAP */ struct rhash_head node; struct work_struct work; @@ -61,15 +55,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk) return container_of(sk, struct netlink_sock, sk); } -static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) -{ -#ifdef CONFIG_NETLINK_MMAP - return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; -#else - return false; -#endif /* CONFIG_NETLINK_MMAP */ -} - struct netlink_table { struct rhashtable hash; struct hlist_head mc_list; diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 3ee63a3cff30..8dd836a8dd60 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -8,41 +8,6 @@ #include "af_netlink.h" -#ifdef CONFIG_NETLINK_MMAP -static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type, - struct sk_buff *nlskb) -{ - struct netlink_diag_ring ndr; - - ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT; - ndr.ndr_block_nr = ring->pg_vec_len; - ndr.ndr_frame_size = ring->frame_size; - ndr.ndr_frame_nr = ring->frame_max + 1; - - return nla_put(nlskb, nl_type, sizeof(ndr), &ndr); -} - -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - int ret; - - mutex_lock(&nlk->pg_vec_lock); - ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb); - if (!ret) - ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING, - nlskb); - mutex_unlock(&nlk->pg_vec_lock); - - return ret; -} -#else -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) -{ - return 0; -} -#endif - static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) { struct netlink_sock *nlk = nlk_sk(sk); @@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) goto out_nlmsg_trim; - if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) && - sk_diag_put_rings_cfg(sk, skb)) - goto out_nlmsg_trim; - nlmsg_end(skb, nlh); return 0; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d1bd4a45ca2d..d26b28def310 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -588,7 +588,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, ipv4 = true; break; case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: - SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, + SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src, nla_get_in6_addr(a), is_mask); ipv6 = true; break; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d805cd577a60..3975ac809934 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3021,7 +3021,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[15]; + char name[sizeof(uaddr->sa_data) + 1]; /* * Check legality @@ -3029,7 +3029,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, if (addr_len != sizeof(struct sockaddr)) return -EINVAL; - strlcpy(name, uaddr->sa_data, sizeof(name)); + /* uaddr->sa_data comes from the userspace, it's not guaranteed to be + * zero-terminated. + */ + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); + name[sizeof(uaddr->sa_data)] = 0; return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 06e7c4a37245..694a06f1e0d5 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -820,10 +820,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, goto out_module_put; err = a.ops->walk(skb, &dcb, RTM_DELACTION, &a); - if (err < 0) + if (err <= 0) goto out_module_put; - if (err == 0) - goto noflush_out; nla_nest_end(skb, nest); @@ -840,7 +838,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, out_module_put: module_put(a.ops->owner); err_out: -noflush_out: kfree_skb(skb); return err; } diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index bb41699c6c49..7ecb14f3db54 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -109,6 +109,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, if (ret < 0) return ret; + if (!tb[TCA_CONNMARK_PARMS]) + return -EINVAL; + parm = nla_data(tb[TCA_CONNMARK_PARMS]); if (!tcf_hash_check(parm->index, a, bind)) { diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 6a0d48525fcf..c36757e72844 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -146,6 +146,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); + BUG_ON(!atomic_long_read(&u->inflight)); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) @@ -341,6 +342,14 @@ void unix_gc(void) } list_del(&cursor); + /* Now gc_candidates contains only garbage. Restore original + * inflight counters for these as well, and remove the skbuffs + * which are creating the cycle(s). + */ + skb_queue_head_init(&hitlist); + list_for_each_entry(u, &gc_candidates, link) + scan_children(&u->sk, inc_inflight, &hitlist); + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ @@ -350,14 +359,6 @@ void unix_gc(void) list_move_tail(&u->link, &gc_inflight_list); } - /* Now gc_candidates contains only garbage. Restore original - * inflight counters for these as well, and remove the skbuffs - * which are creating the cycle(s). - */ - skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) - scan_children(&u->sk, inc_inflight, &hitlist); - spin_unlock(&unix_gc_lock); /* Here we are. Hitlist is filled. Die. */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index adc62ec5507a..074cf0d25558 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -509,21 +509,17 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, { int err; - rtnl_lock(); - if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, nl80211_fam.attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) - goto out_unlock; + return err; *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), nl80211_fam.attrbuf); - if (IS_ERR(*wdev)) { - err = PTR_ERR(*wdev); - goto out_unlock; - } + if (IS_ERR(*wdev)) + return PTR_ERR(*wdev); *rdev = wiphy_to_rdev((*wdev)->wiphy); /* 0 is the first index - add 1 to parse only once */ cb->args[0] = (*rdev)->wiphy_idx + 1; @@ -533,10 +529,8 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; - if (!wiphy) { - err = -ENODEV; - goto out_unlock; - } + if (!wiphy) + return -ENODEV; *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; @@ -547,21 +541,11 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, } } - if (!*wdev) { - err = -ENODEV; - goto out_unlock; - } + if (!*wdev) + return -ENODEV; } return 0; - out_unlock: - rtnl_unlock(); - return err; -} - -static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev) -{ - rtnl_unlock(); } /* IE validation */ @@ -4278,9 +4262,10 @@ static int nl80211_dump_station(struct sk_buff *skb, int sta_idx = cb->args[2]; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out_err; if (!wdev->netdev) { err = -EINVAL; @@ -4316,7 +4301,7 @@ static int nl80211_dump_station(struct sk_buff *skb, cb->args[2] = sta_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return err; } @@ -5033,9 +5018,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb, int path_idx = cb->args[2]; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out_err; if (!rdev->ops->dump_mpath) { err = -EOPNOTSUPP; @@ -5069,7 +5055,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, cb->args[2] = path_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return err; } @@ -5229,9 +5215,10 @@ static int nl80211_dump_mpp(struct sk_buff *skb, int path_idx = cb->args[2]; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out_err; if (!rdev->ops->dump_mpp) { err = -EOPNOTSUPP; @@ -5264,7 +5251,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb, cb->args[2] = path_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return err; } @@ -7333,9 +7320,12 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) int start = cb->args[2], idx = 0; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); - if (err) + if (err) { + rtnl_unlock(); return err; + } wdev_lock(wdev); spin_lock_bh(&rdev->bss_lock); @@ -7358,7 +7348,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) wdev_unlock(wdev); cb->args[2] = idx; - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return skb->len; } @@ -7442,9 +7432,10 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) int res; bool radio_stats; + rtnl_lock(); res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (res) - return res; + goto out_err; /* prepare_wdev_dump parsed the attributes */ radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; @@ -7485,7 +7476,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) cb->args[2] = survey_idx; res = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return res; } @@ -10561,17 +10552,13 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, void *data = NULL; unsigned int data_len = 0; - rtnl_lock(); - if (cb->args[0]) { /* subtract the 1 again here */ struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; - if (!wiphy) { - err = -ENODEV; - goto out_unlock; - } + if (!wiphy) + return -ENODEV; *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; @@ -10592,13 +10579,11 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, nl80211_fam.attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) - goto out_unlock; + return err; if (!nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID] || - !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { - err = -EINVAL; - goto out_unlock; - } + !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) + return -EINVAL; *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), nl80211_fam.attrbuf); @@ -10607,10 +10592,8 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), nl80211_fam.attrbuf); - if (IS_ERR(*rdev)) { - err = PTR_ERR(*rdev); - goto out_unlock; - } + if (IS_ERR(*rdev)) + return PTR_ERR(*rdev); vid = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID]); subcmd = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); @@ -10623,19 +10606,15 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd) continue; - if (!vcmd->dumpit) { - err = -EOPNOTSUPP; - goto out_unlock; - } + if (!vcmd->dumpit) + return -EOPNOTSUPP; vcmd_idx = i; break; } - if (vcmd_idx < 0) { - err = -EOPNOTSUPP; - goto out_unlock; - } + if (vcmd_idx < 0) + return -EOPNOTSUPP; if (nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]) { data = nla_data(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]); @@ -10652,9 +10631,6 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, /* keep rtnl locked in successful case */ return 0; - out_unlock: - rtnl_unlock(); - return err; } static int nl80211_vendor_cmd_dump(struct sk_buff *skb, @@ -10669,9 +10645,10 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, int err; struct nlattr *vendor_data; + rtnl_lock(); err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out; vcmd_idx = cb->args[2]; data = (void *)cb->args[3]; @@ -10680,18 +10657,26 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV | WIPHY_VENDOR_CMD_NEED_NETDEV)) { - if (!wdev) - return -EINVAL; + if (!wdev) { + err = -EINVAL; + goto out; + } if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV && - !wdev->netdev) - return -EINVAL; + !wdev->netdev) { + err = -EINVAL; + goto out; + } if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { if (wdev->netdev && - !netif_running(wdev->netdev)) - return -ENETDOWN; - if (!wdev->netdev && !wdev->p2p_started) - return -ENETDOWN; + !netif_running(wdev->netdev)) { + err = -ENETDOWN; + goto out; + } + if (!wdev->netdev && !wdev->p2p_started) { + err = -ENETDOWN; + goto out; + } } } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cf0193b74ae3..d5b4ac7bf0d8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3027,6 +3027,11 @@ static int __net_init xfrm_net_init(struct net *net) { int rv; + /* Initialize the per-net locks here */ + spin_lock_init(&net->xfrm.xfrm_state_lock); + rwlock_init(&net->xfrm.xfrm_policy_lock); + mutex_init(&net->xfrm.xfrm_cfg_mutex); + rv = xfrm_statistics_init(net); if (rv < 0) goto out_statistics; @@ -3043,11 +3048,6 @@ static int __net_init xfrm_net_init(struct net *net) if (rv < 0) goto out; - /* Initialize the per-net locks here */ - spin_lock_init(&net->xfrm.xfrm_state_lock); - rwlock_init(&net->xfrm.xfrm_policy_lock); - mutex_init(&net->xfrm.xfrm_cfg_mutex); - return 0; out: diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 805681a7d356..7a5a64e70b4d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -412,7 +412,14 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es up = nla_data(rp); ulen = xfrm_replay_state_esn_len(up); - if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) + /* Check the overall length and the internal bitmap length to avoid + * potential overflow. */ + if (nla_len(rp) < ulen || + xfrm_replay_state_esn_len(replay_esn) != ulen || + replay_esn->bmp_len != up->bmp_len) + return -EINVAL; + + if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) return -EINVAL; return 0; diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 58e79e02f217..c67f9c212dd1 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1921,6 +1921,7 @@ static int snd_seq_ioctl_set_client_pool(struct snd_seq_client *client, info.output_pool != client->pool->size)) { if (snd_seq_write_pool_allocated(client)) { /* remove all existing cells */ + snd_seq_pool_mark_closing(client->pool); snd_seq_queue_client_leave_cells(client->number); snd_seq_pool_done(client->pool); } diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c index 86240d02b530..3f4efcb85df5 100644 --- a/sound/core/seq/seq_fifo.c +++ b/sound/core/seq/seq_fifo.c @@ -70,6 +70,9 @@ void snd_seq_fifo_delete(struct snd_seq_fifo **fifo) return; *fifo = NULL; + if (f->pool) + snd_seq_pool_mark_closing(f->pool); + snd_seq_fifo_clear(f); /* wake up clients if any */ diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c index dfa5156f3585..5847c4475bf3 100644 --- a/sound/core/seq/seq_memory.c +++ b/sound/core/seq/seq_memory.c @@ -414,6 +414,18 @@ int snd_seq_pool_init(struct snd_seq_pool *pool) return 0; } +/* refuse the further insertion to the pool */ +void snd_seq_pool_mark_closing(struct snd_seq_pool *pool) +{ + unsigned long flags; + + if (snd_BUG_ON(!pool)) + return; + spin_lock_irqsave(&pool->lock, flags); + pool->closing = 1; + spin_unlock_irqrestore(&pool->lock, flags); +} + /* remove events */ int snd_seq_pool_done(struct snd_seq_pool *pool) { @@ -424,10 +436,6 @@ int snd_seq_pool_done(struct snd_seq_pool *pool) return -EINVAL; /* wait for closing all threads */ - spin_lock_irqsave(&pool->lock, flags); - pool->closing = 1; - spin_unlock_irqrestore(&pool->lock, flags); - if (waitqueue_active(&pool->output_sleep)) wake_up(&pool->output_sleep); @@ -484,6 +492,7 @@ int snd_seq_pool_delete(struct snd_seq_pool **ppool) *ppool = NULL; if (pool == NULL) return 0; + snd_seq_pool_mark_closing(pool); snd_seq_pool_done(pool); kfree(pool); return 0; diff --git a/sound/core/seq/seq_memory.h b/sound/core/seq/seq_memory.h index 4a2ec779b8a7..32f959c17786 100644 --- a/sound/core/seq/seq_memory.h +++ b/sound/core/seq/seq_memory.h @@ -84,6 +84,7 @@ static inline int snd_seq_total_cells(struct snd_seq_pool *pool) int snd_seq_pool_init(struct snd_seq_pool *pool); /* done pool - free events */ +void snd_seq_pool_mark_closing(struct snd_seq_pool *pool); int snd_seq_pool_done(struct snd_seq_pool *pool); /* create pool */ diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c index ab4cdab5cfa5..79edd88d5cd0 100644 --- a/sound/pci/ctxfi/cthw20k1.c +++ b/sound/pci/ctxfi/cthw20k1.c @@ -1905,7 +1905,7 @@ static int hw_card_start(struct hw *hw) return err; /* Set DMA transfer mask */ - if (dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) { + if (!dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) { dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits)); } else { dma_set_mask(&pci->dev, DMA_BIT_MASK(32)); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index cf0785ddbd14..1d4f34379f56 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6040,6 +6040,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { ALC295_STANDARD_PINS, {0x17, 0x21014040}, {0x18, 0x21a19050}), + SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC295_STANDARD_PINS), SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, ALC298_STANDARD_PINS, {0x17, 0x90170110}), diff --git a/tools/include/linux/log2.h b/tools/include/linux/log2.h index 41446668ccce..d5677d39c1e4 100644 --- a/tools/include/linux/log2.h +++ b/tools/include/linux/log2.h @@ -13,12 +13,6 @@ #define _TOOLS_LINUX_LOG2_H /* - * deal with unrepresentable constant logarithms - */ -extern __attribute__((const, noreturn)) -int ____ilog2_NaN(void); - -/* * non-constant log of base 2 calculators * - the arch may override these in asm/bitops.h if they can be implemented * more efficiently than using fls() and fls64() @@ -78,7 +72,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n) < 1 ? ____ilog2_NaN() : \ + (n) < 2 ? 0 : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ @@ -141,10 +135,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ - (n) & (1ULL << 1) ? 1 : \ - (n) & (1ULL << 0) ? 0 : \ - ____ilog2_NaN() \ - ) : \ + 1 ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ |