diff options
Diffstat (limited to 'arch/arm64')
104 files changed, 5509 insertions, 2613 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f8d50d8c5379..e0536945f835 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -82,9 +82,12 @@ config ARM64 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE select HAVE_SYSCALL_TRACEPOINTS select IOMMU_DMA if (IOMMU_SUPPORT && !ARCH_QCOM) + select HAVE_KPROBES + select HAVE_KRETPROBES if HAVE_KPROBES select IRQ_DOMAIN select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA @@ -99,6 +102,7 @@ config ARM64 select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select HAVE_CONTEXT_TRACKING + select HAVE_ARM_SMCCC help ARM 64-bit (AArch64) Linux support. @@ -807,6 +811,14 @@ config SETEND_EMULATION If unsure, say Y endif +config ARM64_SW_TTBR0_PAN + bool "Emulate Priviledged Access Never using TTBR0_EL1 switching" + help + Enabling this option prevents the kernel from accessing + user-space memory directly by pointing TTBR0_EL1 to a reserved + zeroed area and reserved ASID. The user access routines + restore the valid TTBR0_EL1 temporarily. + menu "ARMv8.1 architectural features" config ARM64_HW_AFDBM @@ -893,7 +905,7 @@ config RELOCATABLE config RANDOMIZE_BASE bool "Randomize the address of the kernel image" - select ARM64_MODULE_PLTS + select ARM64_MODULE_PLTS if MODULES select RELOCATABLE help Randomizes the virtual address at which the kernel image is @@ -1053,6 +1065,14 @@ menu "Power management options" source "kernel/power/Kconfig" +config ARCH_HIBERNATION_POSSIBLE + def_bool y + depends on CPU_PM + +config ARCH_HIBERNATION_HEADER + def_bool y + depends on HIBERNATION + config ARCH_SUSPEND_POSSIBLE def_bool y diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 01c86385fbb6..365e13004651 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -226,3 +226,6 @@ CONFIG_CRYPTO_AES_ARM64_CE_CCM=y CONFIG_CRYPTO_AES_ARM64_CE_BLK=y CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y CONFIG_CRYPTO_CRC32_ARM64=y +CONFIG_HIBERNATION=y +CONFIG_KPROBES=y +CONFIG_CORESIGHT=y diff --git a/arch/arm64/configs/msm-perf_defconfig b/arch/arm64/configs/msm-perf_defconfig index 03c5bc89b6f5..32b44c8c3d18 100644 --- a/arch/arm64/configs/msm-perf_defconfig +++ b/arch/arm64/configs/msm-perf_defconfig @@ -327,8 +327,6 @@ CONFIG_POWER_RESET_QCOM=y CONFIG_QCOM_DLOAD_MODE=y CONFIG_POWER_RESET_XGENE=y CONFIG_POWER_RESET_SYSCON=y -CONFIG_QPNP_SMBCHARGER=y -CONFIG_QPNP_FG=y CONFIG_SMB135X_CHARGER=y CONFIG_SMB1351_USB_CHARGER=y CONFIG_MSM_BCL_CTL=y @@ -461,7 +459,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_MSM=y CONFIG_LEDS_QPNP=y -CONFIG_LEDS_QPNP_FLASH=y +CONFIG_LEDS_QPNP_FLASH_V2=y CONFIG_LEDS_QPNP_WLED=y CONFIG_LEDS_TRIGGERS=y CONFIG_SWITCH=y diff --git a/arch/arm64/configs/msm_defconfig b/arch/arm64/configs/msm_defconfig index b4ddd9a2bed5..988f1fbf8ea3 100644 --- a/arch/arm64/configs/msm_defconfig +++ b/arch/arm64/configs/msm_defconfig @@ -314,8 +314,6 @@ CONFIG_POWER_RESET_QCOM=y CONFIG_QCOM_DLOAD_MODE=y CONFIG_POWER_RESET_XGENE=y CONFIG_POWER_RESET_SYSCON=y -CONFIG_QPNP_SMBCHARGER=y -CONFIG_QPNP_FG=y CONFIG_SMB135X_CHARGER=y CONFIG_SMB1351_USB_CHARGER=y CONFIG_MSM_BCL_CTL=y @@ -449,7 +447,7 @@ CONFIG_MMC_SPI=y CONFIG_MMC_DW=y CONFIG_MMC_DW_EXYNOS=y CONFIG_LEDS_QPNP=y -CONFIG_LEDS_QPNP_FLASH=y +CONFIG_LEDS_QPNP_FLASH_V2=y CONFIG_LEDS_QPNP_WLED=y CONFIG_LEDS_SYSCON=y CONFIG_LEDS_TRIGGERS=y diff --git a/arch/arm64/configs/msmcortex-perf_defconfig b/arch/arm64/configs/msmcortex-perf_defconfig index 5c07a8b80354..91224fc7ae68 100644 --- a/arch/arm64/configs/msmcortex-perf_defconfig +++ b/arch/arm64/configs/msmcortex-perf_defconfig @@ -479,6 +479,11 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_SWITCH=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_QPNP=y +CONFIG_ESOC=y +CONFIG_ESOC_DEV=y +CONFIG_ESOC_CLIENT=y +CONFIG_ESOC_MDM_4x=y +CONFIG_ESOC_MDM_DRV=y CONFIG_DMADEVICES=y CONFIG_QCOM_SPS_DMA=y CONFIG_UIO=y diff --git a/arch/arm64/configs/msmcortex_defconfig b/arch/arm64/configs/msmcortex_defconfig index f5b163eaec0b..d5f30e9a8b16 100644 --- a/arch/arm64/configs/msmcortex_defconfig +++ b/arch/arm64/configs/msmcortex_defconfig @@ -489,6 +489,13 @@ CONFIG_EDAC_CORTEX_ARM64=y CONFIG_EDAC_CORTEX_ARM64_PANIC_ON_UE=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_QPNP=y +CONFIG_ESOC=y +CONFIG_ESOC_DEV=y +CONFIG_ESOC_CLIENT=y +CONFIG_ESOC_DEBUG=y +CONFIG_ESOC_MDM_4x=y +CONFIG_ESOC_MDM_DRV=y +CONFIG_ESOC_MDM_DBG_ENG=y CONFIG_DMADEVICES=y CONFIG_QCOM_SPS_DMA=y CONFIG_UIO=y diff --git a/arch/arm64/configs/msmcortex_mediabox_defconfig b/arch/arm64/configs/msmcortex_mediabox_defconfig index 967edf184f1b..9ebe740c7d91 100644 --- a/arch/arm64/configs/msmcortex_mediabox_defconfig +++ b/arch/arm64/configs/msmcortex_mediabox_defconfig @@ -298,7 +298,6 @@ CONFIG_INPUT_KEYCHORD=y CONFIG_INPUT_UINPUT=y CONFIG_INPUT_GPIO=y # CONFIG_SERIO_SERPORT is not set -# CONFIG_VT is not set # CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_MSM=y CONFIG_SERIAL_MSM_CONSOLE=y @@ -349,7 +348,6 @@ CONFIG_MFD_SPMI_PMIC=y CONFIG_MFD_I2C_PMIC=y CONFIG_WCD9335_CODEC=y CONFIG_WCD934X_CODEC=y -CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_RPM_SMD=y CONFIG_REGULATOR_QPNP=y @@ -407,15 +405,8 @@ CONFIG_DVB_MPQ=m CONFIG_DVB_MPQ_DEMUX=m CONFIG_DVB_MPQ_MEDIA_BOX_DEMUX=y CONFIG_TSPP=m -CONFIG_QCOM_KGSL=y -CONFIG_FB=y -CONFIG_FB_VIRTUAL=y -CONFIG_FB_MSM=y -CONFIG_FB_MSM_MDSS=y -CONFIG_FB_MSM_MDSS_WRITEBACK=y -CONFIG_FB_MSM_MDSS_HDMI_PANEL=y -CONFIG_FB_MSM_MDSS_DP_PANEL=y -CONFIG_FB_MSM_MDSS_XLOG_DEBUG=y +CONFIG_DRM=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set @@ -497,6 +488,7 @@ CONFIG_STAGING=y CONFIG_ASHMEM=y CONFIG_ANDROID_TIMED_GPIO=y CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y CONFIG_ION=y CONFIG_ION_MSM=y CONFIG_QPNP_REVID=y @@ -545,7 +537,6 @@ CONFIG_MSM_SYSMON_GLINK_COMM=y CONFIG_MSM_IPC_ROUTER_GLINK_XPRT=y CONFIG_MSM_GLINK_PKT=y CONFIG_MSM_SPM=y -CONFIG_QCOM_SCM=y CONFIG_QCOM_WATCHDOG_V2=y CONFIG_QCOM_IRQ_HELPER=y CONFIG_QCOM_MEMORY_DUMP_V2=y @@ -608,7 +599,6 @@ CONFIG_EXT4_FS_ICE_ENCRYPTION=y CONFIG_FUSE_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y -CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_EFIVAR_FS=y CONFIG_ECRYPT_FS=y diff --git a/arch/arm64/configs/ranchu64_defconfig b/arch/arm64/configs/ranchu64_defconfig new file mode 100644 index 000000000000..00eb346e0928 --- /dev/null +++ b/arch/arm64/configs/ranchu64_defconfig @@ -0,0 +1,311 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_ARCH_MMAP_RND_BITS=24 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS=16 +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +CONFIG_ARCH_VEXPRESS=y +CONFIG_NR_CPUS=4 +CONFIG_PREEMPT=y +CONFIG_KSM=y +CONFIG_SECCOMP=y +CONFIG_ARMV8_DEPRECATED=y +CONFIG_SWP_EMULATION=y +CONFIG_CP15_BARRIER_EMULATION=y +CONFIG_SETEND_EMULATION=y +CONFIG_CMDLINE="console=ttyAMA0" +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_COMPAT=y +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_INET_ESP=y +# CONFIG_INET_LRO is not set +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_RPFILTER=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_TARGET_ECN=y +CONFIG_IP_NF_TARGET_TTL=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_AH=y +CONFIG_IP6_NF_MATCH_EUI64=y +CONFIG_IP6_NF_MATCH_FRAG=y +CONFIG_IP6_NF_MATCH_OPTS=y +CONFIG_IP6_NF_MATCH_HL=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_MATCH_MH=y +CONFIG_IP6_NF_MATCH_RT=y +CONFIG_IP6_NF_TARGET_HL=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_BRIDGE=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_SCSI=y +# CONFIG_SCSI_PROC_FS is not set +CONFIG_BLK_DEV_SD=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +CONFIG_SMC91X=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +CONFIG_KEYBOARD_GOLDFISH_EVENTS=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_CONSOLE=y +# CONFIG_HW_RANDOM is not set +CONFIG_BATTERY_GOLDFISH=y +# CONFIG_HWMON is not set +CONFIG_MEDIA_SUPPORT=y +CONFIG_FB=y +CONFIG_FB_GOLDFISH=y +CONFIG_FB_SIMPLE=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +# CONFIG_USB_SUPPORT is not set +CONFIG_RTC_CLASS=y +CONFIG_VIRTIO_MMIO=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SW_SYNC_USER=y +CONFIG_ION=y +CONFIG_GOLDFISH_AUDIO=y +CONFIG_GOLDFISH=y +CONFIG_GOLDFISH_PIPE=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_EXT2_FS=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_FUSE_FS=y +CONFIG_CUSE=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_PANIC_TIMEOUT=5 +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +# CONFIG_FTRACE is not set +CONFIG_ATOMIC64_SELFTEST=y +CONFIG_DEBUG_RODATA=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y diff --git a/arch/arm64/configs/sdm660-perf_defconfig b/arch/arm64/configs/sdm660-perf_defconfig index 29b2c75e70f3..b0bdabf58d62 100644 --- a/arch/arm64/configs/sdm660-perf_defconfig +++ b/arch/arm64/configs/sdm660-perf_defconfig @@ -527,6 +527,7 @@ CONFIG_IOMMU_DEBUG_TRACKING=y CONFIG_IOMMU_TESTS=y CONFIG_MSM_SMEM=y CONFIG_QPNP_HAPTIC=y +CONFIG_QPNP_PBS=y CONFIG_MSM_SMD=y CONFIG_MSM_GLINK=y CONFIG_MSM_GLINK_LOOPBACK_SERVER=y diff --git a/arch/arm64/configs/sdm660_defconfig b/arch/arm64/configs/sdm660_defconfig index adfcc6c9531c..7b21409c3266 100644 --- a/arch/arm64/configs/sdm660_defconfig +++ b/arch/arm64/configs/sdm660_defconfig @@ -541,6 +541,7 @@ CONFIG_IOMMU_TESTS=y CONFIG_QCOM_COMMON_LOG=y CONFIG_MSM_SMEM=y CONFIG_QPNP_HAPTIC=y +CONFIG_QPNP_PBS=y CONFIG_MSM_SMD=y CONFIG_MSM_SMD_DEBUG=y CONFIG_MSM_GLINK=y @@ -568,7 +569,6 @@ CONFIG_QCOM_WATCHDOG_V2=y CONFIG_QCOM_IRQ_HELPER=y CONFIG_QCOM_MEMORY_DUMP_V2=y CONFIG_ICNSS=y -CONFIG_ICNSS_DEBUG=y CONFIG_MSM_GLADIATOR_ERP_V2=y CONFIG_PANIC_ON_GLADIATOR_ERROR_V2=y CONFIG_MSM_GLADIATOR_HANG_DETECT=y @@ -687,6 +687,7 @@ CONFIG_PID_IN_CONTEXTIDR=y CONFIG_DEBUG_SET_MODULE_RONX=y CONFIG_DEBUG_RODATA=y CONFIG_FREE_PAGES_RDONLY=y +CONFIG_ARM64_STRICT_BREAK_BEFORE_MAKE=y CONFIG_CORESIGHT=y CONFIG_CORESIGHT_EVENT=y CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 05d9e16c0dfd..6a51dfccfe71 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -211,7 +211,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } - if (nbytes) { + if (walk.nbytes % AES_BLOCK_SIZE) { u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; u8 __aligned(8) tail[AES_BLOCK_SIZE]; diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index beccbdefa106..8746ff6abd77 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -95,13 +95,11 @@ void apply_alternatives(void *start, size_t length); * The code that follows this macro will be assembled and linked as * normal. There are no restrictions on this code. */ -.macro alternative_if_not cap, enable = 1 - .if \enable +.macro alternative_if_not cap .pushsection .altinstructions, "a" altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f .popsection 661: - .endif .endm /* @@ -118,27 +116,27 @@ void apply_alternatives(void *start, size_t length); * alternative sequence it is defined in (branches into an * alternative sequence are not fixed up). */ -.macro alternative_else, enable = 1 - .if \enable +.macro alternative_else 662: .pushsection .altinstr_replacement, "ax" 663: - .endif .endm /* * Complete an alternative code sequence. */ -.macro alternative_endif, enable = 1 - .if \enable +.macro alternative_endif 664: .popsection .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) - .endif .endm #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) +.macro user_alt, label, oldinstr, newinstr, cond +9999: alternative_insn "\oldinstr", "\newinstr", \cond + _ASM_EXTABLE 9999b, \label +.endm /* * Generate the assembly for UAO alternatives with exception table entries. diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index f8856a450852..dfc8416d8896 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -1,5 +1,5 @@ /* - * Based on arch/arm/include/asm/assembler.h + * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S * * Copyright (C) 1996-2000 Russell King * Copyright (C) 2012 ARM Ltd. @@ -23,6 +23,10 @@ #ifndef __ASM_ASSEMBLER_H #define __ASM_ASSEMBLER_H +#include <asm/asm-offsets.h> +#include <asm/cpufeature.h> +#include <asm/page.h> +#include <asm/pgtable-hwdef.h> #include <asm/ptrace.h> #include <asm/thread_info.h> @@ -49,6 +53,15 @@ msr daifclr, #2 .endm + .macro save_and_disable_irq, flags + mrs \flags, daif + msr daifset, #2 + .endm + + .macro restore_irq, flags + msr daif, \flags + .endm + /* * Save/disable and restore interrupts. */ @@ -224,6 +237,111 @@ lr .req x30 // link register .endm /* + * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) + */ + .macro vma_vm_mm, rd, rn + ldr \rd, [\rn, #VMA_VM_MM] + .endm + +/* + * mmid - get context id from mm pointer (mm->context.id) + */ + .macro mmid, rd, rn + ldr \rd, [\rn, #MM_CONTEXT_ID] + .endm + +/* + * dcache_line_size - get the minimum D-cache line size from the CTR register. + */ + .macro dcache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + ubfm \tmp, \tmp, #16, #19 // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * icache_line_size - get the minimum I-cache line size from the CTR register. + */ + .macro icache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + and \tmp, \tmp, #0xf // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map + */ + .macro tcr_set_idmap_t0sz, valreg, tmpreg +#ifndef CONFIG_ARM64_VA_BITS_48 + ldr_l \tmpreg, idmap_t0sz + bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH +#endif + .endm + +/* + * Macro to perform a data cache maintenance for the interval + * [kaddr, kaddr + size) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * kaddr: starting virtual address of the region + * size: size of the region + * Corrupts: kaddr, size, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 + dcache_line_size \tmp1, \tmp2 + add \size, \kaddr, \size + sub \tmp2, \tmp1, #1 + bic \kaddr, \kaddr, \tmp2 +9998: + .if (\op == cvau || \op == cvac) +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc \op, \kaddr +alternative_else + dc civac, \kaddr +alternative_endif + .else + dc \op, \kaddr + .endif + add \kaddr, \kaddr, \tmp1 + cmp \kaddr, \size + b.lo 9998b + dsb \domain + .endm + +/* + * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present + */ + .macro reset_pmuserenr_el0, tmpreg + mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx \tmpreg, \tmpreg, #8, #4 + cmp \tmpreg, #1 // Skip if no PMU present + b.lt 9000f + msr pmuserenr_el0, xzr // Disable PMU access from EL0 +9000: + .endm + +/* + * copy_page - copy src to dest using temp registers t1-t8 + */ + .macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req +9998: ldp \t1, \t2, [\src] + ldp \t3, \t4, [\src, #16] + ldp \t5, \t6, [\src, #32] + ldp \t7, \t8, [\src, #48] + add \src, \src, #64 + stnp \t1, \t2, [\dest] + stnp \t3, \t4, [\dest, #16] + stnp \t5, \t6, [\dest, #32] + stnp \t7, \t8, [\dest, #48] + add \dest, \dest, #64 + tst \src, #(PAGE_SIZE - 1) + b.ne 9998b + .endm + +/* * Annotate a function as position independent, i.e., safe to be called before * the kernel virtual mapping is activated. */ @@ -265,4 +383,28 @@ lr .req x30 // link register movk \reg, :abs_g0_nc:\val .endm +/* + * Return the current thread_info. + */ + .macro get_thread_info, rd + mrs \rd, sp_el0 + .endm + +/* + * Errata workaround post TTBR0_EL1 update. + */ + .macro post_ttbr0_update_workaround +#ifdef CONFIG_CAVIUM_ERRATUM_27456 +alternative_if_not ARM64_WORKAROUND_CAVIUM_27456 + nop + nop + nop +alternative_else + ic iallu + dsb nsh + isb +alternative_endif +#endif + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index fb1013f9bb86..29f8d7fd627d 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -34,9 +34,9 @@ #define ARM64_HAS_UAO 9 #define ARM64_ALT_PAN_NOT_UAO 10 -#define ARM64_NCAPS 11 -#define ARM64_WORKAROUND_CAVIUM_27456 12 - +#define ARM64_WORKAROUND_CAVIUM_27456 11 +#define ARM64_HAS_VIRT_HOST_EXTN 12 +#define ARM64_NCAPS 13 #ifndef __ASSEMBLY__ @@ -83,7 +83,7 @@ struct arm64_cpu_capabilities { const char *desc; u16 capability; bool (*matches)(const struct arm64_cpu_capabilities *); - void (*enable)(void *); /* Called on all active CPUs */ + int (*enable)(void *); /* Called on all active CPUs */ union { struct { /* To be used for erratum handling only */ u32 midr_model; @@ -191,6 +191,12 @@ static inline bool system_supports_mixed_endian_el0(void) return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1)); } +static inline bool system_uses_ttbr0_pan(void) +{ + return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) && + !cpus_have_cap(ARM64_HAS_PAN); +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 2fcb9b7c876c..4b6b3f72a215 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -66,6 +66,11 @@ #define CACHE_FLUSH_IS_SAFE 1 +/* kprobes BRK opcodes with ESR encoding */ +#define BRK64_ESR_MASK 0xFFFF +#define BRK64_ESR_KPROBES 0x0004 +#define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (BRK64_ESR_KPROBES << 5)) + /* AArch32 */ #define DBG_ESR_EVT_BKPT 0x4 #define DBG_ESR_EVT_VECC 0x5 diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index ef572206f1c3..932f5a56d1a6 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -1,8 +1,11 @@ #ifndef _ASM_EFI_H #define _ASM_EFI_H +#include <asm/cpufeature.h> #include <asm/io.h> +#include <asm/mmu_context.h> #include <asm/neon.h> +#include <asm/tlbflush.h> #ifdef CONFIG_EFI extern void efi_init(void); @@ -10,6 +13,8 @@ extern void efi_init(void); #define efi_init() #endif +int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); + #define efi_call_virt(f, ...) \ ({ \ efi_##f##_t *__f; \ @@ -63,6 +68,34 @@ extern void efi_init(void); * Services are enabled and the EFI_RUNTIME_SERVICES bit set. */ +static inline void efi_set_pgd(struct mm_struct *mm) +{ + __switch_mm(mm); + + if (system_uses_ttbr0_pan()) { + if (mm != current->active_mm) { + /* + * Update the current thread's saved ttbr0 since it is + * restored as part of a return from exception. Set + * the hardware TTBR0_EL1 using cpu_switch_mm() + * directly to enable potential errata workarounds. + */ + update_saved_ttbr0(current, mm); + cpu_switch_mm(mm->pgd, mm); + } else { + /* + * Defer the switch to the current thread's TTBR0_EL1 + * until uaccess_enable(). Restore the current + * thread's saved ttbr0 corresponding to its active_mm + * (if different from init_mm). + */ + cpu_set_reserved_ttbr0(); + if (current->active_mm != &init_mm) + update_saved_ttbr0(current, current->active_mm); + } + } +} + void efi_virtmap_load(void); void efi_virtmap_unload(void); diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 77eeb2cc648f..f772e15c4766 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -74,6 +74,7 @@ #define ESR_ELx_EC_SHIFT (26) #define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) +#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) #define ESR_ELx_IL (UL(1) << 25) #define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index f2585cdd32c2..71dfa3b42313 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -27,9 +27,9 @@ #include <asm/sysreg.h> #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ +do { \ + uaccess_enable(); \ asm volatile( \ - ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ " prfm pstl1strm, %2\n" \ "1: ldxr %w1, %2\n" \ insn "\n" \ @@ -44,11 +44,11 @@ " .popsection\n" \ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ - ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ : "r" (oparg), "Ir" (-EFAULT) \ - : "memory") + : "memory"); \ + uaccess_disable(); \ +} while (0) static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) @@ -118,8 +118,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; + uaccess_enable(); asm volatile("// futex_atomic_cmpxchg_inatomic\n" -ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) " prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" @@ -134,10 +134,10 @@ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) " .popsection\n" _ASM_EXTABLE(1b, 4b) _ASM_EXTABLE(2b, 4b) -ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) : "memory"); + uaccess_disable(); *uval = val; return ret; diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 30e50eb54a67..1dbaa901d7e5 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -120,6 +120,29 @@ enum aarch64_insn_register { AARCH64_INSN_REG_SP = 31 /* Stack pointer: as load/store base reg */ }; +enum aarch64_insn_special_register { + AARCH64_INSN_SPCLREG_SPSR_EL1 = 0xC200, + AARCH64_INSN_SPCLREG_ELR_EL1 = 0xC201, + AARCH64_INSN_SPCLREG_SP_EL0 = 0xC208, + AARCH64_INSN_SPCLREG_SPSEL = 0xC210, + AARCH64_INSN_SPCLREG_CURRENTEL = 0xC212, + AARCH64_INSN_SPCLREG_DAIF = 0xDA11, + AARCH64_INSN_SPCLREG_NZCV = 0xDA10, + AARCH64_INSN_SPCLREG_FPCR = 0xDA20, + AARCH64_INSN_SPCLREG_DSPSR_EL0 = 0xDA28, + AARCH64_INSN_SPCLREG_DLR_EL0 = 0xDA29, + AARCH64_INSN_SPCLREG_SPSR_EL2 = 0xE200, + AARCH64_INSN_SPCLREG_ELR_EL2 = 0xE201, + AARCH64_INSN_SPCLREG_SP_EL1 = 0xE208, + AARCH64_INSN_SPCLREG_SPSR_INQ = 0xE218, + AARCH64_INSN_SPCLREG_SPSR_ABT = 0xE219, + AARCH64_INSN_SPCLREG_SPSR_UND = 0xE21A, + AARCH64_INSN_SPCLREG_SPSR_FIQ = 0xE21B, + AARCH64_INSN_SPCLREG_SPSR_EL3 = 0xF200, + AARCH64_INSN_SPCLREG_ELR_EL3 = 0xF201, + AARCH64_INSN_SPCLREG_SP_EL2 = 0xF210 +}; + enum aarch64_insn_variant { AARCH64_INSN_VARIANT_32BIT, AARCH64_INSN_VARIANT_64BIT @@ -223,8 +246,15 @@ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \ { return (val); } +__AARCH64_INSN_FUNCS(adr_adrp, 0x1F000000, 0x10000000) +__AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) +__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) +__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) +__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) +__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000) +__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000) __AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000) __AARCH64_INSN_FUNCS(ldp_post, 0x7FC00000, 0x28C00000) __AARCH64_INSN_FUNCS(stp_pre, 0x7FC00000, 0x29800000) @@ -273,10 +303,15 @@ __AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001) __AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002) __AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003) __AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000) +__AARCH64_INSN_FUNCS(exception, 0xFF000000, 0xD4000000) __AARCH64_INSN_FUNCS(hint, 0xFFFFF01F, 0xD503201F) __AARCH64_INSN_FUNCS(br, 0xFFFFFC1F, 0xD61F0000) __AARCH64_INSN_FUNCS(blr, 0xFFFFFC1F, 0xD63F0000) __AARCH64_INSN_FUNCS(ret, 0xFFFFFC1F, 0xD65F0000) +__AARCH64_INSN_FUNCS(eret, 0xFFFFFFFF, 0xD69F03E0) +__AARCH64_INSN_FUNCS(mrs, 0xFFF00000, 0xD5300000) +__AARCH64_INSN_FUNCS(msr_imm, 0xFFF8F01F, 0xD500401F) +__AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) #undef __AARCH64_INSN_FUNCS @@ -286,6 +321,8 @@ bool aarch64_insn_is_branch_imm(u32 insn); int aarch64_insn_read(void *addr, u32 *insnp); int aarch64_insn_write(void *addr, u32 insn); enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn); +bool aarch64_insn_uses_literal(u32 insn); +bool aarch64_insn_is_branch(u32 insn); u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn); u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, u32 insn, u64 imm); @@ -367,9 +404,13 @@ bool aarch32_insn_is_wide(u32 insn); #define A32_RT_OFFSET 12 #define A32_RT2_OFFSET 0 +u32 aarch64_insn_extract_system_reg(u32 insn); u32 aarch32_insn_extract_reg_num(u32 insn, int offset); u32 aarch32_insn_mcr_extract_opc2(u32 insn); u32 aarch32_insn_mcr_extract_crm(u32 insn); + +typedef bool (pstate_check_t)(unsigned long); +extern pstate_check_t * const aarch32_opcode_cond_checks[16]; #endif /* __ASSEMBLY__ */ #endif /* __ASM_INSN_H */ diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 5c6375d8528b..7803343e5881 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -19,6 +19,8 @@ #ifndef __ASM_KERNEL_PGTABLE_H #define __ASM_KERNEL_PGTABLE_H +#include <asm/pgtable.h> +#include <asm/sparsemem.h> /* * The linear mapping and the start of memory are both 2M aligned (per @@ -53,6 +55,12 @@ #define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +#define RESERVED_TTBR0_SIZE (PAGE_SIZE) +#else +#define RESERVED_TTBR0_SIZE (0) +#endif + /* Initial memory map size */ #if ARM64_SWAPPER_USES_SECTION_MAPS #define SWAPPER_BLOCK_SHIFT SECTION_SHIFT @@ -86,10 +94,24 @@ * (64k granule), or a multiple that can be mapped using contiguous bits * in the page tables: 32 * PMD_SIZE (16k granule) */ -#ifdef CONFIG_ARM64_64K_PAGES -#define ARM64_MEMSTART_ALIGN SZ_512M +#if defined(CONFIG_ARM64_4K_PAGES) +#define ARM64_MEMSTART_SHIFT PUD_SHIFT +#elif defined(CONFIG_ARM64_16K_PAGES) +#define ARM64_MEMSTART_SHIFT (PMD_SHIFT + 5) +#else +#define ARM64_MEMSTART_SHIFT PMD_SHIFT +#endif + +/* + * sparsemem vmemmap imposes an additional requirement on the alignment of + * memstart_addr, due to the fact that the base of the vmemmap region + * has a direct correspondence, and needs to appear sufficiently aligned + * in the virtual address space. + */ +#if defined(CONFIG_SPARSEMEM_VMEMMAP) && ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS +#define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS) #else -#define ARM64_MEMSTART_ALIGN SZ_1G +#define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT) #endif #endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h new file mode 100644 index 000000000000..1737aecfcc5e --- /dev/null +++ b/arch/arm64/include/asm/kprobes.h @@ -0,0 +1,60 @@ +/* + * arch/arm64/include/asm/kprobes.h + * + * Copyright (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KPROBES_H +#define _ARM_KPROBES_H + +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/percpu.h> + +#define __ARCH_WANT_KPROBES_INSN_SLOT +#define MAX_INSN_SIZE 1 + +#define flush_insn_slot(p) do { } while (0) +#define kretprobe_blacklist_size 0 + +#include <asm/probes.h> + +struct prev_kprobe { + struct kprobe *kp; + unsigned int status; +}; + +/* Single step context for kprobe */ +struct kprobe_step_ctx { + unsigned long ss_pending; + unsigned long match_addr; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned int kprobe_status; + unsigned long saved_irqflag; + struct prev_kprobe prev_kprobe; + struct kprobe_step_ctx ss_ctx; + struct pt_regs jprobe_saved_regs; +}; + +void arch_remove_kprobe(struct kprobe *); +int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); +int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); +int kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr); +int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr); +void kretprobe_trampoline(void); +void __kprobes *trampoline_probe_handler(struct pt_regs *regs); + +#endif /* _ARM_KPROBES_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 2d960f8588b0..8b709f53f874 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -83,17 +83,6 @@ #define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) -/* Hyp System Control Register (SCTLR_EL2) bits */ -#define SCTLR_EL2_EE (1 << 25) -#define SCTLR_EL2_WXN (1 << 19) -#define SCTLR_EL2_I (1 << 12) -#define SCTLR_EL2_SA (1 << 3) -#define SCTLR_EL2_C (1 << 2) -#define SCTLR_EL2_A (1 << 1) -#define SCTLR_EL2_M 1 -#define SCTLR_EL2_FLAGS (SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C | \ - SCTLR_EL2_SA | SCTLR_EL2_I) - /* TCR_EL2 Registers bits */ #define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) #define TCR_EL2_TBI (1 << 20) @@ -123,6 +112,7 @@ #define VTCR_EL2_SL0_LVL1 (1 << 6) #define VTCR_EL2_T0SZ_MASK 0x3f #define VTCR_EL2_T0SZ_40B 24 +#define VTCR_EL2_VS 19 /* * We configure the Stage-2 page tables to always restrict the IPA space to be @@ -167,7 +157,7 @@ #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) #define VTTBR_VMID_SHIFT (UL(48)) -#define VTTBR_VMID_MASK (UL(0xFF) << VTTBR_VMID_SHIFT) +#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) /* Hyp System Trap Register */ #define HSTR_EL2_T(x) (1 << x) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 419bc6661b5c..36a30c80032d 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -20,84 +20,10 @@ #include <asm/virt.h> -/* - * 0 is reserved as an invalid value. - * Order *must* be kept in sync with the hyp switch code. - */ -#define MPIDR_EL1 1 /* MultiProcessor Affinity Register */ -#define CSSELR_EL1 2 /* Cache Size Selection Register */ -#define SCTLR_EL1 3 /* System Control Register */ -#define ACTLR_EL1 4 /* Auxiliary Control Register */ -#define CPACR_EL1 5 /* Coprocessor Access Control */ -#define TTBR0_EL1 6 /* Translation Table Base Register 0 */ -#define TTBR1_EL1 7 /* Translation Table Base Register 1 */ -#define TCR_EL1 8 /* Translation Control Register */ -#define ESR_EL1 9 /* Exception Syndrome Register */ -#define AFSR0_EL1 10 /* Auxilary Fault Status Register 0 */ -#define AFSR1_EL1 11 /* Auxilary Fault Status Register 1 */ -#define FAR_EL1 12 /* Fault Address Register */ -#define MAIR_EL1 13 /* Memory Attribute Indirection Register */ -#define VBAR_EL1 14 /* Vector Base Address Register */ -#define CONTEXTIDR_EL1 15 /* Context ID Register */ -#define TPIDR_EL0 16 /* Thread ID, User R/W */ -#define TPIDRRO_EL0 17 /* Thread ID, User R/O */ -#define TPIDR_EL1 18 /* Thread ID, Privileged */ -#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ -#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ -#define PAR_EL1 21 /* Physical Address Register */ -#define MDSCR_EL1 22 /* Monitor Debug System Control Register */ -#define MDCCINT_EL1 23 /* Monitor Debug Comms Channel Interrupt Enable Reg */ - -/* 32bit specific registers. Keep them at the end of the range */ -#define DACR32_EL2 24 /* Domain Access Control Register */ -#define IFSR32_EL2 25 /* Instruction Fault Status Register */ -#define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */ -#define DBGVCR32_EL2 27 /* Debug Vector Catch Register */ -#define NR_SYS_REGS 28 - -/* 32bit mapping */ -#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ -#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ -#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ -#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ -#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ -#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ -#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ -#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ -#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ -#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ -#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ -#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ -#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ -#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ -#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ -#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ -#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ -#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ -#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ -#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ -#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ -#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ -#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ -#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ -#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ -#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ -#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ -#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ -#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ - -#define cp14_DBGDSCRext (MDSCR_EL1 * 2) -#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) -#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) -#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) -#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) -#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) -#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) - -#define NR_COPRO_REGS (NR_SYS_REGS * 2) - #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_TRAP 1 +/* The hyp-stub will return this for any kvm_call_hyp() call */ +#define ARM_EXCEPTION_HYP_GONE 2 #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) @@ -105,11 +31,27 @@ #define kvm_ksym_ref(sym) phys_to_virt((u64)&sym - kimage_voffset) #ifndef __ASSEMBLY__ +#if __GNUC__ > 4 +#define kvm_ksym_shift (PAGE_OFFSET - KIMAGE_VADDR) +#else +/* + * GCC versions 4.9 and older will fold the constant below into the addend of + * the reference to 'sym' above if kvm_ksym_shift is declared static or if the + * constant is used directly. However, since we use the small code model for + * the core kernel, the reference to 'sym' will be emitted as a adrp/add pair, + * with a +/- 4 GB range, resulting in linker relocation errors if the shift + * is sufficiently large. So prevent the compiler from folding the shift into + * the addend, by making the shift a variable with external linkage. + */ +__weak u64 kvm_ksym_shift = PAGE_OFFSET - KIMAGE_VADDR; +#endif + struct kvm; struct kvm_vcpu; extern char __kvm_hyp_init[]; extern char __kvm_hyp_init_end[]; +extern char __kvm_hyp_reset[]; extern char __kvm_hyp_vector[]; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 25a40213bd9b..3066328cd86b 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -26,7 +26,6 @@ #include <asm/esr.h> #include <asm/kvm_arm.h> -#include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> #include <asm/ptrace.h> #include <asm/cputype.h> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 90c6368ad7c8..3be7a7b52d80 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -25,7 +25,6 @@ #include <linux/types.h> #include <linux/kvm_types.h> #include <asm/kvm.h> -#include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -45,6 +44,7 @@ int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_dev_ioctl_check_extension(long ext); +phys_addr_t kvm_hyp_reset_entry(void); struct kvm_arch { /* The VMID generation used for the virt. memory system */ @@ -85,6 +85,86 @@ struct kvm_vcpu_fault_info { u64 hpfar_el2; /* Hyp IPA Fault Address Register */ }; +/* + * 0 is reserved as an invalid value. + * Order should be kept in sync with the save/restore code. + */ +enum vcpu_sysreg { + __INVALID_SYSREG__, + MPIDR_EL1, /* MultiProcessor Affinity Register */ + CSSELR_EL1, /* Cache Size Selection Register */ + SCTLR_EL1, /* System Control Register */ + ACTLR_EL1, /* Auxiliary Control Register */ + CPACR_EL1, /* Coprocessor Access Control */ + TTBR0_EL1, /* Translation Table Base Register 0 */ + TTBR1_EL1, /* Translation Table Base Register 1 */ + TCR_EL1, /* Translation Control Register */ + ESR_EL1, /* Exception Syndrome Register */ + AFSR0_EL1, /* Auxilary Fault Status Register 0 */ + AFSR1_EL1, /* Auxilary Fault Status Register 1 */ + FAR_EL1, /* Fault Address Register */ + MAIR_EL1, /* Memory Attribute Indirection Register */ + VBAR_EL1, /* Vector Base Address Register */ + CONTEXTIDR_EL1, /* Context ID Register */ + TPIDR_EL0, /* Thread ID, User R/W */ + TPIDRRO_EL0, /* Thread ID, User R/O */ + TPIDR_EL1, /* Thread ID, Privileged */ + AMAIR_EL1, /* Aux Memory Attribute Indirection Register */ + CNTKCTL_EL1, /* Timer Control Register (EL1) */ + PAR_EL1, /* Physical Address Register */ + MDSCR_EL1, /* Monitor Debug System Control Register */ + MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ + + /* 32bit specific registers. Keep them at the end of the range */ + DACR32_EL2, /* Domain Access Control Register */ + IFSR32_EL2, /* Instruction Fault Status Register */ + FPEXC32_EL2, /* Floating-Point Exception Control Register */ + DBGVCR32_EL2, /* Debug Vector Catch Register */ + + NR_SYS_REGS /* Nothing after this line! */ +}; + +/* 32bit mapping */ +#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ +#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ +#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ +#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ +#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ +#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ +#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ +#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ +#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ +#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ +#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ +#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ +#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ +#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ +#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ +#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ +#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ +#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ +#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ +#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ +#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ +#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ +#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ +#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ +#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ +#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ +#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ +#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ +#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ + +#define cp14_DBGDSCRext (MDSCR_EL1 * 2) +#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) +#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) +#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) +#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) +#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) +#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) + +#define NR_COPRO_REGS (NR_SYS_REGS * 2) + struct kvm_cpu_context { struct kvm_regs gp_regs; union { @@ -247,7 +327,21 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, hyp_stack_ptr, vector_ptr); } -static inline void kvm_arch_hardware_disable(void) {} +static inline void __cpu_init_stage2(void) +{ +} + +static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr, + phys_addr_t phys_idmap_start) +{ + /* + * Call reset code, and switch back to stub hyp vectors. + * Uses __kvm_call_hyp() to avoid kaslr's kvm_ksym_ref() translation. + */ + __kvm_call_hyp((void *)kvm_hyp_reset_entry(), + boot_pgd_ptr, phys_idmap_start); +} + static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index 889c908ee631..fe612a962576 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -19,7 +19,6 @@ #define __ARM64_KVM_MMIO_H__ #include <linux/kvm_host.h> -#include <asm/kvm_asm.h> #include <asm/kvm_arm.h> /* diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 61505676d085..342a5ac2f3da 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -20,6 +20,7 @@ #include <asm/page.h> #include <asm/memory.h> +#include <asm/cpufeature.h> /* * As we only have the TTBR0_EL2 register, we cannot express @@ -98,6 +99,7 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_mmu_get_boot_httbr(void); phys_addr_t kvm_get_idmap_vector(void); +phys_addr_t kvm_get_idmap_start(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); @@ -158,7 +160,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) #define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT) #endif #define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) -#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) #define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) @@ -302,5 +303,12 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE); } +static inline unsigned int kvm_get_vmid_bits(void) +{ + int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1); + + return (cpuid_feature_extract_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 12f8a00fb3f1..ae11e8fdbfd2 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -71,6 +71,9 @@ #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) +#define KERNEL_START _text +#define KERNEL_END _end + /* * The size of the KASAN shadow region. This should be 1/8th of the * size of the entire kernel virtual address space. @@ -193,7 +196,11 @@ static inline void *phys_to_virt(phys_addr_t x) #define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) + +#define _virt_addr_is_linear(kaddr) (((u64)(kaddr)) >= PAGE_OFFSET) +#define virt_addr_valid(kaddr) (_virt_addr_is_linear(kaddr) && \ + _virt_addr_valid(kaddr)) #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 21d5a6e45891..5d5baf8096b9 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -23,6 +23,7 @@ #include <linux/sched.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/proc-fns.h> #include <asm-generic/mm_hooks.h> #include <asm/cputype.h> @@ -118,7 +119,7 @@ static inline void cpu_uninstall_idmap(void) local_flush_tlb_all(); cpu_set_default_tcr_t0sz(); - if (mm != &init_mm) + if (mm != &init_mm && !system_uses_ttbr0_pan()) cpu_switch_mm(mm->pgd, mm); } @@ -178,20 +179,26 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } -/* - * This is the actual mm switch as far as the scheduler - * is concerned. No registers are touched. We avoid - * calling the CPU specific function when the mm hasn't - * actually changed. - */ -static inline void -switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +static inline void update_saved_ttbr0(struct task_struct *tsk, + struct mm_struct *mm) { - unsigned int cpu = smp_processor_id(); + if (system_uses_ttbr0_pan()) { + BUG_ON(mm->pgd == swapper_pg_dir); + task_thread_info(tsk)->ttbr0 = + virt_to_phys(mm->pgd) | ASID(mm) << 48; + } +} +#else +static inline void update_saved_ttbr0(struct task_struct *tsk, + struct mm_struct *mm) +{ +} +#endif - if (prev == next) - return; +static inline void __switch_mm(struct mm_struct *next) +{ + unsigned int cpu = smp_processor_id(); /* * init_mm.pgd does not contain any user mappings and it is always @@ -205,7 +212,23 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, check_and_switch_context(next, cpu); } +static inline void +switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + if (prev != next) + __switch_mm(next); + + /* + * Update the saved TTBR0_EL1 of the scheduled-in task as the previous + * value may have not been initialised yet (activate_mm caller) or the + * ASID has changed since the last run (following the context switch + * of another thread of the same process). + */ + update_saved_ttbr0(tsk, next); +} + #define deactivate_mm(tsk,mm) do { } while (0) -#define activate_mm(prev,next) switch_mm(prev, next, NULL) +#define activate_mm(prev,next) switch_mm(prev, next, current) #endif diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 9b2f5a9d019d..fbafd0ad16df 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -19,6 +19,8 @@ #ifndef __ASM_PAGE_H #define __ASM_PAGE_H +#include <linux/const.h> + /* PAGE_SHIFT determines the page size */ /* CONT_SHIFT determines the number of pages which can be tracked together */ #ifdef CONFIG_ARM64_64K_PAGES diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 0a456bef8c79..8a336852eeba 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -44,48 +44,44 @@ static inline unsigned long __percpu_##op(void *ptr, \ \ switch (size) { \ case 1: \ - do { \ - asm ("//__per_cpu_" #op "_1\n" \ - "ldxrb %w[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_1\n" \ + "1: ldxrb %w[ret], %[ptr]\n" \ #asm_op " %w[ret], %w[ret], %w[val]\n" \ - "stxrb %w[loop], %w[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u8 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxrb %w[loop], %w[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u8 *)ptr) \ + : [val] "Ir" (val)); \ break; \ case 2: \ - do { \ - asm ("//__per_cpu_" #op "_2\n" \ - "ldxrh %w[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_2\n" \ + "1: ldxrh %w[ret], %[ptr]\n" \ #asm_op " %w[ret], %w[ret], %w[val]\n" \ - "stxrh %w[loop], %w[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u16 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxrh %w[loop], %w[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u16 *)ptr) \ + : [val] "Ir" (val)); \ break; \ case 4: \ - do { \ - asm ("//__per_cpu_" #op "_4\n" \ - "ldxr %w[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_4\n" \ + "1: ldxr %w[ret], %[ptr]\n" \ #asm_op " %w[ret], %w[ret], %w[val]\n" \ - "stxr %w[loop], %w[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u32 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxr %w[loop], %w[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u32 *)ptr) \ + : [val] "Ir" (val)); \ break; \ case 8: \ - do { \ - asm ("//__per_cpu_" #op "_8\n" \ - "ldxr %[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_8\n" \ + "1: ldxr %[ret], %[ptr]\n" \ #asm_op " %[ret], %[ret], %[val]\n" \ - "stxr %w[loop], %[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u64 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxr %w[loop], %[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u64 *)ptr) \ + : [val] "Ir" (val)); \ break; \ default: \ BUILD_BUG(); \ @@ -150,44 +146,40 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, switch (size) { case 1: - do { - asm ("//__percpu_xchg_1\n" - "ldxrb %w[ret], %[ptr]\n" - "stxrb %w[loop], %w[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u8 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_1\n" + "1: ldxrb %w[ret], %[ptr]\n" + " stxrb %w[loop], %w[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u8 *)ptr) + : [val] "r" (val)); break; case 2: - do { - asm ("//__percpu_xchg_2\n" - "ldxrh %w[ret], %[ptr]\n" - "stxrh %w[loop], %w[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u16 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_2\n" + "1: ldxrh %w[ret], %[ptr]\n" + " stxrh %w[loop], %w[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u16 *)ptr) + : [val] "r" (val)); break; case 4: - do { - asm ("//__percpu_xchg_4\n" - "ldxr %w[ret], %[ptr]\n" - "stxr %w[loop], %w[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u32 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_4\n" + "1: ldxr %w[ret], %[ptr]\n" + " stxr %w[loop], %w[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u32 *)ptr) + : [val] "r" (val)); break; case 8: - do { - asm ("//__percpu_xchg_8\n" - "ldxr %[ret], %[ptr]\n" - "stxr %w[loop], %[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u64 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_8\n" + "1: ldxr %[ret], %[ptr]\n" + " stxr %w[loop], %[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u64 *)ptr) + : [val] "r" (val)); break; default: BUILD_BUG(); diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h new file mode 100644 index 000000000000..5af574d632fa --- /dev/null +++ b/arch/arm64/include/asm/probes.h @@ -0,0 +1,35 @@ +/* + * arch/arm64/include/asm/probes.h + * + * Copyright (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef _ARM_PROBES_H +#define _ARM_PROBES_H + +#include <asm/opcodes.h> + +struct kprobe; +struct arch_specific_insn; + +typedef u32 kprobe_opcode_t; +typedef void (kprobes_handler_t) (u32 opcode, long addr, struct pt_regs *); + +/* architecture specific copy of original instruction */ +struct arch_specific_insn { + kprobe_opcode_t *insn; + pstate_check_t *pstate_cc; + kprobes_handler_t *handler; + /* restore address after step xol */ + unsigned long restore; +}; + +#endif diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index c97d01eb219f..a0053d23b35a 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -193,7 +193,7 @@ static inline void spin_lock_prefetch(const void *ptr) #endif -void cpu_enable_pan(void *__unused); -void cpu_enable_uao(void *__unused); +int cpu_enable_pan(void *__unused); +int cpu_enable_uao(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 7f94755089e2..5eedfd83acc7 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -21,6 +21,8 @@ #include <uapi/asm/ptrace.h> +#define _PSR_PAN_BIT 22 + /* Current Exception Level values, as contained in CurrentEL */ #define CurrentEL_EL1 (1 << 2) #define CurrentEL_EL2 (2 << 2) @@ -121,6 +123,8 @@ struct pt_regs { u64 unused; // maintain 16 byte alignment }; +#define MAX_REG_OFFSET offsetof(struct pt_regs, pstate) + #define arch_has_single_step() (1) #ifdef CONFIG_COMPAT @@ -146,9 +150,57 @@ struct pt_regs { #define fast_interrupts_enabled(regs) \ (!((regs)->pstate & PSR_F_BIT)) -#define user_stack_pointer(regs) \ +#define GET_USP(regs) \ (!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp) +#define SET_USP(ptregs, value) \ + (!compat_user_mode(regs) ? ((regs)->sp = value) : ((regs)->compat_sp = value)) + +extern int regs_query_register_offset(const char *name); +extern const char *regs_query_register_name(unsigned int offset); +extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n); + +/** + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten + * @offset: offset of the register. + * + * regs_get_register returns the value of a register whose offset from @regs. + * The @offset is the offset of the register in struct pt_regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline u64 regs_get_register(struct pt_regs *regs, unsigned int offset) +{ + u64 val = 0; + + offset >>= 3; + switch (offset) { + case 0 ... 30: + val = regs->regs[offset]; + break; + case offsetof(struct pt_regs, sp) >> 3: + val = regs->sp; + break; + case offsetof(struct pt_regs, pc) >> 3: + val = regs->pc; + break; + case offsetof(struct pt_regs, pstate) >> 3: + val = regs->pstate; + break; + default: + val = 0; + } + + return val; +} + +/* Valid only for Kernel mode traps. */ +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->sp; +} + static inline unsigned long regs_return_value(struct pt_regs *regs) { return regs->regs[0]; @@ -158,8 +210,15 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) struct task_struct; int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task); -#define instruction_pointer(regs) ((unsigned long)(regs)->pc) +#define GET_IP(regs) ((unsigned long)(regs)->pc) +#define SET_IP(regs, value) ((regs)->pc = ((u64) (value))) + +#define GET_FP(ptregs) ((unsigned long)(ptregs)->regs[29]) +#define SET_FP(ptregs, value) ((ptregs)->regs[29] = ((u64) (value))) + +#include <asm-generic/ptrace.h> +#undef profile_pc extern unsigned long profile_pc(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 7b369923b2eb..19afa01911dd 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -37,13 +37,17 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) "2: ldaxr %w0, %2\n" " eor %w1, %w0, %w0, ror #16\n" " cbnz %w1, 1b\n" + /* Serialise against any concurrent lockers */ ARM64_LSE_ATOMIC_INSN( /* LL/SC */ " stxr %w1, %w0, %2\n" -" cbnz %w1, 2b\n", /* Serialise against any concurrent lockers */ - /* LSE atomics */ " nop\n" -" nop\n") +" nop\n", + /* LSE atomics */ +" mov %w1, %w0\n" +" cas %w0, %w0, %2\n" +" eor %w1, %w1, %w0\n") +" cbnz %w1, 2b\n" : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) : : "memory"); @@ -330,4 +334,14 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) #define arch_read_relax(lock) cpu_relax() #define arch_write_relax(lock) cpu_relax() +/* + * Accesses appearing in program order before a spin_lock() operation + * can be reordered with accesses inside the critical section, by virtue + * of arch_spin_lock being constructed using acquire semantics. + * + * In cases where this is problematic (e.g. try_to_wake_up), an + * smp_mb__before_spinlock() can restore the required ordering. + */ +#define smp_mb__before_spinlock() smp_mb() + #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 59a5b0f1e81c..024d623f662e 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -1,7 +1,8 @@ #ifndef __ASM_SUSPEND_H #define __ASM_SUSPEND_H -#define NR_CTX_REGS 11 +#define NR_CTX_REGS 10 +#define NR_CALLEE_SAVED_REGS 12 /* * struct cpu_suspend_ctx must be 16-byte aligned since it is allocated on @@ -16,11 +17,34 @@ struct cpu_suspend_ctx { u64 sp; } __aligned(16); -struct sleep_save_sp { - phys_addr_t *save_ptr_stash; - phys_addr_t save_ptr_stash_phys; +/* + * Memory to save the cpu state is allocated on the stack by + * __cpu_suspend_enter()'s caller, and populated by __cpu_suspend_enter(). + * This data must survive until cpu_resume() is called. + * + * This struct desribes the size and the layout of the saved cpu state. + * The layout of the callee_saved_regs is defined by the implementation + * of __cpu_suspend_enter(), and cpu_resume(). This struct must be passed + * in by the caller as __cpu_suspend_enter()'s stack-frame is gone once it + * returns, and the data would be subsequently corrupted by the call to the + * finisher. + */ +struct sleep_stack_data { + struct cpu_suspend_ctx system_regs; + unsigned long callee_saved_regs[NR_CALLEE_SAVED_REGS]; }; +extern unsigned long *sleep_save_stash; + extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)); extern void cpu_resume(void); +int __cpu_suspend_enter(struct sleep_stack_data *state); +void __cpu_suspend_exit(void); +void _cpu_resume(void); + +int swsusp_arch_suspend(void); +int swsusp_arch_resume(void); +int arch_hibernation_header_save(void *addr, unsigned int max_size); +int arch_hibernation_header_restore(void *addr); + #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b9fd8ec79033..0961a24e8d48 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,6 +20,8 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H +#include <linux/stringify.h> + #include <asm/opcodes.h> /* @@ -84,10 +86,21 @@ #define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\ (!!x)<<8 | 0x1f) -/* SCTLR_EL1 */ -#define SCTLR_EL1_CP15BEN (0x1 << 5) -#define SCTLR_EL1_SED (0x1 << 8) -#define SCTLR_EL1_SPAN (0x1 << 23) +/* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_EE (1 << 25) +#define SCTLR_ELx_I (1 << 12) +#define SCTLR_ELx_SA (1 << 3) +#define SCTLR_ELx_C (1 << 2) +#define SCTLR_ELx_A (1 << 1) +#define SCTLR_ELx_M 1 + +#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ + SCTLR_ELx_SA | SCTLR_ELx_I) + +/* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_SPAN (1 << 23) +#define SCTLR_EL1_SED (1 << 8) +#define SCTLR_EL1_CP15BEN (1 << 5) /* id_aa64isar0 */ @@ -215,6 +228,8 @@ #else +#include <linux/types.h> + asm( " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" " .equ .L__reg_num_x\\num, \\num\n" @@ -239,6 +254,23 @@ static inline void config_sctlr_el1(u32 clear, u32 set) val |= set; asm volatile("msr sctlr_el1, %0" : : "r" (val)); } + +/* + * Unlike read_cpuid, calls to read_sysreg are never expected to be + * optimized away or replaced with synthetic values. + */ +#define read_sysreg(r) ({ \ + u64 __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +#define write_sysreg(v, r) do { \ + u64 __val = (u64)v; \ + asm volatile("msr " __stringify(r) ", %0" \ + : : "r" (__val)); \ +} while (0) + #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 4a9e82e4f724..4b4292147584 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -47,6 +47,9 @@ typedef unsigned long mm_segment_t; struct thread_info { unsigned long flags; /* low level flags */ mm_segment_t addr_limit; /* address limit */ +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + u64 ttbr0; /* saved TTBR0_EL1 */ +#endif struct task_struct *task; /* main task structure */ int preempt_count; /* 0 => preemptable, <0 => bug */ int cpu; /* cpu */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index c3d445b42351..c37c064d7cdd 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -18,6 +18,8 @@ #ifndef __ASM_UACCESS_H #define __ASM_UACCESS_H +#ifndef __ASSEMBLY__ + /* * User space memory access functions */ @@ -26,6 +28,7 @@ #include <asm/alternative.h> #include <asm/cpufeature.h> +#include <asm/kernel-pgtable.h> #include <asm/ptrace.h> #include <asm/sysreg.h> #include <asm/errno.h> @@ -124,6 +127,85 @@ static inline void set_fs(mm_segment_t fs) " .popsection\n" /* + * User access enabling/disabling. + */ +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +static inline void uaccess_ttbr0_disable(void) +{ + unsigned long ttbr; + + /* reserved_ttbr0 placed at the end of swapper_pg_dir */ + ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE; + write_sysreg(ttbr, ttbr0_el1); + isb(); +} + +static inline void uaccess_ttbr0_enable(void) +{ + unsigned long flags; + + /* + * Disable interrupts to avoid preemption between reading the 'ttbr0' + * variable and the MSR. A context switch could trigger an ASID + * roll-over and an update of 'ttbr0'. + */ + local_irq_save(flags); + write_sysreg(current_thread_info()->ttbr0, ttbr0_el1); + isb(); + local_irq_restore(flags); +} +#else +static inline void uaccess_ttbr0_disable(void) +{ +} + +static inline void uaccess_ttbr0_enable(void) +{ +} +#endif + +#define __uaccess_disable(alt) \ +do { \ + if (system_uses_ttbr0_pan()) \ + uaccess_ttbr0_disable(); \ + else \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt, \ + CONFIG_ARM64_PAN)); \ +} while (0) + +#define __uaccess_enable(alt) \ +do { \ + if (system_uses_ttbr0_pan()) \ + uaccess_ttbr0_enable(); \ + else \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \ + CONFIG_ARM64_PAN)); \ +} while (0) + +static inline void uaccess_disable(void) +{ + __uaccess_disable(ARM64_HAS_PAN); +} + +static inline void uaccess_enable(void) +{ + __uaccess_enable(ARM64_HAS_PAN); +} + +/* + * These functions are no-ops when UAO is present. + */ +static inline void uaccess_disable_not_uao(void) +{ + __uaccess_disable(ARM64_ALT_PAN_NOT_UAO); +} + +static inline void uaccess_enable_not_uao(void) +{ + __uaccess_enable(ARM64_ALT_PAN_NOT_UAO); +} + +/* * The "__xxx" versions of the user access functions do not verify the address * space - it must have been done previously with a separate "access_ok()" * call. @@ -150,8 +232,7 @@ static inline void set_fs(mm_segment_t fs) do { \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\ - CONFIG_ARM64_PAN)); \ + uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \ @@ -172,9 +253,8 @@ do { \ default: \ BUILD_BUG(); \ } \ + uaccess_disable_not_uao(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\ - CONFIG_ARM64_PAN)); \ } while (0) #define __get_user(x, ptr) \ @@ -219,8 +299,7 @@ do { \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\ - CONFIG_ARM64_PAN)); \ + uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \ @@ -241,8 +320,7 @@ do { \ default: \ BUILD_BUG(); \ } \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\ - CONFIG_ARM64_PAN)); \ + uaccess_disable_not_uao(); \ } while (0) #define __put_user(x, ptr) \ @@ -327,4 +405,73 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); +#else /* __ASSEMBLY__ */ + +#include <asm/alternative.h> +#include <asm/assembler.h> +#include <asm/kernel-pgtable.h> + +/* + * User access enabling/disabling macros. + */ + .macro uaccess_ttbr0_disable, tmp1 + mrs \tmp1, ttbr1_el1 // swapper_pg_dir + add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir + msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 + isb + .endm + + .macro uaccess_ttbr0_enable, tmp1 + get_thread_info \tmp1 + ldr \tmp1, [\tmp1, #TI_TTBR0] // load saved TTBR0_EL1 + msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 + isb + .endm + +/* + * These macros are no-ops when UAO is present. + */ + .macro uaccess_disable_not_uao, tmp1 +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +alternative_if_not ARM64_HAS_PAN + uaccess_ttbr0_disable \tmp1 +alternative_else + nop + nop + nop + nop +alternative_endif +#endif +alternative_if_not ARM64_ALT_PAN_NOT_UAO + nop +alternative_else + SET_PSTATE_PAN(1) +alternative_endif + .endm + + .macro uaccess_enable_not_uao, tmp1, tmp2 +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +alternative_if_not ARM64_HAS_PAN + save_and_disable_irq \tmp2 // avoid preemption + uaccess_ttbr0_enable \tmp1 + restore_irq \tmp2 +alternative_else + nop + nop + nop + nop + nop + nop + nop +alternative_endif +#endif +alternative_if_not ARM64_ALT_PAN_NOT_UAO + nop +alternative_else + SET_PSTATE_PAN(0) +alternative_endif + .endm + +#endif /* __ASSEMBLY__ */ + #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 7a5df5252dd7..06e6a5238c4c 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -18,11 +18,29 @@ #ifndef __ASM__VIRT_H #define __ASM__VIRT_H +/* + * The arm64 hcall implementation uses x0 to specify the hcall type. A value + * less than 0xfff indicates a special hcall, such as get/set vector. + * Any other value is used as a pointer to the function to call. + */ + +/* HVC_GET_VECTORS - Return the value of the vbar_el2 register. */ +#define HVC_GET_VECTORS 0 + +/* + * HVC_SET_VECTORS - Set the value of the vbar_el2 register. + * + * @x1: Physical address of the new vector table. + */ +#define HVC_SET_VECTORS 1 + #define BOOT_CPU_MODE_EL1 (0xe11) #define BOOT_CPU_MODE_EL2 (0xe12) #ifndef __ASSEMBLY__ +#include <asm/ptrace.h> + /* * __boot_cpu_mode records what mode CPUs were booted in. * A correctly-implemented bootloader must start all CPUs in the same mode: @@ -50,6 +68,14 @@ static inline bool is_hyp_mode_mismatched(void) return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } +static inline bool is_kernel_in_hyp_mode(void) +{ + u64 el; + + asm("mrs %0, CurrentEL" : "=r" (el)); + return el == CurrentEL_EL2; +} + /* The section containing the hypervisor text */ extern char __hyp_text_start[]; extern char __hyp_text_end[]; diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index f6e0269de4c7..d3cfa681654f 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -14,10 +14,10 @@ CFLAGS_REMOVE_return_address.o = -pg arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o \ + hyp-stub.o psci.o cpu_ops.o insn.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ - smp.o smp_spin_table.o topology.o + smp.o smp_spin_table.o topology.o smccc-call.o extra-$(CONFIG_EFI) := efi-entry.o @@ -26,8 +26,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,objcopy) arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o entry32.o \ - ../../arm/kernel/opcodes.o + sys_compat.o entry32.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o @@ -44,10 +43,12 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o -arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o +arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o +arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o -obj-y += $(arm64-obj-y) vdso/ +obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) head-y := head.o extra-y += $(head-y) vmlinux.lds diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 65919bfe6cf5..546ce8979b3a 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -26,6 +26,8 @@ #include <linux/syscalls.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/kprobes.h> +#include <linux/arm-smccc.h> #include <asm/cacheflush.h> #include <asm/checksum.h> @@ -68,8 +70,13 @@ EXPORT_SYMBOL(test_and_change_bit); #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); +NOKPROBE_SYMBOL(_mcount); #endif /* caching functions */ EXPORT_SYMBOL(__dma_inv_range); EXPORT_SYMBOL(__dma_clean_range); EXPORT_SYMBOL(__dma_flush_range); + + /* arm-smccc */ +EXPORT_SYMBOL(arm_smccc_smc); +EXPORT_SYMBOL(arm_smccc_hvc); diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c37202c0c838..a0a0f2b20608 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -281,9 +281,9 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) * Error-checking SWP macros implemented using ldxr{b}/stxr{b} */ #define __user_swpX_asm(data, addr, res, temp, B) \ +do { \ + uaccess_enable(); \ __asm__ __volatile__( \ - ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ "0: ldxr"B" %w2, [%3]\n" \ "1: stxr"B" %w0, %w1, [%3]\n" \ " cbz %w0, 2f\n" \ @@ -299,11 +299,11 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) " .popsection" \ _ASM_EXTABLE(0b, 4b) \ _ASM_EXTABLE(1b, 4b) \ - ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ : "=&r" (res), "+r" (data), "=&r" (temp) \ : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ - : "memory") + : "memory"); \ + uaccess_disable(); \ +} while (0) #define __user_swp_asm(data, addr, res, temp) \ __user_swpX_asm(data, addr, res, temp, "") @@ -366,6 +366,21 @@ static int emulate_swpX(unsigned int address, unsigned int *data, return res; } +#define ARM_OPCODE_CONDITION_UNCOND 0xf + +static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr) +{ + u32 cc_bits = opcode >> 28; + + if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) { + if ((*aarch32_opcode_cond_checks[cc_bits])(psr)) + return ARM_OPCODE_CONDTEST_PASS; + else + return ARM_OPCODE_CONDTEST_FAIL; + } + return ARM_OPCODE_CONDTEST_UNCOND; +} + /* * swp_handler logs the id of calling process, dissects the instruction, sanity * checks the memory location, calls emulate_swpX for the actual operation and @@ -380,7 +395,7 @@ static int swp_handler(struct pt_regs *regs, u32 instr) type = instr & TYPE_SWPB; - switch (arm_check_condition(instr, regs->pstate)) { + switch (aarch32_check_condition(instr, regs->pstate)) { case ARM_OPCODE_CONDTEST_PASS: break; case ARM_OPCODE_CONDTEST_FAIL: @@ -461,7 +476,7 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr) { perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); - switch (arm_check_condition(instr, regs->pstate)) { + switch (aarch32_check_condition(instr, regs->pstate)) { case ARM_OPCODE_CONDTEST_PASS: break; case ARM_OPCODE_CONDTEST_FAIL: diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 087cf9a65359..dac70c160289 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -22,12 +22,14 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/kvm_host.h> +#include <linux/suspend.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/smp_plat.h> #include <asm/suspend.h> #include <asm/vdso_datapage.h> #include <linux/kbuild.h> +#include <linux/arm-smccc.h> int main(void) { @@ -36,6 +38,9 @@ int main(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + DEFINE(TI_TTBR0, offsetof(struct thread_info, ttbr0)); +#endif DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); BLANK(); @@ -49,6 +54,17 @@ int main(void) DEFINE(S_X5, offsetof(struct pt_regs, regs[5])); DEFINE(S_X6, offsetof(struct pt_regs, regs[6])); DEFINE(S_X7, offsetof(struct pt_regs, regs[7])); + DEFINE(S_X8, offsetof(struct pt_regs, regs[8])); + DEFINE(S_X10, offsetof(struct pt_regs, regs[10])); + DEFINE(S_X12, offsetof(struct pt_regs, regs[12])); + DEFINE(S_X14, offsetof(struct pt_regs, regs[14])); + DEFINE(S_X16, offsetof(struct pt_regs, regs[16])); + DEFINE(S_X18, offsetof(struct pt_regs, regs[18])); + DEFINE(S_X20, offsetof(struct pt_regs, regs[20])); + DEFINE(S_X22, offsetof(struct pt_regs, regs[22])); + DEFINE(S_X24, offsetof(struct pt_regs, regs[24])); + DEFINE(S_X26, offsetof(struct pt_regs, regs[26])); + DEFINE(S_X28, offsetof(struct pt_regs, regs[28])); DEFINE(S_LR, offsetof(struct pt_regs, regs[30])); DEFINE(S_SP, offsetof(struct pt_regs, sp)); #ifdef CONFIG_COMPAT @@ -109,58 +125,25 @@ int main(void) DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); - DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1)); - DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1)); - DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr)); - DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs)); + DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); - DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); - DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr)); - DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr)); - DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr)); - DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr)); - DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr)); - DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); - DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2)); - DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); - DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state)); - DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); - DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); - DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); - DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); - DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); - DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); - DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); - DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); - DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); - DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); - DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); - DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); - DEFINE(VGIC_V3_CPU_SRE, offsetof(struct vgic_cpu, vgic_v3.vgic_sre)); - DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); - DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); - DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); - DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr)); - DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr)); - DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r)); - DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r)); - DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr)); - DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); - DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); - DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); - DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); - DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); - DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); + DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs)); + DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs)); #endif + DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0)); + DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); + BLANK(); + DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); + DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); + DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); return 0; } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7566cad9fa1d..cdf1dca64133 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -19,13 +19,16 @@ #define pr_fmt(fmt) "CPU features: " fmt #include <linux/bsearch.h> +#include <linux/cpumask.h> #include <linux/sort.h> +#include <linux/stop_machine.h> #include <linux/types.h> #include <asm/cpu.h> #include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/processor.h> #include <asm/sysreg.h> +#include <asm/virt.h> unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); @@ -43,6 +46,7 @@ unsigned int compat_elf_hwcap2 __read_mostly; #endif DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +EXPORT_SYMBOL(cpu_hwcaps); #define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ { \ @@ -646,6 +650,11 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry) return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max); } +static bool runs_at_el2(const struct arm64_cpu_capabilities *entry) +{ + return is_kernel_in_hyp_mode(); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -698,6 +707,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = cpufeature_pan_not_uao, }, #endif /* CONFIG_ARM64_PAN */ + { + .desc = "Virtualization Host Extensions", + .capability = ARM64_HAS_VIRT_HOST_EXTN, + .matches = runs_at_el2, + }, {}, }; @@ -812,7 +826,13 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) for (i = 0; caps[i].matches; i++) if (caps[i].enable && cpus_have_cap(caps[i].capability)) - on_each_cpu(caps[i].enable, NULL, true); + /* + * Use stop_machine() as it schedules the work allowing + * us to modify PSTATE, instead of on_each_cpu() which + * uses an IPI, giving us a PSTATE that disappears when + * we return. + */ + stop_machine(caps[i].enable, NULL, cpu_online_mask); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index e51f27ac13fd..6de6d9f43b95 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -23,6 +23,7 @@ #include <linux/hardirq.h> #include <linux/init.h> #include <linux/ptrace.h> +#include <linux/kprobes.h> #include <linux/stat.h> #include <linux/uaccess.h> @@ -48,6 +49,7 @@ static void mdscr_write(u32 mdscr) asm volatile("msr mdscr_el1, %0" :: "r" (mdscr)); local_dbg_restore(flags); } +NOKPROBE_SYMBOL(mdscr_write); static u32 mdscr_read(void) { @@ -55,6 +57,7 @@ static u32 mdscr_read(void) asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr)); return mdscr; } +NOKPROBE_SYMBOL(mdscr_read); /* * Allow root to disable self-hosted debug from userspace. @@ -103,6 +106,7 @@ void enable_debug_monitors(enum dbg_active_el el) mdscr_write(mdscr); } } +NOKPROBE_SYMBOL(enable_debug_monitors); void disable_debug_monitors(enum dbg_active_el el) { @@ -123,6 +127,7 @@ void disable_debug_monitors(enum dbg_active_el el) mdscr_write(mdscr); } } +NOKPROBE_SYMBOL(disable_debug_monitors); /* * OS lock clearing. @@ -173,6 +178,7 @@ static void set_regs_spsr_ss(struct pt_regs *regs) spsr |= DBG_SPSR_SS; regs->pstate = spsr; } +NOKPROBE_SYMBOL(set_regs_spsr_ss); static void clear_regs_spsr_ss(struct pt_regs *regs) { @@ -182,6 +188,7 @@ static void clear_regs_spsr_ss(struct pt_regs *regs) spsr &= ~DBG_SPSR_SS; regs->pstate = spsr; } +NOKPROBE_SYMBOL(clear_regs_spsr_ss); /* EL1 Single Step Handler hooks */ static LIST_HEAD(step_hook); @@ -225,6 +232,7 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr) return retval; } +NOKPROBE_SYMBOL(call_step_hook); static int single_step_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) @@ -253,6 +261,10 @@ static int single_step_handler(unsigned long addr, unsigned int esr, */ user_rewind_single_step(current); } else { +#ifdef CONFIG_KPROBES + if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED) + return 0; +#endif if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) return 0; @@ -266,6 +278,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } +NOKPROBE_SYMBOL(single_step_handler); /* * Breakpoint handler is re-entrant as another breakpoint can @@ -303,6 +316,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) return fn ? fn(regs, esr) : DBG_HOOK_ERROR; } +NOKPROBE_SYMBOL(call_break_hook); static int brk_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) @@ -318,13 +332,21 @@ static int brk_handler(unsigned long addr, unsigned int esr, }; force_sig_info(SIGTRAP, &info, current); - } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { - pr_warning("Unexpected kernel BRK exception at EL1\n"); + } +#ifdef CONFIG_KPROBES + else if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) { + if (kprobe_breakpoint_handler(regs, esr) != DBG_HOOK_HANDLED) + return -EFAULT; + } +#endif + else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { + pr_warn("Unexpected kernel BRK exception at EL1\n"); return -EFAULT; } return 0; } +NOKPROBE_SYMBOL(brk_handler); int aarch32_break_handler(struct pt_regs *regs) { @@ -369,6 +391,7 @@ int aarch32_break_handler(struct pt_regs *regs) force_sig_info(SIGTRAP, &info, current); return 0; } +NOKPROBE_SYMBOL(aarch32_break_handler); static int __init debug_traps_init(void) { @@ -390,6 +413,7 @@ void user_rewind_single_step(struct task_struct *task) if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) set_regs_spsr_ss(task_pt_regs(task)); } +NOKPROBE_SYMBOL(user_rewind_single_step); void user_fastforward_single_step(struct task_struct *task) { @@ -405,6 +429,7 @@ void kernel_enable_single_step(struct pt_regs *regs) mdscr_write(mdscr_read() | DBG_MDSCR_SS); enable_debug_monitors(DBG_ACTIVE_EL1); } +NOKPROBE_SYMBOL(kernel_enable_single_step); void kernel_disable_single_step(void) { @@ -412,21 +437,27 @@ void kernel_disable_single_step(void) mdscr_write(mdscr_read() & ~DBG_MDSCR_SS); disable_debug_monitors(DBG_ACTIVE_EL1); } +NOKPROBE_SYMBOL(kernel_disable_single_step); int kernel_active_single_step(void) { WARN_ON(!irqs_disabled()); return mdscr_read() & DBG_MDSCR_SS; } +NOKPROBE_SYMBOL(kernel_active_single_step); /* ptrace API */ void user_enable_single_step(struct task_struct *task) { - set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); - set_regs_spsr_ss(task_pt_regs(task)); + struct thread_info *ti = task_thread_info(task); + + if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP)) + set_regs_spsr_ss(task_pt_regs(task)); } +NOKPROBE_SYMBOL(user_enable_single_step); void user_disable_single_step(struct task_struct *task) { clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); } +NOKPROBE_SYMBOL(user_disable_single_step); diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 4eeb17198cfa..b6abc852f2a1 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -11,317 +11,34 @@ * */ -#include <linux/atomic.h> #include <linux/dmi.h> #include <linux/efi.h> -#include <linux/export.h> -#include <linux/memblock.h> -#include <linux/mm_types.h> -#include <linux/bootmem.h> -#include <linux/of.h> -#include <linux/of_fdt.h> -#include <linux/preempt.h> -#include <linux/rbtree.h> -#include <linux/rwsem.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/spinlock.h> +#include <linux/init.h> -#include <asm/cacheflush.h> #include <asm/efi.h> -#include <asm/tlbflush.h> -#include <asm/mmu_context.h> -#include <asm/mmu.h> -#include <asm/pgtable.h> -struct efi_memory_map memmap; - -static u64 efi_system_table; - -static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; - -static struct mm_struct efi_mm = { - .mm_rb = RB_ROOT, - .pgd = efi_pgd, - .mm_users = ATOMIC_INIT(2), - .mm_count = ATOMIC_INIT(1), - .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), - .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), - .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), -}; - -static int __init is_normal_ram(efi_memory_desc_t *md) -{ - if (md->attribute & EFI_MEMORY_WB) - return 1; - return 0; -} - -/* - * Translate a EFI virtual address into a physical address: this is necessary, - * as some data members of the EFI system table are virtually remapped after - * SetVirtualAddressMap() has been called. - */ -static phys_addr_t efi_to_phys(unsigned long addr) +int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { - efi_memory_desc_t *md; - - for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (md->virt_addr == 0) - /* no virtual mapping has been installed by the stub */ - break; - if (md->virt_addr <= addr && - (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT)) - return md->phys_addr + addr - md->virt_addr; - } - return addr; -} - -static int __init uefi_init(void) -{ - efi_char16_t *c16; - void *config_tables; - u64 table_size; - char vendor[100] = "unknown"; - int i, retval; - - efi.systab = early_memremap(efi_system_table, - sizeof(efi_system_table_t)); - if (efi.systab == NULL) { - pr_warn("Unable to map EFI system table.\n"); - return -ENOMEM; - } - - set_bit(EFI_BOOT, &efi.flags); - set_bit(EFI_64BIT, &efi.flags); + pteval_t prot_val; /* - * Verify the EFI Table + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. */ - if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { - pr_err("System table signature incorrect\n"); - retval = -EINVAL; - goto out; - } - if ((efi.systab->hdr.revision >> 16) < 2) - pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff); - - /* Show what we know for posterity */ - c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), - sizeof(vendor) * sizeof(efi_char16_t)); - if (c16) { - for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = c16[i]; - vendor[i] = '\0'; - early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); - } - - pr_info("EFI v%u.%.02u by %s\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff, vendor); - - table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables; - config_tables = early_memremap(efi_to_phys(efi.systab->tables), - table_size); - if (config_tables == NULL) { - pr_warn("Unable to map EFI config table array.\n"); - retval = -ENOMEM; - goto out; - } - retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables, - sizeof(efi_config_table_64_t), NULL); - - early_memunmap(config_tables, table_size); -out: - early_memunmap(efi.systab, sizeof(efi_system_table_t)); - return retval; -} - -/* - * Return true for RAM regions we want to permanently reserve. - */ -static __init int is_reserve_region(efi_memory_desc_t *md) -{ - switch (md->type) { - case EFI_LOADER_CODE: - case EFI_LOADER_DATA: - case EFI_BOOT_SERVICES_CODE: - case EFI_BOOT_SERVICES_DATA: - case EFI_CONVENTIONAL_MEMORY: - case EFI_PERSISTENT_MEMORY: - return 0; - default: - break; - } - return is_normal_ram(md); -} - -static __init void reserve_regions(void) -{ - efi_memory_desc_t *md; - u64 paddr, npages, size; - - if (efi_enabled(EFI_DBG)) - pr_info("Processing EFI memory map:\n"); - - for_each_efi_memory_desc(&memmap, md) { - paddr = md->phys_addr; - npages = md->num_pages; - - if (efi_enabled(EFI_DBG)) { - char buf[64]; - - pr_info(" 0x%012llx-0x%012llx %s", - paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, - efi_md_typeattr_format(buf, sizeof(buf), md)); - } - - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (is_normal_ram(md)) - early_init_dt_add_memory_arch(paddr, size); - - if (is_reserve_region(md)) { - memblock_reserve(paddr, size); - if (efi_enabled(EFI_DBG)) - pr_cont("*"); - } - - if (efi_enabled(EFI_DBG)) - pr_cont("\n"); - } - - set_bit(EFI_MEMMAP, &efi.flags); -} - -void __init efi_init(void) -{ - struct efi_fdt_params params; - - /* Grab UEFI information placed in FDT by stub */ - if (!efi_get_fdt_params(¶ms)) - return; - - efi_system_table = params.system_table; - - memblock_reserve(params.mmap & PAGE_MASK, - PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK))); - memmap.phys_map = params.mmap; - memmap.map = early_memremap(params.mmap, params.mmap_size); - if (memmap.map == NULL) { - /* - * If we are booting via UEFI, the UEFI memory map is the only - * description of memory we have, so there is little point in - * proceeding if we cannot access it. - */ - panic("Unable to map EFI memory map.\n"); - } - memmap.map_end = memmap.map + params.mmap_size; - memmap.desc_size = params.desc_size; - memmap.desc_version = params.desc_ver; - - if (uefi_init() < 0) - return; - - reserve_regions(); - early_memunmap(memmap.map, params.mmap_size); -} - -static bool __init efi_virtmap_init(void) -{ - efi_memory_desc_t *md; - - init_new_context(NULL, &efi_mm); - - for_each_efi_memory_desc(&memmap, md) { - pgprot_t prot; - - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (md->virt_addr == 0) - return false; - - pr_info(" EFI remap 0x%016llx => %p\n", - md->phys_addr, (void *)md->virt_addr); - - /* - * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be - * executable, everything else can be mapped with the XN bits - * set. - */ - if (!is_normal_ram(md)) - prot = __pgprot(PROT_DEVICE_nGnRE); - else if (md->type == EFI_RUNTIME_SERVICES_CODE || - !PAGE_ALIGNED(md->phys_addr)) - prot = PAGE_KERNEL_EXEC; - else - prot = PAGE_KERNEL; - - create_pgd_mapping(&efi_mm, md->phys_addr, md->virt_addr, - md->num_pages << EFI_PAGE_SHIFT, - __pgprot(pgprot_val(prot) | PTE_NG)); - } - return true; -} - -/* - * Enable the UEFI Runtime Services if all prerequisites are in place, i.e., - * non-early mapping of the UEFI system table and virtual mappings for all - * EFI_MEMORY_RUNTIME regions. - */ -static int __init arm64_enable_runtime_services(void) -{ - u64 mapsize; - - if (!efi_enabled(EFI_BOOT)) { - pr_info("EFI services will not be available.\n"); - return 0; - } - - if (efi_runtime_disabled()) { - pr_info("EFI runtime services will be disabled.\n"); - return 0; - } - - pr_info("Remapping and enabling EFI services.\n"); - - mapsize = memmap.map_end - memmap.map; - memmap.map = (__force void *)ioremap_cache(memmap.phys_map, - mapsize); - if (!memmap.map) { - pr_err("Failed to remap EFI memory map\n"); - return -ENOMEM; - } - memmap.map_end = memmap.map + mapsize; - efi.memmap = &memmap; - - efi.systab = (__force void *)ioremap_cache(efi_system_table, - sizeof(efi_system_table_t)); - if (!efi.systab) { - pr_err("Failed to remap EFI System Table\n"); - return -ENOMEM; - } - set_bit(EFI_SYSTEM_TABLES, &efi.flags); - - if (!efi_virtmap_init()) { - pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); - return -ENOMEM; - } - - /* Set up runtime services function pointers */ - efi_native_runtime_setup(); - set_bit(EFI_RUNTIME_SERVICES, &efi.flags); - - efi.runtime_version = efi.systab->hdr.revision; - + if ((md->attribute & EFI_MEMORY_WB) == 0) + prot_val = PROT_DEVICE_nGnRE; + else if (md->type == EFI_RUNTIME_SERVICES_CODE || + !PAGE_ALIGNED(md->phys_addr)) + prot_val = pgprot_val(PAGE_KERNEL_EXEC); + else + prot_val = pgprot_val(PAGE_KERNEL); + + create_pgd_mapping(mm, md->phys_addr, md->virt_addr, + md->num_pages << EFI_PAGE_SHIFT, + __pgprot(prot_val | PTE_NG)); return 0; } -early_initcall(arm64_enable_runtime_services); static int __init arm64_dmi_init(void) { @@ -337,23 +54,6 @@ static int __init arm64_dmi_init(void) } core_initcall(arm64_dmi_init); -static void efi_set_pgd(struct mm_struct *mm) -{ - switch_mm(NULL, mm, NULL); -} - -void efi_virtmap_load(void) -{ - preempt_disable(); - efi_set_pgd(&efi_mm); -} - -void efi_virtmap_unload(void) -{ - efi_set_pgd(current->active_mm); - preempt_enable(); -} - /* * UpdateCapsule() depends on the system being shutdown via * ResetSystem(). diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index d311c635a00e..7f4e5cba0b13 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -29,7 +29,9 @@ #include <asm/esr.h> #include <asm/irq.h> #include <asm/memory.h> +#include <asm/ptrace.h> #include <asm/thread_info.h> +#include <asm/uaccess.h> #include <asm/unistd.h> /* @@ -109,6 +111,34 @@ mrs x22, elr_el1 mrs x23, spsr_el1 stp lr, x21, [sp, #S_LR] + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Set the TTBR0 PAN bit in SPSR. When the exception is taken from + * EL0, there is no need to check the state of TTBR0_EL1 since + * accesses are always enabled. + * Note that the meaning of this bit differs from the ARMv8.1 PAN + * feature as all TTBR0_EL1 accesses are disabled, not just those to + * user mappings. + */ +alternative_if_not ARM64_HAS_PAN + nop +alternative_else + b 1f // skip TTBR0 PAN +alternative_endif + + .if \el != 0 + mrs x21, ttbr0_el1 + tst x21, #0xffff << 48 // Check for the reserved ASID + orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR + b.eq 1f // TTBR0 access already disabled + and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR + .endif + + uaccess_ttbr0_disable x21 +1: +#endif + stp x22, x23, [sp, #S_PC] /* @@ -147,6 +177,42 @@ ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ct_user_enter + .endif + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR + * PAN bit checking. + */ +alternative_if_not ARM64_HAS_PAN + nop +alternative_else + b 2f // skip TTBR0 PAN +alternative_endif + + .if \el != 0 + tbnz x22, #_PSR_PAN_BIT, 1f // Skip re-enabling TTBR0 access if previously disabled + .endif + + uaccess_ttbr0_enable x0 + + .if \el == 0 + /* + * Enable errata workarounds only if returning to user. The only + * workaround currently required for TTBR0_EL1 changes are for the + * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache + * corruption). + */ + post_ttbr0_update_workaround + .endif +1: + .if \el != 0 + and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit + .endif +2: +#endif + + .if \el == 0 ldr x23, [sp, #S_SP] // load return stack pointer msr sp_el0, x23 #ifdef CONFIG_ARM64_ERRATUM_845719 @@ -168,6 +234,7 @@ alternative_else alternative_endif #endif .endif + msr elr_el1, x21 // set up the return data msr spsr_el1, x22 ldp x0, x1, [sp, #16 * 0] @@ -190,10 +257,6 @@ alternative_endif eret // return to kernel .endm - .macro get_thread_info, rd - mrs \rd, sp_el0 - .endm - .macro irq_stack_entry mov x19, sp // preserve the original sp @@ -258,6 +321,7 @@ tsk .req x28 // current thread_info /* * Exception vectors. */ + .pushsection ".entry.text", "ax" .align 11 ENTRY(vectors) @@ -293,7 +357,7 @@ END(vectors) * Invalid mode handlers */ .macro inv_entry, el, reason, regsize = 64 - kernel_entry el, \regsize + kernel_entry \el, \regsize mov x0, sp mov x1, #\reason mrs x2, esr_el1 @@ -355,6 +419,8 @@ el1_sync: lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1 b.eq el1_da + cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1 + b.eq el1_ia cmp x24, #ESR_ELx_EC_SYS64 // configurable trap b.eq el1_undef cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception @@ -366,6 +432,11 @@ el1_sync: cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1 b.ge el1_dbg b el1_inv + +el1_ia: + /* + * Fall through to the Data abort case + */ el1_da: /* * Data abort handling @@ -551,7 +622,7 @@ el0_ia: enable_dbg_and_irq ct_user_exit mov x0, x26 - orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts + mov x1, x25 mov x2, sp bl do_mem_abort b ret_to_user @@ -820,6 +891,8 @@ __ni_sys_trace: bl do_ni_syscall b __sys_trace_return + .popsection // .entry.text + /* * Special system call wrappers. */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 9890d04a96cb..8cfd5ab37743 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -50,9 +50,6 @@ #error TEXT_OFFSET must be less than 2MB #endif -#define KERNEL_START _text -#define KERNEL_END _end - /* * Kernel startup entry point. * --------------------------- @@ -321,14 +318,14 @@ __create_page_tables: * dirty cache lines being evicted. */ mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + add x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE bl __inval_cache_range /* * Clear the idmap and swapper page tables. */ mov x0, x25 - add x6, x26, #SWAPPER_DIR_SIZE + add x6, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -407,7 +404,7 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + add x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE dmb sy bl __inval_cache_range @@ -551,8 +548,9 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems b.lt 4f // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to - msr mdcr_el2, x0 // all PMU counters from EL1 4: + csel x0, xzr, x0, lt // all PMU counters from EL1 + msr mdcr_el2, x0 // (if they exist) /* Stage-2 translation */ msr vttbr_el2, xzr @@ -665,7 +663,7 @@ ENDPROC(__secondary_switched) * If it isn't, park the CPU */ .section ".idmap.text", "ax" -__enable_mmu: +ENTRY(__enable_mmu) mrs x18, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 @@ -697,6 +695,9 @@ __enable_mmu: isb bl __create_page_tables // recreate kernel mapping + tlbi vmalle1 // Remove any stale TLB entries + dsb nsh + msr sctlr_el1, x19 // re-enable the MMU isb ic iallu // flush instructions fetched diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S new file mode 100644 index 000000000000..46f29b6560ec --- /dev/null +++ b/arch/arm64/kernel/hibernate-asm.S @@ -0,0 +1,176 @@ +/* + * Hibernate low-level support + * + * Copyright (C) 2016 ARM Ltd. + * Author: James Morse <james.morse@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/linkage.h> +#include <linux/errno.h> + +#include <asm/asm-offsets.h> +#include <asm/assembler.h> +#include <asm/cputype.h> +#include <asm/memory.h> +#include <asm/page.h> +#include <asm/virt.h> + +/* + * To prevent the possibility of old and new partial table walks being visible + * in the tlb, switch the ttbr to a zero page when we invalidate the old + * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i + * Even switching to our copied tables will cause a changed output address at + * each stage of the walk. + */ +.macro break_before_make_ttbr_switch zero_page, page_table + msr ttbr1_el1, \zero_page + isb + tlbi vmalle1is + dsb ish + msr ttbr1_el1, \page_table + isb +.endm + + +/* + * Resume from hibernate + * + * Loads temporary page tables then restores the memory image. + * Finally branches to cpu_resume() to restore the state saved by + * swsusp_arch_suspend(). + * + * Because this code has to be copied to a 'safe' page, it can't call out to + * other functions by PC-relative address. Also remember that it may be + * mid-way through over-writing other functions. For this reason it contains + * code from flush_icache_range() and uses the copy_page() macro. + * + * This 'safe' page is mapped via ttbr0, and executed from there. This function + * switches to a copy of the linear map in ttbr1, performs the restore, then + * switches ttbr1 to the original kernel's swapper_pg_dir. + * + * All of memory gets written to, including code. We need to clean the kernel + * text to the Point of Coherence (PoC) before secondary cores can be booted. + * Because the kernel modules and executable pages mapped to user space are + * also written as data, we clean all pages we touch to the Point of + * Unification (PoU). + * + * x0: physical address of temporary page tables + * x1: physical address of swapper page tables + * x2: address of cpu_resume + * x3: linear map address of restore_pblist in the current kernel + * x4: physical address of __hyp_stub_vectors, or 0 + * x5: physical address of a zero page that remains zero after resume + */ +.pushsection ".hibernate_exit.text", "ax" +ENTRY(swsusp_arch_suspend_exit) + /* + * We execute from ttbr0, change ttbr1 to our copied linear map tables + * with a break-before-make via the zero page + */ + break_before_make_ttbr_switch x5, x0 + + mov x21, x1 + mov x30, x2 + mov x24, x4 + mov x25, x5 + + /* walk the restore_pblist and use copy_page() to over-write memory */ + mov x19, x3 + +1: ldr x10, [x19, #HIBERN_PBE_ORIG] + mov x0, x10 + ldr x1, [x19, #HIBERN_PBE_ADDR] + + copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 + + add x1, x10, #PAGE_SIZE + /* Clean the copied page to PoU - based on flush_icache_range() */ + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x4, x10, x3 +2: dc cvau, x4 /* clean D line / unified line */ + add x4, x4, x2 + cmp x4, x1 + b.lo 2b + + ldr x19, [x19, #HIBERN_PBE_NEXT] + cbnz x19, 1b + dsb ish /* wait for PoU cleaning to finish */ + + /* switch to the restored kernels page tables */ + break_before_make_ttbr_switch x25, x21 + + ic ialluis + dsb ish + isb + + cbz x24, 3f /* Do we need to re-initialise EL2? */ + hvc #0 +3: ret + + .ltorg +ENDPROC(swsusp_arch_suspend_exit) + +/* + * Restore the hyp stub. + * This must be done before the hibernate page is unmapped by _cpu_resume(), + * but happens before any of the hyp-stub's code is cleaned to PoC. + * + * x24: The physical address of __hyp_stub_vectors + */ +el1_sync: + msr vbar_el2, x24 + eret +ENDPROC(el1_sync) + +.macro invalid_vector label +\label: + b \label +ENDPROC(\label) +.endm + + invalid_vector el2_sync_invalid + invalid_vector el2_irq_invalid + invalid_vector el2_fiq_invalid + invalid_vector el2_error_invalid + invalid_vector el1_sync_invalid + invalid_vector el1_irq_invalid + invalid_vector el1_fiq_invalid + invalid_vector el1_error_invalid + +/* el2 vectors - switch el2 here while we restore the memory image. */ + .align 11 +ENTRY(hibernate_el2_vectors) + ventry el2_sync_invalid // Synchronous EL2t + ventry el2_irq_invalid // IRQ EL2t + ventry el2_fiq_invalid // FIQ EL2t + ventry el2_error_invalid // Error EL2t + + ventry el2_sync_invalid // Synchronous EL2h + ventry el2_irq_invalid // IRQ EL2h + ventry el2_fiq_invalid // FIQ EL2h + ventry el2_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq_invalid // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync_invalid // Synchronous 32-bit EL1 + ventry el1_irq_invalid // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +END(hibernate_el2_vectors) + +.popsection diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c new file mode 100644 index 000000000000..f8df75d740f4 --- /dev/null +++ b/arch/arm64/kernel/hibernate.c @@ -0,0 +1,487 @@ +/*: + * Hibernate support specific for ARM64 + * + * Derived from work on ARM hibernation support by: + * + * Ubuntu project, hibernation support for mach-dove + * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu) + * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.) + * https://lkml.org/lkml/2010/6/18/4 + * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html + * https://patchwork.kernel.org/patch/96442/ + * + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * License terms: GNU General Public License (GPL) version 2 + */ +#define pr_fmt(x) "hibernate: " x +#include <linux/kvm_host.h> +#include <linux/mm.h> +#include <linux/notifier.h> +#include <linux/pm.h> +#include <linux/sched.h> +#include <linux/suspend.h> +#include <linux/utsname.h> +#include <linux/version.h> + +#include <asm/barrier.h> +#include <asm/cacheflush.h> +#include <asm/irqflags.h> +#include <asm/memory.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/pgtable-hwdef.h> +#include <asm/sections.h> +#include <asm/suspend.h> +#include <asm/virt.h> + +/* + * Hibernate core relies on this value being 0 on resume, and marks it + * __nosavedata assuming it will keep the resume kernel's '0' value. This + * doesn't happen with either KASLR. + * + * defined as "__visible int in_suspend __nosavedata" in + * kernel/power/hibernate.c + */ +extern int in_suspend; + +/* Find a symbols alias in the linear map */ +#define LMADDR(x) phys_to_virt(virt_to_phys(x)) + +/* Do we need to reset el2? */ +#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) + +/* + * Start/end of the hibernate exit code, this must be copied to a 'safe' + * location in memory, and executed from there. + */ +extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; + +/* temporary el2 vectors in the __hibernate_exit_text section. */ +extern char hibernate_el2_vectors[]; + +/* hyp-stub vectors, used to restore el2 during resume from hibernate. */ +extern char __hyp_stub_vectors[]; + +/* + * Values that may not change over hibernate/resume. We put the build number + * and date in here so that we guarantee not to resume with a different + * kernel. + */ +struct arch_hibernate_hdr_invariants { + char uts_version[__NEW_UTS_LEN + 1]; +}; + +/* These values need to be know across a hibernate/restore. */ +static struct arch_hibernate_hdr { + struct arch_hibernate_hdr_invariants invariants; + + /* These are needed to find the relocated kernel if built with kaslr */ + phys_addr_t ttbr1_el1; + void (*reenter_kernel)(void); + + /* + * We need to know where the __hyp_stub_vectors are after restore to + * re-configure el2. + */ + phys_addr_t __hyp_stub_vectors; +} resume_hdr; + +static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) +{ + memset(i, 0, sizeof(*i)); + memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version)); +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin); + unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1); + + return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn); +} + +void notrace save_processor_state(void) +{ + WARN_ON(num_online_cpus() != 1); +} + +void notrace restore_processor_state(void) +{ +} + +int arch_hibernation_header_save(void *addr, unsigned int max_size) +{ + struct arch_hibernate_hdr *hdr = addr; + + if (max_size < sizeof(*hdr)) + return -EOVERFLOW; + + arch_hdr_invariants(&hdr->invariants); + hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir); + hdr->reenter_kernel = _cpu_resume; + + /* We can't use __hyp_get_vectors() because kvm may still be loaded */ + if (el2_reset_needed()) + hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors); + else + hdr->__hyp_stub_vectors = 0; + + return 0; +} +EXPORT_SYMBOL(arch_hibernation_header_save); + +int arch_hibernation_header_restore(void *addr) +{ + struct arch_hibernate_hdr_invariants invariants; + struct arch_hibernate_hdr *hdr = addr; + + arch_hdr_invariants(&invariants); + if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) { + pr_crit("Hibernate image not generated by this kernel!\n"); + return -EINVAL; + } + + resume_hdr = *hdr; + + return 0; +} +EXPORT_SYMBOL(arch_hibernation_header_restore); + +/* + * Copies length bytes, starting at src_start into an new page, + * perform cache maintentance, then maps it at the specified address low + * address as executable. + * + * This is used by hibernate to copy the code it needs to execute when + * overwriting the kernel text. This function generates a new set of page + * tables, which it loads into ttbr0. + * + * Length is provided as we probably only want 4K of data, even on a 64K + * page system. + */ +static int create_safe_exec_page(void *src_start, size_t length, + unsigned long dst_addr, + phys_addr_t *phys_dst_addr, + void *(*allocator)(gfp_t mask), + gfp_t mask) +{ + int rc = 0; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long dst = (unsigned long)allocator(mask); + + if (!dst) { + rc = -ENOMEM; + goto out; + } + + memcpy((void *)dst, src_start, length); + flush_icache_range(dst, dst + length); + + pgd = pgd_offset_raw(allocator(mask), dst_addr); + if (pgd_none(*pgd)) { + pud = allocator(mask); + if (!pud) { + rc = -ENOMEM; + goto out; + } + pgd_populate(&init_mm, pgd, pud); + } + + pud = pud_offset(pgd, dst_addr); + if (pud_none(*pud)) { + pmd = allocator(mask); + if (!pmd) { + rc = -ENOMEM; + goto out; + } + pud_populate(&init_mm, pud, pmd); + } + + pmd = pmd_offset(pud, dst_addr); + if (pmd_none(*pmd)) { + pte = allocator(mask); + if (!pte) { + rc = -ENOMEM; + goto out; + } + pmd_populate_kernel(&init_mm, pmd, pte); + } + + pte = pte_offset_kernel(pmd, dst_addr); + set_pte(pte, __pte(virt_to_phys((void *)dst) | + pgprot_val(PAGE_KERNEL_EXEC))); + + /* Load our new page tables */ + asm volatile("msr ttbr0_el1, %0;" + "isb;" + "tlbi vmalle1is;" + "dsb ish;" + "isb" : : "r"(virt_to_phys(pgd))); + + *phys_dst_addr = virt_to_phys((void *)dst); + +out: + return rc; +} + + +int swsusp_arch_suspend(void) +{ + int ret = 0; + unsigned long flags; + struct sleep_stack_data state; + + local_dbg_save(flags); + + if (__cpu_suspend_enter(&state)) { + ret = swsusp_save(); + } else { + /* Clean kernel to PoC for secondary core startup */ + __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START); + + /* + * Tell the hibernation core that we've just restored + * the memory + */ + in_suspend = 0; + + __cpu_suspend_exit(); + } + + local_dbg_restore(flags); + + return ret; +} + +static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, + unsigned long end) +{ + pte_t *src_pte; + pte_t *dst_pte; + unsigned long addr = start; + + dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pte) + return -ENOMEM; + pmd_populate_kernel(&init_mm, dst_pmd, dst_pte); + dst_pte = pte_offset_kernel(dst_pmd, start); + + src_pte = pte_offset_kernel(src_pmd, start); + do { + if (!pte_none(*src_pte)) + /* + * Resume will overwrite areas that may be marked + * read only (code, rodata). Clear the RDONLY bit from + * the temporary mappings we use during restore. + */ + set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY)); + } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + + return 0; +} + +static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start, + unsigned long end) +{ + pmd_t *src_pmd; + pmd_t *dst_pmd; + unsigned long next; + unsigned long addr = start; + + if (pud_none(*dst_pud)) { + dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pmd) + return -ENOMEM; + pud_populate(&init_mm, dst_pud, dst_pmd); + } + dst_pmd = pmd_offset(dst_pud, start); + + src_pmd = pmd_offset(src_pud, start); + do { + next = pmd_addr_end(addr, end); + if (pmd_none(*src_pmd)) + continue; + if (pmd_table(*src_pmd)) { + if (copy_pte(dst_pmd, src_pmd, addr, next)) + return -ENOMEM; + } else { + set_pmd(dst_pmd, + __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY)); + } + } while (dst_pmd++, src_pmd++, addr = next, addr != end); + + return 0; +} + +static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start, + unsigned long end) +{ + pud_t *dst_pud; + pud_t *src_pud; + unsigned long next; + unsigned long addr = start; + + if (pgd_none(*dst_pgd)) { + dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pud) + return -ENOMEM; + pgd_populate(&init_mm, dst_pgd, dst_pud); + } + dst_pud = pud_offset(dst_pgd, start); + + src_pud = pud_offset(src_pgd, start); + do { + next = pud_addr_end(addr, end); + if (pud_none(*src_pud)) + continue; + if (pud_table(*(src_pud))) { + if (copy_pmd(dst_pud, src_pud, addr, next)) + return -ENOMEM; + } else { + set_pud(dst_pud, + __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY)); + } + } while (dst_pud++, src_pud++, addr = next, addr != end); + + return 0; +} + +static int copy_page_tables(pgd_t *dst_pgd, unsigned long start, + unsigned long end) +{ + unsigned long next; + unsigned long addr = start; + pgd_t *src_pgd = pgd_offset_k(start); + + dst_pgd = pgd_offset_raw(dst_pgd, start); + do { + next = pgd_addr_end(addr, end); + if (pgd_none(*src_pgd)) + continue; + if (copy_pud(dst_pgd, src_pgd, addr, next)) + return -ENOMEM; + } while (dst_pgd++, src_pgd++, addr = next, addr != end); + + return 0; +} + +/* + * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit(). + * + * Memory allocated by get_safe_page() will be dealt with by the hibernate code, + * we don't need to free it here. + */ +int swsusp_arch_resume(void) +{ + int rc = 0; + void *zero_page; + size_t exit_size; + pgd_t *tmp_pg_dir; + void *lm_restore_pblist; + phys_addr_t phys_hibernate_exit; + void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, + void *, phys_addr_t, phys_addr_t); + + /* + * Locate the exit code in the bottom-but-one page, so that *NULL + * still has disastrous affects. + */ + hibernate_exit = (void *)PAGE_SIZE; + exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; + /* + * Copy swsusp_arch_suspend_exit() to a safe page. This will generate + * a new set of ttbr0 page tables and load them. + */ + rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size, + (unsigned long)hibernate_exit, + &phys_hibernate_exit, + (void *)get_safe_page, GFP_ATOMIC); + if (rc) { + pr_err("Failed to create safe executable page for hibernate_exit code."); + goto out; + } + + /* + * The hibernate exit text contains a set of el2 vectors, that will + * be executed at el2 with the mmu off in order to reload hyp-stub. + */ + __flush_dcache_area(hibernate_exit, exit_size); + + /* + * Restoring the memory image will overwrite the ttbr1 page tables. + * Create a second copy of just the linear map, and use this when + * restoring. + */ + tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!tmp_pg_dir) { + pr_err("Failed to allocate memory for temporary page tables."); + rc = -ENOMEM; + goto out; + } + rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0); + if (rc) + goto out; + + /* + * Since we only copied the linear map, we need to find restore_pblist's + * linear map address. + */ + lm_restore_pblist = LMADDR(restore_pblist); + + /* + * KASLR will cause the el2 vectors to be in a different location in + * the resumed kernel. Load hibernate's temporary copy into el2. + * + * We can skip this step if we booted at EL1, or are running with VHE. + */ + if (el2_reset_needed()) { + phys_addr_t el2_vectors = phys_hibernate_exit; /* base */ + el2_vectors += hibernate_el2_vectors - + __hibernate_exit_text_start; /* offset */ + + __hyp_set_vectors(el2_vectors); + } + + /* + * We need a zero page that is zero before & after resume in order to + * to break before make on the ttbr1 page tables. + */ + zero_page = (void *)get_safe_page(GFP_ATOMIC); + + hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, + resume_hdr.reenter_kernel, lm_restore_pblist, + resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page)); + +out: + return rc; +} + +static int check_boot_cpu_online_pm_callback(struct notifier_block *nb, + unsigned long action, void *ptr) +{ + if (action == PM_HIBERNATION_PREPARE && + cpumask_first(cpu_online_mask) != 0) { + pr_warn("CPU0 is offline.\n"); + return notifier_from_errno(-ENODEV); + } + + return NOTIFY_OK; +} + +static int __init check_boot_cpu_online_init(void) +{ + /* + * Set this pm_notifier callback with a lower priority than + * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be + * called earlier to disable cpu hotplug before the cpu online check. + */ + pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX); + + return 0; +} +core_initcall(check_boot_cpu_online_init); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index b45c95d34b83..367a954f9937 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -24,6 +24,7 @@ #include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/hw_breakpoint.h> +#include <linux/kprobes.h> #include <linux/perf_event.h> #include <linux/ptrace.h> #include <linux/smp.h> @@ -127,6 +128,7 @@ static u64 read_wb_reg(int reg, int n) return val; } +NOKPROBE_SYMBOL(read_wb_reg); static void write_wb_reg(int reg, int n, u64 val) { @@ -140,6 +142,7 @@ static void write_wb_reg(int reg, int n, u64 val) } isb(); } +NOKPROBE_SYMBOL(write_wb_reg); /* * Convert a breakpoint privilege level to the corresponding exception @@ -157,6 +160,7 @@ static enum dbg_active_el debug_exception_level(int privilege) return -EINVAL; } } +NOKPROBE_SYMBOL(debug_exception_level); enum hw_breakpoint_ops { HW_BREAKPOINT_INSTALL, @@ -575,6 +579,7 @@ static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable) write_wb_reg(reg, i, ctrl); } } +NOKPROBE_SYMBOL(toggle_bp_registers); /* * Debug exception handlers. @@ -654,6 +659,7 @@ unlock: return 0; } +NOKPROBE_SYMBOL(breakpoint_handler); static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) @@ -756,6 +762,7 @@ unlock: return 0; } +NOKPROBE_SYMBOL(watchpoint_handler); /* * Handle single-step exception. @@ -813,6 +820,7 @@ int reinstall_suspended_bps(struct pt_regs *regs) return !handled_exception; } +NOKPROBE_SYMBOL(reinstall_suspended_bps); /* * Context-switcher for restoring suspended breakpoints. diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index a272f335c289..8727f4490772 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -22,6 +22,8 @@ #include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> #include <asm/ptrace.h> #include <asm/virt.h> @@ -53,15 +55,26 @@ ENDPROC(__hyp_stub_vectors) .align 11 el1_sync: - mrs x1, esr_el2 - lsr x1, x1, #26 - cmp x1, #0x16 - b.ne 2f // Not an HVC trap - cbz x0, 1f - msr vbar_el2, x0 // Set vbar_el2 - b 2f -1: mrs x0, vbar_el2 // Return vbar_el2 -2: eret + mrs x30, esr_el2 + lsr x30, x30, #ESR_ELx_EC_SHIFT + + cmp x30, #ESR_ELx_EC_HVC64 + b.ne 9f // Not an HVC trap + + cmp x0, #HVC_GET_VECTORS + b.ne 1f + mrs x0, vbar_el2 + b 9f + +1: cmp x0, #HVC_SET_VECTORS + b.ne 2f + msr vbar_el2, x1 + b 9f + + /* Someone called kvm_call_hyp() against the hyp-stub... */ +2: mov x0, #ARM_EXCEPTION_HYP_GONE + +9: eret ENDPROC(el1_sync) .macro invalid_vector label @@ -101,10 +114,18 @@ ENDPROC(\label) */ ENTRY(__hyp_get_vectors) - mov x0, xzr - // fall through -ENTRY(__hyp_set_vectors) + str lr, [sp, #-16]! + mov x0, #HVC_GET_VECTORS hvc #0 + ldr lr, [sp], #16 ret ENDPROC(__hyp_get_vectors) + +ENTRY(__hyp_set_vectors) + str lr, [sp, #-16]! + mov x1, x0 + mov x0, #HVC_SET_VECTORS + hvc #0 + ldr lr, [sp], #16 + ret ENDPROC(__hyp_set_vectors) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 62152a48e65f..5f72243e5ba7 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -30,6 +30,7 @@ #include <asm/cacheflush.h> #include <asm/debug-monitors.h> #include <asm/fixmap.h> +#include <asm/opcodes.h> #include <asm/insn.h> #define AARCH64_INSN_SF_BIT BIT(31) @@ -163,6 +164,32 @@ static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) aarch64_insn_is_nop(insn); } +bool __kprobes aarch64_insn_uses_literal(u32 insn) +{ + /* ldr/ldrsw (literal), prfm */ + + return aarch64_insn_is_ldr_lit(insn) || + aarch64_insn_is_ldrsw_lit(insn) || + aarch64_insn_is_adr_adrp(insn) || + aarch64_insn_is_prfm_lit(insn); +} + +bool __kprobes aarch64_insn_is_branch(u32 insn) +{ + /* b, bl, cb*, tb*, b.cond, br, blr */ + + return aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn) || + aarch64_insn_is_cbz(insn) || + aarch64_insn_is_cbnz(insn) || + aarch64_insn_is_tbz(insn) || + aarch64_insn_is_tbnz(insn) || + aarch64_insn_is_ret(insn) || + aarch64_insn_is_br(insn) || + aarch64_insn_is_blr(insn) || + aarch64_insn_is_bcond(insn); +} + /* * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a * Section B2.6.5 "Concurrent modification and execution of instructions": @@ -1176,6 +1203,14 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset) BUG(); } +/* + * Extract the Op/CR data from a msr/mrs instruction. + */ +u32 aarch64_insn_extract_system_reg(u32 insn) +{ + return (insn & 0x1FFFE0) >> 5; +} + bool aarch32_insn_is_wide(u32 insn) { return insn >= 0xe800; @@ -1201,3 +1236,101 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn) { return insn & CRM_MASK; } + +static bool __kprobes __check_eq(unsigned long pstate) +{ + return (pstate & PSR_Z_BIT) != 0; +} + +static bool __kprobes __check_ne(unsigned long pstate) +{ + return (pstate & PSR_Z_BIT) == 0; +} + +static bool __kprobes __check_cs(unsigned long pstate) +{ + return (pstate & PSR_C_BIT) != 0; +} + +static bool __kprobes __check_cc(unsigned long pstate) +{ + return (pstate & PSR_C_BIT) == 0; +} + +static bool __kprobes __check_mi(unsigned long pstate) +{ + return (pstate & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_pl(unsigned long pstate) +{ + return (pstate & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_vs(unsigned long pstate) +{ + return (pstate & PSR_V_BIT) != 0; +} + +static bool __kprobes __check_vc(unsigned long pstate) +{ + return (pstate & PSR_V_BIT) == 0; +} + +static bool __kprobes __check_hi(unsigned long pstate) +{ + pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return (pstate & PSR_C_BIT) != 0; +} + +static bool __kprobes __check_ls(unsigned long pstate) +{ + pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return (pstate & PSR_C_BIT) == 0; +} + +static bool __kprobes __check_ge(unsigned long pstate) +{ + pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return (pstate & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_lt(unsigned long pstate) +{ + pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return (pstate & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_gt(unsigned long pstate) +{ + /*PSR_N_BIT ^= PSR_V_BIT */ + unsigned long temp = pstate ^ (pstate << 3); + + temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ + return (temp & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_le(unsigned long pstate) +{ + /*PSR_N_BIT ^= PSR_V_BIT */ + unsigned long temp = pstate ^ (pstate << 3); + + temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ + return (temp & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_al(unsigned long pstate) +{ + return true; +} + +/* + * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that + * it behaves identically to 0b1110 ("al"). + */ +pstate_check_t * const aarch32_opcode_cond_checks[16] = { + __check_eq, __check_ne, __check_cs, __check_cc, + __check_mi, __check_pl, __check_vs, __check_vc, + __check_hi, __check_ls, __check_ge, __check_lt, + __check_gt, __check_le, __check_al, __check_al +}; diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index bcac81e600b9..814d0c51b2f9 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -22,6 +22,7 @@ #include <linux/irq.h> #include <linux/kdebug.h> #include <linux/kgdb.h> +#include <linux/kprobes.h> #include <asm/traps.h> struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -218,6 +219,7 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) kgdb_handle_exception(1, SIGTRAP, 0, regs); return 0; } +NOKPROBE_SYMBOL(kgdb_brk_fn) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) { @@ -226,12 +228,14 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) return 0; } +NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) { kgdb_handle_exception(1, SIGTRAP, 0, regs); return 0; } +NOKPROBE_SYMBOL(kgdb_step_brk_fn); static struct break_hook kgdb_brkpt_hook = { .esr_mask = 0xffffffff, diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile new file mode 100644 index 000000000000..ce06312e3d34 --- /dev/null +++ b/arch/arm64/kernel/probes/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o \ + kprobes_trampoline.o \ + simulate-insn.o diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c new file mode 100644 index 000000000000..f7931d900bca --- /dev/null +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -0,0 +1,174 @@ +/* + * arch/arm64/kernel/probes/decode-insn.c + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <asm/kprobes.h> +#include <asm/insn.h> +#include <asm/sections.h> + +#include "decode-insn.h" +#include "simulate-insn.h" + +static bool __kprobes aarch64_insn_is_steppable(u32 insn) +{ + /* + * Branch instructions will write a new value into the PC which is + * likely to be relative to the XOL address and therefore invalid. + * Deliberate generation of an exception during stepping is also not + * currently safe. Lastly, MSR instructions can do any number of nasty + * things we can't handle during single-stepping. + */ + if (aarch64_get_insn_class(insn) == AARCH64_INSN_CLS_BR_SYS) { + if (aarch64_insn_is_branch(insn) || + aarch64_insn_is_msr_imm(insn) || + aarch64_insn_is_msr_reg(insn) || + aarch64_insn_is_exception(insn) || + aarch64_insn_is_eret(insn)) + return false; + + /* + * The MRS instruction may not return a correct value when + * executing in the single-stepping environment. We do make one + * exception, for reading the DAIF bits. + */ + if (aarch64_insn_is_mrs(insn)) + return aarch64_insn_extract_system_reg(insn) + != AARCH64_INSN_SPCLREG_DAIF; + + /* + * The HINT instruction is is problematic when single-stepping, + * except for the NOP case. + */ + if (aarch64_insn_is_hint(insn)) + return aarch64_insn_is_nop(insn); + + return true; + } + + /* + * Instructions which load PC relative literals are not going to work + * when executed from an XOL slot. Instructions doing an exclusive + * load/store are not going to complete successfully when single-step + * exception handling happens in the middle of the sequence. + */ + if (aarch64_insn_uses_literal(insn) || + aarch64_insn_is_exclusive(insn)) + return false; + + return true; +} + +/* Return: + * INSN_REJECTED If instruction is one not allowed to kprobe, + * INSN_GOOD If instruction is supported and uses instruction slot, + * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot. + */ +static enum kprobe_insn __kprobes +arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* + * Instructions reading or modifying the PC won't work from the XOL + * slot. + */ + if (aarch64_insn_is_steppable(insn)) + return INSN_GOOD; + + if (aarch64_insn_is_bcond(insn)) { + asi->handler = simulate_b_cond; + } else if (aarch64_insn_is_cbz(insn) || + aarch64_insn_is_cbnz(insn)) { + asi->handler = simulate_cbz_cbnz; + } else if (aarch64_insn_is_tbz(insn) || + aarch64_insn_is_tbnz(insn)) { + asi->handler = simulate_tbz_tbnz; + } else if (aarch64_insn_is_adr_adrp(insn)) { + asi->handler = simulate_adr_adrp; + } else if (aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn)) { + asi->handler = simulate_b_bl; + } else if (aarch64_insn_is_br(insn) || + aarch64_insn_is_blr(insn) || + aarch64_insn_is_ret(insn)) { + asi->handler = simulate_br_blr_ret; + } else if (aarch64_insn_is_ldr_lit(insn)) { + asi->handler = simulate_ldr_literal; + } else if (aarch64_insn_is_ldrsw_lit(insn)) { + asi->handler = simulate_ldrsw_literal; + } else { + /* + * Instruction cannot be stepped out-of-line and we don't + * (yet) simulate it. + */ + return INSN_REJECTED; + } + + return INSN_GOOD_NO_SLOT; +} + +static bool __kprobes +is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end) +{ + while (scan_start > scan_end) { + /* + * atomic region starts from exclusive load and ends with + * exclusive store. + */ + if (aarch64_insn_is_store_ex(le32_to_cpu(*scan_start))) + return false; + else if (aarch64_insn_is_load_ex(le32_to_cpu(*scan_start))) + return true; + scan_start--; + } + + return false; +} + +enum kprobe_insn __kprobes +arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) +{ + enum kprobe_insn decoded; + kprobe_opcode_t insn = le32_to_cpu(*addr); + kprobe_opcode_t *scan_start = addr - 1; + kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; +#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) + struct module *mod; +#endif + + if (addr >= (kprobe_opcode_t *)_text && + scan_end < (kprobe_opcode_t *)_text) + scan_end = (kprobe_opcode_t *)_text; +#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) + else { + preempt_disable(); + mod = __module_address((unsigned long)addr); + if (mod && within_module_init((unsigned long)addr, mod) && + !within_module_init((unsigned long)scan_end, mod)) + scan_end = (kprobe_opcode_t *)mod->module_init; + else if (mod && within_module_core((unsigned long)addr, mod) && + !within_module_core((unsigned long)scan_end, mod)) + scan_end = (kprobe_opcode_t *)mod->module_core; + preempt_enable(); + } +#endif + decoded = arm_probe_decode_insn(insn, asi); + + if (decoded == INSN_REJECTED || + is_probed_address_atomic(scan_start, scan_end)) + return INSN_REJECTED; + + return decoded; +} diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h new file mode 100644 index 000000000000..d438289646a6 --- /dev/null +++ b/arch/arm64/kernel/probes/decode-insn.h @@ -0,0 +1,35 @@ +/* + * arch/arm64/kernel/probes/decode-insn.h + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KERNEL_KPROBES_ARM64_H +#define _ARM_KERNEL_KPROBES_ARM64_H + +/* + * ARM strongly recommends a limit of 128 bytes between LoadExcl and + * StoreExcl instructions in a single thread of execution. So keep the + * max atomic context size as 32. + */ +#define MAX_ATOMIC_CONTEXT_SIZE (128 / sizeof(kprobe_opcode_t)) + +enum kprobe_insn { + INSN_REJECTED, + INSN_GOOD_NO_SLOT, + INSN_GOOD, +}; + +enum kprobe_insn __kprobes +arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi); + +#endif /* _ARM_KERNEL_KPROBES_ARM64_H */ diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c new file mode 100644 index 000000000000..1ee93c7c5a75 --- /dev/null +++ b/arch/arm64/kernel/probes/kprobes.c @@ -0,0 +1,657 @@ +/* + * arch/arm64/kernel/probes/kprobes.c + * + * Kprobes support for ARM64 + * + * Copyright (C) 2013 Linaro Limited. + * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ +#include <linux/kasan.h> +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/stop_machine.h> +#include <linux/stringify.h> +#include <asm/traps.h> +#include <asm/ptrace.h> +#include <asm/cacheflush.h> +#include <asm/debug-monitors.h> +#include <asm/system_misc.h> +#include <asm/insn.h> +#include <asm/uaccess.h> +#include <asm/irq.h> +#include <asm-generic/sections.h> + +#include "decode-insn.h" + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +static void __kprobes +post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); + +static void __kprobes arch_prepare_ss_slot(struct kprobe *p) +{ + /* prepare insn slot */ + p->ainsn.insn[0] = cpu_to_le32(p->opcode); + + flush_icache_range((uintptr_t) (p->ainsn.insn), + (uintptr_t) (p->ainsn.insn) + + MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + + /* + * Needs restoring of return address after stepping xol. + */ + p->ainsn.restore = (unsigned long) p->addr + + sizeof(kprobe_opcode_t); +} + +static void __kprobes arch_prepare_simulate(struct kprobe *p) +{ + /* This instructions is not executed xol. No need to adjust the PC */ + p->ainsn.restore = 0; +} + +static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (p->ainsn.handler) + p->ainsn.handler((u32)p->opcode, (long)p->addr, regs); + + /* single step simulated, now go for post processing */ + post_kprobe_handler(kcb, regs); +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long probe_addr = (unsigned long)p->addr; + extern char __start_rodata[]; + extern char __end_rodata[]; + + if (probe_addr & 0x3) + return -EINVAL; + + /* copy instruction */ + p->opcode = le32_to_cpu(*p->addr); + + if (in_exception_text(probe_addr)) + return -EINVAL; + if (probe_addr >= (unsigned long) __start_rodata && + probe_addr <= (unsigned long) __end_rodata) + return -EINVAL; + + /* decode instruction */ + switch (arm_kprobe_decode_insn(p->addr, &p->ainsn)) { + case INSN_REJECTED: /* insn not supported */ + return -EINVAL; + + case INSN_GOOD_NO_SLOT: /* insn need simulation */ + p->ainsn.insn = NULL; + break; + + case INSN_GOOD: /* instruction uses slot */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + break; + }; + + /* prepare the instruction */ + if (p->ainsn.insn) + arch_prepare_ss_slot(p); + else + arch_prepare_simulate(p); + + return 0; +} + +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +{ + void *addrs[1]; + u32 insns[1]; + + addrs[0] = (void *)addr; + insns[0] = (u32)opcode; + + return aarch64_insn_patch_text(addrs, insns, 1); +} + +/* arm kprobe: install breakpoint in text */ +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, BRK64_OPCODE_KPROBES); +} + +/* disarm kprobe: remove breakpoint from text */ +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, p->opcode); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; +} + +static void __kprobes set_current_kprobe(struct kprobe *p) +{ + __this_cpu_write(current_kprobe, p); +} + +/* + * The D-flag (Debug mask) is set (masked) upon debug exception entry. + * Kprobes needs to clear (unmask) D-flag -ONLY- in case of recursive + * probe i.e. when probe hit from kprobe handler context upon + * executing the pre/post handlers. In this case we return with + * D-flag clear so that single-stepping can be carried-out. + * + * Leave D-flag set in all other cases. + */ +static void __kprobes +spsr_set_debug_flag(struct pt_regs *regs, int mask) +{ + unsigned long spsr = regs->pstate; + + if (mask) + spsr |= PSR_D_BIT; + else + spsr &= ~PSR_D_BIT; + + regs->pstate = spsr; +} + +/* + * Interrupts need to be disabled before single-step mode is set, and not + * reenabled until after single-step mode ends. + * Without disabling interrupt on local CPU, there is a chance of + * interrupt occurrence in the period of exception return and start of + * out-of-line single-step, that result in wrongly single stepping + * into the interrupt handler. + */ +static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + kcb->saved_irqflag = regs->pstate; + regs->pstate |= PSR_I_BIT; +} + +static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + if (kcb->saved_irqflag & PSR_I_BIT) + regs->pstate |= PSR_I_BIT; + else + regs->pstate &= ~PSR_I_BIT; +} + +static void __kprobes +set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr) +{ + kcb->ss_ctx.ss_pending = true; + kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t); +} + +static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb) +{ + kcb->ss_ctx.ss_pending = false; + kcb->ss_ctx.match_addr = 0; +} + +static void __kprobes setup_singlestep(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) +{ + unsigned long slot; + + if (reenter) { + save_previous_kprobe(kcb); + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_REENTER; + } else { + kcb->kprobe_status = KPROBE_HIT_SS; + } + + + if (p->ainsn.insn) { + /* prepare for single stepping */ + slot = (unsigned long)p->ainsn.insn; + + set_ss_context(kcb, slot); /* mark pending ss */ + + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 0); + else + WARN_ON(regs->pstate & PSR_D_BIT); + + /* IRQs and single stepping do not mix well. */ + kprobes_save_local_irqflag(kcb, regs); + kernel_enable_single_step(regs); + instruction_pointer_set(regs, slot); + } else { + /* insn simulation */ + arch_simulate_insn(p, regs); + } +} + +static int __kprobes reenter_kprobe(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + setup_singlestep(p, regs, kcb, 1); + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr); + dump_kprobe(p); + BUG(); + break; + default: + WARN_ON(1); + return 0; + } + + return 1; +} + +static void __kprobes +post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + + if (!cur) + return; + + /* return addr restore if non-branching insn */ + if (cur->ainsn.restore != 0) + instruction_pointer_set(regs, cur->ainsn.restore); + + /* restore back original saved kprobe variables and continue */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + return; + } + /* call post handler */ + kcb->kprobe_status = KPROBE_HIT_SSDONE; + if (cur->post_handler) { + /* post_handler can hit breakpoint and single step + * again, so we enable D-flag for recursive exception. + */ + cur->post_handler(cur, regs, 0); + } + + reset_current_kprobe(); +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + switch (kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the ip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + instruction_pointer_set(regs, (unsigned long) cur->addr); + if (!instruction_pointer(regs)) + BUG(); + + kernel_disable_single_step(); + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 1); + + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); + + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accounting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, fsr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (fixup_exception(regs)) + return 1; + } + return 0; +} + +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return NOTIFY_DONE; +} + +static void __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p, *cur_kprobe; + struct kprobe_ctlblk *kcb; + unsigned long addr = instruction_pointer(regs); + + kcb = get_kprobe_ctlblk(); + cur_kprobe = kprobe_running(); + + p = get_kprobe((kprobe_opcode_t *) addr); + + if (p) { + if (cur_kprobe) { + if (reenter_kprobe(p, regs, kcb)) + return; + } else { + /* Probe hit */ + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + /* + * If we have no pre-handler or it returned 0, we + * continue with normal processing. If we have a + * pre-handler and it returned non-zero, it prepped + * for calling the break_handler below on re-entry, + * so get out doing nothing more here. + * + * pre_handler can hit a breakpoint and can step thru + * before return, keep PSTATE D-flag enabled until + * pre_handler return back. + */ + if (!p->pre_handler || !p->pre_handler(p, regs)) { + setup_singlestep(p, regs, kcb, 0); + return; + } + } + } else if ((le32_to_cpu(*(kprobe_opcode_t *) addr) == + BRK64_OPCODE_KPROBES) && cur_kprobe) { + /* We probably hit a jprobe. Call its break handler. */ + if (cur_kprobe->break_handler && + cur_kprobe->break_handler(cur_kprobe, regs)) { + setup_singlestep(cur_kprobe, regs, kcb, 0); + return; + } + } + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + * Return back to original instruction, and continue. + */ +} + +static int __kprobes +kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr) +{ + if ((kcb->ss_ctx.ss_pending) + && (kcb->ss_ctx.match_addr == addr)) { + clear_ss_context(kcb); /* clear pending ss */ + return DBG_HOOK_HANDLED; + } + /* not ours, kprobes should ignore it */ + return DBG_HOOK_ERROR; +} + +int __kprobes +kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + int retval; + + /* return error if this is not our step */ + retval = kprobe_ss_hit(kcb, instruction_pointer(regs)); + + if (retval == DBG_HOOK_HANDLED) { + kprobes_restore_local_irqflag(kcb, regs); + kernel_disable_single_step(); + + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 1); + + post_kprobe_handler(kcb, regs); + } + + return retval; +} + +int __kprobes +kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) +{ + kprobe_handler(regs); + return DBG_HOOK_HANDLED; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + kcb->jprobe_saved_regs = *regs; + /* + * Since we can't be sure where in the stack frame "stacked" + * pass-by-value arguments are stored we just don't try to + * duplicate any of the stack. Do not use jprobes on functions that + * use more than 64 bytes (after padding each to an 8 byte boundary) + * of arguments, or pass individual arguments larger than 16 bytes. + */ + + instruction_pointer_set(regs, (unsigned long) jp->entry); + preempt_disable(); + pause_graph_tracing(); + return 1; +} + +void __kprobes jprobe_return(void) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + /* + * Jprobe handler return by entering break exception, + * encoded same as kprobe, but with following conditions + * -a special PC to identify it from the other kprobes. + * -restore stack addr to original saved pt_regs + */ + asm volatile(" mov sp, %0 \n" + "jprobe_return_break: brk %1 \n" + : + : "r" (kcb->jprobe_saved_regs.sp), + "I" (BRK64_ESR_KPROBES) + : "memory"); + + unreachable(); +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + long stack_addr = kcb->jprobe_saved_regs.sp; + long orig_sp = kernel_stack_pointer(regs); + struct jprobe *jp = container_of(p, struct jprobe, kp); + extern const char jprobe_return_break[]; + + if (instruction_pointer(regs) != (u64) jprobe_return_break) + return 0; + + if (orig_sp != stack_addr) { + struct pt_regs *saved_regs = + (struct pt_regs *)kcb->jprobe_saved_regs.sp; + pr_err("current sp %lx does not match saved sp %lx\n", + orig_sp, stack_addr); + pr_err("Saved registers for jprobe %p\n", jp); + show_regs(saved_regs); + pr_err("Current registers\n"); + show_regs(regs); + BUG(); + } + unpause_graph_tracing(); + *regs = kcb->jprobe_saved_regs; + preempt_enable_no_resched(); + return 1; +} + +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + extern char __idmap_text_start[], __idmap_text_end[]; + + if ((addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end) || + (addr >= (unsigned long)__entry_text_start && + addr < (unsigned long)__entry_text_end) || + (addr >= (unsigned long)__idmap_text_start && + addr < (unsigned long)__idmap_text_end) || + !!search_exception_tables(addr)) + return true; + + + return false; +} + +void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address = + (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because multiple functions in the call path have + * return probes installed on them, and/or more than one + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always pushed into the head of the list + * - when multiple return probes are registered for the same + * function, the (chronologically) first instance's ret_addr + * will be the real return address, and all the rest will + * point to kretprobe_trampoline. + */ + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + if (ri->rp && ri->rp->handler) { + __this_cpu_write(current_kprobe, &ri->rp->kp); + get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; + ri->rp->handler(ri, regs); + __this_cpu_write(current_kprobe, NULL); + } + + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_hash_unlock(current, &flags); + + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + return (void *)orig_ret_address; +} + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *)regs->regs[30]; + + /* replace return addr (x30) with trampoline */ + regs->regs[30] = (long)&kretprobe_trampoline; +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + return 0; +} + +int __init arch_init_kprobes(void) +{ + return 0; +} diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S new file mode 100644 index 000000000000..5d6e7f14638c --- /dev/null +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -0,0 +1,81 @@ +/* + * trampoline entry and return code for kretprobes. + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> + + .text + + .macro save_all_base_regs + stp x0, x1, [sp, #S_X0] + stp x2, x3, [sp, #S_X2] + stp x4, x5, [sp, #S_X4] + stp x6, x7, [sp, #S_X6] + stp x8, x9, [sp, #S_X8] + stp x10, x11, [sp, #S_X10] + stp x12, x13, [sp, #S_X12] + stp x14, x15, [sp, #S_X14] + stp x16, x17, [sp, #S_X16] + stp x18, x19, [sp, #S_X18] + stp x20, x21, [sp, #S_X20] + stp x22, x23, [sp, #S_X22] + stp x24, x25, [sp, #S_X24] + stp x26, x27, [sp, #S_X26] + stp x28, x29, [sp, #S_X28] + add x0, sp, #S_FRAME_SIZE + stp lr, x0, [sp, #S_LR] + /* + * Construct a useful saved PSTATE + */ + mrs x0, nzcv + mrs x1, daif + orr x0, x0, x1 + mrs x1, CurrentEL + orr x0, x0, x1 + mrs x1, SPSel + orr x0, x0, x1 + stp xzr, x0, [sp, #S_PC] + .endm + + .macro restore_all_base_regs + ldr x0, [sp, #S_PSTATE] + and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT) + msr nzcv, x0 + ldp x0, x1, [sp, #S_X0] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + ldp x8, x9, [sp, #S_X8] + ldp x10, x11, [sp, #S_X10] + ldp x12, x13, [sp, #S_X12] + ldp x14, x15, [sp, #S_X14] + ldp x16, x17, [sp, #S_X16] + ldp x18, x19, [sp, #S_X18] + ldp x20, x21, [sp, #S_X20] + ldp x22, x23, [sp, #S_X22] + ldp x24, x25, [sp, #S_X24] + ldp x26, x27, [sp, #S_X26] + ldp x28, x29, [sp, #S_X28] + .endm + +ENTRY(kretprobe_trampoline) + sub sp, sp, #S_FRAME_SIZE + + save_all_base_regs + + mov x0, sp + bl trampoline_probe_handler + /* + * Replace trampoline address in lr with actual orig_ret_addr return + * address. + */ + mov lr, x0 + + restore_all_base_regs + + add sp, sp, #S_FRAME_SIZE + ret + +ENDPROC(kretprobe_trampoline) diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c new file mode 100644 index 000000000000..8977ce9d009d --- /dev/null +++ b/arch/arm64/kernel/probes/simulate-insn.c @@ -0,0 +1,217 @@ +/* + * arch/arm64/kernel/probes/simulate-insn.c + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> + +#include "simulate-insn.h" + +#define sign_extend(x, signbit) \ + ((x) | (0 - ((x) & (1 << (signbit))))) + +#define bbl_displacement(insn) \ + sign_extend(((insn) & 0x3ffffff) << 2, 27) + +#define bcond_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +#define cbz_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +#define tbz_displacement(insn) \ + sign_extend(((insn >> 5) & 0x3fff) << 2, 15) + +#define ldr_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val) +{ + if (reg < 31) + regs->regs[reg] = val; +} + +static inline void set_w_reg(struct pt_regs *regs, int reg, u64 val) +{ + if (reg < 31) + regs->regs[reg] = lower_32_bits(val); +} + +static inline u64 get_x_reg(struct pt_regs *regs, int reg) +{ + if (reg < 31) + return regs->regs[reg]; + else + return 0; +} + +static inline u32 get_w_reg(struct pt_regs *regs, int reg) +{ + if (reg < 31) + return lower_32_bits(regs->regs[reg]); + else + return 0; +} + +static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + + return (opcode & (1 << 31)) ? + (get_x_reg(regs, xn) == 0) : (get_w_reg(regs, xn) == 0); +} + +static bool __kprobes check_cbnz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + + return (opcode & (1 << 31)) ? + (get_x_reg(regs, xn) != 0) : (get_w_reg(regs, xn) != 0); +} + +static bool __kprobes check_tbz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f); + + return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) == 0; +} + +static bool __kprobes check_tbnz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f); + + return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) != 0; +} + +/* + * instruction simulation functions + */ +void __kprobes +simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs) +{ + long imm, xn, val; + + xn = opcode & 0x1f; + imm = ((opcode >> 3) & 0x1ffffc) | ((opcode >> 29) & 0x3); + imm = sign_extend(imm, 20); + if (opcode & 0x80000000) + val = (imm<<12) + (addr & 0xfffffffffffff000); + else + val = imm + addr; + + set_x_reg(regs, xn, val); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} + +void __kprobes +simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = bbl_displacement(opcode); + + /* Link register is x30 */ + if (opcode & (1 << 31)) + set_x_reg(regs, 30, addr + 4); + + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (aarch32_opcode_cond_checks[opcode & 0xf](regs->pstate & 0xffffffff)) + disp = bcond_displacement(opcode); + + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs) +{ + int xn = (opcode >> 5) & 0x1f; + + /* update pc first in case we're doing a "blr lr" */ + instruction_pointer_set(regs, get_x_reg(regs, xn)); + + /* Link register is x30 */ + if (((opcode >> 21) & 0x3) == 1) + set_x_reg(regs, 30, addr + 4); +} + +void __kprobes +simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (opcode & (1 << 24)) { + if (check_cbnz(opcode, regs)) + disp = cbz_displacement(opcode); + } else { + if (check_cbz(opcode, regs)) + disp = cbz_displacement(opcode); + } + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (opcode & (1 << 24)) { + if (check_tbnz(opcode, regs)) + disp = tbz_displacement(opcode); + } else { + if (check_tbz(opcode, regs)) + disp = tbz_displacement(opcode); + } + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs) +{ + u64 *load_addr; + int xn = opcode & 0x1f; + int disp; + + disp = ldr_displacement(opcode); + load_addr = (u64 *) (addr + disp); + + if (opcode & (1 << 30)) /* x0-x30 */ + set_x_reg(regs, xn, *load_addr); + else /* w0-w30 */ + set_w_reg(regs, xn, *load_addr); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} + +void __kprobes +simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs) +{ + s32 *load_addr; + int xn = opcode & 0x1f; + int disp; + + disp = ldr_displacement(opcode); + load_addr = (s32 *) (addr + disp); + + set_x_reg(regs, xn, *load_addr); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h new file mode 100644 index 000000000000..050bde683c2d --- /dev/null +++ b/arch/arm64/kernel/probes/simulate-insn.h @@ -0,0 +1,28 @@ +/* + * arch/arm64/kernel/probes/simulate-insn.h + * + * Copyright (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KERNEL_KPROBES_SIMULATE_INSN_H +#define _ARM_KERNEL_KPROBES_SIMULATE_INSN_H + +void simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs); +void simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs); +void simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs); +void simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs); +void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs); +void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs); +void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs); +void simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs); + +#endif /* _ARM_KERNEL_KPROBES_SIMULATE_INSN_H */ diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index daa573479421..b9e7c42cd8eb 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -20,7 +20,6 @@ #include <linux/smp.h> #include <linux/delay.h> #include <linux/psci.h> -#include <linux/slab.h> #include <uapi/linux/psci.h> @@ -28,73 +27,6 @@ #include <asm/cpu_ops.h> #include <asm/errno.h> #include <asm/smp_plat.h> -#include <asm/suspend.h> - -static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state); - -static int __maybe_unused cpu_psci_cpu_init_idle(unsigned int cpu) -{ - int i, ret, count = 0; - u32 *psci_states; - struct device_node *state_node, *cpu_node; - - cpu_node = of_get_cpu_node(cpu, NULL); - if (!cpu_node) - return -ENODEV; - - /* - * If the PSCI cpu_suspend function hook has not been initialized - * idle states must not be enabled, so bail out - */ - if (!psci_ops.cpu_suspend) - return -EOPNOTSUPP; - - /* Count idle states */ - while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states", - count))) { - count++; - of_node_put(state_node); - } - - if (!count) - return -ENODEV; - - psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL); - if (!psci_states) - return -ENOMEM; - - for (i = 0; i < count; i++) { - u32 state; - - state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i); - - ret = of_property_read_u32(state_node, - "arm,psci-suspend-param", - &state); - if (ret) { - pr_warn(" * %s missing arm,psci-suspend-param property\n", - state_node->full_name); - of_node_put(state_node); - goto free_mem; - } - - of_node_put(state_node); - pr_debug("psci-power-state %#x index %d\n", state, i); - if (!psci_power_state_is_valid(state)) { - pr_warn("Invalid PSCI power state %#x\n", state); - ret = -EINVAL; - goto free_mem; - } - psci_states[i] = state; - } - /* Idle states parsed correctly, initialize per-cpu pointer */ - per_cpu(psci_power_state, cpu) = psci_states; - return 0; - -free_mem: - kfree(psci_states); - return ret; -} static int __init cpu_psci_cpu_init(unsigned int cpu) { @@ -179,34 +111,11 @@ static int cpu_psci_cpu_kill(unsigned int cpu) } #endif -static int psci_suspend_finisher(unsigned long state_id) -{ - return psci_ops.cpu_suspend(state_id, virt_to_phys(cpu_resume)); -} - -static int __maybe_unused cpu_psci_cpu_suspend(unsigned long state_id) -{ - int ret; - /* - * idle state id 0 corresponds to wfi, should never be called - * from the cpu_suspend operations - */ - if (WARN_ON_ONCE(!state_id)) - return -EINVAL; - - if (!psci_power_state_loses_context(state_id)) - ret = psci_ops.cpu_suspend(state_id, 0); - else - ret = cpu_suspend(state_id, psci_suspend_finisher); - - return ret; -} - const struct cpu_operations cpu_psci_ops = { .name = "psci", #ifdef CONFIG_CPU_IDLE - .cpu_init_idle = cpu_psci_cpu_init_idle, - .cpu_suspend = cpu_psci_cpu_suspend, + .cpu_init_idle = psci_cpu_init_idle, + .cpu_suspend = psci_cpu_suspend_enter, #endif .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index fc779ec6f051..df4707380e66 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -49,6 +49,106 @@ #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} +#define GPR_OFFSET_NAME(r) \ + {.name = "x" #r, .offset = offsetof(struct pt_regs, regs[r])} + +static const struct pt_regs_offset regoffset_table[] = { + GPR_OFFSET_NAME(0), + GPR_OFFSET_NAME(1), + GPR_OFFSET_NAME(2), + GPR_OFFSET_NAME(3), + GPR_OFFSET_NAME(4), + GPR_OFFSET_NAME(5), + GPR_OFFSET_NAME(6), + GPR_OFFSET_NAME(7), + GPR_OFFSET_NAME(8), + GPR_OFFSET_NAME(9), + GPR_OFFSET_NAME(10), + GPR_OFFSET_NAME(11), + GPR_OFFSET_NAME(12), + GPR_OFFSET_NAME(13), + GPR_OFFSET_NAME(14), + GPR_OFFSET_NAME(15), + GPR_OFFSET_NAME(16), + GPR_OFFSET_NAME(17), + GPR_OFFSET_NAME(18), + GPR_OFFSET_NAME(19), + GPR_OFFSET_NAME(20), + GPR_OFFSET_NAME(21), + GPR_OFFSET_NAME(22), + GPR_OFFSET_NAME(23), + GPR_OFFSET_NAME(24), + GPR_OFFSET_NAME(25), + GPR_OFFSET_NAME(26), + GPR_OFFSET_NAME(27), + GPR_OFFSET_NAME(28), + GPR_OFFSET_NAME(29), + GPR_OFFSET_NAME(30), + {.name = "lr", .offset = offsetof(struct pt_regs, regs[30])}, + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(pc), + REG_OFFSET_NAME(pstate), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + /* * TODO: does not yet catch signals sent when the child dies. * in exit.c or in signal.c. diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 326c53b7fe0b..30af178c640c 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -183,7 +183,6 @@ static void __init smp_build_mpidr_hash(void) */ if (mpidr_hash_size() > 4 * num_possible_cpus()) pr_warn("Large number of MPIDR hash buckets detected\n"); - __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); } static void __init setup_machine_fdt(phys_addr_t dt_phys) @@ -361,6 +360,15 @@ void __init setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash(); +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Make sure init_thread_info.ttbr0 always generates translation + * faults in case uaccess_enable() is inadvertently called by the init + * thread. + */ + init_thread_info.ttbr0 = virt_to_phys(empty_zero_page); +#endif + #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index e33fe33876ab..c2bf5a58039f 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -49,39 +49,32 @@ orr \dst, \dst, \mask // dst|=(aff3>>rs3) .endm /* - * Save CPU state for a suspend and execute the suspend finisher. - * On success it will return 0 through cpu_resume - ie through a CPU - * soft/hard reboot from the reset vector. - * On failure it returns the suspend finisher return value or force - * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher - * is not allowed to return, if it does this must be considered failure). - * It saves callee registers, and allocates space on the kernel stack - * to save the CPU specific registers + some other data for resume. + * Save CPU state in the provided sleep_stack_data area, and publish its + * location for cpu_resume()'s use in sleep_save_stash. * - * x0 = suspend finisher argument - * x1 = suspend finisher function pointer + * cpu_resume() will restore this saved state, and return. Because the + * link-register is saved and restored, it will appear to return from this + * function. So that the caller can tell the suspend/resume paths apart, + * __cpu_suspend_enter() will always return a non-zero value, whereas the + * path through cpu_resume() will return 0. + * + * x0 = struct sleep_stack_data area */ ENTRY(__cpu_suspend_enter) - stp x29, lr, [sp, #-96]! - stp x19, x20, [sp,#16] - stp x21, x22, [sp,#32] - stp x23, x24, [sp,#48] - stp x25, x26, [sp,#64] - stp x27, x28, [sp,#80] - /* - * Stash suspend finisher and its argument in x20 and x19 - */ - mov x19, x0 - mov x20, x1 + stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS] + stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16] + stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32] + stp x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48] + stp x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64] + stp x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80] + + /* save the sp in cpu_suspend_ctx */ mov x2, sp - sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx - mov x0, sp - /* - * x0 now points to struct cpu_suspend_ctx allocated on the stack - */ - str x2, [x0, #CPU_CTX_SP] - ldr x1, =sleep_save_sp - ldr x1, [x1, #SLEEP_SAVE_SP_VIRT] + str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP] + + /* find the mpidr_hash */ + ldr x1, =sleep_save_stash + ldr x1, [x1] mrs x7, mpidr_el1 ldr x9, =mpidr_hash ldr x10, [x9, #MPIDR_HASH_MASK] @@ -93,70 +86,28 @@ ENTRY(__cpu_suspend_enter) ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 add x1, x1, x8, lsl #3 - bl __cpu_suspend_save - /* - * Grab suspend finisher in x20 and its argument in x19 - */ - mov x0, x19 - mov x1, x20 - /* - * We are ready for power down, fire off the suspend finisher - * in x1, with argument in x0 - */ - blr x1 - /* - * Never gets here, unless suspend finisher fails. - * Successful cpu_suspend should return from cpu_resume, returning - * through this code path is considered an error - * If the return value is set to 0 force x0 = -EOPNOTSUPP - * to make sure a proper error condition is propagated - */ - cmp x0, #0 - mov x3, #-EOPNOTSUPP - csel x0, x3, x0, eq - add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer - ldp x19, x20, [sp, #16] - ldp x21, x22, [sp, #32] - ldp x23, x24, [sp, #48] - ldp x25, x26, [sp, #64] - ldp x27, x28, [sp, #80] - ldp x29, lr, [sp], #96 + + str x0, [x1] + add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS + stp x29, lr, [sp, #-16]! + bl cpu_do_suspend + ldp x29, lr, [sp], #16 + mov x0, #1 ret ENDPROC(__cpu_suspend_enter) .ltorg -/* - * x0 must contain the sctlr value retrieved from restored context - */ - .pushsection ".idmap.text", "ax" -ENTRY(cpu_resume_mmu) - ldr x3, =cpu_resume_after_mmu - msr sctlr_el1, x0 // restore sctlr_el1 - isb - /* - * Invalidate the local I-cache so that any instructions fetched - * speculatively from the PoC are discarded, since they may have - * been dynamically patched at the PoU. - */ - ic iallu - dsb nsh - isb - br x3 // global jump to virtual address -ENDPROC(cpu_resume_mmu) - .popsection -cpu_resume_after_mmu: - mov x0, #0 // return zero on success - ldp x19, x20, [sp, #16] - ldp x21, x22, [sp, #32] - ldp x23, x24, [sp, #48] - ldp x25, x26, [sp, #64] - ldp x27, x28, [sp, #80] - ldp x29, lr, [sp], #96 - ret -ENDPROC(cpu_resume_after_mmu) - ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly + /* enable the MMU early - so we can access sleep_save_stash by va */ + adr_l lr, __enable_mmu /* __cpu_setup will return here */ + ldr x27, =_cpu_resume /* __enable_mmu will branch here */ + adrp x25, idmap_pg_dir + adrp x26, swapper_pg_dir + b __cpu_setup +ENDPROC(cpu_resume) + +ENTRY(_cpu_resume) mrs x1, mpidr_el1 adrp x8, mpidr_hash add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address @@ -166,20 +117,27 @@ ENTRY(cpu_resume) ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 /* x7 contains hash index, let's use it to grab context pointer */ - ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS + ldr_l x0, sleep_save_stash ldr x0, [x0, x7, lsl #3] + add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS + add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS /* load sp from context */ ldr x2, [x0, #CPU_CTX_SP] - /* load physical address of identity map page table in x1 */ - adrp x1, idmap_pg_dir mov sp, x2 /* save thread_info */ and x2, x2, #~(THREAD_SIZE - 1) msr sp_el0, x2 /* - * cpu_do_resume expects x0 to contain context physical address - * pointer and x1 to contain physical address of 1:1 page tables + * cpu_do_resume expects x0 to contain context address pointer */ - bl cpu_do_resume // PC relative jump, MMU off - b cpu_resume_mmu // Resume MMU, never returns -ENDPROC(cpu_resume) + bl cpu_do_resume + + ldp x19, x20, [x29, #16] + ldp x21, x22, [x29, #32] + ldp x23, x24, [x29, #48] + ldp x25, x26, [x29, #64] + ldp x27, x28, [x29, #80] + ldp x29, lr, [x29] + mov x0, #0 + ret +ENDPROC(_cpu_resume) diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S new file mode 100644 index 000000000000..ae0496fa4235 --- /dev/null +++ b/arch/arm64/kernel/smccc-call.S @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License Version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/linkage.h> +#include <asm/asm-offsets.h> + + .macro SMCCC instr + .cfi_startproc + \instr #0 + ldr x4, [sp] + stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS] + stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS] + ret + .cfi_endproc + .endm + +/* + * void arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, + * unsigned long a3, unsigned long a4, unsigned long a5, + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + */ +ENTRY(arm_smccc_smc) + SMCCC smc +ENDPROC(arm_smccc_smc) + +/* + * void arm_smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, + * unsigned long a3, unsigned long a4, unsigned long a5, + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + */ +ENTRY(arm_smccc_hvc) + SMCCC hvc +ENDPROC(arm_smccc_hvc) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 85aea381fbf6..cb3eec8e8e50 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -69,6 +69,8 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->fp = *(unsigned long *)(fp); frame->pc = *(unsigned long *)(fp + 8); + kasan_enable_current(); + #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (tsk && tsk->ret_stack && (frame->pc == (unsigned long)return_to_handler)) { @@ -112,8 +114,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) } } - kasan_enable_current(); - return 0; } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 66055392f445..5a0b1088c17c 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -1,7 +1,9 @@ #include <linux/ftrace.h> #include <linux/percpu.h> #include <linux/slab.h> +#include <asm/alternative.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/debug-monitors.h> #include <asm/pgtable.h> #include <asm/memory.h> @@ -10,30 +12,11 @@ #include <asm/suspend.h> #include <asm/tlbflush.h> -extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long)); /* - * This is called by __cpu_suspend_enter() to save the state, and do whatever - * flushing is required to ensure that when the CPU goes to sleep we have - * the necessary data available when the caches are not searched. - * - * ptr: CPU context virtual address - * save_ptr: address of the location where the context physical address - * must be saved + * This is allocated by cpu_suspend_init(), and used to store a pointer to + * the 'struct sleep_stack_data' the contains a particular CPUs state. */ -void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr, - phys_addr_t *save_ptr) -{ - *save_ptr = virt_to_phys(ptr); - - cpu_do_suspend(ptr); - /* - * Only flush the context that must be retrieved with the MMU - * off. VA primitives ensure the flush is applied to all - * cache levels so context is pushed to DRAM. - */ - __flush_dcache_area(ptr, sizeof(*ptr)); - __flush_dcache_area(save_ptr, sizeof(*save_ptr)); -} +unsigned long *sleep_save_stash; /* * This hook is provided so that cpu_suspend code can restore HW @@ -51,6 +34,30 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) hw_breakpoint_restore = hw_bp_restore; } +void notrace __cpu_suspend_exit(void) +{ + /* + * We are resuming from reset with the idmap active in TTBR0_EL1. + * We must uninstall the idmap and restore the expected MMU + * state before we can possibly return to userspace. + */ + cpu_uninstall_idmap(); + + /* + * Restore per-cpu offset before any kernel + * subsystem relying on it has a chance to run. + */ + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + + /* + * Restore HW breakpoint registers to sane values + * before debug exceptions are possibly reenabled + * through local_dbg_restore. + */ + if (hw_breakpoint_restore) + hw_breakpoint_restore(NULL); +} + /* * cpu_suspend * @@ -60,8 +67,9 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { - int ret; + int ret = 0; unsigned long flags; + struct sleep_stack_data state; /* * From this point debug exceptions are disabled to prevent @@ -77,34 +85,26 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ pause_graph_tracing(); - /* - * mm context saved on the stack, it will be restored when - * the cpu comes out of reset through the identity mapped - * page tables, so that the thread address space is properly - * set-up on function return. - */ - ret = __cpu_suspend_enter(arg, fn); - if (ret == 0) { - /* - * We are resuming from reset with the idmap active in TTBR0_EL1. - * We must uninstall the idmap and restore the expected MMU - * state before we can possibly return to userspace. - */ - cpu_uninstall_idmap(); + if (__cpu_suspend_enter(&state)) { + /* Call the suspend finisher */ + ret = fn(arg); /* - * Restore per-cpu offset before any kernel - * subsystem relying on it has a chance to run. + * PSTATE was not saved over suspend/resume, re-enable any + * detected features that might not have been set correctly. */ - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, + CONFIG_ARM64_PAN)); /* * Restore HW breakpoint registers to sane values * before debug exceptions are possibly reenabled * through local_dbg_restore. */ - if (hw_breakpoint_restore) - hw_breakpoint_restore(NULL); + if (!ret) + ret = -EOPNOTSUPP; + } else { + __cpu_suspend_exit(); } unpause_graph_tracing(); @@ -119,22 +119,15 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) return ret; } -struct sleep_save_sp sleep_save_sp; - static int __init cpu_suspend_init(void) { - void *ctx_ptr; - /* ctx_ptr is an array of physical addresses */ - ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + sleep_save_stash = kcalloc(mpidr_hash_size(), sizeof(*sleep_save_stash), + GFP_KERNEL); - if (WARN_ON(!ctx_ptr)) + if (WARN_ON(!sleep_save_stash)) return -ENOMEM; - sleep_save_sp.save_ptr_stash = ctx_ptr; - sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); - __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); - return 0; } early_initcall(cpu_suspend_init); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 48b75ece4c17..d621d1de0ac2 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -68,8 +68,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, /* * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. + * to safely read from kernel space. */ fs = get_fs(); set_fs(KERNEL_DS); @@ -115,21 +114,12 @@ static void dump_backtrace_entry(unsigned long where) print_ip_sym(where); } -static void dump_instr(const char *lvl, struct pt_regs *regs) +static void __dump_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); - mm_segment_t fs; char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; - /* - * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. - */ - fs = get_fs(); - set_fs(KERNEL_DS); - for (i = -4; i < 1; i++) { unsigned int val, bad; @@ -143,8 +133,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } } printk("%sCode: %s\n", lvl, str); +} - set_fs(fs); +static void dump_instr(const char *lvl, struct pt_regs *regs) +{ + if (!user_mode(regs)) { + mm_segment_t fs = get_fs(); + set_fs(KERNEL_DS); + __dump_instr(lvl, regs); + set_fs(fs); + } else { + __dump_instr(lvl, regs); + } } static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) @@ -505,7 +505,7 @@ static const char *esr_class_str[] = { const char *esr_get_class_string(u32 esr) { - return esr_class_str[esr >> ESR_ELx_EC_SHIFT]; + return esr_class_str[ESR_ELx_EC(esr)]; } /* diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 76ff67d44fb5..b94ad40ecf7b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -47,6 +47,16 @@ jiffies = jiffies_64; *(.idmap.text) \ VMLINUX_SYMBOL(__idmap_text_end) = .; +#ifdef CONFIG_HIBERNATION +#define HIBERNATE_TEXT \ + . = ALIGN(SZ_4K); \ + VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\ + *(.hibernate_exit.text) \ + VMLINUX_SYMBOL(__hibernate_exit_text_end) = .; +#else +#define HIBERNATE_TEXT +#endif + /* * The size of the PE/COFF section that covers the kernel image, which * runs from stext to _edata, must be a round multiple of the PE/COFF @@ -109,11 +119,14 @@ SECTIONS *(.exception.text) __exception_text_end = .; IRQENTRY_TEXT + ENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT + KPROBES_TEXT HYPERVISOR_TEXT IDMAP_TEXT + HIBERNATE_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); @@ -182,6 +195,11 @@ SECTIONS swapper_pg_dir = .; . += SWAPPER_DIR_SIZE; +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + reserved_ttbr0 = .; + . += RESERVED_TTBR0_SIZE; +#endif + _end = .; STABS_DEBUG @@ -197,6 +215,10 @@ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, "HYP init code too big or misaligned") ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, "ID map text too big or misaligned") +#ifdef CONFIG_HIBERNATION +ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) + <= SZ_4K, "Hibernate exit text too big or misaligned") +#endif /* * If padding is applied before .head.text, virt<->phys conversions will fail. diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 1949fe5f5424..caee9ee8e12a 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -10,6 +10,7 @@ KVM=../../../virt/kvm ARM=../../../arch/arm/kvm obj-$(CONFIG_KVM_ARM_HOST) += kvm.o +obj-$(CONFIG_KVM_ARM_HOST) += hyp/ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o @@ -22,8 +23,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generi kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3039f080e2d5..e5ee8880d5d9 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -28,7 +28,6 @@ #include <asm/cputype.h> #include <asm/uaccess.h> #include <asm/kvm.h> -#include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 15f0477b0d2a..25006a7a5316 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -23,6 +23,7 @@ #include <linux/kvm_host.h> #include <asm/esr.h> +#include <asm/kvm_asm.h> #include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> @@ -182,6 +183,13 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, exit_handler = kvm_get_exit_handler(vcpu); return exit_handler(vcpu, run); + case ARM_EXCEPTION_HYP_GONE: + /* + * EL2 has been reset to the hyp-stub. This happens when a guest + * is pre-empted by kvm_reboot()'s shutdown call. + */ + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + return 0; default: kvm_pr_unimpl("Unsupported exception type: %d", exception_index); diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 84c338f017b2..d87635e678b7 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -21,6 +21,7 @@ #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> #include <asm/pgtable-hwdef.h> +#include <asm/sysreg.h> .text .pushsection .hyp.idmap.text, "ax" @@ -96,6 +97,14 @@ __do_hyp_init: ldr x4, =VTCR_EL2_FLAGS bfi x4, x5, #16, #3 + /* + * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in + * VTCR_EL2. + */ + mrs x5, ID_AA64MMFR1_EL1 + ubfx x5, x5, #5, #1 + lsl x5, x5, #VTCR_EL2_VS + orr x4, x4, x5 msr vtcr_el2, x4 @@ -108,8 +117,8 @@ __do_hyp_init: dsb sy mrs x4, sctlr_el2 - and x4, x4, #SCTLR_EL2_EE // preserve endianness of EL2 - ldr x5, =SCTLR_EL2_FLAGS + and x4, x4, #SCTLR_ELx_EE // preserve endianness of EL2 + ldr x5, =SCTLR_ELx_FLAGS orr x4, x4, x5 msr sctlr_el2, x4 isb @@ -143,6 +152,44 @@ merged: eret ENDPROC(__kvm_hyp_init) + /* + * x0: HYP boot pgd + * x1: HYP phys_idmap_start + */ +ENTRY(__kvm_hyp_reset) + /* We're in trampoline code in VA, switch back to boot page tables */ + msr ttbr0_el2, x0 + isb + + /* Ensure the PA branch doesn't find a stale tlb entry or stale code. */ + ic iallu + tlbi alle2 + dsb sy + isb + + /* Branch into PA space */ + adr x0, 1f + bfi x1, x0, #0, #PAGE_SHIFT + br x1 + + /* We're now in idmap, disable MMU */ +1: mrs x0, sctlr_el2 + ldr x1, =SCTLR_ELx_FLAGS + bic x0, x0, x1 // Clear SCTL_M and etc + msr sctlr_el2, x0 + isb + + /* Invalidate the old TLBs */ + tlbi alle2 + dsb sy + + /* Install stub vectors */ + adr_l x0, __hyp_stub_vectors + msr vbar_el2, x0 + + eret +ENDPROC(__kvm_hyp_reset) + .ltorg .popsection diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 309e3479dc2c..7ce931565151 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -18,909 +18,8 @@ #include <linux/linkage.h> #include <asm/alternative.h> -#include <asm/asm-offsets.h> #include <asm/assembler.h> #include <asm/cpufeature.h> -#include <asm/debug-monitors.h> -#include <asm/esr.h> -#include <asm/fpsimdmacros.h> -#include <asm/kvm.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_mmu.h> -#include <asm/memory.h> - -#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) -#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) -#define CPU_SPSR_OFFSET(x) CPU_GP_REG_OFFSET(CPU_SPSR + 8*x) -#define CPU_SYSREG_OFFSET(x) (CPU_SYSREGS + 8*x) - - .text - .pushsection .hyp.text, "ax" - .align PAGE_SHIFT - -.macro save_common_regs - // x2: base address for cpu context - // x3: tmp register - - add x3, x2, #CPU_XREG_OFFSET(19) - stp x19, x20, [x3] - stp x21, x22, [x3, #16] - stp x23, x24, [x3, #32] - stp x25, x26, [x3, #48] - stp x27, x28, [x3, #64] - stp x29, lr, [x3, #80] - - mrs x19, sp_el0 - mrs x20, elr_el2 // pc before entering el2 - mrs x21, spsr_el2 // pstate before entering el2 - - stp x19, x20, [x3, #96] - str x21, [x3, #112] - - mrs x22, sp_el1 - mrs x23, elr_el1 - mrs x24, spsr_el1 - - str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] - str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] - str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] -.endm - -.macro restore_common_regs - // x2: base address for cpu context - // x3: tmp register - - ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] - ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] - ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] - - msr sp_el1, x22 - msr elr_el1, x23 - msr spsr_el1, x24 - - add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0 - ldp x19, x20, [x3] - ldr x21, [x3, #16] - - msr sp_el0, x19 - msr elr_el2, x20 // pc on return from el2 - msr spsr_el2, x21 // pstate on return from el2 - - add x3, x2, #CPU_XREG_OFFSET(19) - ldp x19, x20, [x3] - ldp x21, x22, [x3, #16] - ldp x23, x24, [x3, #32] - ldp x25, x26, [x3, #48] - ldp x27, x28, [x3, #64] - ldp x29, lr, [x3, #80] -.endm - -.macro save_host_regs - save_common_regs -.endm - -.macro restore_host_regs - restore_common_regs -.endm - -.macro save_fpsimd - // x2: cpu context address - // x3, x4: tmp regs - add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) - fpsimd_save x3, 4 -.endm - -.macro restore_fpsimd - // x2: cpu context address - // x3, x4: tmp regs - add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) - fpsimd_restore x3, 4 -.endm - -.macro save_guest_regs - // x0 is the vcpu address - // x1 is the return code, do not corrupt! - // x2 is the cpu context - // x3 is a tmp register - // Guest's x0-x3 are on the stack - - // Compute base to save registers - add x3, x2, #CPU_XREG_OFFSET(4) - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - stp x8, x9, [x3, #32] - stp x10, x11, [x3, #48] - stp x12, x13, [x3, #64] - stp x14, x15, [x3, #80] - stp x16, x17, [x3, #96] - str x18, [x3, #112] - - pop x6, x7 // x2, x3 - pop x4, x5 // x0, x1 - - add x3, x2, #CPU_XREG_OFFSET(0) - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - - save_common_regs -.endm - -.macro restore_guest_regs - // x0 is the vcpu address. - // x2 is the cpu context - // x3 is a tmp register - - // Prepare x0-x3 for later restore - add x3, x2, #CPU_XREG_OFFSET(0) - ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] - push x4, x5 // Push x0-x3 on the stack - push x6, x7 - - // x4-x18 - ldp x4, x5, [x3, #32] - ldp x6, x7, [x3, #48] - ldp x8, x9, [x3, #64] - ldp x10, x11, [x3, #80] - ldp x12, x13, [x3, #96] - ldp x14, x15, [x3, #112] - ldp x16, x17, [x3, #128] - ldr x18, [x3, #144] - - // x19-x29, lr, sp*, elr*, spsr* - restore_common_regs - - // Last bits of the 64bit state - pop x2, x3 - pop x0, x1 - - // Do not touch any register after this! -.endm - -/* - * Macros to perform system register save/restore. - * - * Ordering here is absolutely critical, and must be kept consistent - * in {save,restore}_sysregs, {save,restore}_guest_32bit_state, - * and in kvm_asm.h. - * - * In other words, don't touch any of these unless you know what - * you are doing. - */ -.macro save_sysregs - // x2: base address for cpu context - // x3: tmp register - - add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) - - mrs x4, vmpidr_el2 - mrs x5, csselr_el1 - mrs x6, sctlr_el1 - mrs x7, actlr_el1 - mrs x8, cpacr_el1 - mrs x9, ttbr0_el1 - mrs x10, ttbr1_el1 - mrs x11, tcr_el1 - mrs x12, esr_el1 - mrs x13, afsr0_el1 - mrs x14, afsr1_el1 - mrs x15, far_el1 - mrs x16, mair_el1 - mrs x17, vbar_el1 - mrs x18, contextidr_el1 - mrs x19, tpidr_el0 - mrs x20, tpidrro_el0 - mrs x21, tpidr_el1 - mrs x22, amair_el1 - mrs x23, cntkctl_el1 - mrs x24, par_el1 - mrs x25, mdscr_el1 - - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - stp x8, x9, [x3, #32] - stp x10, x11, [x3, #48] - stp x12, x13, [x3, #64] - stp x14, x15, [x3, #80] - stp x16, x17, [x3, #96] - stp x18, x19, [x3, #112] - stp x20, x21, [x3, #128] - stp x22, x23, [x3, #144] - stp x24, x25, [x3, #160] -.endm - -.macro save_debug type - // x4: pointer to register set - // x5: number of registers to skip - // x6..x22 trashed - - adr x22, 1f - add x22, x22, x5, lsl #2 - br x22 -1: - mrs x21, \type\()15_el1 - mrs x20, \type\()14_el1 - mrs x19, \type\()13_el1 - mrs x18, \type\()12_el1 - mrs x17, \type\()11_el1 - mrs x16, \type\()10_el1 - mrs x15, \type\()9_el1 - mrs x14, \type\()8_el1 - mrs x13, \type\()7_el1 - mrs x12, \type\()6_el1 - mrs x11, \type\()5_el1 - mrs x10, \type\()4_el1 - mrs x9, \type\()3_el1 - mrs x8, \type\()2_el1 - mrs x7, \type\()1_el1 - mrs x6, \type\()0_el1 - - adr x22, 1f - add x22, x22, x5, lsl #2 - br x22 -1: - str x21, [x4, #(15 * 8)] - str x20, [x4, #(14 * 8)] - str x19, [x4, #(13 * 8)] - str x18, [x4, #(12 * 8)] - str x17, [x4, #(11 * 8)] - str x16, [x4, #(10 * 8)] - str x15, [x4, #(9 * 8)] - str x14, [x4, #(8 * 8)] - str x13, [x4, #(7 * 8)] - str x12, [x4, #(6 * 8)] - str x11, [x4, #(5 * 8)] - str x10, [x4, #(4 * 8)] - str x9, [x4, #(3 * 8)] - str x8, [x4, #(2 * 8)] - str x7, [x4, #(1 * 8)] - str x6, [x4, #(0 * 8)] -.endm - -.macro restore_sysregs - // x2: base address for cpu context - // x3: tmp register - - add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) - - ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] - ldp x8, x9, [x3, #32] - ldp x10, x11, [x3, #48] - ldp x12, x13, [x3, #64] - ldp x14, x15, [x3, #80] - ldp x16, x17, [x3, #96] - ldp x18, x19, [x3, #112] - ldp x20, x21, [x3, #128] - ldp x22, x23, [x3, #144] - ldp x24, x25, [x3, #160] - - msr vmpidr_el2, x4 - msr csselr_el1, x5 - msr sctlr_el1, x6 - msr actlr_el1, x7 - msr cpacr_el1, x8 - msr ttbr0_el1, x9 - msr ttbr1_el1, x10 - msr tcr_el1, x11 - msr esr_el1, x12 - msr afsr0_el1, x13 - msr afsr1_el1, x14 - msr far_el1, x15 - msr mair_el1, x16 - msr vbar_el1, x17 - msr contextidr_el1, x18 - msr tpidr_el0, x19 - msr tpidrro_el0, x20 - msr tpidr_el1, x21 - msr amair_el1, x22 - msr cntkctl_el1, x23 - msr par_el1, x24 - msr mdscr_el1, x25 -.endm - -.macro restore_debug type - // x4: pointer to register set - // x5: number of registers to skip - // x6..x22 trashed - - adr x22, 1f - add x22, x22, x5, lsl #2 - br x22 -1: - ldr x21, [x4, #(15 * 8)] - ldr x20, [x4, #(14 * 8)] - ldr x19, [x4, #(13 * 8)] - ldr x18, [x4, #(12 * 8)] - ldr x17, [x4, #(11 * 8)] - ldr x16, [x4, #(10 * 8)] - ldr x15, [x4, #(9 * 8)] - ldr x14, [x4, #(8 * 8)] - ldr x13, [x4, #(7 * 8)] - ldr x12, [x4, #(6 * 8)] - ldr x11, [x4, #(5 * 8)] - ldr x10, [x4, #(4 * 8)] - ldr x9, [x4, #(3 * 8)] - ldr x8, [x4, #(2 * 8)] - ldr x7, [x4, #(1 * 8)] - ldr x6, [x4, #(0 * 8)] - - adr x22, 1f - add x22, x22, x5, lsl #2 - br x22 -1: - msr \type\()15_el1, x21 - msr \type\()14_el1, x20 - msr \type\()13_el1, x19 - msr \type\()12_el1, x18 - msr \type\()11_el1, x17 - msr \type\()10_el1, x16 - msr \type\()9_el1, x15 - msr \type\()8_el1, x14 - msr \type\()7_el1, x13 - msr \type\()6_el1, x12 - msr \type\()5_el1, x11 - msr \type\()4_el1, x10 - msr \type\()3_el1, x9 - msr \type\()2_el1, x8 - msr \type\()1_el1, x7 - msr \type\()0_el1, x6 -.endm - -.macro skip_32bit_state tmp, target - // Skip 32bit state if not needed - mrs \tmp, hcr_el2 - tbnz \tmp, #HCR_RW_SHIFT, \target -.endm - -.macro skip_tee_state tmp, target - // Skip ThumbEE state if not needed - mrs \tmp, id_pfr0_el1 - tbz \tmp, #12, \target -.endm - -.macro skip_debug_state tmp, target - ldr \tmp, [x0, #VCPU_DEBUG_FLAGS] - tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target -.endm - -/* - * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled) - */ -.macro skip_fpsimd_state tmp, target - mrs \tmp, cptr_el2 - tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target -.endm - -.macro compute_debug_state target - // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY - // is set, we do a full save/restore cycle and disable trapping. - add x25, x0, #VCPU_CONTEXT - - // Check the state of MDSCR_EL1 - ldr x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)] - and x26, x25, #DBG_MDSCR_KDE - and x25, x25, #DBG_MDSCR_MDE - adds xzr, x25, x26 - b.eq 9998f // Nothing to see there - - // If any interesting bits was set, we must set the flag - mov x26, #KVM_ARM64_DEBUG_DIRTY - str x26, [x0, #VCPU_DEBUG_FLAGS] - b 9999f // Don't skip restore - -9998: - // Otherwise load the flags from memory in case we recently - // trapped - skip_debug_state x25, \target -9999: -.endm - -.macro save_guest_32bit_state - skip_32bit_state x3, 1f - - add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) - mrs x4, spsr_abt - mrs x5, spsr_und - mrs x6, spsr_irq - mrs x7, spsr_fiq - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - - add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) - mrs x4, dacr32_el2 - mrs x5, ifsr32_el2 - stp x4, x5, [x3] - - skip_fpsimd_state x8, 2f - mrs x6, fpexc32_el2 - str x6, [x3, #16] -2: - skip_debug_state x8, 1f - mrs x7, dbgvcr32_el2 - str x7, [x3, #24] -1: -.endm - -.macro restore_guest_32bit_state - skip_32bit_state x3, 1f - - add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) - ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] - msr spsr_abt, x4 - msr spsr_und, x5 - msr spsr_irq, x6 - msr spsr_fiq, x7 - - add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) - ldp x4, x5, [x3] - msr dacr32_el2, x4 - msr ifsr32_el2, x5 - - skip_debug_state x8, 1f - ldr x7, [x3, #24] - msr dbgvcr32_el2, x7 -1: -.endm - -.macro activate_traps - ldr x2, [x0, #VCPU_HCR_EL2] - - /* - * We are about to set CPTR_EL2.TFP to trap all floating point - * register accesses to EL2, however, the ARM ARM clearly states that - * traps are only taken to EL2 if the operation would not otherwise - * trap to EL1. Therefore, always make sure that for 32-bit guests, - * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. - */ - tbnz x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state - mov x3, #(1 << 30) - msr fpexc32_el2, x3 - isb -99: - msr hcr_el2, x2 - mov x2, #CPTR_EL2_TTA - orr x2, x2, #CPTR_EL2_TFP - msr cptr_el2, x2 - - mov x2, #(1 << 15) // Trap CP15 Cr=15 - msr hstr_el2, x2 - - // Monitor Debug Config - see kvm_arm_setup_debug() - ldr x2, [x0, #VCPU_MDCR_EL2] - msr mdcr_el2, x2 -.endm - -.macro deactivate_traps - mov x2, #HCR_RW - msr hcr_el2, x2 - msr hstr_el2, xzr - - mrs x2, mdcr_el2 - and x2, x2, #MDCR_EL2_HPMN_MASK - msr mdcr_el2, x2 -.endm - -.macro activate_vm - ldr x1, [x0, #VCPU_KVM] - kern_hyp_va x1 - ldr x2, [x1, #KVM_VTTBR] - msr vttbr_el2, x2 -.endm - -.macro deactivate_vm - msr vttbr_el2, xzr -.endm - -/* - * Call into the vgic backend for state saving - */ -.macro save_vgic_state -alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF - bl __save_vgic_v2_state -alternative_else - bl __save_vgic_v3_state -alternative_endif - mrs x24, hcr_el2 - mov x25, #HCR_INT_OVERRIDE - neg x25, x25 - and x24, x24, x25 - msr hcr_el2, x24 -.endm - -/* - * Call into the vgic backend for state restoring - */ -.macro restore_vgic_state - mrs x24, hcr_el2 - ldr x25, [x0, #VCPU_IRQ_LINES] - orr x24, x24, #HCR_INT_OVERRIDE - orr x24, x24, x25 - msr hcr_el2, x24 -alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF - bl __restore_vgic_v2_state -alternative_else - bl __restore_vgic_v3_state -alternative_endif -.endm - -.macro save_timer_state - // x0: vcpu pointer - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr w3, [x2, #KVM_TIMER_ENABLED] - cbz w3, 1f - - mrs x3, cntv_ctl_el0 - and x3, x3, #3 - str w3, [x0, #VCPU_TIMER_CNTV_CTL] - - isb - - mrs x3, cntv_cval_el0 - str x3, [x0, #VCPU_TIMER_CNTV_CVAL] - -1: - // Disable the virtual timer - msr cntv_ctl_el0, xzr - - // Allow physical timer/counter access for the host - mrs x2, cnthctl_el2 - orr x2, x2, #3 - msr cnthctl_el2, x2 - - // Clear cntvoff for the host - msr cntvoff_el2, xzr -.endm - -.macro restore_timer_state - // x0: vcpu pointer - // Disallow physical timer access for the guest - // Physical counter access is allowed - mrs x2, cnthctl_el2 - orr x2, x2, #1 - bic x2, x2, #2 - msr cnthctl_el2, x2 - - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr w3, [x2, #KVM_TIMER_ENABLED] - cbz w3, 1f - - ldr x3, [x2, #KVM_TIMER_CNTVOFF] - msr cntvoff_el2, x3 - ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL] - msr cntv_cval_el0, x2 - isb - - ldr w2, [x0, #VCPU_TIMER_CNTV_CTL] - and x2, x2, #3 - msr cntv_ctl_el0, x2 -1: -.endm - -__save_sysregs: - save_sysregs - ret - -__restore_sysregs: - restore_sysregs - ret - -/* Save debug state */ -__save_debug: - // x2: ptr to CPU context - // x3: ptr to debug reg struct - // x4/x5/x6-22/x24-26: trashed - - mrs x26, id_aa64dfr0_el1 - ubfx x24, x26, #12, #4 // Extract BRPs - ubfx x25, x26, #20, #4 // Extract WRPs - mov w26, #15 - sub w24, w26, w24 // How many BPs to skip - sub w25, w26, w25 // How many WPs to skip - - mov x5, x24 - add x4, x3, #DEBUG_BCR - save_debug dbgbcr - add x4, x3, #DEBUG_BVR - save_debug dbgbvr - - mov x5, x25 - add x4, x3, #DEBUG_WCR - save_debug dbgwcr - add x4, x3, #DEBUG_WVR - save_debug dbgwvr - - mrs x21, mdccint_el1 - str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] - ret - -/* Restore debug state */ -__restore_debug: - // x2: ptr to CPU context - // x3: ptr to debug reg struct - // x4/x5/x6-22/x24-26: trashed - - mrs x26, id_aa64dfr0_el1 - ubfx x24, x26, #12, #4 // Extract BRPs - ubfx x25, x26, #20, #4 // Extract WRPs - mov w26, #15 - sub w24, w26, w24 // How many BPs to skip - sub w25, w26, w25 // How many WPs to skip - - mov x5, x24 - add x4, x3, #DEBUG_BCR - restore_debug dbgbcr - add x4, x3, #DEBUG_BVR - restore_debug dbgbvr - - mov x5, x25 - add x4, x3, #DEBUG_WCR - restore_debug dbgwcr - add x4, x3, #DEBUG_WVR - restore_debug dbgwvr - - ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] - msr mdccint_el1, x21 - - ret - -__save_fpsimd: - skip_fpsimd_state x3, 1f - save_fpsimd -1: ret - -__restore_fpsimd: - skip_fpsimd_state x3, 1f - restore_fpsimd -1: ret - -switch_to_guest_fpsimd: - push x4, lr - - mrs x2, cptr_el2 - bic x2, x2, #CPTR_EL2_TFP - msr cptr_el2, x2 - isb - - mrs x0, tpidr_el2 - - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - bl __save_fpsimd - - add x2, x0, #VCPU_CONTEXT - bl __restore_fpsimd - - skip_32bit_state x3, 1f - ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)] - msr fpexc32_el2, x4 -1: - pop x4, lr - pop x2, x3 - pop x0, x1 - - eret - -/* - * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu); - * - * This is the world switch. The first half of the function - * deals with entering the guest, and anything from __kvm_vcpu_return - * to the end of the function deals with reentering the host. - * On the enter path, only x0 (vcpu pointer) must be preserved until - * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception - * code) must both be preserved until the epilogue. - * In both cases, x2 points to the CPU context we're saving/restoring from/to. - */ -ENTRY(__kvm_vcpu_run) - kern_hyp_va x0 - msr tpidr_el2, x0 // Save the vcpu register - - // Host context - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - - save_host_regs - bl __save_sysregs - - compute_debug_state 1f - add x3, x0, #VCPU_HOST_DEBUG_STATE - bl __save_debug -1: - activate_traps - activate_vm - - restore_vgic_state - restore_timer_state - - // Guest context - add x2, x0, #VCPU_CONTEXT - - // We must restore the 32-bit state before the sysregs, thanks - // to Cortex-A57 erratum #852523. - restore_guest_32bit_state - bl __restore_sysregs - - skip_debug_state x3, 1f - ldr x3, [x0, #VCPU_DEBUG_PTR] - kern_hyp_va x3 - bl __restore_debug -1: - restore_guest_regs - - // That's it, no more messing around. - eret - -__kvm_vcpu_return: - // Assume x0 is the vcpu pointer, x1 the return code - // Guest's x0-x3 are on the stack - - // Guest context - add x2, x0, #VCPU_CONTEXT - - save_guest_regs - bl __save_fpsimd - bl __save_sysregs - - skip_debug_state x3, 1f - ldr x3, [x0, #VCPU_DEBUG_PTR] - kern_hyp_va x3 - bl __save_debug -1: - save_guest_32bit_state - - save_timer_state - save_vgic_state - - deactivate_traps - deactivate_vm - - // Host context - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - - bl __restore_sysregs - bl __restore_fpsimd - /* Clear FPSIMD and Trace trapping */ - msr cptr_el2, xzr - - skip_debug_state x3, 1f - // Clear the dirty flag for the next run, as all the state has - // already been saved. Note that we nuke the whole 64bit word. - // If we ever add more flags, we'll have to be more careful... - str xzr, [x0, #VCPU_DEBUG_FLAGS] - add x3, x0, #VCPU_HOST_DEBUG_STATE - bl __restore_debug -1: - restore_host_regs - - mov x0, x1 - ret -END(__kvm_vcpu_run) - -// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); -ENTRY(__kvm_tlb_flush_vmid_ipa) - dsb ishst - - kern_hyp_va x0 - ldr x2, [x0, #KVM_VTTBR] - msr vttbr_el2, x2 - isb - - /* - * We could do so much better if we had the VA as well. - * Instead, we invalidate Stage-2 for this IPA, and the - * whole of Stage-1. Weep... - */ - lsr x1, x1, #12 - tlbi ipas2e1is, x1 - /* - * We have to ensure completion of the invalidation at Stage-2, - * since a table walk on another CPU could refill a TLB with a - * complete (S1 + S2) walk based on the old Stage-2 mapping if - * the Stage-1 invalidation happened first. - */ - dsb ish - tlbi vmalle1is - dsb ish - isb - - msr vttbr_el2, xzr - ret -ENDPROC(__kvm_tlb_flush_vmid_ipa) - -/** - * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs - * @struct kvm *kvm - pointer to kvm structure - * - * Invalidates all Stage 1 and 2 TLB entries for current VMID. - */ -ENTRY(__kvm_tlb_flush_vmid) - dsb ishst - - kern_hyp_va x0 - ldr x2, [x0, #KVM_VTTBR] - msr vttbr_el2, x2 - isb - - tlbi vmalls12e1is - dsb ish - isb - - msr vttbr_el2, xzr - ret -ENDPROC(__kvm_tlb_flush_vmid) - -ENTRY(__kvm_flush_vm_context) - dsb ishst - tlbi alle1is - ic ialluis - dsb ish - ret -ENDPROC(__kvm_flush_vm_context) - -__kvm_hyp_panic: - // Stash PAR_EL1 before corrupting it in __restore_sysregs - mrs x0, par_el1 - push x0, xzr - - // Guess the context by looking at VTTBR: - // If zero, then we're already a host. - // Otherwise restore a minimal host context before panicing. - mrs x0, vttbr_el2 - cbz x0, 1f - - mrs x0, tpidr_el2 - - deactivate_traps - deactivate_vm - - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - - bl __restore_sysregs - - /* - * Make sure we have a valid host stack, and don't leave junk in the - * frame pointer that will give us a misleading host stack unwinding. - */ - ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] - msr sp_el1, x22 - mov x29, xzr - -1: adr x0, __hyp_panic_str - adr x1, 2f - ldp x2, x3, [x1] - sub x0, x0, x2 - add x0, x0, x3 - mrs x1, spsr_el2 - mrs x2, elr_el2 - mrs x3, esr_el2 - mrs x4, far_el2 - mrs x5, hpfar_el2 - pop x6, xzr // active context PAR_EL1 - mrs x7, tpidr_el2 - - mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ - PSR_MODE_EL1h) - msr spsr_el2, lr - ldr lr, =panic - msr elr_el2, lr - eret - - .align 3 -2: .quad HYP_PAGE_OFFSET - .quad PAGE_OFFSET -ENDPROC(__kvm_hyp_panic) - -__hyp_panic_str: - .ascii "HYP panic:\nPS:%08x PC:%016x ESR:%08x\nFAR:%016x HPFAR:%016x PAR:%016x\nVCPU:%p\n\0" - - .align 2 /* * u64 __kvm_call_hyp(void *hypfn, ...); @@ -934,189 +33,23 @@ __hyp_panic_str: * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the * function pointer can be passed). The function being called must be mapped * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are - * passed in r0 and r1. + * passed in x0. * - * A function pointer with a value of 0 has a special meaning, and is - * used to implement __hyp_get_vectors in the same way as in + * A function pointer with a value less than 0xfff has a special meaning, + * and is used to implement __hyp_get_vectors in the same way as in * arch/arm64/kernel/hyp_stub.S. + * HVC behaves as a 'bl' call and will clobber lr. */ ENTRY(__kvm_call_hyp) +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN + str lr, [sp, #-16]! hvc #0 + ldr lr, [sp], #16 ret -ENDPROC(__kvm_call_hyp) - -.macro invalid_vector label, target - .align 2 -\label: - b \target -ENDPROC(\label) -.endm - - /* None of these should ever happen */ - invalid_vector el2t_sync_invalid, __kvm_hyp_panic - invalid_vector el2t_irq_invalid, __kvm_hyp_panic - invalid_vector el2t_fiq_invalid, __kvm_hyp_panic - invalid_vector el2t_error_invalid, __kvm_hyp_panic - invalid_vector el2h_sync_invalid, __kvm_hyp_panic - invalid_vector el2h_irq_invalid, __kvm_hyp_panic - invalid_vector el2h_fiq_invalid, __kvm_hyp_panic - invalid_vector el2h_error_invalid, __kvm_hyp_panic - invalid_vector el1_sync_invalid, __kvm_hyp_panic - invalid_vector el1_irq_invalid, __kvm_hyp_panic - invalid_vector el1_fiq_invalid, __kvm_hyp_panic - invalid_vector el1_error_invalid, __kvm_hyp_panic - -el1_sync: // Guest trapped into EL2 - push x0, x1 - push x2, x3 - - mrs x1, esr_el2 - lsr x2, x1, #ESR_ELx_EC_SHIFT - - cmp x2, #ESR_ELx_EC_HVC64 - b.ne el1_trap - - mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest - cbnz x3, el1_trap // called HVC - - /* Here, we're pretty sure the host called HVC. */ - pop x2, x3 - pop x0, x1 - - /* Check for __hyp_get_vectors */ - cbnz x0, 1f - mrs x0, vbar_el2 - b 2f - -1: push lr, xzr - - /* - * Compute the function address in EL2, and shuffle the parameters. - */ - kern_hyp_va x0 - mov lr, x0 - mov x0, x1 - mov x1, x2 - mov x2, x3 - blr lr - - pop lr, xzr -2: eret - -el1_trap: - /* - * x1: ESR - * x2: ESR_EC - */ - - /* Guest accessed VFP/SIMD registers, save host, restore Guest */ - cmp x2, #ESR_ELx_EC_FP_ASIMD - b.eq switch_to_guest_fpsimd - - cmp x2, #ESR_ELx_EC_DABT_LOW - mov x0, #ESR_ELx_EC_IABT_LOW - ccmp x2, x0, #4, ne - b.ne 1f // Not an abort we care about - - /* This is an abort. Check for permission fault */ -alternative_if_not ARM64_WORKAROUND_834220 - and x2, x1, #ESR_ELx_FSC_TYPE - cmp x2, #FSC_PERM - b.ne 1f // Not a permission fault alternative_else - nop // Use the permission fault path to - nop // check for a valid S1 translation, - nop // regardless of the ESR value. + b __vhe_hyp_call + nop + nop + nop alternative_endif - - /* - * Check for Stage-1 page table walk, which is guaranteed - * to give a valid HPFAR_EL2. - */ - tbnz x1, #7, 1f // S1PTW is set - - /* Preserve PAR_EL1 */ - mrs x3, par_el1 - push x3, xzr - - /* - * Permission fault, HPFAR_EL2 is invalid. - * Resolve the IPA the hard way using the guest VA. - * Stage-1 translation already validated the memory access rights. - * As such, we can use the EL1 translation regime, and don't have - * to distinguish between EL0 and EL1 access. - */ - mrs x2, far_el2 - at s1e1r, x2 - isb - - /* Read result */ - mrs x3, par_el1 - pop x0, xzr // Restore PAR_EL1 from the stack - msr par_el1, x0 - tbnz x3, #0, 3f // Bail out if we failed the translation - ubfx x3, x3, #12, #36 // Extract IPA - lsl x3, x3, #4 // and present it like HPFAR - b 2f - -1: mrs x3, hpfar_el2 - mrs x2, far_el2 - -2: mrs x0, tpidr_el2 - str w1, [x0, #VCPU_ESR_EL2] - str x2, [x0, #VCPU_FAR_EL2] - str x3, [x0, #VCPU_HPFAR_EL2] - - mov x1, #ARM_EXCEPTION_TRAP - b __kvm_vcpu_return - - /* - * Translation failed. Just return to the guest and - * let it fault again. Another CPU is probably playing - * behind our back. - */ -3: pop x2, x3 - pop x0, x1 - - eret - -el1_irq: - push x0, x1 - push x2, x3 - mrs x0, tpidr_el2 - mov x1, #ARM_EXCEPTION_IRQ - b __kvm_vcpu_return - - .ltorg - - .align 11 - -ENTRY(__kvm_hyp_vector) - ventry el2t_sync_invalid // Synchronous EL2t - ventry el2t_irq_invalid // IRQ EL2t - ventry el2t_fiq_invalid // FIQ EL2t - ventry el2t_error_invalid // Error EL2t - - ventry el2h_sync_invalid // Synchronous EL2h - ventry el2h_irq_invalid // IRQ EL2h - ventry el2h_fiq_invalid // FIQ EL2h - ventry el2h_error_invalid // Error EL2h - - ventry el1_sync // Synchronous 64-bit EL1 - ventry el1_irq // IRQ 64-bit EL1 - ventry el1_fiq_invalid // FIQ 64-bit EL1 - ventry el1_error_invalid // Error 64-bit EL1 - - ventry el1_sync // Synchronous 32-bit EL1 - ventry el1_irq // IRQ 32-bit EL1 - ventry el1_fiq_invalid // FIQ 32-bit EL1 - ventry el1_error_invalid // Error 32-bit EL1 -ENDPROC(__kvm_hyp_vector) - - -ENTRY(__kvm_get_mdcr_el2) - mrs x0, mdcr_el2 - ret -ENDPROC(__kvm_get_mdcr_el2) - - .popsection +ENDPROC(__kvm_call_hyp) diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile new file mode 100644 index 000000000000..826032bc3945 --- /dev/null +++ b/arch/arm64/kvm/hyp/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for Kernel-based Virtual Machine module, HYP part +# + +obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += entry.o +obj-$(CONFIG_KVM_ARM_HOST) += switch.o +obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o +obj-$(CONFIG_KVM_ARM_HOST) += tlb.o +obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c new file mode 100644 index 000000000000..2f8bca8af295 --- /dev/null +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/debug-monitors.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +#define read_debug(r,n) read_sysreg(r##n##_el1) +#define write_debug(v,r,n) write_sysreg(v, r##n##_el1) + +#define save_debug(ptr,reg,nr) \ + switch (nr) { \ + case 15: ptr[15] = read_debug(reg, 15); \ + case 14: ptr[14] = read_debug(reg, 14); \ + case 13: ptr[13] = read_debug(reg, 13); \ + case 12: ptr[12] = read_debug(reg, 12); \ + case 11: ptr[11] = read_debug(reg, 11); \ + case 10: ptr[10] = read_debug(reg, 10); \ + case 9: ptr[9] = read_debug(reg, 9); \ + case 8: ptr[8] = read_debug(reg, 8); \ + case 7: ptr[7] = read_debug(reg, 7); \ + case 6: ptr[6] = read_debug(reg, 6); \ + case 5: ptr[5] = read_debug(reg, 5); \ + case 4: ptr[4] = read_debug(reg, 4); \ + case 3: ptr[3] = read_debug(reg, 3); \ + case 2: ptr[2] = read_debug(reg, 2); \ + case 1: ptr[1] = read_debug(reg, 1); \ + default: ptr[0] = read_debug(reg, 0); \ + } + +#define restore_debug(ptr,reg,nr) \ + switch (nr) { \ + case 15: write_debug(ptr[15], reg, 15); \ + case 14: write_debug(ptr[14], reg, 14); \ + case 13: write_debug(ptr[13], reg, 13); \ + case 12: write_debug(ptr[12], reg, 12); \ + case 11: write_debug(ptr[11], reg, 11); \ + case 10: write_debug(ptr[10], reg, 10); \ + case 9: write_debug(ptr[9], reg, 9); \ + case 8: write_debug(ptr[8], reg, 8); \ + case 7: write_debug(ptr[7], reg, 7); \ + case 6: write_debug(ptr[6], reg, 6); \ + case 5: write_debug(ptr[5], reg, 5); \ + case 4: write_debug(ptr[4], reg, 4); \ + case 3: write_debug(ptr[3], reg, 3); \ + case 2: write_debug(ptr[2], reg, 2); \ + case 1: write_debug(ptr[1], reg, 1); \ + default: write_debug(ptr[0], reg, 0); \ + } + +void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) +{ + u64 aa64dfr0; + int brps, wrps; + + if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) + return; + + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); + brps = (aa64dfr0 >> 12) & 0xf; + wrps = (aa64dfr0 >> 20) & 0xf; + + save_debug(dbg->dbg_bcr, dbgbcr, brps); + save_debug(dbg->dbg_bvr, dbgbvr, brps); + save_debug(dbg->dbg_wcr, dbgwcr, wrps); + save_debug(dbg->dbg_wvr, dbgwvr, wrps); + + ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); +} + +void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) +{ + u64 aa64dfr0; + int brps, wrps; + + if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) + return; + + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); + + brps = (aa64dfr0 >> 12) & 0xf; + wrps = (aa64dfr0 >> 20) & 0xf; + + restore_debug(dbg->dbg_bcr, dbgbcr, brps); + restore_debug(dbg->dbg_bvr, dbgbvr, brps); + restore_debug(dbg->dbg_wcr, dbgwcr, wrps); + restore_debug(dbg->dbg_wvr, dbgwvr, wrps); + + write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); +} + +void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu) +{ + /* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform + * a full save/restore cycle. */ + if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) || + (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE)) + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + + __debug_save_state(vcpu, &vcpu->arch.host_debug_state, + kern_hyp_va(vcpu->arch.host_cpu_context)); +} + +void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu) +{ + __debug_restore_state(vcpu, &vcpu->arch.host_debug_state, + kern_hyp_va(vcpu->arch.host_cpu_context)); + + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY; +} + +static u32 __hyp_text __debug_read_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} + +__alias(__debug_read_mdcr_el2) u32 __kvm_get_mdcr_el2(void); diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S new file mode 100644 index 000000000000..fd0fbe9b7e6a --- /dev/null +++ b/arch/arm64/kvm/hyp/entry.S @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/asm-offsets.h> +#include <asm/assembler.h> +#include <asm/fpsimdmacros.h> +#include <asm/kvm.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + +#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) +#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) + + .text + .pushsection .hyp.text, "ax" + +.macro save_callee_saved_regs ctxt + stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] + stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] + stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] + stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] + stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] + stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +.endm + +.macro restore_callee_saved_regs ctxt + ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] + ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] + ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] + ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] + ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] + ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +.endm + +/* + * u64 __guest_enter(struct kvm_vcpu *vcpu, + * struct kvm_cpu_context *host_ctxt); + */ +ENTRY(__guest_enter) + // x0: vcpu + // x1: host/guest context + // x2-x18: clobbered by macros + + // Store the host regs + save_callee_saved_regs x1 + + // Preserve vcpu & host_ctxt for use at exit time + stp x0, x1, [sp, #-16]! + + add x1, x0, #VCPU_CONTEXT + + // Prepare x0-x1 for later restore by pushing them onto the stack + ldp x2, x3, [x1, #CPU_XREG_OFFSET(0)] + stp x2, x3, [sp, #-16]! + + // x2-x18 + ldp x2, x3, [x1, #CPU_XREG_OFFSET(2)] + ldp x4, x5, [x1, #CPU_XREG_OFFSET(4)] + ldp x6, x7, [x1, #CPU_XREG_OFFSET(6)] + ldp x8, x9, [x1, #CPU_XREG_OFFSET(8)] + ldp x10, x11, [x1, #CPU_XREG_OFFSET(10)] + ldp x12, x13, [x1, #CPU_XREG_OFFSET(12)] + ldp x14, x15, [x1, #CPU_XREG_OFFSET(14)] + ldp x16, x17, [x1, #CPU_XREG_OFFSET(16)] + ldr x18, [x1, #CPU_XREG_OFFSET(18)] + + // x19-x29, lr + restore_callee_saved_regs x1 + + // Last bits of the 64bit state + ldp x0, x1, [sp], #16 + + // Do not touch any register after this! + eret +ENDPROC(__guest_enter) + +ENTRY(__guest_exit) + // x0: vcpu + // x1: return code + // x2-x3: free + // x4-x29,lr: vcpu regs + // vcpu x0-x3 on the stack + + add x2, x0, #VCPU_CONTEXT + + stp x4, x5, [x2, #CPU_XREG_OFFSET(4)] + stp x6, x7, [x2, #CPU_XREG_OFFSET(6)] + stp x8, x9, [x2, #CPU_XREG_OFFSET(8)] + stp x10, x11, [x2, #CPU_XREG_OFFSET(10)] + stp x12, x13, [x2, #CPU_XREG_OFFSET(12)] + stp x14, x15, [x2, #CPU_XREG_OFFSET(14)] + stp x16, x17, [x2, #CPU_XREG_OFFSET(16)] + str x18, [x2, #CPU_XREG_OFFSET(18)] + + ldp x6, x7, [sp], #16 // x2, x3 + ldp x4, x5, [sp], #16 // x0, x1 + + stp x4, x5, [x2, #CPU_XREG_OFFSET(0)] + stp x6, x7, [x2, #CPU_XREG_OFFSET(2)] + + save_callee_saved_regs x2 + + // Restore vcpu & host_ctxt from the stack + // (preserving return code in x1) + ldp x0, x2, [sp], #16 + // Now restore the host regs + restore_callee_saved_regs x2 + + mov x0, x1 + ret +ENDPROC(__guest_exit) + +ENTRY(__fpsimd_guest_restore) + stp x4, lr, [sp, #-16]! + + mrs x2, cptr_el2 + bic x2, x2, #CPTR_EL2_TFP + msr cptr_el2, x2 + isb + + mrs x3, tpidr_el2 + + ldr x0, [x3, #VCPU_HOST_CONTEXT] + kern_hyp_va x0 + add x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + bl __fpsimd_save_state + + add x2, x3, #VCPU_CONTEXT + add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + bl __fpsimd_restore_state + + // Skip restoring fpexc32 for AArch64 guests + mrs x1, hcr_el2 + tbnz x1, #HCR_RW_SHIFT, 1f + ldr x4, [x3, #VCPU_FPEXC32_EL2] + msr fpexc32_el2, x4 +1: + ldp x4, lr, [sp], #16 + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 + + eret +ENDPROC(__fpsimd_guest_restore) diff --git a/arch/arm64/kernel/psci-call.S b/arch/arm64/kvm/hyp/fpsimd.S index cf83e61cd3b5..da3f22c7f14a 100644 --- a/arch/arm64/kernel/psci-call.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -1,4 +1,7 @@ /* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -8,21 +11,23 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * Copyright (C) 2015 ARM Limited - * - * Author: Will Deacon <will.deacon@arm.com> + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/linkage.h> -/* int __invoke_psci_fn_hvc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */ -ENTRY(__invoke_psci_fn_hvc) - hvc #0 +#include <asm/fpsimdmacros.h> + + .text + .pushsection .hyp.text, "ax" + +ENTRY(__fpsimd_save_state) + fpsimd_save x0, 1 ret -ENDPROC(__invoke_psci_fn_hvc) +ENDPROC(__fpsimd_save_state) -/* int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */ -ENTRY(__invoke_psci_fn_smc) - smc #0 +ENTRY(__fpsimd_restore_state) + fpsimd_restore x0, 1 ret -ENDPROC(__invoke_psci_fn_smc) +ENDPROC(__fpsimd_restore_state) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S new file mode 100644 index 000000000000..44c79fd81ad1 --- /dev/null +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -0,0 +1,232 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/alternative.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/cpufeature.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + + .text + .pushsection .hyp.text, "ax" + +.macro save_x0_to_x3 + stp x0, x1, [sp, #-16]! + stp x2, x3, [sp, #-16]! +.endm + +.macro restore_x0_to_x3 + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +.endm + +.macro do_el2_call + /* + * Shuffle the parameters before calling the function + * pointed to in x0. Assumes parameters in x[1,2,3]. + */ + mov lr, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + blr lr +.endm + +ENTRY(__vhe_hyp_call) + str lr, [sp, #-16]! + do_el2_call + ldr lr, [sp], #16 + /* + * We used to rely on having an exception return to get + * an implicit isb. In the E2H case, we don't have it anymore. + * rather than changing all the leaf functions, just do it here + * before returning to the rest of the kernel. + */ + isb + ret +ENDPROC(__vhe_hyp_call) + +el1_sync: // Guest trapped into EL2 + save_x0_to_x3 + + mrs x1, esr_el2 + lsr x2, x1, #ESR_ELx_EC_SHIFT + + cmp x2, #ESR_ELx_EC_HVC64 + b.ne el1_trap + + mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest + cbnz x3, el1_trap // called HVC + + /* Here, we're pretty sure the host called HVC. */ + restore_x0_to_x3 + + cmp x0, #HVC_GET_VECTORS + b.ne 1f + mrs x0, vbar_el2 + b 2f + +1: + /* + * Perform the EL2 call + */ + kern_hyp_va x0 + do_el2_call + +2: eret + +el1_trap: + /* + * x1: ESR + * x2: ESR_EC + */ + + /* Guest accessed VFP/SIMD registers, save host, restore Guest */ + cmp x2, #ESR_ELx_EC_FP_ASIMD + b.eq __fpsimd_guest_restore + + cmp x2, #ESR_ELx_EC_DABT_LOW + mov x0, #ESR_ELx_EC_IABT_LOW + ccmp x2, x0, #4, ne + b.ne 1f // Not an abort we care about + + /* This is an abort. Check for permission fault */ +alternative_if_not ARM64_WORKAROUND_834220 + and x2, x1, #ESR_ELx_FSC_TYPE + cmp x2, #FSC_PERM + b.ne 1f // Not a permission fault +alternative_else + nop // Use the permission fault path to + nop // check for a valid S1 translation, + nop // regardless of the ESR value. +alternative_endif + + /* + * Check for Stage-1 page table walk, which is guaranteed + * to give a valid HPFAR_EL2. + */ + tbnz x1, #7, 1f // S1PTW is set + + /* Preserve PAR_EL1 */ + mrs x3, par_el1 + stp x3, xzr, [sp, #-16]! + + /* + * Permission fault, HPFAR_EL2 is invalid. + * Resolve the IPA the hard way using the guest VA. + * Stage-1 translation already validated the memory access rights. + * As such, we can use the EL1 translation regime, and don't have + * to distinguish between EL0 and EL1 access. + */ + mrs x2, far_el2 + at s1e1r, x2 + isb + + /* Read result */ + mrs x3, par_el1 + ldp x0, xzr, [sp], #16 // Restore PAR_EL1 from the stack + msr par_el1, x0 + tbnz x3, #0, 3f // Bail out if we failed the translation + ubfx x3, x3, #12, #36 // Extract IPA + lsl x3, x3, #4 // and present it like HPFAR + b 2f + +1: mrs x3, hpfar_el2 + mrs x2, far_el2 + +2: mrs x0, tpidr_el2 + str w1, [x0, #VCPU_ESR_EL2] + str x2, [x0, #VCPU_FAR_EL2] + str x3, [x0, #VCPU_HPFAR_EL2] + + mov x1, #ARM_EXCEPTION_TRAP + b __guest_exit + + /* + * Translation failed. Just return to the guest and + * let it fault again. Another CPU is probably playing + * behind our back. + */ +3: restore_x0_to_x3 + + eret + +el1_irq: + save_x0_to_x3 + mrs x0, tpidr_el2 + mov x1, #ARM_EXCEPTION_IRQ + b __guest_exit + +ENTRY(__hyp_do_panic) + mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ + PSR_MODE_EL1h) + msr spsr_el2, lr + ldr lr, =panic + msr elr_el2, lr + eret +ENDPROC(__hyp_do_panic) + +.macro invalid_vector label, target = __hyp_panic + .align 2 +\label: + b \target +ENDPROC(\label) +.endm + + /* None of these should ever happen */ + invalid_vector el2t_sync_invalid + invalid_vector el2t_irq_invalid + invalid_vector el2t_fiq_invalid + invalid_vector el2t_error_invalid + invalid_vector el2h_sync_invalid + invalid_vector el2h_irq_invalid + invalid_vector el2h_fiq_invalid + invalid_vector el2h_error_invalid + invalid_vector el1_sync_invalid + invalid_vector el1_irq_invalid + invalid_vector el1_fiq_invalid + invalid_vector el1_error_invalid + + .ltorg + + .align 11 + +ENTRY(__kvm_hyp_vector) + ventry el2t_sync_invalid // Synchronous EL2t + ventry el2t_irq_invalid // IRQ EL2t + ventry el2t_fiq_invalid // FIQ EL2t + ventry el2t_error_invalid // Error EL2t + + ventry el2h_sync_invalid // Synchronous EL2h + ventry el2h_irq_invalid // IRQ EL2h + ventry el2h_fiq_invalid // FIQ EL2h + ventry el2h_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync // Synchronous 32-bit EL1 + ventry el1_irq // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +ENDPROC(__kvm_hyp_vector) diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h new file mode 100644 index 000000000000..fb275178b6af --- /dev/null +++ b/arch/arm64/kvm/hyp/hyp.h @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_HYP_H__ +#define __ARM64_KVM_HYP_H__ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> +#include <asm/kvm_mmu.h> +#include <asm/sysreg.h> + +#define __hyp_text __section(.hyp.text) notrace + +#define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK) +#define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \ + + PAGE_OFFSET) + +/** + * hyp_alternate_select - Generates patchable code sequences that are + * used to switch between two implementations of a function, depending + * on the availability of a feature. + * + * @fname: a symbol name that will be defined as a function returning a + * function pointer whose type will match @orig and @alt + * @orig: A pointer to the default function, as returned by @fname when + * @cond doesn't hold + * @alt: A pointer to the alternate function, as returned by @fname + * when @cond holds + * @cond: a CPU feature (as described in asm/cpufeature.h) + */ +#define hyp_alternate_select(fname, orig, alt, cond) \ +typeof(orig) * __hyp_text fname(void) \ +{ \ + typeof(alt) *val = orig; \ + asm volatile(ALTERNATIVE("nop \n", \ + "mov %0, %1 \n", \ + cond) \ + : "+r" (val) : "r" (alt)); \ + return val; \ +} + +void __vgic_v2_save_state(struct kvm_vcpu *vcpu); +void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); + +void __vgic_v3_save_state(struct kvm_vcpu *vcpu); +void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); + +void __timer_save_state(struct kvm_vcpu *vcpu); +void __timer_restore_state(struct kvm_vcpu *vcpu); + +void __sysreg_save_state(struct kvm_cpu_context *ctxt); +void __sysreg_restore_state(struct kvm_cpu_context *ctxt); +void __sysreg32_save_state(struct kvm_vcpu *vcpu); +void __sysreg32_restore_state(struct kvm_vcpu *vcpu); + +void __debug_save_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt); +void __debug_restore_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt); +void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); +void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); + +void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); +void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); +static inline bool __fpsimd_enabled(void) +{ + return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); +} + +u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); +void __noreturn __hyp_do_panic(unsigned long, ...); + +#endif /* __ARM64_KVM_HYP_H__ */ + diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c new file mode 100644 index 000000000000..ca8f5a5e2f96 --- /dev/null +++ b/arch/arm64/kvm/hyp/switch.c @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "hyp.h" + +static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* + * We are about to set CPTR_EL2.TFP to trap all floating point + * register accesses to EL2, however, the ARM ARM clearly states that + * traps are only taken to EL2 if the operation would not otherwise + * trap to EL1. Therefore, always make sure that for 32-bit guests, + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. + */ + val = vcpu->arch.hcr_el2; + if (!(val & HCR_RW)) { + write_sysreg(1 << 30, fpexc32_el2); + isb(); + } + write_sysreg(val, hcr_el2); + /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ + write_sysreg(1 << 15, hstr_el2); + write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); +} + +static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) +{ + write_sysreg(HCR_RW, hcr_el2); + write_sysreg(0, hstr_el2); + write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2); + write_sysreg(0, cptr_el2); +} + +static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + write_sysreg(kvm->arch.vttbr, vttbr_el2); +} + +static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) +{ + write_sysreg(0, vttbr_el2); +} + +static hyp_alternate_select(__vgic_call_save_state, + __vgic_v2_save_state, __vgic_v3_save_state, + ARM64_HAS_SYSREG_GIC_CPUIF); + +static hyp_alternate_select(__vgic_call_restore_state, + __vgic_v2_restore_state, __vgic_v3_restore_state, + ARM64_HAS_SYSREG_GIC_CPUIF); + +static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) +{ + __vgic_call_save_state()(vcpu); + write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2); +} + +static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) +{ + u64 val; + + val = read_sysreg(hcr_el2); + val |= HCR_INT_OVERRIDE; + val |= vcpu->arch.irq_lines; + write_sysreg(val, hcr_el2); + + __vgic_call_restore_state()(vcpu); +} + +static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + bool fp_enabled; + u64 exit_code; + + vcpu = kern_hyp_va(vcpu); + write_sysreg(vcpu, tpidr_el2); + + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + guest_ctxt = &vcpu->arch.ctxt; + + __sysreg_save_state(host_ctxt); + __debug_cond_save_host_state(vcpu); + + __activate_traps(vcpu); + __activate_vm(vcpu); + + __vgic_restore_state(vcpu); + __timer_restore_state(vcpu); + + /* + * We must restore the 32-bit state before the sysregs, thanks + * to Cortex-A57 erratum #852523. + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_state(guest_ctxt); + __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); + + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + /* And we're baaack! */ + + fp_enabled = __fpsimd_enabled(); + + __sysreg_save_state(guest_ctxt); + __sysreg32_save_state(vcpu); + __timer_save_state(vcpu); + __vgic_save_state(vcpu); + + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + + __sysreg_restore_state(host_ctxt); + + if (fp_enabled) { + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); + } + + __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); + __debug_cond_restore_host_state(vcpu); + + return exit_code; +} + +__alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu); + +static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; + +void __hyp_text __noreturn __hyp_panic(void) +{ + unsigned long str_va = (unsigned long)__hyp_panic_string; + u64 spsr = read_sysreg(spsr_el2); + u64 elr = read_sysreg(elr_el2); + u64 par = read_sysreg(par_el1); + + if (read_sysreg(vttbr_el2)) { + struct kvm_vcpu *vcpu; + struct kvm_cpu_context *host_ctxt; + + vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2); + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + __sysreg_restore_state(host_ctxt); + } + + /* Call panic for real */ + __hyp_do_panic(hyp_kern_va(str_va), + spsr, elr, + read_sysreg(esr_el2), read_sysreg(far_el2), + read_sysreg(hpfar_el2), par, + (void *)read_sysreg(tpidr_el2)); + + unreachable(); +} diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c new file mode 100644 index 000000000000..425630980229 --- /dev/null +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +/* ctxt is already in the HYP VA space */ +void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) +{ + ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); + ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); + ctxt->sys_regs[SCTLR_EL1] = read_sysreg(sctlr_el1); + ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); + ctxt->sys_regs[CPACR_EL1] = read_sysreg(cpacr_el1); + ctxt->sys_regs[TTBR0_EL1] = read_sysreg(ttbr0_el1); + ctxt->sys_regs[TTBR1_EL1] = read_sysreg(ttbr1_el1); + ctxt->sys_regs[TCR_EL1] = read_sysreg(tcr_el1); + ctxt->sys_regs[ESR_EL1] = read_sysreg(esr_el1); + ctxt->sys_regs[AFSR0_EL1] = read_sysreg(afsr0_el1); + ctxt->sys_regs[AFSR1_EL1] = read_sysreg(afsr1_el1); + ctxt->sys_regs[FAR_EL1] = read_sysreg(far_el1); + ctxt->sys_regs[MAIR_EL1] = read_sysreg(mair_el1); + ctxt->sys_regs[VBAR_EL1] = read_sysreg(vbar_el1); + ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg(contextidr_el1); + ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); + ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); + ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); + ctxt->sys_regs[AMAIR_EL1] = read_sysreg(amair_el1); + ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg(cntkctl_el1); + ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); + ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); + + ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); + ctxt->gp_regs.regs.pc = read_sysreg(elr_el2); + ctxt->gp_regs.regs.pstate = read_sysreg(spsr_el2); + ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); + ctxt->gp_regs.elr_el1 = read_sysreg(elr_el1); + ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1); +} + +void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); + write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); + write_sysreg(ctxt->sys_regs[SCTLR_EL1], sctlr_el1); + write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); + write_sysreg(ctxt->sys_regs[CPACR_EL1], cpacr_el1); + write_sysreg(ctxt->sys_regs[TTBR0_EL1], ttbr0_el1); + write_sysreg(ctxt->sys_regs[TTBR1_EL1], ttbr1_el1); + write_sysreg(ctxt->sys_regs[TCR_EL1], tcr_el1); + write_sysreg(ctxt->sys_regs[ESR_EL1], esr_el1); + write_sysreg(ctxt->sys_regs[AFSR0_EL1], afsr0_el1); + write_sysreg(ctxt->sys_regs[AFSR1_EL1], afsr1_el1); + write_sysreg(ctxt->sys_regs[FAR_EL1], far_el1); + write_sysreg(ctxt->sys_regs[MAIR_EL1], mair_el1); + write_sysreg(ctxt->sys_regs[VBAR_EL1], vbar_el1); + write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1); + write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); + write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); + write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); + write_sysreg(ctxt->sys_regs[AMAIR_EL1], amair_el1); + write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1); + write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); + write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); + + write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); + write_sysreg(ctxt->gp_regs.regs.pc, elr_el2); + write_sysreg(ctxt->gp_regs.regs.pstate, spsr_el2); + write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); + write_sysreg(ctxt->gp_regs.elr_el1, elr_el1); + write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1); +} + +void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) +{ + u64 *spsr, *sysreg; + + if (read_sysreg(hcr_el2) & HCR_RW) + return; + + spsr = vcpu->arch.ctxt.gp_regs.spsr; + sysreg = vcpu->arch.ctxt.sys_regs; + + spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); + spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); + spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); + spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); + + sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); + sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); + + if (__fpsimd_enabled()) + sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2); + + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); +} + +void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) +{ + u64 *spsr, *sysreg; + + if (read_sysreg(hcr_el2) & HCR_RW) + return; + + spsr = vcpu->arch.ctxt.gp_regs.spsr; + sysreg = vcpu->arch.ctxt.sys_regs; + + write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); + write_sysreg(spsr[KVM_SPSR_UND], spsr_und); + write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); + write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); + + write_sysreg(sysreg[DACR32_EL2], dacr32_el2); + write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); + + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); +} diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c new file mode 100644 index 000000000000..1051e5d7320f --- /dev/null +++ b/arch/arm64/kvm/hyp/timer-sr.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <clocksource/arm_arch_timer.h> +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +/* vcpu is already in the HYP VA space */ +void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + u64 val; + + if (kvm->arch.timer.enabled) { + timer->cntv_ctl = read_sysreg(cntv_ctl_el0); + timer->cntv_cval = read_sysreg(cntv_cval_el0); + } + + /* Disable the virtual timer */ + write_sysreg(0, cntv_ctl_el0); + + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); + + /* Clear cntvoff for the host */ + write_sysreg(0, cntvoff_el2); +} + +void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + u64 val; + + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); + + if (kvm->arch.timer.enabled) { + write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); + write_sysreg(timer->cntv_cval, cntv_cval_el0); + isb(); + write_sysreg(timer->cntv_ctl, cntv_ctl_el0); + } +} diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c new file mode 100644 index 000000000000..2a7e0d838698 --- /dev/null +++ b/arch/arm64/kvm/hyp/tlb.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "hyp.h" + +static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +{ + dsb(ishst); + + /* Switch to requested VMID */ + kvm = kern_hyp_va(kvm); + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + asm volatile("tlbi ipas2e1is, %0" : : "r" (ipa)); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + asm volatile("tlbi vmalle1is" : : ); + dsb(ish); + isb(); + + write_sysreg(0, vttbr_el2); +} + +__alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, + phys_addr_t ipa); + +static void __hyp_text __tlb_flush_vmid(struct kvm *kvm) +{ + dsb(ishst); + + /* Switch to requested VMID */ + kvm = kern_hyp_va(kvm); + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); + + asm volatile("tlbi vmalls12e1is" : : ); + dsb(ish); + isb(); + + write_sysreg(0, vttbr_el2); +} + +__alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm); + +static void __hyp_text __tlb_flush_vm_context(void) +{ + dsb(ishst); + asm volatile("tlbi alle1is \n" + "ic ialluis ": : ); + dsb(ish); +} + +__alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void); diff --git a/arch/arm64/kvm/hyp/vgic-v2-sr.c b/arch/arm64/kvm/hyp/vgic-v2-sr.c new file mode 100644 index 000000000000..e71761238cfc --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v2-sr.c @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/irqchip/arm-gic.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +/* vcpu is already in the HYP VA space */ +void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_dist *vgic = &kvm->arch.vgic; + void __iomem *base = kern_hyp_va(vgic->vctrl_base); + u32 eisr0, eisr1, elrsr0, elrsr1; + int i, nr_lr; + + if (!base) + return; + + nr_lr = vcpu->arch.vgic_cpu.nr_lr; + cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR); + cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR); + eisr0 = readl_relaxed(base + GICH_EISR0); + elrsr0 = readl_relaxed(base + GICH_ELRSR0); + if (unlikely(nr_lr > 32)) { + eisr1 = readl_relaxed(base + GICH_EISR1); + elrsr1 = readl_relaxed(base + GICH_ELRSR1); + } else { + eisr1 = elrsr1 = 0; + } +#ifdef CONFIG_CPU_BIG_ENDIAN + cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1; + cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1; +#else + cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0; + cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; +#endif + cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); + + writel_relaxed(0, base + GICH_HCR); + + for (i = 0; i < nr_lr; i++) + cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); +} + +/* vcpu is already in the HYP VA space */ +void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_dist *vgic = &kvm->arch.vgic; + void __iomem *base = kern_hyp_va(vgic->vctrl_base); + int i, nr_lr; + + if (!base) + return; + + writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); + writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR); + writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); + + nr_lr = vcpu->arch.vgic_cpu.nr_lr; + for (i = 0; i < nr_lr; i++) + writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4)); +} diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c new file mode 100644 index 000000000000..9142e082f5f3 --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/irqchip/arm-gic-v3.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +#define vtr_to_max_lr_idx(v) ((v) & 0xf) +#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1) + +#define read_gicreg(r) \ + ({ \ + u64 reg; \ + asm volatile("mrs_s %0, " __stringify(r) : "=r" (reg)); \ + reg; \ + }) + +#define write_gicreg(v,r) \ + do { \ + u64 __val = (v); \ + asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\ + } while (0) + +/* vcpu is already in the HYP VA space */ +void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 val; + u32 max_lr_idx, nr_pri_bits; + + /* + * Make sure stores to the GIC via the memory mapped interface + * are now visible to the system register interface. + */ + dsb(st); + + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); + cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); + cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); + + write_gicreg(0, ICH_HCR_EL2); + val = read_gicreg(ICH_VTR_EL2); + max_lr_idx = vtr_to_max_lr_idx(val); + nr_pri_bits = vtr_to_nr_pri_bits(val); + + switch (max_lr_idx) { + case 15: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2); + case 14: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2); + case 13: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2); + case 12: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2); + case 11: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2); + case 10: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2); + case 9: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2); + case 8: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2); + case 7: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2); + case 6: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2); + case 5: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2); + case 4: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2); + case 3: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2); + case 2: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2); + case 1: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2); + case 0: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2); + } + + switch (nr_pri_bits) { + case 7: + cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); + cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); + case 6: + cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); + default: + cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); + } + + switch (nr_pri_bits) { + case 7: + cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); + cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); + case 6: + cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); + default: + cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); + } + + val = read_gicreg(ICC_SRE_EL2); + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + isb(); /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ + write_gicreg(1, ICC_SRE_EL1); +} + +void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 val; + u32 max_lr_idx, nr_pri_bits; + + /* + * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a + * Group0 interrupt (as generated in GICv2 mode) to be + * delivered as a FIQ to the guest, with potentially fatal + * consequences. So we must make sure that ICC_SRE_EL1 has + * been actually programmed with the value we want before + * starting to mess with the rest of the GIC. + */ + write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1); + isb(); + + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); + write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); + + val = read_gicreg(ICH_VTR_EL2); + max_lr_idx = vtr_to_max_lr_idx(val); + nr_pri_bits = vtr_to_nr_pri_bits(val); + + switch (nr_pri_bits) { + case 7: + write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); + write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); + case 6: + write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); + default: + write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); + } + + switch (nr_pri_bits) { + case 7: + write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); + write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); + case 6: + write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); + default: + write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); + } + + switch (max_lr_idx) { + case 15: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2); + case 14: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2); + case 13: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2); + case 12: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2); + case 11: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2); + case 10: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2); + case 9: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2); + case 8: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2); + case 7: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2); + case 6: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2); + case 5: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2); + case 4: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2); + case 3: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2); + case 2: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2); + case 1: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2); + case 0: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2); + } + + /* + * Ensures that the above will have reached the + * (re)distributors. This ensure the guest will read the + * correct values from the memory-mapped interface. + */ + isb(); + dsb(sy); + + /* + * Prevent the guest from touching the GIC system registers if + * SRE isn't enabled for GICv3 emulation. + */ + if (!cpu_if->vgic_sre) { + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, + ICC_SRE_EL2); + } +} + +static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void) +{ + return read_gicreg(ICH_VTR_EL2); +} + +__alias(__vgic_v3_read_ich_vtr_el2) u64 __vgic_v3_get_ich_vtr_el2(void); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f34745cb3d23..d6e155a212dc 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -29,7 +29,9 @@ #include <asm/cputype.h> #include <asm/ptrace.h> #include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> #include <asm/kvm_coproc.h> +#include <asm/kvm_mmu.h> /* * ARMv8 Reset Values @@ -123,3 +125,15 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset timer */ return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); } + +extern char __hyp_idmap_text_start[]; + +phys_addr_t kvm_hyp_reset_entry(void) +{ + unsigned long offset; + + offset = (unsigned long)__kvm_hyp_reset + - ((unsigned long)__hyp_idmap_text_start & PAGE_MASK); + + return TRAMPOLINE_VA + offset; +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d2650e84faf2..eec3598b4184 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -29,6 +29,7 @@ #include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> #include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_host.h> @@ -219,9 +220,9 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the * hyp.S code switches between host and guest values in future. */ -static inline void reg_to_dbg(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - u64 *dbg_reg) +static void reg_to_dbg(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + u64 *dbg_reg) { u64 val = p->regval; @@ -234,18 +235,18 @@ static inline void reg_to_dbg(struct kvm_vcpu *vcpu, vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; } -static inline void dbg_to_reg(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - u64 *dbg_reg) +static void dbg_to_reg(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + u64 *dbg_reg) { p->regval = *dbg_reg; if (p->is_32bit) p->regval &= 0xffffffffUL; } -static inline bool trap_bvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_bvr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; @@ -279,15 +280,15 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return 0; } -static inline void reset_bvr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static void reset_bvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) { vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; } -static inline bool trap_bcr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_bcr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; @@ -322,15 +323,15 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return 0; } -static inline void reset_bcr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static void reset_bcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) { vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; } -static inline bool trap_wvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_wvr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; @@ -365,15 +366,15 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return 0; } -static inline void reset_wvr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static void reset_wvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) { vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; } -static inline bool trap_wcr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_wcr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; @@ -407,8 +408,8 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return 0; } -static inline void reset_wcr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static void reset_wcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) { vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; } @@ -722,9 +723,9 @@ static bool trap_debug32(struct kvm_vcpu *vcpu, * system is in. */ -static inline bool trap_xvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_xvr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S deleted file mode 100644 index 3f000712a85d..000000000000 --- a/arch/arm64/kvm/vgic-v2-switch.S +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/linkage.h> -#include <linux/irqchip/arm-gic.h> - -#include <asm/assembler.h> -#include <asm/memory.h> -#include <asm/asm-offsets.h> -#include <asm/kvm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> - - .text - .pushsection .hyp.text, "ax" - -/* - * Save the VGIC CPU state into memory - * x0: Register pointing to VCPU struct - * Do not corrupt x1!!! - */ -ENTRY(__save_vgic_v2_state) -__save_vgic_v2_state: - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* Save all interesting registers */ - ldr w5, [x2, #GICH_VMCR] - ldr w6, [x2, #GICH_MISR] - ldr w7, [x2, #GICH_EISR0] - ldr w8, [x2, #GICH_EISR1] - ldr w9, [x2, #GICH_ELRSR0] - ldr w10, [x2, #GICH_ELRSR1] - ldr w11, [x2, #GICH_APR] -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) -CPU_BE( rev w7, w7 ) -CPU_BE( rev w8, w8 ) -CPU_BE( rev w9, w9 ) -CPU_BE( rev w10, w10 ) -CPU_BE( rev w11, w11 ) - - str w5, [x3, #VGIC_V2_CPU_VMCR] - str w6, [x3, #VGIC_V2_CPU_MISR] -CPU_LE( str w7, [x3, #VGIC_V2_CPU_EISR] ) -CPU_LE( str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] ) -CPU_LE( str w9, [x3, #VGIC_V2_CPU_ELRSR] ) -CPU_LE( str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) -CPU_BE( str w7, [x3, #(VGIC_V2_CPU_EISR + 4)] ) -CPU_BE( str w8, [x3, #VGIC_V2_CPU_EISR] ) -CPU_BE( str w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) -CPU_BE( str w10, [x3, #VGIC_V2_CPU_ELRSR] ) - str w11, [x3, #VGIC_V2_CPU_APR] - - /* Clear GICH_HCR */ - str wzr, [x2, #GICH_HCR] - - /* Save list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x2], #4 -CPU_BE( rev w5, w5 ) - str w5, [x3], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: - ret -ENDPROC(__save_vgic_v2_state) - -/* - * Restore the VGIC CPU state from memory - * x0: Register pointing to VCPU struct - */ -ENTRY(__restore_vgic_v2_state) -__restore_vgic_v2_state: - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* We only restore a minimal set of registers */ - ldr w4, [x3, #VGIC_V2_CPU_HCR] - ldr w5, [x3, #VGIC_V2_CPU_VMCR] - ldr w6, [x3, #VGIC_V2_CPU_APR] -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) - - str w4, [x2, #GICH_HCR] - str w5, [x2, #GICH_VMCR] - str w6, [x2, #GICH_APR] - - /* Restore list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x3], #4 -CPU_BE( rev w5, w5 ) - str w5, [x2], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: - ret -ENDPROC(__restore_vgic_v2_state) - - .popsection diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S deleted file mode 100644 index 3c20730ddff5..000000000000 --- a/arch/arm64/kvm/vgic-v3-switch.S +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/linkage.h> -#include <linux/irqchip/arm-gic-v3.h> - -#include <asm/assembler.h> -#include <asm/memory.h> -#include <asm/asm-offsets.h> -#include <asm/kvm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_arm.h> - - .text - .pushsection .hyp.text, "ax" - -/* - * We store LRs in reverse order to let the CPU deal with streaming - * access. Use this macro to make it look saner... - */ -#define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8) - -/* - * Save the VGIC CPU state into memory - * x0: Register pointing to VCPU struct - * Do not corrupt x1!!! - */ -.macro save_vgic_v3_state - // Compute the address of struct vgic_cpu - add x3, x0, #VCPU_VGIC_CPU - - // Make sure stores to the GIC via the memory mapped interface - // are now visible to the system register interface - dsb st - - // Save all interesting registers - mrs_s x5, ICH_VMCR_EL2 - mrs_s x6, ICH_MISR_EL2 - mrs_s x7, ICH_EISR_EL2 - mrs_s x8, ICH_ELSR_EL2 - - str w5, [x3, #VGIC_V3_CPU_VMCR] - str w6, [x3, #VGIC_V3_CPU_MISR] - str w7, [x3, #VGIC_V3_CPU_EISR] - str w8, [x3, #VGIC_V3_CPU_ELRSR] - - msr_s ICH_HCR_EL2, xzr - - mrs_s x21, ICH_VTR_EL2 - mvn w22, w21 - ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - mrs_s x20, ICH_LR15_EL2 - mrs_s x19, ICH_LR14_EL2 - mrs_s x18, ICH_LR13_EL2 - mrs_s x17, ICH_LR12_EL2 - mrs_s x16, ICH_LR11_EL2 - mrs_s x15, ICH_LR10_EL2 - mrs_s x14, ICH_LR9_EL2 - mrs_s x13, ICH_LR8_EL2 - mrs_s x12, ICH_LR7_EL2 - mrs_s x11, ICH_LR6_EL2 - mrs_s x10, ICH_LR5_EL2 - mrs_s x9, ICH_LR4_EL2 - mrs_s x8, ICH_LR3_EL2 - mrs_s x7, ICH_LR2_EL2 - mrs_s x6, ICH_LR1_EL2 - mrs_s x5, ICH_LR0_EL2 - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - str x20, [x3, #LR_OFFSET(15)] - str x19, [x3, #LR_OFFSET(14)] - str x18, [x3, #LR_OFFSET(13)] - str x17, [x3, #LR_OFFSET(12)] - str x16, [x3, #LR_OFFSET(11)] - str x15, [x3, #LR_OFFSET(10)] - str x14, [x3, #LR_OFFSET(9)] - str x13, [x3, #LR_OFFSET(8)] - str x12, [x3, #LR_OFFSET(7)] - str x11, [x3, #LR_OFFSET(6)] - str x10, [x3, #LR_OFFSET(5)] - str x9, [x3, #LR_OFFSET(4)] - str x8, [x3, #LR_OFFSET(3)] - str x7, [x3, #LR_OFFSET(2)] - str x6, [x3, #LR_OFFSET(1)] - str x5, [x3, #LR_OFFSET(0)] - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - mrs_s x20, ICH_AP0R3_EL2 - str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - mrs_s x19, ICH_AP0R2_EL2 - str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] -6: mrs_s x18, ICH_AP0R1_EL2 - str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] -5: mrs_s x17, ICH_AP0R0_EL2 - str w17, [x3, #VGIC_V3_CPU_AP0R] - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - mrs_s x20, ICH_AP1R3_EL2 - str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - mrs_s x19, ICH_AP1R2_EL2 - str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] -6: mrs_s x18, ICH_AP1R1_EL2 - str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] -5: mrs_s x17, ICH_AP1R0_EL2 - str w17, [x3, #VGIC_V3_CPU_AP1R] - - // Restore SRE_EL1 access and re-enable SRE at EL1. - mrs_s x5, ICC_SRE_EL2 - orr x5, x5, #ICC_SRE_EL2_ENABLE - msr_s ICC_SRE_EL2, x5 - isb - mov x5, #1 - msr_s ICC_SRE_EL1, x5 -.endm - -/* - * Restore the VGIC CPU state from memory - * x0: Register pointing to VCPU struct - */ -.macro restore_vgic_v3_state - // Compute the address of struct vgic_cpu - add x3, x0, #VCPU_VGIC_CPU - - // Restore all interesting registers - ldr w4, [x3, #VGIC_V3_CPU_HCR] - ldr w5, [x3, #VGIC_V3_CPU_VMCR] - ldr w25, [x3, #VGIC_V3_CPU_SRE] - - msr_s ICC_SRE_EL1, x25 - - // make sure SRE is valid before writing the other registers - isb - - msr_s ICH_HCR_EL2, x4 - msr_s ICH_VMCR_EL2, x5 - - mrs_s x21, ICH_VTR_EL2 - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - msr_s ICH_AP1R3_EL2, x20 - ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] - msr_s ICH_AP1R2_EL2, x19 -6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] - msr_s ICH_AP1R1_EL2, x18 -5: ldr w17, [x3, #VGIC_V3_CPU_AP1R] - msr_s ICH_AP1R0_EL2, x17 - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - msr_s ICH_AP0R3_EL2, x20 - ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] - msr_s ICH_AP0R2_EL2, x19 -6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] - msr_s ICH_AP0R1_EL2, x18 -5: ldr w17, [x3, #VGIC_V3_CPU_AP0R] - msr_s ICH_AP0R0_EL2, x17 - - and w22, w21, #0xf - mvn w22, w21 - ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - ldr x20, [x3, #LR_OFFSET(15)] - ldr x19, [x3, #LR_OFFSET(14)] - ldr x18, [x3, #LR_OFFSET(13)] - ldr x17, [x3, #LR_OFFSET(12)] - ldr x16, [x3, #LR_OFFSET(11)] - ldr x15, [x3, #LR_OFFSET(10)] - ldr x14, [x3, #LR_OFFSET(9)] - ldr x13, [x3, #LR_OFFSET(8)] - ldr x12, [x3, #LR_OFFSET(7)] - ldr x11, [x3, #LR_OFFSET(6)] - ldr x10, [x3, #LR_OFFSET(5)] - ldr x9, [x3, #LR_OFFSET(4)] - ldr x8, [x3, #LR_OFFSET(3)] - ldr x7, [x3, #LR_OFFSET(2)] - ldr x6, [x3, #LR_OFFSET(1)] - ldr x5, [x3, #LR_OFFSET(0)] - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - msr_s ICH_LR15_EL2, x20 - msr_s ICH_LR14_EL2, x19 - msr_s ICH_LR13_EL2, x18 - msr_s ICH_LR12_EL2, x17 - msr_s ICH_LR11_EL2, x16 - msr_s ICH_LR10_EL2, x15 - msr_s ICH_LR9_EL2, x14 - msr_s ICH_LR8_EL2, x13 - msr_s ICH_LR7_EL2, x12 - msr_s ICH_LR6_EL2, x11 - msr_s ICH_LR5_EL2, x10 - msr_s ICH_LR4_EL2, x9 - msr_s ICH_LR3_EL2, x8 - msr_s ICH_LR2_EL2, x7 - msr_s ICH_LR1_EL2, x6 - msr_s ICH_LR0_EL2, x5 - - // Ensure that the above will have reached the - // (re)distributors. This ensure the guest will read - // the correct values from the memory-mapped interface. - isb - dsb sy - - // Prevent the guest from touching the GIC system registers - // if SRE isn't enabled for GICv3 emulation - cbnz x25, 1f - mrs_s x5, ICC_SRE_EL2 - and x5, x5, #~ICC_SRE_EL2_ENABLE - msr_s ICC_SRE_EL2, x5 -1: -.endm - -ENTRY(__save_vgic_v3_state) - save_vgic_v3_state - ret -ENDPROC(__save_vgic_v3_state) - -ENTRY(__restore_vgic_v3_state) - restore_vgic_v3_state - ret -ENDPROC(__restore_vgic_v3_state) - -ENTRY(__vgic_v3_get_ich_vtr_el2) - mrs_s x0, ICH_VTR_EL2 - ret -ENDPROC(__vgic_v3_get_ich_vtr_el2) - - .popsection diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 5d1cad3ce6d6..08b5f18ba604 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -17,10 +17,10 @@ */ #include <linux/linkage.h> -#include <asm/alternative.h> #include <asm/assembler.h> #include <asm/cpufeature.h> #include <asm/sysreg.h> +#include <asm/uaccess.h> .text @@ -33,8 +33,7 @@ * Alignment fixed up by hardware. */ ENTRY(__clear_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x2, x3 mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f @@ -54,8 +53,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 b.mi 5f uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_disable_not_uao x2 ret ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 0b90497d4424..6505ec81f1da 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -16,11 +16,11 @@ #include <linux/linkage.h> -#include <asm/alternative.h> #include <asm/assembler.h> #include <asm/cache.h> #include <asm/cpufeature.h> #include <asm/sysreg.h> +#include <asm/uaccess.h> /* * Copy from user space to a kernel buffer (alignment handled by the hardware) @@ -67,12 +67,10 @@ end .req x5 ENTRY(__arch_copy_from_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x3, x4 add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_disable_not_uao x3 mov x0, #0 // Nothing to copy ret ENDPROC(__arch_copy_from_user) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index f7292dd08c84..9b04ff3ab610 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -18,11 +18,11 @@ #include <linux/linkage.h> -#include <asm/alternative.h> #include <asm/assembler.h> #include <asm/cache.h> #include <asm/cpufeature.h> #include <asm/sysreg.h> +#include <asm/uaccess.h> /* * Copy from user space to user space (alignment handled by the hardware) @@ -68,12 +68,10 @@ end .req x5 ENTRY(__copy_in_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x3, x4 add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_disable_not_uao x3 mov x0, #0 ret ENDPROC(__copy_in_user) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 7a7efe255034..8077e4f34d56 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -16,11 +16,11 @@ #include <linux/linkage.h> -#include <asm/alternative.h> #include <asm/assembler.h> #include <asm/cache.h> #include <asm/cpufeature.h> #include <asm/sysreg.h> +#include <asm/uaccess.h> /* * Copy to user space from a kernel buffer (alignment handled by the hardware) @@ -66,12 +66,10 @@ end .req x5 ENTRY(__arch_copy_to_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x3, x4 add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_disable_not_uao x3 mov x0, #0 ret ENDPROC(__arch_copy_to_user) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 2f06997dcd59..6c2401aeb5ce 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -24,8 +24,6 @@ #include <asm/cpufeature.h> #include <asm/alternative.h> -#include "proc-macros.S" - /* * __flush_dcache_all() * @@ -127,7 +125,7 @@ ENTRY(__flush_cache_user_range) sub x3, x2, #1 bic x4, x0, x3 1: -USER(9f, dc cvau, x4 ) // clean D line to PoU +user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE add x4, x4, x2 cmp x4, x1 b.lo 1b diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 7275628ba59f..25128089c386 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -182,7 +182,12 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); switch_mm_fastpath: - cpu_switch_mm(mm->pgd, mm); + /* + * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when + * emulating PAN. + */ + if (!system_uses_ttbr0_pan()) + cpu_switch_mm(mm->pgd, mm); } static int asids_init(void) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index a65dec4b001b..c3efb77d1229 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -29,7 +29,9 @@ #include <linux/sched.h> #include <linux/highmem.h> #include <linux/perf_event.h> +#include <linux/preempt.h> +#include <asm/bug.h> #include <asm/cpufeature.h> #include <asm/exception.h> #include <asm/debug-monitors.h> @@ -45,6 +47,28 @@ static const char *fault_name(unsigned int esr); +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) +{ + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, esr)) + ret = 1; + preempt_enable(); + } + + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) +{ + return 0; +} +#endif + /* * Dump out the page tables associated with 'addr' in mm 'mm'. */ @@ -135,6 +159,11 @@ int ptep_set_access_flags(struct vm_area_struct *vma, } #endif +static bool is_el1_instruction_abort(unsigned int esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; +} + /* * The kernel tried to access some page that wasn't present. */ @@ -143,8 +172,9 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, { /* * Are we prepared to handle this kernel fault? + * We are almost certainly not prepared to handle instruction faults. */ - if (fixup_exception(regs)) + if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; /* @@ -208,8 +238,6 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADMAP 0x010000 #define VM_FAULT_BADACCESS 0x020000 -#define ESR_LNX_EXEC (1 << 24) - static int __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int mm_flags, unsigned long vm_flags, struct task_struct *tsk) @@ -248,12 +276,24 @@ out: return fault; } -static inline int permission_fault(unsigned int esr) +static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs) { - unsigned int ec = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT; + unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; - return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM); + if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) + return false; + + if (system_uses_ttbr0_pan()) + return fsc_type == ESR_ELx_FSC_FAULT && + (regs->pstate & PSR_PAN_BIT); + else + return fsc_type == ESR_ELx_FSC_PERM; +} + +static bool is_el0_instruction_abort(unsigned int esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; } static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, @@ -265,6 +305,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + if (notify_page_fault(regs, esr)) + return 0; + tsk = current; mm = tsk->mm; @@ -282,7 +325,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; - if (esr & ESR_LNX_EXEC) { + if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; } else if (((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) || ((esr & ESR_ELx_CM) && !(mm_flags & FAULT_FLAG_USER))) { @@ -290,11 +333,14 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (permission_fault(esr) && (addr < USER_DS)) { + if (addr < USER_DS && is_permission_fault(esr, regs)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); + if (is_el1_instruction_abort(esr)) + die("Attempting to execute userspace memory", regs, esr); + if (!search_exception_tables(regs->pc)) die("Accessing user space memory outside uaccess.h routines", regs, esr); } @@ -460,7 +506,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 1; } -static struct fault_info { +static const struct fault_info { int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); int sig; int code; @@ -486,10 +532,10 @@ static struct fault_info { { do_bad, SIGBUS, 0, "unknown 17" }, { do_bad, SIGBUS, 0, "unknown 18" }, { do_bad, SIGBUS, 0, "unknown 19" }, - { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, { do_bad, SIGBUS, 0, "synchronous parity error" }, { do_bad, SIGBUS, 0, "unknown 25" }, { do_bad, SIGBUS, 0, "unknown 26" }, @@ -639,11 +685,20 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, return 0; } +NOKPROBE_SYMBOL(do_debug_exception); #ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(void *__unused) +int cpu_enable_pan(void *__unused) { + /* + * We modify PSTATE. This won't work from irq context as the PSTATE + * is discarded once we return from the exception. + */ + WARN_ON_ONCE(in_interrupt()); + config_sctlr_el1(SCTLR_EL1_SPAN, 0); + asm(SET_PSTATE_PAN(1)); + return 0; } #endif /* CONFIG_ARM64_PAN */ @@ -654,7 +709,7 @@ void cpu_enable_pan(void *__unused) * We need to enable the feature at runtime (instead of adding it to * PSR_MODE_EL1h) as the feature may not be implemented by the cpu. */ -void cpu_enable_uao(void *__unused) +int cpu_enable_uao(void *__unused) { asm(SET_PSTATE_UAO(1)); } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9b9b13ba603d..b4ef9ea679a1 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -131,7 +131,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) int pfn_valid(unsigned long pfn) { - return (pfn & PFN_MASK) == pfn && memblock_is_memory(pfn << PAGE_SHIFT); + return (pfn & PFN_MASK) == pfn && memblock_is_map_memory(pfn << PAGE_SHIFT); } EXPORT_SYMBOL(pfn_valid); #endif @@ -192,8 +192,12 @@ void __init arm64_memblock_init(void) */ memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)), ULLONG_MAX); - if (memblock_end_of_DRAM() > linear_region_size) - memblock_remove(0, memblock_end_of_DRAM() - linear_region_size); + if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) { + /* ensure that memstart_addr remains sufficiently aligned */ + memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size, + ARM64_MEMSTART_ALIGN); + memblock_remove(0, memstart_addr); + } /* * Apply the memory limit if it was set. Since the kernel may be loaded diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c3a3deb76ae2..6c444d968323 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -444,6 +444,8 @@ static void __init map_mem(pgd_t *pgd) if (start >= end) break; + if (memblock_is_nomap(reg)) + continue; __map_memblock(pgd, start, end); } diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S deleted file mode 100644 index 984edcda1850..000000000000 --- a/arch/arm64/mm/proc-macros.S +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Based on arch/arm/mm/proc-macros.S - * - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> - -/* - * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) - */ - .macro vma_vm_mm, rd, rn - ldr \rd, [\rn, #VMA_VM_MM] - .endm - -/* - * mmid - get context id from mm pointer (mm->context.id) - */ - .macro mmid, rd, rn - ldr \rd, [\rn, #MM_CONTEXT_ID] - .endm - -/* - * dcache_line_size - get the minimum D-cache line size from the CTR register. - */ - .macro dcache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - ubfm \tmp, \tmp, #16, #19 // cache line size encoding - mov \reg, #4 // bytes per word - lsl \reg, \reg, \tmp // actual cache line size - .endm - -/* - * icache_line_size - get the minimum I-cache line size from the CTR register. - */ - .macro icache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - and \tmp, \tmp, #0xf // cache line size encoding - mov \reg, #4 // bytes per word - lsl \reg, \reg, \tmp // actual cache line size - .endm - -/* - * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map - */ - .macro tcr_set_idmap_t0sz, valreg, tmpreg -#ifndef CONFIG_ARM64_VA_BITS_48 - ldr_l \tmpreg, idmap_t0sz - bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH -#endif - .endm - -/* - * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present - */ - .macro reset_pmuserenr_el0, tmpreg - mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer - sbfx \tmpreg, \tmpreg, #8, #4 - cmp \tmpreg, #1 // Skip if no PMU present - b.lt 9000f - msr pmuserenr_el0, xzr // Disable PMU access from EL0 -9000: - .endm - -/* - * Macro to perform a data cache maintenance for the interval - * [kaddr, kaddr + size) - * - * op: operation passed to dc instruction - * domain: domain used in dsb instruciton - * kaddr: starting virtual address of the region - * size: size of the region - * Corrupts: kaddr, size, tmp1, tmp2 - */ - .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 - dcache_line_size \tmp1, \tmp2 - add \size, \kaddr, \size - sub \tmp2, \tmp1, #1 - bic \kaddr, \kaddr, \tmp2 -9998: dc \op, \kaddr - add \kaddr, \kaddr, \tmp1 - cmp \kaddr, \size - b.lo 9998b - dsb \domain - .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index ec968a076919..81a0de4e457d 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -23,13 +23,11 @@ #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/hwcap.h> -#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> +#include <asm/pgtable-hwdef.h> #include <asm/cpufeature.h> #include <asm/alternative.h> -#include "proc-macros.S" - #ifdef CONFIG_ARM64_64K_PAGES #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K #elif defined(CONFIG_ARM64_16K_PAGES) @@ -112,62 +110,50 @@ ENTRY(cpu_do_suspend) mrs x2, tpidr_el0 mrs x3, tpidrro_el0 mrs x4, contextidr_el1 - mrs x5, mair_el1 - mrs x6, cpacr_el1 - mrs x7, ttbr1_el1 - mrs x8, tcr_el1 - mrs x9, vbar_el1 - mrs x10, mdscr_el1 - mrs x11, oslsr_el1 - mrs x12, sctlr_el1 + mrs x5, cpacr_el1 + mrs x6, tcr_el1 + mrs x7, vbar_el1 + mrs x8, mdscr_el1 + mrs x9, oslsr_el1 + mrs x10, sctlr_el1 stp x2, x3, [x0] - stp x4, x5, [x0, #16] - stp x6, x7, [x0, #32] - stp x8, x9, [x0, #48] - stp x10, x11, [x0, #64] - str x12, [x0, #80] + stp x4, xzr, [x0, #16] + stp x5, x6, [x0, #32] + stp x7, x8, [x0, #48] + stp x9, x10, [x0, #64] ret ENDPROC(cpu_do_suspend) /** * cpu_do_resume - restore CPU register context * - * x0: Physical address of context pointer - * x1: ttbr0_el1 to be restored - * - * Returns: - * sctlr_el1 value in x0 + * x0: Address of context pointer */ ENTRY(cpu_do_resume) - /* - * Invalidate local tlb entries before turning on MMU - */ - tlbi vmalle1 ldp x2, x3, [x0] ldp x4, x5, [x0, #16] - ldp x6, x7, [x0, #32] - ldp x8, x9, [x0, #48] - ldp x10, x11, [x0, #64] - ldr x12, [x0, #80] + ldp x6, x8, [x0, #32] + ldp x9, x10, [x0, #48] + ldp x11, x12, [x0, #64] msr tpidr_el0, x2 msr tpidrro_el0, x3 msr contextidr_el1, x4 - msr mair_el1, x5 msr cpacr_el1, x6 - msr ttbr0_el1, x1 - msr ttbr1_el1, x7 - tcr_set_idmap_t0sz x8, x7 + + /* Don't change t0sz here, mask those bits when restoring */ + mrs x5, tcr_el1 + bfi x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + msr tcr_el1, x8 msr vbar_el1, x9 msr mdscr_el1, x10 + msr sctlr_el1, x12 /* * Restore oslsr_el1 by writing oslar_el1 */ ubfx x11, x11, #1, #1 msr oslar_el1, x11 reset_pmuserenr_el0 x0 // Disable PMU access from EL0 - mov x0, x12 - dsb nsh // Make sure local tlb invalidation completed isb ret ENDPROC(cpu_do_resume) @@ -185,17 +171,8 @@ ENTRY(cpu_do_switch_mm) bfi x0, x1, #48, #16 // set the ASID msr ttbr0_el1, x0 // set TTBR0 isb -alternative_if_not ARM64_WORKAROUND_CAVIUM_27456 - ret - nop - nop - nop -alternative_else - ic iallu - dsb nsh - isb + post_ttbr0_update_workaround ret -alternative_endif ENDPROC(cpu_do_switch_mm) .pushsection ".idmap.text", "ax" diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 8bbe9401f4f0..6d6e4af1a4bf 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -49,6 +49,7 @@ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/uaccess.h> #include <xen/interface/xen.h> @@ -89,6 +90,24 @@ ENTRY(privcmd_call) mov x2, x3 mov x3, x4 mov x4, x5 +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Privcmd calls are issued by the userspace. The kernel needs to + * enable access to TTBR0_EL1 as the hypervisor would issue stage 1 + * translations to user memory via AT instructions. Since AT + * instructions are not affected by the PAN bit (ARMv8.1), we only + * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation + * is enabled (it implies that hardware UAO and PAN disabled). + */ + uaccess_enable_not_uao x6, x7 +#endif hvc XEN_IMM + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Disable userspace access from kernel once the hyp call completed. + */ + uaccess_disable_not_uao x6 +#endif ret ENDPROC(privcmd_call); |
