diff options
Diffstat (limited to 'kernel')
53 files changed, 935 insertions, 186 deletions
diff --git a/kernel/async.c b/kernel/async.c index 4c3773c0bf63..f1fd155abff6 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -84,20 +84,24 @@ static atomic_t entry_count; static async_cookie_t lowest_in_progress(struct async_domain *domain) { - struct list_head *pending; + struct async_entry *first = NULL; async_cookie_t ret = ASYNC_COOKIE_MAX; unsigned long flags; spin_lock_irqsave(&async_lock, flags); - if (domain) - pending = &domain->pending; - else - pending = &async_global_pending; + if (domain) { + if (!list_empty(&domain->pending)) + first = list_first_entry(&domain->pending, + struct async_entry, domain_list); + } else { + if (!list_empty(&async_global_pending)) + first = list_first_entry(&async_global_pending, + struct async_entry, global_list); + } - if (!list_empty(pending)) - ret = list_first_entry(pending, struct async_entry, - domain_list)->cookie; + if (first) + ret = first->cookie; spin_unlock_irqrestore(&async_lock, flags); return ret; diff --git a/kernel/audit.c b/kernel/audit.c index e228b88dfd23..d8e9fba58cbc 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -64,7 +64,6 @@ #include <linux/security.h> #endif #include <linux/freezer.h> -#include <linux/tty.h> #include <linux/pid_namespace.h> #include <net/netns/generic.h> @@ -1882,21 +1881,14 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { const struct cred *cred; char comm[sizeof(tsk->comm)]; - char *tty; + struct tty_struct *tty; if (!ab) return; /* tsk == current */ cred = current_cred(); - - spin_lock_irq(&tsk->sighand->siglock); - if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) - tty = tsk->signal->tty->name; - else - tty = "(none)"; - spin_unlock_irq(&tsk->sighand->siglock); - + tty = audit_get_tty(tsk); audit_log_format(ab, " ppid=%d pid=%d auid=%u uid=%u gid=%u" " euid=%u suid=%u fsuid=%u" @@ -1912,11 +1904,11 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) from_kgid(&init_user_ns, cred->egid), from_kgid(&init_user_ns, cred->sgid), from_kgid(&init_user_ns, cred->fsgid), - tty, audit_get_sessionid(tsk)); - + tty ? tty_name(tty) : "(none)", + audit_get_sessionid(tsk)); + audit_put_tty(tty); audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, get_task_comm(comm, tsk)); - audit_log_d_path_exe(ab, tsk->mm); audit_log_task_context(ab); } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 63f0e495f517..34a57d57bcb1 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1976,21 +1976,26 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, { struct audit_buffer *ab; uid_t uid, oldloginuid, loginuid; + struct tty_struct *tty; if (!audit_enabled) return; + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); + if (!ab) + return; + uid = from_kuid(&init_user_ns, task_uid(current)); oldloginuid = from_kuid(&init_user_ns, koldloginuid); loginuid = from_kuid(&init_user_ns, kloginuid), + tty = audit_get_tty(current); - ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); - if (!ab) - return; audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid); audit_log_task_context(ab); - audit_log_format(ab, " old-auid=%u auid=%u old-ses=%u ses=%u res=%d", - oldloginuid, loginuid, oldsessionid, sessionid, !rc); + audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d", + oldloginuid, loginuid, tty ? tty_name(tty) : "(none)", + oldsessionid, sessionid, !rc); + audit_put_tty(tty); audit_log_end(ab); } @@ -2244,10 +2249,11 @@ int __audit_signal_info(int sig, struct task_struct *t) audit_sig_uid = uid; security_task_getsecid(tsk, &audit_sig_sid); } - if (!audit_signals || audit_dummy_context()) - return 0; } + if (!audit_signals || audit_dummy_context()) + return 0; + /* optimize the common case by putting first signal recipient directly * in audit_context */ if (!ctx->target_pid) { diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 3608fa1aec8a..0eb11b4ac4c7 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -102,7 +102,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_array *array = container_of(map, struct bpf_array, map); - u32 index = *(u32 *)key; + u32 index = key ? *(u32 *)key : U32_MAX; u32 *next = (u32 *)next_key; if (index >= array->map.max_entries) { diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 34777b3746fa..a35abe048239 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -169,12 +169,15 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) struct hlist_head *head; struct htab_elem *l, *next_l; u32 hash, key_size; - int i; + int i = 0; WARN_ON_ONCE(!rcu_read_lock_held()); key_size = map->key_size; + if (!key) + goto find_first_elem; + hash = htab_map_hash(key, key_size); head = select_bucket(htab, hash); @@ -182,10 +185,8 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) /* lookup the key */ l = lookup_elem_raw(head, hash, key, key_size); - if (!l) { - i = 0; + if (!l) goto find_first_elem; - } /* key was found, get next key in the same bucket */ next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 424accd20c2d..4b9bbfe764e8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -390,14 +390,18 @@ static int map_get_next_key(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); - err = -ENOMEM; - key = kmalloc(map->key_size, GFP_USER); - if (!key) - goto err_put; - - err = -EFAULT; - if (copy_from_user(key, ukey, map->key_size) != 0) - goto free_key; + if (ukey) { + err = -ENOMEM; + key = kmalloc(map->key_size, GFP_USER); + if (!key) + goto err_put; + + err = -EFAULT; + if (copy_from_user(key, ukey, map->key_size) != 0) + goto free_key; + } else { + key = NULL; + } err = -ENOMEM; next_key = kmalloc(map->key_size, GFP_USER); @@ -673,7 +677,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz union bpf_attr attr = {}; int err; - if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) + if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN)) return -EPERM; if (!access_ok(VERIFY_READ, uattr, 1)) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c14003840bc5..79e3c21a35d0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1135,7 +1135,8 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) regs[insn->dst_reg].type = UNKNOWN_VALUE; regs[insn->dst_reg].map_ptr = NULL; } - } else { + } else if (BPF_CLASS(insn->code) == BPF_ALU64 || + insn->imm >= 0) { /* case: R = imm * remember the value we stored into this reg */ diff --git a/kernel/configs/android-base.config b/kernel/configs/android-base.config new file mode 100644 index 000000000000..d70829033bb7 --- /dev/null +++ b/kernel/configs/android-base.config @@ -0,0 +1,160 @@ +# KEEP ALPHABETICALLY SORTED +# CONFIG_DEVKMEM is not set +# CONFIG_DEVMEM is not set +# CONFIG_FHANDLE is not set +# CONFIG_INET_LRO is not set +# CONFIG_NFSD is not set +# CONFIG_NFS_FS is not set +# CONFIG_OABI_COMPAT is not set +# CONFIG_SYSVIPC is not set +# CONFIG_USELIB is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_ARMV8_DEPRECATED=y +CONFIG_ASHMEM=y +CONFIG_AUDIT=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_BPF=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_SCHED=y +CONFIG_CP15_BARRIER_EMULATION=y +CONFIG_DEFAULT_SECURITY_SELINUX=y +CONFIG_EMBEDDED=y +CONFIG_FB=y +CONFIG_HARDENED_USERCOPY=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_INET=y +CONFIG_INET_DIAG_DESTROY=y +CONFIG_INET_ESP=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IPV6=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_NET=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_TPROXY=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_KEY=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_NAT=y +CONFIG_NO_HZ=y +CONFIG_PACKET=y +CONFIG_PM_AUTOSLEEP=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PREEMPT=y +CONFIG_QUOTA=y +CONFIG_RANDOMIZE_BASE=y +CONFIG_RTC_CLASS=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SECCOMP=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SETEND_EMULATION=y +CONFIG_STAGING=y +CONFIG_SWP_EMULATION=y +CONFIG_SYNC=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_USB_GADGET=y +CONFIG_USB_CONFIGFS=y +CONFIG_USB_CONFIGFS_F_FS=y +CONFIG_USB_CONFIGFS_F_MIDI=y +CONFIG_USB_OTG_WAKELOCK=y +CONFIG_XFRM_USER=y diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config new file mode 100644 index 000000000000..297756be369c --- /dev/null +++ b/kernel/configs/android-recommended.config @@ -0,0 +1,125 @@ +# KEEP ALPHABETICALLY SORTED +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_NF_CONNTRACK_SIP is not set +# CONFIG_PM_WAKELOCKS_GC is not set +# CONFIG_VT is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_BLK_DEV_DM=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_COMPACTION=y +CONFIG_DEBUG_RODATA=y +CONFIG_DM_CRYPT=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_DRAGONRISE_FF=y +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FUSE_FS=y +CONFIG_GREENASIA_FF=y +CONFIG_HIDRAW=y +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GREENASIA=y +CONFIG_HID_GYRATION=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WACOM=y +CONFIG_HID_WALTOP=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_GPIO=y +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_UINPUT=y +CONFIG_ION=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KSM=y +CONFIG_LOGIG940_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGITECH_FF=y +CONFIG_MD=y +CONFIG_MEDIA_SUPPORT=y +CONFIG_MSDOS_FS=y +CONFIG_PANIC_TIMEOUT=5 +CONFIG_PANTHERLORD_FF=y +CONFIG_PERF_EVENTS=y +CONFIG_PM_DEBUG=y +CONFIG_PM_RUNTIME=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +CONFIG_POWER_SUPPLY=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +CONFIG_SCHEDSTATS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_SND=y +CONFIG_SOUND=y +CONFIG_SUSPEND_TIME=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y +CONFIG_TIMER_STATS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_UHID=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_USBNET=y +CONFIG_VFAT_FS=y diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 9c418002b8c1..75f835d353db 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -107,14 +107,8 @@ int get_callchain_buffers(void) goto exit; } - if (count > 1) { - /* If the allocation failed, give up */ - if (!callchain_cpus_entries) - err = -ENOMEM; - goto exit; - } - - err = alloc_callchain_buffers(); + if (count == 1) + err = alloc_callchain_buffers(); exit: if (err) atomic_dec(&nr_callchain_events); diff --git a/kernel/events/core.c b/kernel/events/core.c index 322f63370038..1a73b31b5fe7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -237,7 +237,7 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec(table, write, buffer, lenp, ppos); + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write) return ret; @@ -5387,9 +5387,6 @@ static void perf_output_read_one(struct perf_output_handle *handle, __output_copy(handle, values, n * sizeof(u64)); } -/* - * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. - */ static void perf_output_read_group(struct perf_output_handle *handle, struct perf_event *event, u64 enabled, u64 running) @@ -5434,6 +5431,13 @@ static void perf_output_read_group(struct perf_output_handle *handle, #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ PERF_FORMAT_TOTAL_TIME_RUNNING) +/* + * XXX PERF_SAMPLE_READ vs inherited events seems difficult. + * + * The problem is that its both hard and excessively expensive to iterate the + * child list, not to mention that its impossible to IPI the children running + * on another CPU, from interrupt/NMI context. + */ static void perf_output_read(struct perf_output_handle *handle, struct perf_event *event) { @@ -8167,9 +8171,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, local64_set(&hwc->period_left, hwc->sample_period); /* - * we currently do not support PERF_FORMAT_GROUP on inherited events + * We currently do not support PERF_SAMPLE_READ on inherited events. + * See perf_output_read(). */ - if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) + if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)) goto err_ns; if (!has_branch_stack(event)) @@ -8337,9 +8342,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, * __u16 sample size limit. */ if (attr->sample_stack_user >= USHRT_MAX) - ret = -EINVAL; + return -EINVAL; else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64))) - ret = -EINVAL; + return -EINVAL; } if (attr->sample_type & PERF_SAMPLE_REGS_INTR) diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 7da5b674d16e..d97a5430d580 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -427,16 +427,9 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); * modify_user_hw_breakpoint - modify a user-space hardware breakpoint * @bp: the breakpoint structure to modify * @attr: new breakpoint attributes - * @triggered: callback to trigger when we hit the breakpoint - * @tsk: pointer to 'task_struct' of the process to which the address belongs */ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { - u64 old_addr = bp->attr.bp_addr; - u64 old_len = bp->attr.bp_len; - int old_type = bp->attr.bp_type; - int err = 0; - /* * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it * will not be possible to raise IPIs that invoke __perf_event_disable. @@ -451,27 +444,18 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att bp->attr.bp_addr = attr->bp_addr; bp->attr.bp_type = attr->bp_type; bp->attr.bp_len = attr->bp_len; + bp->attr.disabled = 1; - if (attr->disabled) - goto end; - - err = validate_hw_breakpoint(bp); - if (!err) - perf_event_enable(bp); + if (!attr->disabled) { + int err = validate_hw_breakpoint(bp); - if (err) { - bp->attr.bp_addr = old_addr; - bp->attr.bp_type = old_type; - bp->attr.bp_len = old_len; - if (!bp->attr.disabled) - perf_event_enable(bp); + if (err) + return err; - return err; + perf_event_enable(bp); + bp->attr.disabled = 0; } -end: - bp->attr.disabled = attr->disabled; - return 0; } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 8c60a4eb4080..f4b9a369c8c3 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/circ_buf.h> #include <linux/poll.h> +#include <linux/nospec.h> #include "internal.h" @@ -781,8 +782,10 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) return NULL; /* AUX space */ - if (pgoff >= rb->aux_pgoff) - return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]); + if (pgoff >= rb->aux_pgoff) { + int aux_pgoff = array_index_nospec(pgoff - rb->aux_pgoff, rb->aux_nr_pages); + return virt_to_page(rb->aux_pages[aux_pgoff]); + } } return __perf_mmap_to_page(rb, pgoff); diff --git a/kernel/exit.c b/kernel/exit.c index 6aebd44b3a9b..f75f7cef0760 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -54,6 +54,7 @@ #include <linux/writeback.h> #include <linux/shm.h> #include <linux/kcov.h> +#include <linux/cpufreq_times.h> #include "sched/tune.h" @@ -173,6 +174,9 @@ void release_task(struct task_struct *p) { struct task_struct *leader; int zap_leader; +#ifdef CONFIG_CPU_FREQ_TIMES + cpufreq_task_times_exit(p); +#endif repeat: /* don't need to get the RCU readlock here - the process is dead and * can't be modifying its own credentials. But shut RCU-lockdep up */ @@ -764,7 +768,7 @@ void do_exit(long code) disassociate_ctty(1); exit_task_namespaces(tsk); exit_task_work(tsk); - exit_thread(); + exit_thread(tsk); /* * Flush inherited counters to the parent - before the parent @@ -1626,6 +1630,10 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, __WNOTHREAD|__WCLONE|__WALL)) return -EINVAL; + /* -INT_MIN is not defined */ + if (upid == INT_MIN) + return -ESRCH; + if (upid == -1) type = PIDTYPE_MAX; else if (upid < 0) { diff --git a/kernel/futex.c b/kernel/futex.c index a09c1dd1f659..aedb36c0fd92 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -470,6 +470,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; struct page *page, *page_head; + struct address_space *mapping; int err, ro = 0; /* @@ -555,7 +556,19 @@ again: } #endif - lock_page(page_head); + /* + * The treatment of mapping from this point on is critical. The page + * lock protects many things but in this context the page lock + * stabilizes mapping, prevents inode freeing in the shared + * file-backed region case and guards against movement to swap cache. + * + * Strictly speaking the page lock is not needed in all cases being + * considered here and page lock forces unnecessarily serialization + * From this point on, mapping will be re-verified if necessary and + * page lock will be acquired only if it is unavoidable + */ + + mapping = READ_ONCE(page_head->mapping); /* * If page_head->mapping is NULL, then it cannot be a PageAnon @@ -572,18 +585,31 @@ again: * shmem_writepage move it from filecache to swapcache beneath us: * an unlikely race, but we do need to retry for page_head->mapping. */ - if (!page_head->mapping) { - int shmem_swizzled = PageSwapCache(page_head); + if (unlikely(!mapping)) { + int shmem_swizzled; + + /* + * Page lock is required to identify which special case above + * applies. If this is really a shmem page then the page lock + * will prevent unexpected transitions. + */ + lock_page(page); + shmem_swizzled = PageSwapCache(page) || page->mapping; unlock_page(page_head); put_page(page_head); + if (shmem_swizzled) goto again; + return -EFAULT; } /* * Private mappings are handled in a simple way. * + * If the futex key is stored on an anonymous page, then the associated + * object is the mm which is implicitly pinned by the calling process. + * * NOTE: When userspace waits on a MAP_SHARED mapping, even if * it's a read-only handle, it's expected that futexes attach to * the object not the particular process. @@ -601,16 +627,75 @@ again: key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->private.mm = mm; key->private.address = address; + + get_futex_key_refs(key); /* implies smp_mb(); (B) */ + } else { + struct inode *inode; + + /* + * The associated futex object in this case is the inode and + * the page->mapping must be traversed. Ordinarily this should + * be stabilised under page lock but it's not strictly + * necessary in this case as we just want to pin the inode, not + * update the radix tree or anything like that. + * + * The RCU read lock is taken as the inode is finally freed + * under RCU. If the mapping still matches expectations then the + * mapping->host can be safely accessed as being a valid inode. + */ + rcu_read_lock(); + + if (READ_ONCE(page_head->mapping) != mapping) { + rcu_read_unlock(); + put_page(page_head); + + goto again; + } + + inode = READ_ONCE(mapping->host); + if (!inode) { + rcu_read_unlock(); + put_page(page_head); + + goto again; + } + + /* + * Take a reference unless it is about to be freed. Previously + * this reference was taken by ihold under the page lock + * pinning the inode in place so i_lock was unnecessary. The + * only way for this check to fail is if the inode was + * truncated in parallel which is almost certainly an + * application bug. In such a case, just retry. + * + * We are not calling into get_futex_key_refs() in file-backed + * cases, therefore a successful atomic_inc return below will + * guarantee that get_futex_key() will still imply smp_mb(); (B). + */ + if (!atomic_inc_not_zero(&inode->i_count)) { + rcu_read_unlock(); + put_page(page_head); + + goto again; + } + + /* Should be impossible but lets be paranoid for now */ + if (WARN_ON_ONCE(inode->i_mapping != mapping)) { + err = -EFAULT; + rcu_read_unlock(); + iput(inode); + + goto out; + } + key->both.offset |= FUT_OFF_INODE; /* inode-based key */ - key->shared.inode = page_head->mapping->host; + key->shared.inode = inode; key->shared.pgoff = basepage_index(page); + rcu_read_unlock(); } - get_futex_key_refs(key); /* implies MB (B) */ - out: - unlock_page(page_head); put_page(page_head); return err; } @@ -1368,6 +1453,45 @@ out: return ret; } +static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) +{ + unsigned int op = (encoded_op & 0x70000000) >> 28; + unsigned int cmp = (encoded_op & 0x0f000000) >> 24; + int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11); + int cmparg = sign_extend32(encoded_op & 0x00000fff, 11); + int oldval, ret; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) { + if (oparg < 0 || oparg > 31) + return -EINVAL; + oparg = 1 << oparg; + } + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr); + if (ret) + return ret; + + switch (cmp) { + case FUTEX_OP_CMP_EQ: + return oldval == cmparg; + case FUTEX_OP_CMP_NE: + return oldval != cmparg; + case FUTEX_OP_CMP_LT: + return oldval < cmparg; + case FUTEX_OP_CMP_GE: + return oldval >= cmparg; + case FUTEX_OP_CMP_LE: + return oldval <= cmparg; + case FUTEX_OP_CMP_GT: + return oldval > cmparg; + default: + return -ENOSYS; + } +} + /* * Wake up all waiters hashed on the physical page that is mapped * to this virtual address: diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 2c2effdb4437..0eb9b064779c 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -854,7 +854,7 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) * This code is triggered unconditionally. Check the affinity * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. */ - if (desc->irq_common_data.affinity) + if (cpumask_available(desc->irq_common_data.affinity)) cpumask_copy(mask, desc->irq_common_data.affinity); else valid = false; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 695763516908..bbe9dd0886bd 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -125,7 +125,7 @@ static void *alloc_insn_page(void) return module_alloc(PAGE_SIZE); } -static void free_insn_page(void *page) +void __weak free_insn_page(void *page) { module_memfree(page); } diff --git a/kernel/module.c b/kernel/module.c index 71e277d72d37..a0eeedb3e5cd 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2875,6 +2875,15 @@ static struct module *setup_load_info(struct load_info *info, int flags) return mod; } +static void check_modinfo_retpoline(struct module *mod, struct load_info *info) +{ + if (retpoline_module_ok(get_modinfo(info, "retpoline"))) + return; + + pr_warn("%s: loading module not compiled with retpoline compiler.\n", + mod->name); +} + static int check_modinfo(struct module *mod, struct load_info *info, int flags) { const char *modmagic = get_modinfo(info, "vermagic"); @@ -2901,6 +2910,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); } + check_modinfo_retpoline(mod, info); + if (get_modinfo(info, "staging")) { add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); pr_warn("%s: module is from the staging directory, the quality " diff --git a/kernel/pid.c b/kernel/pid.c index b17263be9082..5fe7cdb6d05f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -322,8 +322,10 @@ struct pid *alloc_pid(struct pid_namespace *ns) } if (unlikely(is_child_reaper(pid))) { - if (pid_ns_prepare_proc(ns)) + if (pid_ns_prepare_proc(ns)) { + disable_pid_allocation(ns); goto out_free; + } } get_pid_ns(ns); diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c index d5760c42f042..61d41ca41844 100644 --- a/kernel/printk/braille.c +++ b/kernel/printk/braille.c @@ -2,12 +2,13 @@ #include <linux/kernel.h> #include <linux/console.h> +#include <linux/errno.h> #include <linux/string.h> #include "console_cmdline.h" #include "braille.h" -char *_braille_console_setup(char **str, char **brl_options) +int _braille_console_setup(char **str, char **brl_options) { if (!strncmp(*str, "brl,", 4)) { *brl_options = ""; @@ -15,14 +16,14 @@ char *_braille_console_setup(char **str, char **brl_options) } else if (!strncmp(*str, "brl=", 4)) { *brl_options = *str + 4; *str = strchr(*brl_options, ','); - if (!*str) + if (!*str) { pr_err("need port name after brl=\n"); - else - *((*str)++) = 0; - } else - return NULL; + return -EINVAL; + } + *((*str)++) = 0; + } - return *str; + return 0; } int diff --git a/kernel/printk/braille.h b/kernel/printk/braille.h index 769d771145c8..749a6756843a 100644 --- a/kernel/printk/braille.h +++ b/kernel/printk/braille.h @@ -9,7 +9,14 @@ braille_set_options(struct console_cmdline *c, char *brl_options) c->brl_options = brl_options; } -char * +/* + * Setup console according to braille options. + * Return -EINVAL on syntax error, 0 on success (or no braille option was + * actually given). + * Modifies str to point to the serial options + * Sets brl_options to the parsed braille options. + */ +int _braille_console_setup(char **str, char **brl_options); int @@ -25,10 +32,10 @@ braille_set_options(struct console_cmdline *c, char *brl_options) { } -static inline char * +static inline int _braille_console_setup(char **str, char **brl_options) { - return NULL; + return 0; } static inline int diff --git a/kernel/profile.c b/kernel/profile.c index 99513e1160e5..9cd8e18e6f18 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -44,7 +44,7 @@ int prof_on __read_mostly; EXPORT_SYMBOL_GPL(prof_on); static cpumask_var_t prof_cpu_mask; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); static DEFINE_PER_CPU(int, cpu_profile_flip); static DEFINE_MUTEX(profile_flip_mutex); @@ -201,7 +201,7 @@ int profile_event_unregister(enum profile_type type, struct notifier_block *n) } EXPORT_SYMBOL_GPL(profile_event_unregister); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) /* * Each cpu has a pair of open-addressed hashtables for pending * profile hits. read_profile() IPI's all cpus to request them diff --git a/kernel/resource.c b/kernel/resource.c index c09d484f7b5f..73348f574163 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -611,7 +611,8 @@ static int __find_resource(struct resource *root, struct resource *old, alloc.start = constraint->alignf(constraint->alignf_data, &avail, size, constraint->align); alloc.end = alloc.start + size - 1; - if (resource_contains(&avail, &alloc)) { + if (alloc.start <= alloc.end && + resource_contains(&avail, &alloc)) { new->start = alloc.start; new->end = alloc.end; return 0; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 41a8fdb1436c..c1ecb07de762 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -77,6 +77,7 @@ #include <linux/compiler.h> #include <linux/irq.h> #include <linux/sched/core_ctl.h> +#include <linux/cpufreq_times.h> #include <asm/switch_to.h> #include <asm/tlb.h> @@ -628,7 +629,8 @@ void resched_cpu(int cpu) unsigned long flags; raw_spin_lock_irqsave(&rq->lock, flags); - resched_curr(rq); + if (cpu_online(cpu) || cpu == smp_processor_id()) + resched_curr(rq); raw_spin_unlock_irqrestore(&rq->lock, flags); } @@ -2364,6 +2366,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) memset(&p->se.statistics, 0, sizeof(p->se.statistics)); #endif +#ifdef CONFIG_CPU_FREQ_TIMES + cpufreq_task_times_init(p); +#endif + RB_CLEAR_NODE(&p->dl.rb_node); init_dl_task_timer(&p->dl); __dl_clear_params(p); @@ -3262,9 +3268,24 @@ notrace unsigned long get_parent_ip(unsigned long addr) #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ defined(CONFIG_PREEMPT_TRACER)) +/* + * preemptoff stack tracing threshold in ns. + * default: 1ms + */ +unsigned int sysctl_preemptoff_tracing_threshold_ns = 1000000UL; + +struct preempt_store { + u64 ts; + unsigned long caddr[4]; + bool irqs_disabled; +}; + +static DEFINE_PER_CPU(struct preempt_store, the_ps); void preempt_count_add(int val) { + struct preempt_store *ps = &per_cpu(the_ps, raw_smp_processor_id()); + #ifdef CONFIG_DEBUG_PREEMPT /* * Underflow? @@ -3285,6 +3306,13 @@ void preempt_count_add(int val) #ifdef CONFIG_DEBUG_PREEMPT current->preempt_disable_ip = ip; #endif + ps->ts = sched_clock(); + ps->caddr[0] = CALLER_ADDR0; + ps->caddr[1] = CALLER_ADDR1; + ps->caddr[2] = CALLER_ADDR2; + ps->caddr[3] = CALLER_ADDR3; + ps->irqs_disabled = irqs_disabled(); + trace_preempt_off(CALLER_ADDR0, ip); } } @@ -3307,8 +3335,22 @@ void preempt_count_sub(int val) return; #endif - if (preempt_count() == val) + if (preempt_count() == val) { + struct preempt_store *ps = &per_cpu(the_ps, + raw_smp_processor_id()); + u64 delta = sched_clock() - ps->ts; + + /* + * Trace preempt disable stack if preemption + * is disabled for more than the threshold. + */ + if (delta > sysctl_preemptoff_tracing_threshold_ns) + trace_sched_preempt_disable(delta, ps->irqs_disabled, + ps->caddr[0], ps->caddr[1], + ps->caddr[2], ps->caddr[3]); + trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); + } __preempt_count_sub(val); } EXPORT_SYMBOL(preempt_count_sub); @@ -6631,6 +6673,19 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) call_rcu_sched(&old_rd->rcu, free_rootdomain); } +void sched_get_rd(struct root_domain *rd) +{ + atomic_inc(&rd->refcount); +} + +void sched_put_rd(struct root_domain *rd) +{ + if (!atomic_dec_and_test(&rd->refcount)) + return; + + call_rcu_sched(&rd->rcu, free_rootdomain); +} + static int init_rootdomain(struct root_domain *rd) { memset(rd, 0, sizeof(*rd)); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 692d1f888f17..bd4ef2bb551e 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -4,6 +4,7 @@ #include <linux/kernel_stat.h> #include <linux/static_key.h> #include <linux/context_tracking.h> +#include <linux/cpufreq_times.h> #include "sched.h" @@ -160,6 +161,11 @@ void account_user_time(struct task_struct *p, cputime_t cputime, /* Account for user time used */ acct_account_cputime(p); + +#ifdef CONFIG_CPU_FREQ_TIMES + /* Account power usage for user time */ + cpufreq_acct_update_power(p, cputime); +#endif } /* @@ -210,6 +216,10 @@ void __account_system_time(struct task_struct *p, cputime_t cputime, /* Account for system time used */ acct_account_cputime(p); +#ifdef CONFIG_CPU_FREQ_TIMES + /* Account power usage for system time */ + cpufreq_acct_update_power(p, cputime); +#endif } /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2ea3a4337dde..f962ab1eb046 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2402,7 +2402,8 @@ void task_numa_work(struct callback_head *work) return; - down_read(&mm->mmap_sem); + if (!down_read_trylock(&mm->mmap_sem)) + return; vma = find_vma(mm, start); if (!vma) { reset_ptenuma_scan(p); @@ -2641,6 +2642,7 @@ u32 sched_get_wake_up_idle(struct task_struct *p) return !!enabled; } +EXPORT_SYMBOL(sched_get_wake_up_idle); int sched_set_wake_up_idle(struct task_struct *p, int wake_up_idle) { @@ -2653,6 +2655,7 @@ int sched_set_wake_up_idle(struct task_struct *p, int wake_up_idle) return 0; } +EXPORT_SYMBOL(sched_set_wake_up_idle); static const u32 runnable_avg_yN_inv[] = { 0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6, @@ -6596,10 +6599,22 @@ static int sched_group_energy(struct energy_env *eenv) { struct cpumask visit_cpus; u64 total_energy = 0; + int cpu_count; WARN_ON(!eenv->sg_top->sge); cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top)); + /* If a cpu is hotplugged in while we are in this function, + * it does not appear in the existing visit_cpus mask + * which came from the sched_group pointer of the + * sched_domain pointed at by sd_ea for either the prev + * or next cpu and was dereferenced in __energy_diff. + * Since we will dereference sd_scs later as we iterate + * through the CPUs we expect to visit, new CPUs can + * be present which are not in the visit_cpus mask. + * Guard this with cpu_count. + */ + cpu_count = cpumask_weight(&visit_cpus); while (!cpumask_empty(&visit_cpus)) { struct sched_group *sg_shared_cap = NULL; @@ -6609,6 +6624,8 @@ static int sched_group_energy(struct energy_env *eenv) /* * Is the group utilization affected by cpus outside this * sched_group? + * This sd may have groups with cpus which were not present + * when we took visit_cpus. */ sd = rcu_dereference(per_cpu(sd_scs, cpu)); @@ -6658,8 +6675,24 @@ static int sched_group_energy(struct energy_env *eenv) total_energy += sg_busy_energy + sg_idle_energy; - if (!sd->child) + if (!sd->child) { + /* + * cpu_count here is the number of + * cpus we expect to visit in this + * calculation. If we race against + * hotplug, we can have extra cpus + * added to the groups we are + * iterating which do not appear in + * the visit_cpus mask. In that case + * we are not able to calculate energy + * without restarting so we will bail + * out and use prev_cpu this time. + */ + if (!cpu_count) + return -EINVAL; cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg)); + cpu_count--; + } if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top))) goto next_cpu; @@ -6671,6 +6704,9 @@ static int sched_group_energy(struct energy_env *eenv) * If we raced with hotplug and got an sd NULL-pointer; * returning a wrong energy estimation is better than * entering an infinite loop. + * Specifically: If a cpu is unplugged after we took + * the visit_cpus mask, it no longer has an sd_scs + * pointer, so when we dereference it, we get NULL. */ if (cpumask_test_cpu(cpu, &visit_cpus)) return -EINVAL; diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c index ea066ab8376b..d9f0669ff683 100644 --- a/kernel/sched/hmp.c +++ b/kernel/sched/hmp.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012-2017, The Linux Foundation. All rights reserved. +/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -2881,11 +2881,15 @@ void update_task_ravg(struct task_struct *p, struct rq *rq, int event, update_task_burst(p, rq, event, runtime); update_cpu_busy_time(p, rq, event, wallclock, irqtime); update_task_pred_demand(rq, p, event); -done: + + if (exiting_task(p)) + goto done; + trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime, rq->cc.cycles, rq->cc.time, p->grp ? &rq->grp_time : NULL); +done: p->ravg.mark_start = wallclock; } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index baad64b94f15..fb7f36fd6e5b 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2276,9 +2276,8 @@ static void push_rt_tasks(struct rq *rq) * the rt_loop_next will cause the iterator to perform another scan. * */ -static int rto_next_cpu(struct rq *rq) +static int rto_next_cpu(struct root_domain *rd) { - struct root_domain *rd = rq->rd; int next; int cpu; @@ -2354,19 +2353,24 @@ static void tell_cpu_to_push(struct rq *rq) * Otherwise it is finishing up and an ipi needs to be sent. */ if (rq->rd->rto_cpu < 0) - cpu = rto_next_cpu(rq); + cpu = rto_next_cpu(rq->rd); raw_spin_unlock(&rq->rd->rto_lock); rto_start_unlock(&rq->rd->rto_loop_start); - if (cpu >= 0) + if (cpu >= 0) { + /* Make sure the rd does not get freed while pushing */ + sched_get_rd(rq->rd); irq_work_queue_on(&rq->rd->rto_push_work, cpu); + } } /* Called from hardirq context */ void rto_push_irq_work_func(struct irq_work *work) { + struct root_domain *rd = + container_of(work, struct root_domain, rto_push_work); struct rq *rq; int cpu; @@ -2382,18 +2386,20 @@ void rto_push_irq_work_func(struct irq_work *work) raw_spin_unlock(&rq->lock); } - raw_spin_lock(&rq->rd->rto_lock); + raw_spin_lock(&rd->rto_lock); /* Pass the IPI to the next rt overloaded queue */ - cpu = rto_next_cpu(rq); + cpu = rto_next_cpu(rd); - raw_spin_unlock(&rq->rd->rto_lock); + raw_spin_unlock(&rd->rto_lock); - if (cpu < 0) + if (cpu < 0) { + sched_put_rd(rd); return; + } /* Try the next RT overloaded CPU */ - irq_work_queue_on(&rq->rd->rto_push_work, cpu); + irq_work_queue_on(&rd->rto_push_work, cpu); } #endif /* HAVE_RT_PUSH_IPI */ @@ -2594,7 +2600,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) queue_push_tasks(rq); #endif /* CONFIG_SMP */ - if (p->prio < rq->curr->prio) + if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq))) resched_curr(rq); } } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6ef3a59612a9..b6cd12998f16 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -673,6 +673,8 @@ struct root_domain { }; extern struct root_domain def_root_domain; +extern void sched_get_rd(struct root_domain *rd); +extern void sched_put_rd(struct root_domain *rd); #ifdef HAVE_RT_PUSH_IPI extern void rto_push_irq_work_func(struct irq_work *work); @@ -1272,6 +1274,11 @@ static inline bool is_max_capacity_cpu(int cpu) return cpu_max_possible_capacity(cpu) == max_possible_capacity; } +static inline bool is_min_capacity_cpu(int cpu) +{ + return cpu_max_possible_capacity(cpu) == min_max_possible_capacity; +} + /* * 'load' is in reference to "best cpu" at its best frequency. * Scale that in reference to a given cpu, accounting for how bad it is diff --git a/kernel/sched/sched_avg.c b/kernel/sched/sched_avg.c index ba5a326a9fd8..f03ed685f102 100644 --- a/kernel/sched/sched_avg.c +++ b/kernel/sched/sched_avg.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012, 2015-2017, The Linux Foundation. All rights reserved. +/* Copyright (c) 2012, 2015-2017, 2018 The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -119,6 +119,43 @@ void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg, } EXPORT_SYMBOL(sched_get_nr_running_avg); +static DEFINE_PER_CPU(atomic64_t, last_busy_time) = ATOMIC64_INIT(0); + +#define BUSY_NR_RUN 3 +#define BUSY_LOAD_FACTOR 10 + +#ifdef CONFIG_SCHED_HMP +static inline void update_last_busy_time(int cpu, bool dequeue, + unsigned long prev_nr_run, u64 curr_time) +{ + bool nr_run_trigger = false, load_trigger = false; + + if (!hmp_capable() || is_min_capacity_cpu(cpu)) + return; + + if (prev_nr_run >= BUSY_NR_RUN && per_cpu(nr, cpu) < BUSY_NR_RUN) + nr_run_trigger = true; + + if (dequeue) { + u64 load; + + load = cpu_rq(cpu)->hmp_stats.cumulative_runnable_avg; + load = scale_load_to_cpu(load, cpu); + + if (load * BUSY_LOAD_FACTOR > sched_ravg_window) + load_trigger = true; + } + + if (nr_run_trigger || load_trigger) + atomic64_set(&per_cpu(last_busy_time, cpu), curr_time); +} +#else +static inline void update_last_busy_time(int cpu, bool dequeue, + unsigned long prev_nr_run, u64 curr_time) +{ +} +#endif + /** * sched_update_nr_prod * @cpu: The core id of the nr running driver. @@ -147,9 +184,16 @@ void sched_update_nr_prod(int cpu, long delta, bool inc) if (per_cpu(nr, cpu) > per_cpu(nr_max, cpu)) per_cpu(nr_max, cpu) = per_cpu(nr, cpu); + update_last_busy_time(cpu, !inc, nr_running, curr_time); + per_cpu(nr_prod_sum, cpu) += nr_running * diff; per_cpu(nr_big_prod_sum, cpu) += nr_eligible_big_tasks(cpu) * diff; per_cpu(iowait_prod_sum, cpu) += nr_iowait_cpu(cpu) * diff; spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags); } EXPORT_SYMBOL(sched_update_nr_prod); + +u64 sched_get_cpu_last_busy_time(int cpu) +{ + return atomic64_read(&per_cpu(last_busy_time, cpu)); +} diff --git a/kernel/signal.c b/kernel/signal.c index 4a548c6a4118..7d75bc2d042f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2495,6 +2495,13 @@ void __set_current_blocked(const sigset_t *newset) { struct task_struct *tsk = current; + /* + * In case the signal mask hasn't changed, there is nothing we need + * to do. The current->blocked shouldn't be modified by other task. + */ + if (sigequalsets(&tsk->blocked, newset)) + return; + spin_lock_irq(&tsk->sighand->siglock); __set_task_blocked(tsk, newset); spin_unlock_irq(&tsk->sighand->siglock); diff --git a/kernel/softirq.c b/kernel/softirq.c index 615ba59dbc10..d69b77fc7cc1 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -234,8 +234,16 @@ static inline bool lockdep_softirq_start(void) { return false; } static inline void lockdep_softirq_end(bool in_hardirq) { } #endif -#define long_softirq_pending() (local_softirq_pending() & LONG_SOFTIRQ_MASK) -#define defer_for_rt() (long_softirq_pending() && cpupri_check_rt()) +#define softirq_deferred_for_rt(pending) \ +({ \ + __u32 deferred = 0; \ + if (cpupri_check_rt()) { \ + deferred = pending & LONG_SOFTIRQ_MASK; \ + pending &= ~LONG_SOFTIRQ_MASK; \ + } \ + deferred; \ +}) + asmlinkage __visible void __softirq_entry __do_softirq(void) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; @@ -243,6 +251,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) int max_restart = MAX_SOFTIRQ_RESTART; struct softirq_action *h; bool in_hardirq; + __u32 deferred; __u32 pending; int softirq_bit; @@ -254,14 +263,14 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) current->flags &= ~PF_MEMALLOC; pending = local_softirq_pending(); + deferred = softirq_deferred_for_rt(pending); account_irq_enter_time(current); - __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); in_hardirq = lockdep_softirq_start(); restart: /* Reset the pending bitmask before enabling irqs */ - set_softirq_pending(0); + set_softirq_pending(deferred); __this_cpu_write(active_softirqs, pending); local_irq_enable(); @@ -297,15 +306,16 @@ restart: local_irq_disable(); pending = local_softirq_pending(); + deferred = softirq_deferred_for_rt(pending); + if (pending) { if (time_before(jiffies, end) && !need_resched() && - !defer_for_rt() && --max_restart) goto restart; - - wakeup_softirqd(); } + if (pending | deferred) + wakeup_softirqd(); lockdep_softirq_end(in_hardirq); account_irq_exit_time(current); __local_bh_enable(SOFTIRQ_OFFSET); @@ -352,7 +362,7 @@ void irq_enter(void) static inline void invoke_softirq(void) { - if (!force_irqthreads && !defer_for_rt()) { + if (!force_irqthreads) { #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK /* * We can safely execute softirq on the current stack if diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bc4ca30ddc21..14f19af9d79a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -291,6 +291,22 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#if defined(CONFIG_PREEMPT_TRACER) || defined(CONFIG_IRQSOFF_TRACER) + { + .procname = "preemptoff_tracing_threshold_ns", + .data = &sysctl_preemptoff_tracing_threshold_ns, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "irqsoff_tracing_threshold_ns", + .data = &sysctl_irqsoff_tracing_threshold_ns, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_SCHED_HMP { .procname = "sched_freq_reporting_policy", diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 21f82c29c914..11cc757795cd 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -54,7 +54,11 @@ static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1 [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; -static const struct nla_policy cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] = { +/* + * We have to use TASKSTATS_CMD_ATTR_MAX here, it is the maxattr in the family. + * Make sure they are always aligned. + */ +static const struct nla_policy cgroupstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, }; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 0605755c246f..79fadcad21ff 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -435,6 +435,7 @@ void destroy_hrtimer_on_stack(struct hrtimer *timer) { debug_object_free(timer, &hrtimer_debug_descr); } +EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); #else static inline void debug_hrtimer_init(struct hrtimer *timer) { } @@ -1118,7 +1119,12 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, cpu_base = raw_cpu_ptr(&hrtimer_bases); - if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS) + /* + * POSIX magic: Relative CLOCK_REALTIME timers are not affected by + * clock modifications, so they needs to become CLOCK_MONOTONIC to + * ensure POSIX compliance. + */ + if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) clock_id = CLOCK_MONOTONIC; base = hrtimer_clockid_to_base(clock_id); diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 9cff0ab82b63..e24008c098c6 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -300,14 +300,17 @@ out: static int pc_clock_gettime(clockid_t id, struct timespec *ts) { struct posix_clock_desc cd; + struct timespec64 ts64; int err; err = get_clock_desc(id, &cd); if (err) return err; - if (cd.clk->ops.clock_gettime) - err = cd.clk->ops.clock_gettime(cd.clk, ts); + if (cd.clk->ops.clock_gettime) { + err = cd.clk->ops.clock_gettime(cd.clk, &ts64); + *ts = timespec64_to_timespec(ts64); + } else err = -EOPNOTSUPP; @@ -319,14 +322,17 @@ static int pc_clock_gettime(clockid_t id, struct timespec *ts) static int pc_clock_getres(clockid_t id, struct timespec *ts) { struct posix_clock_desc cd; + struct timespec64 ts64; int err; err = get_clock_desc(id, &cd); if (err) return err; - if (cd.clk->ops.clock_getres) - err = cd.clk->ops.clock_getres(cd.clk, ts); + if (cd.clk->ops.clock_getres) { + err = cd.clk->ops.clock_getres(cd.clk, &ts64); + *ts = timespec64_to_timespec(ts64); + } else err = -EOPNOTSUPP; @@ -337,6 +343,7 @@ static int pc_clock_getres(clockid_t id, struct timespec *ts) static int pc_clock_settime(clockid_t id, const struct timespec *ts) { + struct timespec64 ts64 = timespec_to_timespec64(*ts); struct posix_clock_desc cd; int err; @@ -350,7 +357,7 @@ static int pc_clock_settime(clockid_t id, const struct timespec *ts) } if (cd.clk->ops.clock_settime) - err = cd.clk->ops.clock_settime(cd.clk, ts); + err = cd.clk->ops.clock_settime(cd.clk, &ts64); else err = -EOPNOTSUPP; out: @@ -403,29 +410,36 @@ static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec *ts) { clockid_t id = kit->it_clock; struct posix_clock_desc cd; + struct itimerspec64 ts64; if (get_clock_desc(id, &cd)) return; - if (cd.clk->ops.timer_gettime) - cd.clk->ops.timer_gettime(cd.clk, kit, ts); - + if (cd.clk->ops.timer_gettime) { + cd.clk->ops.timer_gettime(cd.clk, kit, &ts64); + *ts = itimerspec64_to_itimerspec(&ts64); + } put_clock_desc(&cd); } static int pc_timer_settime(struct k_itimer *kit, int flags, struct itimerspec *ts, struct itimerspec *old) { + struct itimerspec64 ts64 = itimerspec_to_itimerspec64(ts); clockid_t id = kit->it_clock; struct posix_clock_desc cd; + struct itimerspec64 old64; int err; err = get_clock_desc(id, &cd); if (err) return err; - if (cd.clk->ops.timer_settime) - err = cd.clk->ops.timer_settime(cd.clk, kit, flags, ts, old); + if (cd.clk->ops.timer_settime) { + err = cd.clk->ops.timer_settime(cd.clk, kit, flags, &ts64, &old64); + if (old) + *old = itimerspec64_to_itimerspec(&old64); + } else err = -EOPNOTSUPP; diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index f2826c35e918..fc7c37ad90a0 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -507,17 +507,22 @@ static struct pid *good_sigevent(sigevent_t * event) { struct task_struct *rtn = current->group_leader; - if ((event->sigev_notify & SIGEV_THREAD_ID ) && - (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || - !same_thread_group(rtn, current) || - (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) + switch (event->sigev_notify) { + case SIGEV_SIGNAL | SIGEV_THREAD_ID: + rtn = find_task_by_vpid(event->sigev_notify_thread_id); + if (!rtn || !same_thread_group(rtn, current)) + return NULL; + /* FALLTHRU */ + case SIGEV_SIGNAL: + case SIGEV_THREAD: + if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX) + return NULL; + /* FALLTHRU */ + case SIGEV_NONE: + return task_pid(rtn); + default: return NULL; - - if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) - return NULL; - - return task_pid(rtn); + } } void posix_timers_register_clock(const clockid_t clock_id, @@ -745,8 +750,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) /* interval timer ? */ if (iv.tv64) cur_setting->it_interval = ktime_to_timespec(iv); - else if (!hrtimer_active(timer) && - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + else if (!hrtimer_active(timer) && timr->it_sigev_notify != SIGEV_NONE) return; now = timer->base->get_time(); @@ -757,7 +761,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) * expiry is > now. */ if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING || - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) + timr->it_sigev_notify == SIGEV_NONE)) timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); remaining = __hrtimer_expires_remaining_adjusted(timer, now); @@ -767,7 +771,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) * A single shot SIGEV_NONE timer must return 0, when * it is expired ! */ - if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + if (timr->it_sigev_notify != SIGEV_NONE) cur_setting->it_value.tv_nsec = 1; } else cur_setting->it_value = ktime_to_timespec(remaining); @@ -865,7 +869,7 @@ common_timer_set(struct k_itimer *timr, int flags, timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); /* SIGEV_NONE timers are not queued ! See common_timer_get */ - if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { + if (timr->it_sigev_notify == SIGEV_NONE) { /* Setup correct expiry time for relative timers */ if (mode == HRTIMER_MODE_REL) { hrtimer_add_expires(timer, timer->base->get_time()); diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 699aff70bbfa..1986ab6e5943 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -210,6 +210,11 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) update_clock_read_data(&rd); + if (sched_clock_timer.function != NULL) { + /* update timeout for clock wrap */ + hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); + } + r = rate; if (r >= 4000000) { r /= 1000000; diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index d2a20e83ebae..22d7454b387b 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -610,6 +610,14 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) now = ktime_get(); /* Find all expired events */ for_each_cpu(cpu, tick_broadcast_oneshot_mask) { + /* + * Required for !SMP because for_each_cpu() reports + * unconditionally CPU0 as set on UP kernels. + */ + if (!IS_ENABLED(CONFIG_SMP) && + cpumask_empty(tick_broadcast_oneshot_mask)) + break; + td = &per_cpu(tick_cpu_device, cpu); if (td->evtdev->next_event.tv64 <= now.tv64) { cpumask_set_cpu(cpu, tmpmask); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index fc86fdcce932..7902ecbce8ec 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -633,9 +633,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) tk->ktime_sec = seconds; /* Update the monotonic raw base */ - seconds = tk->raw_sec; - nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift); - tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); + tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC); } /* must hold timekeeper_lock */ diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 83aa1f867b97..998d730401b3 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -16,6 +16,7 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/kallsyms.h> +#include <linux/nmi.h> #include <asm/uaccess.h> @@ -86,6 +87,9 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, next_one: i = 0; + + touch_nmi_watchdog(); + raw_spin_lock_irqsave(&base->cpu_base->lock, flags); curr = timerqueue_getnext(&base->active); @@ -197,6 +201,8 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) { struct clock_event_device *dev = td->evtdev; + touch_nmi_watchdog(); + SEQ_printf(m, "Tick Device: mode: %d\n", td->mode); if (cpu < 0) SEQ_printf(m, "Broadcast device\n"); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7b6127653a37..b674a7a8d655 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -57,7 +57,8 @@ static struct tracer_flags blk_tracer_flags = { }; /* Global reference count of probes */ -static atomic_t blk_probes_ref = ATOMIC_INIT(0); +static DEFINE_MUTEX(blk_probe_mutex); +static int blk_probes_ref; static void blk_register_tracepoints(void); static void blk_unregister_tracepoints(void); @@ -300,11 +301,26 @@ static void blk_trace_free(struct blk_trace *bt) kfree(bt); } +static void get_probe_ref(void) +{ + mutex_lock(&blk_probe_mutex); + if (++blk_probes_ref == 1) + blk_register_tracepoints(); + mutex_unlock(&blk_probe_mutex); +} + +static void put_probe_ref(void) +{ + mutex_lock(&blk_probe_mutex); + if (!--blk_probes_ref) + blk_unregister_tracepoints(); + mutex_unlock(&blk_probe_mutex); +} + static void blk_trace_cleanup(struct blk_trace *bt) { blk_trace_free(bt); - if (atomic_dec_and_test(&blk_probes_ref)) - blk_unregister_tracepoints(); + put_probe_ref(); } int blk_trace_remove(struct request_queue *q) @@ -522,8 +538,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (cmpxchg(&q->blk_trace, NULL, bt)) goto err; - if (atomic_inc_return(&blk_probes_ref) == 1) - blk_register_tracepoints(); + get_probe_ref(); return 0; err: @@ -1518,9 +1533,7 @@ static int blk_trace_remove_queue(struct request_queue *q) if (bt == NULL) return -EINVAL; - if (atomic_dec_and_test(&blk_probes_ref)) - blk_unregister_tracepoints(); - + put_probe_ref(); blk_trace_free(bt); return 0; } @@ -1551,8 +1564,7 @@ static int blk_trace_setup_queue(struct request_queue *q, if (cmpxchg(&q->blk_trace, NULL, bt)) goto free_bt; - if (atomic_inc_return(&blk_probes_ref) == 1) - blk_register_tracepoints(); + get_probe_ref(); return 0; free_bt: diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fc0051fd672d..ac758a53fcea 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3845,7 +3845,6 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, func_g.type = filter_parse_regex(glob, strlen(glob), &func_g.search, ¬); func_g.len = strlen(func_g.search); - func_g.search = glob; /* we do not support '!' for function probes */ if (WARN_ON(not)) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9510d540b48e..05ebb4cac6b4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2739,13 +2739,14 @@ static void test_cpu_buff_start(struct trace_iterator *iter) if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) return; - if (iter->started && cpumask_test_cpu(iter->cpu, iter->started)) + if (cpumask_available(iter->started) && + cpumask_test_cpu(iter->cpu, iter->started)) return; if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries) return; - if (iter->started) + if (cpumask_available(iter->started)) cpumask_set_cpu(iter->cpu, iter->started); /* Don't print started cpu buffer for the first entry of the trace */ diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index f0e5408499b6..1ab2db6c127b 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -322,6 +322,9 @@ static int regex_match_full(char *str, struct regex *r, int len) static int regex_match_front(char *str, struct regex *r, int len) { + if (len < r->len) + return 0; + if (strncmp(str, r->pattern, r->len) == 0) return 1; return 0; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 21b162c07e83..c00137ea939e 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -13,6 +13,7 @@ #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ftrace.h> +#include <linux/sched/sysctl.h> #include "trace.h" @@ -39,6 +40,12 @@ static int save_flags; static void stop_irqsoff_tracer(struct trace_array *tr, int graph); static int start_irqsoff_tracer(struct trace_array *tr, int graph); +/* + * irqsoff stack tracing threshold in ns. + * default: 1ms + */ +unsigned int sysctl_irqsoff_tracing_threshold_ns = 1000000UL; + #ifdef CONFIG_PREEMPT_TRACER static inline int preempt_trace(void) @@ -454,17 +461,52 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) #else /* !CONFIG_PROVE_LOCKING */ +#ifdef CONFIG_PREEMPTIRQ_EVENTS +struct irqsoff_store { + u64 ts; + unsigned long caddr[4]; +}; + +static DEFINE_PER_CPU(struct irqsoff_store, the_irqsoff); +#endif /* CONFIG_PREEMPTIRQ_EVENTS */ + /* * We are only interested in hardirq on/off events: */ static inline void tracer_hardirqs_on(void) { +#ifdef CONFIG_PREEMPTIRQ_EVENTS + struct irqsoff_store *is = &per_cpu(the_irqsoff, + raw_smp_processor_id()); + + if (!is->ts) { + is->ts = sched_clock(); + is->caddr[0] = CALLER_ADDR0; + is->caddr[1] = CALLER_ADDR1; + is->caddr[2] = CALLER_ADDR2; + is->caddr[3] = CALLER_ADDR3; + } +#endif /* CONFIG_PREEMPTIRQ_EVENTS */ if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } static inline void tracer_hardirqs_off(void) { +#ifdef CONFIG_PREEMPTIRQ_EVENTS + struct irqsoff_store *is = &per_cpu(the_irqsoff, + raw_smp_processor_id()); + u64 delta = 0; + + if (is->ts) { + delta = sched_clock() - is->ts; + is->ts = 0; + } + if (delta > sysctl_irqsoff_tracing_threshold_ns) + trace_irqs_disable(delta, is->caddr[0], is->caddr[1], + is->caddr[2], is->caddr[3]); +#endif /* CONFIG_PREEMPTIRQ_EVENTS */ + if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index e9092a0247bf..f2682799c215 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -599,7 +599,7 @@ static int create_trace_kprobe(int argc, char **argv) bool is_return = false, is_delete = false; char *symbol = NULL, *event = NULL, *group = NULL; char *arg; - unsigned long offset = 0; + long offset = 0; void *addr = NULL; char buf[MAX_EVENT_NAME_LEN]; @@ -667,7 +667,7 @@ static int create_trace_kprobe(int argc, char **argv) symbol = argv[1]; /* TODO: support .init module functions */ ret = traceprobe_split_symbol_offset(symbol, &offset); - if (ret) { + if (ret || offset < 0 || offset > UINT_MAX) { pr_info("Failed to parse either an address or a symbol.\n"); return ret; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 1769a81da8a7..741c00b90fdc 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -293,7 +293,7 @@ static fetch_func_t get_fetch_size_function(const struct fetch_type *type, } /* Split symbol and offset. */ -int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset) +int traceprobe_split_symbol_offset(char *symbol, long *offset) { char *tmp; int ret; @@ -301,13 +301,11 @@ int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset) if (!offset) return -EINVAL; - tmp = strchr(symbol, '+'); + tmp = strpbrk(symbol, "+-"); if (tmp) { - /* skip sign because kstrtoul doesn't accept '+' */ - ret = kstrtoul(tmp + 1, 0, offset); + ret = kstrtol(tmp, 0, offset); if (ret) return ret; - *tmp = '\0'; } else *offset = 0; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index f6398db09114..0afe921df8c8 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -335,7 +335,7 @@ extern int traceprobe_conflict_field_name(const char *name, extern void traceprobe_update_arg(struct probe_arg *arg); extern void traceprobe_free_probe_arg(struct probe_arg *arg); -extern int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset); +extern int traceprobe_split_symbol_offset(char *symbol, long *offset); extern ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 23515a716748..64304388993e 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -149,6 +149,8 @@ static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, return; ret = strncpy_from_user(dst, src, maxlen); + if (ret == maxlen) + dst[--ret] = '\0'; if (ret < 0) { /* Failed to fetch string */ ((u8 *)get_rloc_data(dest))[0] = '\0'; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index ecd536de603a..eda85bbf1c2e 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -202,7 +202,7 @@ static int tracepoint_add_func(struct tracepoint *tp, lockdep_is_held(&tracepoints_mutex)); old = func_add(&tp_funcs, func, prio); if (IS_ERR(old)) { - WARN_ON_ONCE(1); + WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM); return PTR_ERR(old); } @@ -235,7 +235,7 @@ static int tracepoint_remove_func(struct tracepoint *tp, lockdep_is_held(&tracepoints_mutex)); old = func_remove(&tp_funcs, func); if (IS_ERR(old)) { - WARN_ON_ONCE(1); + WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM); return PTR_ERR(old); } diff --git a/kernel/user.c b/kernel/user.c index b069ccbfb0b0..41e94e453836 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -16,6 +16,7 @@ #include <linux/interrupt.h> #include <linux/export.h> #include <linux/user_namespace.h> +#include <linux/proc_fs.h> #include <linux/proc_ns.h> /* @@ -201,6 +202,7 @@ struct user_struct *alloc_uid(kuid_t uid) } spin_unlock_irq(&uidhash_lock); } + proc_register_uid(uid); return up; @@ -222,6 +224,7 @@ static int __init uid_cache_init(void) spin_lock_irq(&uidhash_lock); uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID)); spin_unlock_irq(&uidhash_lock); + proc_register_uid(GLOBAL_ROOT_UID); return 0; } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a719a4ad2e74..acccc6c7437f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4070,6 +4070,22 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) EXPORT_SYMBOL_GPL(workqueue_set_max_active); /** + * current_work - retrieve %current task's work struct + * + * Determine if %current task is a workqueue worker and what it's working on. + * Useful to find out the context that the %current task is running in. + * + * Return: work struct if %current task is a workqueue worker, %NULL otherwise. + */ +struct work_struct *current_work(void) +{ + struct worker *worker = current_wq_worker(); + + return worker ? worker->current_work : NULL; +} +EXPORT_SYMBOL(current_work); + +/** * current_is_workqueue_rescuer - is %current workqueue rescuer? * * Determine whether %current is a workqueue rescuer. Can be used from |
