From b1b38278e12b04cf9a227f6af2c24651cf6e8a85 Mon Sep 17 00:00:00 2001 From: David Weinehall Date: Wed, 20 May 2015 17:00:13 +0300 Subject: drm/i915: add a context parameter to {en, dis}able zero address mapping Export a new context parameter that can be set/queried through the context_{get,set}param ioctls. This parameter is passed as a context flag and decides whether or not a GPU address mapping is allowed to be made at address zero. The default is to allow such mappings. Signed-off-by: David Weinehall Acked-by: "Zou, Nanhai" Signed-off-by: Daniel Vetter --- include/uapi/drm/i915_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6e1a2ed116cb..92d61a7c942a 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1106,6 +1106,7 @@ struct drm_i915_gem_context_param { __u32 size; __u64 param; #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 +#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 __u64 value; }; -- cgit v1.2.3 From 49e4d842f0d0892c3d26c93a81b9f22c1467030e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Jun 2015 12:23:48 +0100 Subject: drm/i915: Report to userspace if we have a (presumed) working GPU reset In igt, we want to test handling of GPU hangs, both for recovery purposes and for reporting. However, we don't want to inject a genuine GPU hang onto a machine that cannot recover and so be permenantly wedged. Rather than embed heuristics into igt, have the kernel report exactly when it expects the GPU reset to work. This can also be usefully extended in future to indicate different levels of fine-grained resets. Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Tim Gore Cc: Tomas Elf Signed-off-by: Daniel Vetter --- include/uapi/drm/i915_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 92d61a7c942a..f88cc1cac5d9 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -354,6 +354,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_REVISION 32 #define I915_PARAM_SUBSLICE_TOTAL 33 #define I915_PARAM_EU_TOTAL 34 +#define I915_PARAM_HAS_GPU_RESET 35 typedef struct drm_i915_getparam { int param; -- cgit v1.2.3 From a9ed33ca075f712cc7fd96eb84e3d322012fcaaf Mon Sep 17 00:00:00 2001 From: Abdiel Janulgue Date: Wed, 1 Jul 2015 10:12:23 +0300 Subject: drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag and getparam Ensures that the batch buffer is executed by the resource streamer. And will let userspace know whether Resource Streamer is supported in the kernel. v2: Don't skip 1<<15 for the exec flags (Jani Nikula) v3: Use HAS_RESOURCE_STREAMER macro for execbuf validation (Chris Wilson) (from getparam patch) v2: Update I915_PARAM_HAS_RESOURCE_STREAMER so it's after I915_PARAM_HAS_GPU_RESET. v3: Only advertise RS support for hardware that supports it. v4: Add HAS_RESOURCE_STREAMER() macro (Chris) Testcase: igt/gem_exec_params Cc: Jani Nikula Cc: Kenneth Graunke Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Abdiel Janulgue [danvet: squash in getparam patch since it'd break bisect, suggested by Chris.] Signed-off-by: Daniel Vetter --- include/uapi/drm/i915_drm.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f88cc1cac5d9..e7c29f1659ad 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_SUBSLICE_TOTAL 33 #define I915_PARAM_EU_TOTAL 34 #define I915_PARAM_HAS_GPU_RESET 35 +#define I915_PARAM_HAS_RESOURCE_STREAMER 36 typedef struct drm_i915_getparam { int param; @@ -765,7 +766,12 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_BSD_RING1 (1<<13) #define I915_EXEC_BSD_RING2 (2<<13) -#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15) +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1<<15) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- cgit v1.2.3 From 9aee1ae3312daf0de4c9c614680d06d557133317 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 16 May 2015 15:11:40 -0300 Subject: [media] media: uapi: vsp1: Use __u32 instead of u32 Don't use the kernel types in uapi headers. Signed-off-by: Joe Perches Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/vsp1.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/vsp1.h b/include/uapi/linux/vsp1.h index e18858f6e865..9a823696d816 100644 --- a/include/uapi/linux/vsp1.h +++ b/include/uapi/linux/vsp1.h @@ -28,7 +28,7 @@ _IOWR('V', BASE_VIDIOC_PRIVATE + 1, struct vsp1_lut_config) struct vsp1_lut_config { - u32 lut[256]; + __u32 lut[256]; }; #endif /* __VSP1_USER_H__ */ -- cgit v1.2.3 From 1055e0687f581cf988bb2239a00d1396e18ef114 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 9 Jul 2015 01:38:42 -0700 Subject: drm/fourcc: Add formats R8, RG88, GR88 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Kodi/XBMC developers want to transcode NV12 to RGB with OpenGL shaders, importing the two source planes through EGL_EXT_image_dma_buf_import. That requires importing the Y plane as an R8 EGLImage and the UV plane as either an RG88 or GR88 EGLImage. CC: Peter Frühberger Cc: Rainer Hochecker Cc: Benjamin Widawsky Reviewed-by: Pekka Paalanen Signed-off-by: Chad Versace --- include/uapi/drm/drm_fourcc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 2f295cde657e..8c5e8b91a3cb 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -34,6 +34,13 @@ /* color index */ #define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */ +/* 8 bpp Red */ +#define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ + +/* 16 bpp RG */ +#define DRM_FORMAT_RG88 fourcc_code('R', 'G', '8', '8') /* [15:0] R:G 8:8 little endian */ +#define DRM_FORMAT_GR88 fourcc_code('G', 'R', '8', '8') /* [15:0] G:R 8:8 little endian */ + /* 8 bpp RGB */ #define DRM_FORMAT_RGB332 fourcc_code('R', 'G', 'B', '8') /* [7:0] R:G:B 3:3:2 */ #define DRM_FORMAT_BGR233 fourcc_code('B', 'G', 'R', '8') /* [7:0] B:G:R 2:3:3 */ -- cgit v1.2.3 From 74fe61f17e999a458d5f64ca2aa9a0282ca32198 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 10 Jul 2015 08:02:08 -0700 Subject: bridge: mdb: add vlan support for user entries Until now all user mdb entries were added in vlan 0, this patch adds support to allow the user to specify the vlan for the entry. About the uapi change a hole in struct br_mdb_entry is used so the size and offsets are kept the same (verified with pahole and tested with older iproute2). Example: $ bridge mdb dev br0 port eth1 grp 239.0.0.1 permanent vlan 2000 dev br0 port eth1 grp 239.0.0.1 permanent vlan 200 dev br0 port eth1 grp 239.0.0.1 permanent Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index eaaea6208b42..3635b7797508 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -182,6 +182,7 @@ struct br_mdb_entry { #define MDB_TEMPORARY 0 #define MDB_PERMANENT 1 __u8 state; + __u16 vid; struct { union { __be32 ip4; -- cgit v1.2.3 From 346add7834557b5b9628b9bf2387106d42e631d4 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 14 Jul 2015 18:07:30 +0200 Subject: drm/i915: Use expcitly fixed type in compat32 structs I was confused shortly whether the compat was needed for the int, until I noticed the pointer in the original. Also remove typedef. v2: Review from Chris. - Add comments. - Also change the int param in the original structure. Cc: Chris Wilson Signed-off-by: Daniel Vetter Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- include/uapi/drm/i915_drm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e7c29f1659ad..192027b4f031 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -358,7 +358,11 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_RESOURCE_STREAMER 36 typedef struct drm_i915_getparam { - int param; + s32 param; + /* + * WARNING: Using pointers instead of fixed-size u64 means we need to write + * compat32 code. Don't repeat this mistake. + */ int __user *value; } drm_i915_getparam_t; -- cgit v1.2.3 From 13c4a90119d28cfcb6b5bdd820c233b86c2b0237 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Sat, 13 Jun 2015 09:02:48 -0600 Subject: seccomp: add ptrace options for suspend/resume This patch is the first step in enabling checkpoint/restore of processes with seccomp enabled. One of the things CRIU does while dumping tasks is inject code into them via ptrace to collect information that is only available to the process itself. However, if we are in a seccomp mode where these processes are prohibited from making these syscalls, then what CRIU does kills the task. This patch adds a new ptrace option, PTRACE_O_SUSPEND_SECCOMP, that enables a task from the init user namespace which has CAP_SYS_ADMIN and no seccomp filters to disable (and re-enable) seccomp filters for another task so that they can be successfully dumped (and restored). We restrict the set of processes that can disable seccomp through ptrace because although today ptrace can be used to bypass seccomp, there is some discussion of closing this loophole in the future and we would like this patch to not depend on that behavior and be future proofed for when it is removed. Note that seccomp can be suspended before any filters are actually installed; this behavior is useful on criu restore, so that we can suspend seccomp, restore the filters, unmap our restore code from the restored process' address space, and then resume the task by detaching and have the filters resumed as well. v2 changes: * require that the tracer have no seccomp filters installed * drop TIF_NOTSC manipulation from the patch * change from ptrace command to a ptrace option and use this ptrace option as the flag to check. This means that as soon as the tracer detaches/dies, seccomp is re-enabled and as a corrollary that one can not disable seccomp across PTRACE_ATTACHs. v3 changes: * get rid of various #ifdefs everywhere * report more sensible errors when PTRACE_O_SUSPEND_SECCOMP is incorrectly used v4 changes: * get rid of may_suspend_seccomp() in favor of a capable() check in ptrace directly v5 changes: * check that seccomp is not enabled (or suspended) on the tracer Signed-off-by: Tycho Andersen CC: Will Drewry CC: Roland McGrath CC: Pavel Emelyanov CC: Serge E. Hallyn Acked-by: Oleg Nesterov Acked-by: Andy Lutomirski [kees: access seccomp.mode through seccomp_mode() instead] Signed-off-by: Kees Cook --- include/uapi/linux/ptrace.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index cf1019e15f5b..a7a697986614 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -89,9 +89,11 @@ struct ptrace_peeksiginfo_args { #define PTRACE_O_TRACESECCOMP (1 << PTRACE_EVENT_SECCOMP) /* eventless options */ -#define PTRACE_O_EXITKILL (1 << 20) +#define PTRACE_O_EXITKILL (1 << 20) +#define PTRACE_O_SUSPEND_SECCOMP (1 << 21) -#define PTRACE_O_MASK (0x000000ff | PTRACE_O_EXITKILL) +#define PTRACE_O_MASK (\ + 0x000000ff | PTRACE_O_EXITKILL | PTRACE_O_SUSPEND_SECCOMP) #include -- cgit v1.2.3 From 88d6378bd6c096cb8440face3ae3f33d55a2e6e4 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 14 Jul 2015 13:43:20 -0700 Subject: netlink: changes for setting and clearing protodown via netlink. Signed-off-by: Anuradha Karuppiah Signed-off-by: Andy Gospodarek Signed-off-by: Roopa Prabhu Signed-off-by: Wilson Kok Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2c7e8e3d3981..24d68b797c59 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -148,6 +148,7 @@ enum { IFLA_PHYS_SWITCH_ID, IFLA_LINK_NETNSID, IFLA_PHYS_PORT_NAME, + IFLA_PROTO_DOWN, __IFLA_MAX }; -- cgit v1.2.3 From d32d98642de66048f9534a05f3641558e811bbc9 Mon Sep 17 00:00:00 2001 From: Mats Randgaard Date: Thu, 9 Jul 2015 05:45:47 -0300 Subject: [media] Driver for Toshiba TC358743 HDMI to CSI-2 bridge The driver is tested on our hardware and all the implemented features works as expected. Missing features: - CEC support - HDCP repeater support - IR support Signed-off-by: Mats Randgaard [hans.verkuil@cisco.com: updated copyright year to 2015] [hans.verkuil@cisco.com: update confusing confctl_mutex comment] Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 9f6e108ff4a0..d448c536b49d 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -174,6 +174,10 @@ enum v4l2_colorfx { * We reserve 16 controls for this driver. */ #define V4L2_CID_USER_ADV7180_BASE (V4L2_CID_USER_BASE + 0x1070) +/* The base for the tc358743 driver controls. + * We reserve 16 controls for this driver. */ +#define V4L2_CID_USER_TC358743_BASE (V4L2_CID_USER_BASE + 0x1080) + /* MPEG-class control IDs */ /* The MPEG controls are applicable to all codec controls * and the 'MPEG' part of the define is historical */ -- cgit v1.2.3 From eff3cddc222c88943ff515ae9335687c9e2cbaf6 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 22 Apr 2015 14:40:30 -0700 Subject: clarify implementation of ethtool's get_ts_info op This patch adds some clarification about the intended way to implement both SIOCSHWTSTAMP and ethtool's get_ts_info. The HWTSTAMP API has several Rx filters which are very specific, as well as more general filters. The specific filters really only exist to support some broken hardware which can't fully implement the generic filters. This patch adds clarification that it is okay to support the specific filters in SIOCSHWTSTAMP by upscaling them to the generic filters. In addition, update the header for ethtool_ts_info to specify that drivers ought to only report the filters they support without upscaling in this manner. Signed-off-by: Jacob Keller Acked-by: Richard Cochran Tested-by: Phil Schmitt Reviewed-by: Aaron Brown Signed-off-by: Jeff Kirsher --- include/uapi/linux/ethtool.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index cd67aec187d9..cd1629170103 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1093,6 +1093,11 @@ struct ethtool_sfeatures { * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values, * respectively. For example, if the device supports HWTSTAMP_TX_ON, * then (1 << HWTSTAMP_TX_ON) in 'tx_types' will be set. + * + * Drivers should only report the filters they actually support without + * upscaling in the SIOCSHWTSTAMP ioctl. If the SIOCSHWSTAMP request for + * HWTSTAMP_FILTER_V1_SYNC is supported by HWTSTAMP_FILTER_V1_EVENT, then the + * driver should only report HWTSTAMP_FILTER_V1_EVENT in this op. */ struct ethtool_ts_info { __u32 cmd; -- cgit v1.2.3 From 7b8f4586532f36c5541a15d072576e7e89a5df75 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 3 Jul 2015 19:39:02 +0800 Subject: nfsd: Add macro NFS_ACL_MASK for ACL Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- include/uapi/linux/nfsacl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nfsacl.h b/include/uapi/linux/nfsacl.h index 9bb9771a107f..552726631162 100644 --- a/include/uapi/linux/nfsacl.h +++ b/include/uapi/linux/nfsacl.h @@ -22,6 +22,7 @@ #define NFS_ACLCNT 0x0002 #define NFS_DFACL 0x0004 #define NFS_DFACLCNT 0x0008 +#define NFS_ACL_MASK 0x000f /* Flag for Default ACL entries */ #define NFS_ACL_DEFAULT 0x1000 -- cgit v1.2.3 From 8d20aabe1c76cccac544d9fcc3ad7823d9e98a2d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Jul 2015 14:21:42 +0200 Subject: ebpf: add helper to retrieve net_cls's classid cookie It would be very useful to retrieve the net_cls's classid from an eBPF program to allow for a more fine-grained classification, it could be directly used or in conjunction with additional policies. I.e. docker, but also tooling such as cgexec, can easily run applications via net_cls cgroups: cgcreate -g net_cls:/foo echo 42 > foo/net_cls.classid cgexec -g net_cls:foo Thus, their respecitve classid cookie of foo can then be looked up on the egress path to apply further policies. The helper is desigend such that a non-zero value returns the cgroup id. Signed-off-by: Daniel Borkmann Cc: Thomas Graf Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 29ef6f99e43d..2de87e58b12b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -249,6 +249,13 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_get_current_comm, + + /** + * bpf_get_cgroup_classid(skb) - retrieve a proc's classid + * @skb: pointer to skb + * Return: classid if != 0 + */ + BPF_FUNC_get_cgroup_classid, __BPF_FUNC_MAX_ID, }; -- cgit v1.2.3 From 4e10df9a60d96ced321dd2af71da558c6b750078 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 20 Jul 2015 20:34:18 -0700 Subject: bpf: introduce bpf_skb_vlan_push/pop() helpers Allow eBPF programs attached to TC qdiscs call skb_vlan_push/pop via helper functions. These functions may change skb->data/hlen which are cached by some JITs to improve performance of ld_abs/ld_ind instructions. Therefore JITs need to recognize bpf_skb_vlan_push/pop() calls, re-compute header len and re-cache skb->data/hlen back into cpu registers. Note, skb->data/hlen are not directly accessible from the programs, so any changes to skb->data done either by these helpers or by other TC actions are safe. eBPF JIT supported by three architectures: - arm64 JIT is using bpf_load_pointer() without caching, so it's ok as-is. - x64 JIT re-caches skb->data/hlen unconditionally after vlan_push/pop calls (experiments showed that conditional re-caching is slower). - s390 JIT falls back to interpreter for now when bpf_skb_vlan_push() is present in the program (re-caching is tbd). These helpers allow more scalable handling of vlan from the programs. Instead of creating thousands of vlan netdevs on top of eth0 and attaching TC+ingress+bpf to all of them, the program can be attached to eth0 directly and manipulate vlans as necessary. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2de87e58b12b..2f6c83d714e9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -256,6 +256,8 @@ enum bpf_func_id { * Return: classid if != 0 */ BPF_FUNC_get_cgroup_classid, + BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */ + BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */ __BPF_FUNC_MAX_ID, }; -- cgit v1.2.3 From 8ab30c1538b14424015e45063c41d509b24c1dea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Tue, 7 Jul 2015 17:29:53 +0100 Subject: KVM: add comments for kvm_debug_exit_arch struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bring into line with the comments for the other structures and their KVM_EXIT_* cases. Also update api.txt to reflect use in kvm_run documentation. Signed-off-by: Alex Bennée Reviewed-by: David Hildenbrand Reviewed-by: Andrew Jones Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 716ad4ae4d4b..4ab3c6a8d563 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -237,6 +237,7 @@ struct kvm_run { __u32 count; __u64 data_offset; /* relative to kvm_run start */ } io; + /* KVM_EXIT_DEBUG */ struct { struct kvm_debug_exit_arch arch; } debug; @@ -285,6 +286,7 @@ struct kvm_run { __u32 data; __u8 is_write; } dcr; + /* KVM_EXIT_INTERNAL_ERROR */ struct { __u32 suberror; /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */ @@ -295,6 +297,7 @@ struct kvm_run { struct { __u64 gprs[32]; } osi; + /* KVM_EXIT_PAPR_HCALL */ struct { __u64 nr; __u64 ret; -- cgit v1.2.3 From 5540546bc93b49f98a0466fe3f96615286c76574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Tue, 7 Jul 2015 17:30:01 +0100 Subject: KVM: arm64: guest debug, HW assisted debug support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for userspace to control the HW debug registers for guest debug. In the debug ioctl we copy an IMPDEF registers into a new register set called host_debug_state. We use the recently introduced vcpu parameter debug_ptr to select which register set is copied into the real registers when world switch occurs. I've made some helper functions from hw_breakpoint.c more widely available for re-use. As with single step we need to tweak the guest registers to enable the exceptions so we need to save and restore those bits. Two new capabilities have been added to the KVM_EXTENSION ioctl to allow userspace to query the number of hardware break and watch points available on the host hardware. Signed-off-by: Alex Bennée Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4ab3c6a8d563..a1e08e7bbf20 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -820,6 +820,8 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_DISABLE_QUIRKS 116 #define KVM_CAP_X86_SMM 117 #define KVM_CAP_MULTI_ADDRESS_SPACE 118 +#define KVM_CAP_GUEST_DEBUG_HW_BPS 119 +#define KVM_CAP_GUEST_DEBUG_HW_WPS 120 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From a0d9a8604f29ee3340126ec3f90c9421f930aa50 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 21 Jul 2015 10:43:45 +0200 Subject: rtnetlink: introduce new RTA_ENCAP_TYPE and RTA_ENCAP attributes This patch introduces two new RTA attributes to attach encap data to fib routes. Example iproute2 command to attach mpls encap data to ipv4 routes $ip route add 10.1.1.0/30 encap mpls 200 via inet 10.1.1.1 dev swp1 Signed-off-by: Roopa Prabhu Suggested-by: Eric W. Biederman Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index fdd8f07f1d34..0d3d3cc43356 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -308,6 +308,8 @@ enum rtattr_type_t { RTA_VIA, RTA_NEWDST, RTA_PREF, + RTA_ENCAP_TYPE, + RTA_ENCAP, __RTA_MAX }; -- cgit v1.2.3 From 499a24256862714539e902c0499b67da2bb3ab72 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 21 Jul 2015 10:43:46 +0200 Subject: lwtunnel: infrastructure for handling light weight tunnels like mpls Provides infrastructure to parse/dump/store encap information for light weight tunnels like mpls. Encap information for such tunnels is associated with fib routes. This infrastructure is based on previous suggestions from Eric Biederman to follow the xfrm infrastructure. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/uapi/linux/lwtunnel.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/uapi/linux/lwtunnel.h (limited to 'include/uapi') diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h new file mode 100644 index 000000000000..aa611d931a31 --- /dev/null +++ b/include/uapi/linux/lwtunnel.h @@ -0,0 +1,15 @@ +#ifndef _UAPI_LWTUNNEL_H_ +#define _UAPI_LWTUNNEL_H_ + +#include + +enum lwtunnel_encap_types { + LWTUNNEL_ENCAP_NONE, + LWTUNNEL_ENCAP_MPLS, + __LWTUNNEL_ENCAP_MAX, +}; + +#define LWTUNNEL_ENCAP_MAX (__LWTUNNEL_ENCAP_MAX - 1) + + +#endif /* _UAPI_LWTUNNEL_H_ */ -- cgit v1.2.3 From e3e4712ec0961ed586a8db340bd994c4ad7f5dba Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 21 Jul 2015 10:43:53 +0200 Subject: mpls: ip tunnel support This implementation uses lwtunnel infrastructure to register hooks for mpls tunnel encaps. It picks cues from iptunnel_encaps infrastructure and previous mpls iptunnel RFC patches from Eric W. Biederman and Robert Shearman Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/uapi/linux/mpls_iptunnel.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/uapi/linux/mpls_iptunnel.h (limited to 'include/uapi') diff --git a/include/uapi/linux/mpls_iptunnel.h b/include/uapi/linux/mpls_iptunnel.h new file mode 100644 index 000000000000..d80a0498f77e --- /dev/null +++ b/include/uapi/linux/mpls_iptunnel.h @@ -0,0 +1,28 @@ +/* + * mpls tunnel api + * + * Authors: + * Roopa Prabhu + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_MPLS_IPTUNNEL_H +#define _UAPI_LINUX_MPLS_IPTUNNEL_H + +/* MPLS tunnel attributes + * [RTA_ENCAP] = { + * [MPLS_IPTUNNEL_DST] + * } + */ +enum { + MPLS_IPTUNNEL_UNSPEC, + MPLS_IPTUNNEL_DST, + __MPLS_IPTUNNEL_MAX, +}; +#define MPLS_IPTUNNEL_MAX (__MPLS_IPTUNNEL_MAX - 1) + +#endif /* _UAPI_LINUX_MPLS_IPTUNNEL_H */ -- cgit v1.2.3 From 1d8fff907342d2339796dbd27ea47d0e76a6a2d0 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 21 Jul 2015 10:43:54 +0200 Subject: ip_tunnel: Make ovs_tunnel_info and ovs_key_ipv4_tunnel generic Rename the tunnel metadata data structures currently internal to OVS and make them generic for use by all IP tunnels. Both structures are kernel internal and will stay that way. Their members are exposed to user space through individual Netlink attributes by OVS. It will therefore be possible to extend/modify these structures without affecting user ABI. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 1dab77601c21..d6b885460187 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -321,7 +321,7 @@ enum ovs_key_attr { * the accepted length of the array. */ #ifdef __KERNEL__ - OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */ + OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ #endif __OVS_KEY_ATTR_MAX }; -- cgit v1.2.3 From ee122c79d4227f6ec642157834b6a90fcffa4382 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 21 Jul 2015 10:43:58 +0200 Subject: vxlan: Flow based tunneling Allows putting a VXLAN device into a new flow-based mode in which skbs with a ip_tunnel_info dst metadata attached will be encapsulated according to the instructions stored in there with the VXLAN device defaults taken into consideration. Similar on the receive side, if the VXLAN_F_COLLECT_METADATA flag is set, the packet processing will populate a ip_tunnel_info struct for each packet received and attach it to the skb using the new metadata dst. The metadata structure will contain the outer header and tunnel header fields which have been stripped off. Layers further up in the stack such as routing, tc or netfitler can later match on these fields and perform forwarding. It is the responsibility of upper layers to ensure that the flag is set if the metadata is needed. The flag limits the additional cost of metadata collecting based on demand. This prepares the VXLAN device to be steered by the routing and other subsystems which allows to support encapsulation for a large number of tunnel endpoints and tunnel ids through a single net_device which improves the scalability. It also allows for OVS to leverage this mode which in turn allows for the removal of the OVS specific VXLAN code. Because the skb is currently scrubed in vxlan_rcv(), the attachment of the new dst metadata is postponed until after scrubing which requires the temporary addition of a new member to vxlan_metadata. This member is removed again in a later commit after the indirect VXLAN receive API has been removed. Signed-off-by: Thomas Graf Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 24d68b797c59..9eeb5d9cf8f0 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -382,6 +382,7 @@ enum { IFLA_VXLAN_REMCSUM_RX, IFLA_VXLAN_GBP, IFLA_VXLAN_REMCSUM_NOPARTIAL, + IFLA_VXLAN_FLOWBASED, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) -- cgit v1.2.3 From 3093fbe7ff4bc7d1571fc217dade1cf80330a714 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 21 Jul 2015 10:44:00 +0200 Subject: route: Per route IP tunnel metadata via lightweight tunnel This introduces a new IP tunnel lightweight tunnel type which allows to specify IP tunnel instructions per route. Only IPv4 is supported at this point. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/lwtunnel.h | 1 + include/uapi/linux/rtnetlink.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index aa611d931a31..31377bbea3f8 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -6,6 +6,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_NONE, LWTUNNEL_ENCAP_MPLS, + LWTUNNEL_ENCAP_IP, __LWTUNNEL_ENCAP_MAX, }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 0d3d3cc43356..47d24cb3fbc1 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -286,6 +286,21 @@ enum rt_class_t { /* Routing message attributes */ +enum ip_tunnel_t { + IP_TUN_UNSPEC, + IP_TUN_ID, + IP_TUN_DST, + IP_TUN_SRC, + IP_TUN_TTL, + IP_TUN_TOS, + IP_TUN_SPORT, + IP_TUN_DPORT, + IP_TUN_FLAGS, + __IP_TUN_MAX, +}; + +#define IP_TUN_MAX (__IP_TUN_MAX - 1) + enum rtattr_type_t { RTA_UNSPEC, RTA_DST, -- cgit v1.2.3 From e7030878fc8448492b6e5cecd574043f63271298 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 21 Jul 2015 10:44:01 +0200 Subject: fib: Add fib rule match on tunnel id This add the ability to select a routing table based on the tunnel id which allows to maintain separate routing tables for each virtual tunnel network. ip rule add from all tunnel-id 100 lookup 100 ip rule add from all tunnel-id 200 lookup 200 A new static key controls the collection of metadata at tunnel level upon demand. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/fib_rules.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 2b82d7e30974..96161b8202b5 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -43,7 +43,7 @@ enum { FRA_UNUSED5, FRA_FWMARK, /* mark */ FRA_FLOW, /* flow/class id */ - FRA_UNUSED6, + FRA_TUN_ID, FRA_SUPPRESS_IFGROUP, FRA_SUPPRESS_PREFIXLEN, FRA_TABLE, /* Extended table id */ -- cgit v1.2.3 From b56ea2985d389a3676638203323ebe22c261b7fe Mon Sep 17 00:00:00 2001 From: Rick Jones Date: Tue, 21 Jul 2015 16:14:13 -0700 Subject: net: track success and failure of TCP PMTU probing Track success and failure of TCP PMTU probing. Signed-off-by: Rick Jones Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index eee8968407f0..25a9ad8bcef1 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -278,6 +278,8 @@ enum LINUX_MIB_TCPACKSKIPPEDCHALLENGE, /* TCPACKSkippedChallenge */ LINUX_MIB_TCPWINPROBE, /* TCPWinProbe */ LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */ + LINUX_MIB_TCPMTUPFAIL, /* TCPMTUPFail */ + LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */ __LINUX_MIB_MAX }; -- cgit v1.2.3 From 3985e8a3611a93bb36789f65db862e5700aab65e Mon Sep 17 00:00:00 2001 From: Erik Kline Date: Wed, 22 Jul 2015 16:38:25 +0900 Subject: ipv6: sysctl to restrict candidate source addresses Per RFC 6724, section 4, "Candidate Source Addresses": It is RECOMMENDED that the candidate source addresses be the set of unicast addresses assigned to the interface that will be used to send to the destination (the "outgoing" interface). Add a sysctl to enable this behaviour. Signed-off-by: Erik Kline Signed-off-by: David S. Miller --- include/uapi/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 5efa54ae567c..641a146ead7d 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -171,6 +171,7 @@ enum { DEVCONF_USE_OPTIMISTIC, DEVCONF_ACCEPT_RA_MTU, DEVCONF_STABLE_SECRET, + DEVCONF_USE_OIF_ADDRS_ONLY, DEVCONF_MAX }; -- cgit v1.2.3 From 2ce7918990641b07e70e1b25752d666369e2016e Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 3 Jul 2015 15:01:41 +0300 Subject: kvm/x86: add sending hyper-v crash notification to user space Sending of notification is done by exiting vcpu to user space if KVM_REQ_HV_CRASH is enabled for vcpu. At exit to user space the kvm_run structure contains system_event with type KVM_SYSTEM_EVENT_CRASH to notify about guest crash occurred. Signed-off-by: Andrey Smetanin Signed-off-by: Denis V. Lunev Reviewed-by: Peter Hornyack CC: Paolo Bonzini CC: Gleb Natapov Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 716ad4ae4d4b..9ef19ebd9df4 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -317,6 +317,7 @@ struct kvm_run { struct { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 +#define KVM_SYSTEM_EVENT_CRASH 3 __u32 type; __u64 flags; } system_event; -- cgit v1.2.3 From 45c6df4471edf5404463756df1014fe7e36db3d3 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 23 Jun 2015 19:09:59 +0200 Subject: tty: linux/gsmmux.h needs linux/types.h We use __u8 in linux/gsmmux.h, so include linux/types.h to have that defined. Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/gsmmux.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/gsmmux.h b/include/uapi/linux/gsmmux.h index c06742d52856..ab055d8cddef 100644 --- a/include/uapi/linux/gsmmux.h +++ b/include/uapi/linux/gsmmux.h @@ -3,6 +3,7 @@ #include #include +#include struct gsm_config { -- cgit v1.2.3 From 45ac1403f564f411c6a383a2448688ba8dd705a4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Jul 2015 12:44:02 +0300 Subject: perf: Add PERF_RECORD_SWITCH to indicate context switches There are already two events for context switches, namely the tracepoint sched:sched_switch and the software event context_switches. Unfortunately neither are suitable for use by non-privileged users for the purpose of synchronizing hardware trace data (e.g. Intel PT) to the context switch. Tracepoints are no good at all for non-privileged users because they need either CAP_SYS_ADMIN or /proc/sys/kernel/perf_event_paranoid <= -1. On the other hand, kernel software events need either CAP_SYS_ADMIN or /proc/sys/kernel/perf_event_paranoid <= 1. Now many distributions do default perf_event_paranoid to 1 making context_switches a contender, except it has another problem (which is also shared with sched:sched_switch) which is that it happens before perf schedules events out instead of after perf schedules events in. Whereas a privileged user can see all the events anyway, a non-privileged user only sees events for their own processes, in other words they see when their process was scheduled out not when it was scheduled in. That presents two problems to use the event: 1. the information comes too late, so tools have to look ahead in the event stream to find out what the current state is 2. if they are unlucky tracing might have stopped before the context-switches event is recorded. This new PERF_RECORD_SWITCH event does not have those problems and it also has a couple of other small advantages. It is easier to use because it is an auxiliary event (like mmap, comm and task events) which can be enabled by setting a single bit. It is smaller than sched:sched_switch and easier to parse. To make the event useful for privileged users also, if the context is cpu-wide then the event record will be PERF_RECORD_SWITCH_CPU_WIDE which is the same as PERF_RECORD_SWITCH except it also provides the next or previous pid/tid. Signed-off-by: Adrian Hunter Acked-by: Peter Zijlstra (Intel) Tested-by: Jiri Olsa Cc: Andi Kleen Cc: Mathieu Poirier Cc: Pawel Moll Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1437471846-26995-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index d97f84c080da..022d0acf7df0 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -330,7 +330,8 @@ struct perf_event_attr { mmap2 : 1, /* include mmap with inode data */ comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ - __reserved_1 : 38; + context_switch : 1, /* context switch data */ + __reserved_1 : 37; union { __u32 wakeup_events; /* wakeup every n events */ @@ -572,9 +573,11 @@ struct perf_event_mmap_page { /* * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on * different events so can reuse the same bit position. + * Ditto PERF_RECORD_MISC_SWITCH_OUT. */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) +#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* * Indicates that the content of PERF_SAMPLE_IP points to * the actual instruction that triggered the event. See also @@ -818,6 +821,32 @@ enum perf_event_type { */ PERF_RECORD_LOST_SAMPLES = 13, + /* + * Records a context switch in or out (flagged by + * PERF_RECORD_MISC_SWITCH_OUT). See also + * PERF_RECORD_SWITCH_CPU_WIDE. + * + * struct { + * struct perf_event_header header; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_SWITCH = 14, + + /* + * CPU-wide version of PERF_RECORD_SWITCH with next_prev_pid and + * next_prev_tid that are the next (switching out) or previous + * (switching in) pid/tid. + * + * struct { + * struct perf_event_header header; + * u32 next_prev_pid; + * u32 next_prev_tid; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_SWITCH_CPU_WIDE = 15, + PERF_RECORD_MAX, /* non-ABI */ }; -- cgit v1.2.3 From fc5462f8525b47fa219452289ecb22c921c16823 Mon Sep 17 00:00:00 2001 From: Azael Avalos Date: Wed, 22 Jul 2015 18:09:11 -0600 Subject: toshiba_acpi: Add /dev/toshiba_acpi device There were previous attempts to "merge" the toshiba SMM module to the toshiba_acpi one, they were trying to imitate what the old toshiba module does, however, some models (TOS1900 devices) come with a "crippled" implementation and do not provide all the "features" a "genuine" Toshiba BIOS does. This patch adds a new device called toshiba_acpi, which aim is to enable userspace to access the SMM on Toshiba laptops via ACPI calls. Creating a new convenience _IOWR command to access the SCI functions by opening/closing the SCI internally to avoid buggy BIOS, while at the same time providing backwards compatibility. Older programs (and new) who wish to access the SMM on newer models can do it by pointing their path to /dev/toshiba_acpi (instead of /dev/toshiba) as the toshiba.h header was modified to reflect these changes as well as adds all the toshiba_acpi paths and command, however, it is strongly recommended to use the new IOCTL for any SCI command to avoid any buggy BIOS. Signed-off-by: Azael Avalos Signed-off-by: Darren Hart --- include/uapi/linux/toshiba.h | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/toshiba.h b/include/uapi/linux/toshiba.h index e9bef5b2f91e..c58bf4b5bb26 100644 --- a/include/uapi/linux/toshiba.h +++ b/include/uapi/linux/toshiba.h @@ -1,6 +1,7 @@ /* toshiba.h -- Linux driver for accessing the SMM on Toshiba laptops * * Copyright (c) 1996-2000 Jonathan A. Buzzard (jonathan@buzzard.org.uk) + * Copyright (c) 2015 Azael Avalos * * Thanks to Juergen Heinzl for the pointers * on making sure the structure is aligned and packed. @@ -20,9 +21,18 @@ #ifndef _UAPI_LINUX_TOSHIBA_H #define _UAPI_LINUX_TOSHIBA_H -#define TOSH_PROC "/proc/toshiba" -#define TOSH_DEVICE "/dev/toshiba" -#define TOSH_SMM _IOWR('t', 0x90, int) /* broken: meant 24 bytes */ +/* + * Toshiba modules paths + */ + +#define TOSH_PROC "/proc/toshiba" +#define TOSH_DEVICE "/dev/toshiba" +#define TOSHIBA_ACPI_PROC "/proc/acpi/toshiba" +#define TOSHIBA_ACPI_DEVICE "/dev/toshiba_acpi" + +/* + * Toshiba SMM structure + */ typedef struct { unsigned int eax; @@ -33,5 +43,21 @@ typedef struct { unsigned int edi __attribute__ ((packed)); } SMMRegisters; +/* + * IOCTLs (0x90 - 0x91) + */ + +#define TOSH_SMM _IOWR('t', 0x90, SMMRegisters) +/* + * Convenience toshiba_acpi command. + * + * The System Configuration Interface (SCI) is opened/closed internally + * to avoid userspace of buggy BIOSes. + * + * The toshiba_acpi module checks whether the eax register is set with + * SCI_GET (0xf300) or SCI_SET (0xf400), returning -EINVAL if not. + */ +#define TOSHIBA_ACPI_SCI _IOWR('t', 0x91, SMMRegisters) + #endif /* _UAPI_LINUX_TOSHIBA_H */ -- cgit v1.2.3 From e0910bace663b78c026b73bbd711a24ccf410531 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 23 Jul 2015 15:43:56 +0200 Subject: lwtunnel: export linux/lwtunnel.h to userspace Note also that include/linux/lwtunnel.h is not needed. CC: Thomas Graf CC: Roopa Prabhu Fixes: 499a24256862 ("lwtunnel: infrastructure for handling light weight tunnels like mpls") Signed-off-by: Nicolas Dichtel Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/uapi/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 1ff9942718fe..aafb9937b162 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -243,6 +243,7 @@ header-y += limits.h header-y += llc.h header-y += loop.h header-y += lp.h +header-y += lwtunnel.h header-y += magic.h header-y += major.h header-y += map_to_7segment.h -- cgit v1.2.3 From ec92777f2ba93c00387b8fe53780c25adc57c744 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Thu, 9 Jul 2015 13:25:35 -0600 Subject: libnvdimm: Update name of the ars_status_record mask field The spec suggests that this is a simple 'length' field, not a mask. Update the name accordingly. Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 2b94ea2287bb..e94bc20016b2 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -87,7 +87,7 @@ struct nd_cmd_ars_status { __u32 handle; __u32 flags; __u64 err_address; - __u64 mask; + __u64 length; } __packed records[0]; } __packed; -- cgit v1.2.3 From 39c686b862cdb2049b90e095b6c6c727b2a7ab60 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Thu, 9 Jul 2015 13:25:36 -0600 Subject: libnvdimm: Add DSM support for Address Range Scrub commands Add support for the three ARS DSM commands: - Query ARS Capabilities - Queries the firmware to check if a given range supports scrub, and if so, which type (persistent vs. volatile) - Start ARS - Starts a scrub for a given range/type - Query ARS Status - Checks status of a previously started scrub, and provides the error logs if any. The commands are described by the example DSM spec at: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf Also add these commands to the nfit_test test framework, and return canned data. Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index e94bc20016b2..5b4a4be06e2b 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -111,6 +111,11 @@ enum { ND_CMD_VENDOR = 9, }; +enum { + ND_ARS_VOLATILE = 1, + ND_ARS_PERSISTENT = 2, +}; + static inline const char *nvdimm_bus_cmd_name(unsigned cmd) { static const char * const names[] = { @@ -194,4 +199,9 @@ enum nd_driver_flags { enum { ND_MIN_NAMESPACE_SIZE = 0x00400000, }; + +enum ars_masks { + ARS_STATUS_MASK = 0x0000FFFF, + ARS_EXT_STATUS_SHIFT = 16, +}; #endif /* __NDCTL_H__ */ -- cgit v1.2.3 From a37281b63681015b12c3b7322e6bd681c0ea1ef4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 21 Nov 2014 13:45:08 +0100 Subject: KVM: s390: more irq names for trace events This patch adds names for missing irq types to the trace events. In order to identify adapter irqs, the define is moved from interrupt.c to the other basic irq defines in uapi/linux/kvm.h. Acked-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 9ef19ebd9df4..0d831f94f8a8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -482,6 +482,7 @@ struct kvm_s390_psw { ((ai) << 26)) #define KVM_S390_INT_IO_MIN 0x00000000u #define KVM_S390_INT_IO_MAX 0xfffdffffu +#define KVM_S390_INT_IO_AI_MASK 0x04000000u struct kvm_s390_interrupt { -- cgit v1.2.3 From 8486a0bba6fc13ae60a8e402cf77c60a7d4e7a04 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Thu, 9 Jul 2015 15:18:38 +0800 Subject: usb: add usb_otg20_descriptor for OTG 2.0 and above OTG 2.0 introduces bcdOTG in otg descriptor to identify the OTG and EH supplement release number with which the OTG device is compliant, this patch adds structure usb_otg20_descriptor for OTG 2.0 and above. Signed-off-by: Macpaul Lin Signed-off-by: Li Jun Reviewed-by: Roger Quadros Signed-off-by: Felipe Balbi --- include/uapi/linux/usb/ch9.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index aa33fd1b2d4f..aec689941c18 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -674,6 +674,17 @@ struct usb_otg_descriptor { __u8 bmAttributes; /* support for HNP, SRP, etc */ } __attribute__ ((packed)); +/* USB_DT_OTG (from OTG 2.0 supplement) */ +struct usb_otg20_descriptor { + __u8 bLength; + __u8 bDescriptorType; + + __u8 bmAttributes; /* support for HNP, SRP and ADP, etc */ + __le16 bcdOTG; /* OTG and EH supplement release number + * in binary-coded decimal(i.e. 2.0 is 0200H) + */ +} __attribute__ ((packed)); + /* from usb_otg_descriptor.bmAttributes */ #define USB_OTG_SRP (1 << 0) #define USB_OTG_HNP (1 << 1) /* swap host/device roles */ -- cgit v1.2.3 From 5d701cef9b40188c11c39be3a401bc4feeb154b8 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Thu, 9 Jul 2015 15:18:39 +0800 Subject: usb: add USB_OTG_ADP definition Add USB_OTG_ADP definition for usb_otg_descriptor.bmAttributes. Signed-off-by: Macpaul Lin Signed-off-by: Li Jun Acked-by: Peter Chen Signed-off-by: Felipe Balbi --- include/uapi/linux/usb/ch9.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index aec689941c18..f7adc6e01f9e 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -688,6 +688,7 @@ struct usb_otg20_descriptor { /* from usb_otg_descriptor.bmAttributes */ #define USB_OTG_SRP (1 << 0) #define USB_OTG_HNP (1 << 1) /* swap host/device roles */ +#define USB_OTG_ADP (1 << 2) /* support ADP */ /*-------------------------------------------------------------------------*/ -- cgit v1.2.3 From d7ee3519042798be6224e97f259ed47a63da4620 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Fri, 17 Jul 2015 16:17:56 +0200 Subject: netfilter: nf_ct_sctp: minimal multihoming support Currently nf_conntrack_proto_sctp module handles only packets between primary addresses used to establish the connection. Any packets between secondary addresses are classified as invalid so that usual firewall configurations drop them. Allowing HEARTBEAT and HEARTBEAT-ACK chunks to establish a new conntrack would allow traffic between secondary addresses to pass through. A more sophisticated solution based on the addresses advertised in the initial handshake (and possibly also later dynamic address addition and removal) would be much harder to implement. Moreover, in general we cannot assume to always see the initial handshake as it can be routed through a different path. The patch adds two new conntrack states: SCTP_CONNTRACK_HEARTBEAT_SENT - a HEARTBEAT chunk seen but not acked SCTP_CONNTRACK_HEARTBEAT_ACKED - a HEARTBEAT acked by HEARTBEAT-ACK State transition rules: - HEARTBEAT_SENT responds to usual chunks the same way as NONE (so that the behaviour changes as little as possible) - HEARTBEAT_ACKED responds to usual chunks the same way as ESTABLISHED does, except the resulting state is HEARTBEAT_ACKED rather than ESTABLISHED - previously existing states except NONE are preserved when HEARTBEAT or HEARTBEAT-ACK is seen - NONE (in the initial direction) changes to HEARTBEAT_SENT on HEARTBEAT and to CLOSED on HEARTBEAT-ACK - HEARTBEAT_SENT changes to HEARTBEAT_ACKED on HEARTBEAT-ACK in the reply direction - HEARTBEAT_SENT and HEARTBEAT_ACKED are preserved on HEARTBEAT and HEARTBEAT-ACK otherwise Normally, vtag is set from the INIT chunk for the reply direction and from the INIT-ACK chunk for the originating direction (i.e. each of these defines vtag value for the opposite direction). For secondary conntracks, we can't rely on seeing INIT/INIT-ACK and even if we have seen them, we would need to connect two different conntracks. Therefore simplified logic is applied: vtag of first packet in each direction (HEARTBEAT in the originating and HEARTBEAT-ACK in reply direction) is saved and all following packets in that direction are compared with this saved value. While INIT and INIT-ACK define vtag for the opposite direction, vtags extracted from HEARTBEAT and HEARTBEAT-ACK are always for their direction. Default timeout values for new states are HEARTBEAT_SENT: 30 seconds (default hb_interval) HEARTBEAT_ACKED: 210 seconds (hb_interval * path_max_retry + max_rto) (We cannot expect to see the shutdown sequence so that, unlike ESTABLISHED, the HEARTBEAT_ACKED timeout shouldn't be too long.) Signed-off-by: Michal Kubecek Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_sctp.h | 2 ++ include/uapi/linux/netfilter/nfnetlink_cttimeout.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h index ceeefe6681b5..ed4e776e1242 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_sctp.h +++ b/include/uapi/linux/netfilter/nf_conntrack_sctp.h @@ -13,6 +13,8 @@ enum sctp_conntrack { SCTP_CONNTRACK_SHUTDOWN_SENT, SCTP_CONNTRACK_SHUTDOWN_RECD, SCTP_CONNTRACK_SHUTDOWN_ACK_SENT, + SCTP_CONNTRACK_HEARTBEAT_SENT, + SCTP_CONNTRACK_HEARTBEAT_ACKED, SCTP_CONNTRACK_MAX }; diff --git a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h index 1ab0b97b3a1e..f2c10dc140d6 100644 --- a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h +++ b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h @@ -92,6 +92,8 @@ enum ctattr_timeout_sctp { CTA_TIMEOUT_SCTP_SHUTDOWN_SENT, CTA_TIMEOUT_SCTP_SHUTDOWN_RECD, CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, + CTA_TIMEOUT_SCTP_HEARTBEAT_SENT, + CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, __CTA_TIMEOUT_SCTP_MAX }; #define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1) -- cgit v1.2.3 From a0ddef81f4aeeeec3326f6b6a255d8ea13b41908 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 22 Jul 2015 14:30:14 -0400 Subject: tile: enable full SECCOMP support Signed-off-by: Chris Metcalf --- include/uapi/linux/audit.h | 3 +++ include/uapi/linux/elf-em.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index d3475e1f15ec..1f977dd4c370 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -382,6 +382,9 @@ enum { #define AUDIT_ARCH_SHEL64 (EM_SH|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_SPARC (EM_SPARC) #define AUDIT_ARCH_SPARC64 (EM_SPARCV9|__AUDIT_ARCH_64BIT) +#define AUDIT_ARCH_TILEGX (EM_TILEGX|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_TILEGX32 (EM_TILEGX|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_TILEPRO (EM_TILEPRO|__AUDIT_ARCH_LE) #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_PERM_EXEC 1 diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index b08829667ed7..3429a3ba382b 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -38,6 +38,8 @@ #define EM_ALTERA_NIOS2 113 /* Altera Nios II soft-core processor */ #define EM_TI_C6000 140 /* TI C6X DSPs */ #define EM_AARCH64 183 /* ARM 64 bit */ +#define EM_TILEPRO 188 /* Tilera TILEPro */ +#define EM_TILEGX 191 /* Tilera TILE-Gx */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ #define EM_AVR32 0x18ad /* Atmel AVR32 */ -- cgit v1.2.3 From 8013d1d7eafb0589ca766db6b74026f76b7f5cb4 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 30 Jul 2015 14:28:42 +0800 Subject: net/ipv6: add sysctl option accept_ra_min_hop_limit Commit 6fd99094de2b ("ipv6: Don't reduce hop limit for an interface") disabled accept hop limit from RA if it is smaller than the current hop limit for security stuff. But this behavior kind of break the RFC definition. RFC 4861, 6.3.4. Processing Received Router Advertisements A Router Advertisement field (e.g., Cur Hop Limit, Reachable Time, and Retrans Timer) may contain a value denoting that it is unspecified. In such cases, the parameter should be ignored and the host should continue using whatever value it is already using. If the received Cur Hop Limit value is non-zero, the host SHOULD set its CurHopLimit variable to the received value. So add sysctl option accept_ra_min_hop_limit to let user choose the minimum hop limit value they can accept from RA. And set default to 1 to meet RFC standards. Signed-off-by: Hangbin Liu Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/uapi/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 641a146ead7d..80f3b74446a1 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -172,6 +172,7 @@ enum { DEVCONF_ACCEPT_RA_MTU, DEVCONF_STABLE_SECRET, DEVCONF_USE_OIF_ADDRS_ONLY, + DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT, DEVCONF_MAX }; -- cgit v1.2.3 From d3aa45ce6b94c65b83971257317867db13e5f492 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 30 Jul 2015 15:36:57 -0700 Subject: bpf: add helpers to access tunnel metadata Introduce helpers to let eBPF programs attached to TC manipulate tunnel metadata: bpf_skb_[gs]et_tunnel_key(skb, key, size, flags) skb: pointer to skb key: pointer to 'struct bpf_tunnel_key' size: size of 'struct bpf_tunnel_key' flags: room for future extensions First eBPF program that uses these helpers will allocate per_cpu metadata_dst structures that will be used on TX. On RX metadata_dst is allocated by tunnel driver. Typical usage for TX: struct bpf_tunnel_key tkey; ... populate tkey ... bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), 0); bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); RX: struct bpf_tunnel_key tkey = {}; bpf_skb_get_tunnel_key(skb, &tkey, sizeof(tkey), 0); ... lookup or redirect based on tkey ... 'struct bpf_tunnel_key' will be extended in the future by adding elements to the end and the 'size' argument will indicate which fields are populated, thereby keeping backwards compatibility. The 'flags' argument may be used as well when the 'size' is not enough or to indicate completely different layout of bpf_tunnel_key. Signed-off-by: Alexei Starovoitov Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2f6c83d714e9..bc0d27d3fbdd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -258,6 +258,18 @@ enum bpf_func_id { BPF_FUNC_get_cgroup_classid, BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */ BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */ + + /** + * bpf_skb_[gs]et_tunnel_key(skb, key, size, flags) + * retrieve or populate tunnel metadata + * @skb: pointer to skb + * @key: pointer to 'struct bpf_tunnel_key' + * @size: size of 'struct bpf_tunnel_key' + * @flags: room for future extensions + * Retrun: 0 on success + */ + BPF_FUNC_skb_get_tunnel_key, + BPF_FUNC_skb_set_tunnel_key, __BPF_FUNC_MAX_ID, }; @@ -280,4 +292,9 @@ struct __sk_buff { __u32 cb[5]; }; +struct bpf_tunnel_key { + __u32 tunnel_id; + __u32 remote_ipv4; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From f8a9b1bc1b238eed9987da747a0e52f5bb009980 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 30 Jul 2015 20:10:22 -0700 Subject: vxlan: expose COLLECT_METADATA flag to user space Two vxlan driver flags FLOWBASED and COLLECT_METADATA need to be set to make use of its new flow mode. The former already exposed. Expose the latter. Signed-off-by: Alexei Starovoitov Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 9eeb5d9cf8f0..24e22cd4be79 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -383,6 +383,7 @@ enum { IFLA_VXLAN_GBP, IFLA_VXLAN_REMCSUM_NOPARTIAL, IFLA_VXLAN_FLOWBASED, + IFLA_VXLAN_COLLECT_METADATA, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) -- cgit v1.2.3 From 0f7bffd9e512b77279bbce704fad3cb1d6887958 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 31 Jul 2015 16:49:43 +0200 Subject: bonding: add tlb_dynamic_lb netlink support tlb_dynamic_lb could be set only via sysfs, this patch allows it to be set via netlink. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 24e22cd4be79..ea047480a1f0 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -434,6 +434,7 @@ enum { IFLA_BOND_AD_ACTOR_SYS_PRIO, IFLA_BOND_AD_USER_PORT_KEY, IFLA_BOND_AD_ACTOR_SYSTEM, + IFLA_BOND_TLB_DYNAMIC_LB, __IFLA_BOND_MAX, }; -- cgit v1.2.3 From ba7591d8b28bd16a2eface5d009ab0b60c7629a4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 1 Aug 2015 00:46:29 +0200 Subject: ebpf: add skb->hash to offset map for usage in {cls, act}_bpf or filters Add skb->hash to the __sk_buff offset map, so it can be accessed from an eBPF program. We currently already do this for classic BPF filters, but not yet on eBPF, it might be useful as a demuxer in combination with helpers like bpf_clone_redirect(), toy example: __section("cls-lb") int ingress_main(struct __sk_buff *skb) { unsigned int which = 3 + (skb->hash & 7); /* bpf_skb_store_bytes(skb, ...); */ /* bpf_l{3,4}_csum_replace(skb, ...); */ bpf_clone_redirect(skb, which, 0); return -1; } I was thinking whether to add skb_get_hash(), but then concluded the raw skb->hash seems fine in this case: we can directly access the hash w/o extra eBPF helper function call, it's filled out by many NICs on ingress, and in case the entropy level would not be sufficient, people can still implement their own specific sw fallback hash mix anyway. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bc0d27d3fbdd..2ce13c109b00 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -290,6 +290,7 @@ struct __sk_buff { __u32 ifindex; __u32 tc_index; __u32 cb[5]; + __u32 hash; }; struct bpf_tunnel_key { -- cgit v1.2.3 From 3c7c8468e5d993dfe377a67e41cbb23cda93af9e Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sun, 26 Jul 2015 09:54:20 +0300 Subject: mei: add async event notification ioctls Add ioctl IOCTL_MEI_NOTIFY_SET for enabling and disabling async event notification. Add ioctl IOCTL_MEI_NOTIFY_GET for receiving and acking an event notification. Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/mei.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mei.h b/include/uapi/linux/mei.h index bc0d8b69c49e..7c3b64f6a215 100644 --- a/include/uapi/linux/mei.h +++ b/include/uapi/linux/mei.h @@ -107,4 +107,23 @@ struct mei_connect_client_data { }; }; +/** + * DOC: set and unset event notification for a connected client + * + * The IOCTL argument is 1 for enabling event notification and 0 for + * disabling the service + * Return: -EOPNOTSUPP if the devices doesn't support the feature + */ +#define IOCTL_MEI_NOTIFY_SET _IOW('H', 0x02, __u32) + +/** + * DOC: retrieve notification + * + * The IOCTL output argument is 1 if an event was is pending and 0 otherwise + * the ioctl has to be called in order to acknowledge pending event + * + * Return: -EOPNOTSUPP if the devices doesn't support the feature + */ +#define IOCTL_MEI_NOTIFY_GET _IOR('H', 0x03, __u32) + #endif /* _LINUX_MEI_H */ -- cgit v1.2.3 From a6affd24f439feddec04bab4d1e3ad6579868367 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Mon, 3 Aug 2015 17:50:04 +0100 Subject: mpls: Use definition for reserved label checks In multiple locations there are checks for whether the label in hand is a reserved label or not using the arbritray value of 16. Factor this out into a #define for better maintainability and for documentation. Signed-off-by: Robert Shearman Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/uapi/linux/mpls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mpls.h b/include/uapi/linux/mpls.h index 139d4dd1cab8..24a6cb1aec86 100644 --- a/include/uapi/linux/mpls.h +++ b/include/uapi/linux/mpls.h @@ -41,4 +41,6 @@ struct mpls_label { #define MPLS_LABEL_OAMALERT 14 /* RFC3429 */ #define MPLS_LABEL_EXTENSION 15 /* RFC7274 */ +#define MPLS_LABEL_FIRST_UNRESERVED 16 /* RFC3032 */ + #endif /* _UAPI_MPLS_H */ -- cgit v1.2.3 From 71ef3c6b9d4665ee7afbbe4c208a98917dcfc32f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 10 May 2015 12:22:42 -0700 Subject: perf: Add cycles to branch_info Intel Skylake supports reporting the time in cycles a branch in the LBR took, to give a rough indication of the basic block performance. Export the cycle information in the branch_info structure. This can be done by just reusing some currently zero padding. This is just the generic header change. The architecture still needs to fill it in. There's no attempt to convert to real time, as we really want cycles here. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: eranian@google.com Link: http://lkml.kernel.org/r/1431285767-27027-5-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 022d0acf7df0..2881145cda86 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -951,6 +951,7 @@ union perf_mem_data_src { * * in_tx: running in a hardware transaction * abort: aborting a hardware transaction + * cycles: cycles from last branch (or 0 if not supported) */ struct perf_branch_entry { __u64 from; @@ -959,7 +960,8 @@ struct perf_branch_entry { predicted:1,/* target predicted */ in_tx:1, /* in transaction */ abort:1, /* transaction abort */ - reserved:60; + cycles:16, /* cycle count to last branch */ + reserved:44; }; #endif /* _UAPI_LINUX_PERF_EVENT_H */ -- cgit v1.2.3 From 35c051258e8fd7cb97222f4aa887bcd404c156d0 Mon Sep 17 00:00:00 2001 From: Sinclair Yeh Date: Fri, 26 Jun 2015 01:42:06 -0700 Subject: drm/vmwgfx: Implement screen targets Add support for the screen target device interface. Add a getparam parameter and bump minor to signal availability. Signed-off-by: Sinclair Yeh Signed-off-by: Thomas Hellstrom --- include/uapi/drm/vmwgfx_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h index c472bedbe38e..c8a863180174 100644 --- a/include/uapi/drm/vmwgfx_drm.h +++ b/include/uapi/drm/vmwgfx_drm.h @@ -88,6 +88,7 @@ #define DRM_VMW_PARAM_3D_CAPS_SIZE 8 #define DRM_VMW_PARAM_MAX_MOB_MEMORY 9 #define DRM_VMW_PARAM_MAX_MOB_SIZE 10 +#define DRM_VMW_PARAM_SCREEN_TARGET 11 /** * enum drm_vmw_handle_type - handle type for ref ioctls -- cgit v1.2.3 From 34d99af52ad40bd498ba66970579a5bc1fb1a3bc Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Wed, 5 Aug 2015 16:29:37 -0400 Subject: audit: implement audit by executable This adds the ability audit the actions of a not-yet-running process. This patch implements the ability to filter on the executable path. Instead of just hard coding the ino and dev of the executable we care about at the moment the rule is inserted into the kernel, use the new audit_fsnotify infrastructure to manage this dynamically. This means that if the filename does not yet exist but the containing directory does, or if the inode in question is unlinked and creat'd (aka updated) the rule will just continue to work. If the containing directory is moved or deleted or the filesystem is unmounted, the rule is deleted automatically. A future enhancement would be to have the rule survive across directory disruptions. This is a heavily modified version of a patch originally submitted by Eric Paris with some ideas from Peter Moody. Cc: Peter Moody Cc: Eric Paris Signed-off-by: Richard Guy Briggs [PM: minor whitespace clean to satisfy ./scripts/checkpatch] Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index d3475e1f15ec..f6ff62c24aba 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -266,6 +266,7 @@ #define AUDIT_OBJ_UID 109 #define AUDIT_OBJ_GID 110 #define AUDIT_FIELD_COMPARE 111 +#define AUDIT_EXE 112 #define AUDIT_ARG0 200 #define AUDIT_ARG1 (AUDIT_ARG0+1) @@ -324,8 +325,10 @@ enum { #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT 0x00000001 #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME 0x00000002 +#define AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH 0x00000004 #define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \ - AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME) + AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \ + AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH) /* deprecated: AUDIT_VERSION_* */ #define AUDIT_VERSION_LATEST AUDIT_FEATURE_BITMAP_ALL -- cgit v1.2.3 From d877f07112f1e5a247c6b585c971a93895c9f738 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 31 May 2015 18:04:11 +0200 Subject: netfilter: nf_tables: add nft_dup expression This new expression uses the nf_dup engine to clone packets to a given gateway. Unlike xt_TEE, we use an index to indicate output interface which should be fine at this stage. Moreover, change to the preemtion-safe this_cpu_read(nf_skb_duplicated) from nf_dup_ipv{4,6} to silence a lockdep splat. Based on the original tee expression from Arturo Borrero Gonzalez, although this patch has diverted quite a bit from this initial effort due to the change to support maps. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index a99e6a997140..2ef35f2c9bda 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -935,6 +935,20 @@ enum nft_redir_attributes { }; #define NFTA_REDIR_MAX (__NFTA_REDIR_MAX - 1) +/** + * enum nft_dup_attributes - nf_tables dup expression netlink attributes + * + * @NFTA_DUP_SREG_ADDR: source register of address (NLA_U32: nft_registers) + * @NFTA_DUP_SREG_DEV: source register of output interface (NLA_U32: nft_register) + */ +enum nft_dup_attributes { + NFTA_DUP_UNSPEC, + NFTA_DUP_SREG_ADDR, + NFTA_DUP_SREG_DEV, + __NFTA_DUP_MAX +}; +#define NFTA_DUP_MAX (__NFTA_DUP_MAX - 1) + /** * enum nft_gen_attributes - nf_tables ruleset generation attributes * -- cgit v1.2.3 From 3e87baafa4f476017b2453e52a1ca7308b4e6ad5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 2 Aug 2015 18:02:14 +0200 Subject: netfilter: nft_limit: add burst parameter This patch adds the burst parameter. This burst indicates the number of packets that can exceed the limit. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 2ef35f2c9bda..cafd78943c7d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -761,11 +761,13 @@ enum nft_ct_attributes { * * @NFTA_LIMIT_RATE: refill rate (NLA_U64) * @NFTA_LIMIT_UNIT: refill unit (NLA_U64) + * @NFTA_LIMIT_BURST: burst (NLA_U32) */ enum nft_limit_attributes { NFTA_LIMIT_UNSPEC, NFTA_LIMIT_RATE, NFTA_LIMIT_UNIT, + NFTA_LIMIT_BURST, __NFTA_LIMIT_MAX }; #define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1) -- cgit v1.2.3 From d2168e849ebf617b2b7feae44c0c0baf739cb610 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 5 Aug 2015 12:38:44 +0200 Subject: netfilter: nft_limit: add per-byte limiting This patch adds a new NFTA_LIMIT_TYPE netlink attribute to indicate the type of limiting. Contrary to per-packet limiting, the cost is calculated from the packet path since this depends on the packet length. The burst attribute indicates the number of bytes in which the rate can be exceeded. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index cafd78943c7d..d8c8a7c9d88a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -756,18 +756,25 @@ enum nft_ct_attributes { }; #define NFTA_CT_MAX (__NFTA_CT_MAX - 1) +enum nft_limit_type { + NFT_LIMIT_PKTS, + NFT_LIMIT_PKT_BYTES +}; + /** * enum nft_limit_attributes - nf_tables limit expression netlink attributes * * @NFTA_LIMIT_RATE: refill rate (NLA_U64) * @NFTA_LIMIT_UNIT: refill unit (NLA_U64) * @NFTA_LIMIT_BURST: burst (NLA_U32) + * @NFTA_LIMIT_TYPE: type of limit (NLA_U32: enum nft_limit_type) */ enum nft_limit_attributes { NFTA_LIMIT_UNSPEC, NFTA_LIMIT_RATE, NFTA_LIMIT_UNIT, NFTA_LIMIT_BURST, + NFTA_LIMIT_TYPE, __NFTA_LIMIT_MAX }; #define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1) -- cgit v1.2.3 From da8b43c0e1dcea3bcac5f37ea59934ddaa137aed Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 4 Aug 2015 22:51:07 -0700 Subject: vxlan: combine VXLAN_FLOWBASED into VXLAN_COLLECT_METADATA IFLA_VXLAN_FLOWBASED is useless without IFLA_VXLAN_COLLECT_METADATA, so combine them into single IFLA_VXLAN_COLLECT_METADATA flag. 'flowbased' doesn't convey real meaning of the vxlan tunnel mode. This mode can be used by routing, tc+bpf and ovs. Only ovs is strictly flow based, so 'collect metadata' is a better name for this tunnel mode. Signed-off-by: Alexei Starovoitov Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ea047480a1f0..f24ec99a2262 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -382,7 +382,6 @@ enum { IFLA_VXLAN_REMCSUM_RX, IFLA_VXLAN_GBP, IFLA_VXLAN_REMCSUM_NOPARTIAL, - IFLA_VXLAN_FLOWBASED, IFLA_VXLAN_COLLECT_METADATA, __IFLA_VXLAN_MAX }; -- cgit v1.2.3 From ea317b267e9d03a8241893aa176fba7661d07579 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 6 Aug 2015 07:02:34 +0000 Subject: bpf: Add new bpf map type to store the pointer to struct perf_event Introduce a new bpf map type 'BPF_MAP_TYPE_PERF_EVENT_ARRAY'. This map only stores the pointer to struct perf_event. The user space event FDs from perf_event_open() syscall are converted to the pointer to struct perf_event and stored in map. Signed-off-by: Kaixu Xia Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2ce13c109b00..a1814e8e53a7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -114,6 +114,7 @@ enum bpf_map_type { BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PROG_ARRAY, + BPF_MAP_TYPE_PERF_EVENT_ARRAY, }; enum bpf_prog_type { -- cgit v1.2.3 From 35578d7984003097af2b1e34502bc943d40c1804 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 6 Aug 2015 07:02:35 +0000 Subject: bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter According to the perf_event_map_fd and index, the function bpf_perf_event_read() can convert the corresponding map value to the pointer to struct perf_event and return the Hardware PMU counter value. Signed-off-by: Kaixu Xia Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a1814e8e53a7..92a48e2d5461 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -271,6 +271,7 @@ enum bpf_func_id { */ BPF_FUNC_skb_get_tunnel_key, BPF_FUNC_skb_set_tunnel_key, + BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */ __BPF_FUNC_MAX_ID, }; -- cgit v1.2.3 From a7854037da006a7472c48773e3190db55217ec9b Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 7 Aug 2015 19:40:45 +0300 Subject: bridge: netlink: add support for vlan_filtering attribute This patch adds the ability to toggle the vlan filtering support via netlink. Since we're already running with rtnl in .changelink() we don't need to take any additional locks. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index f24ec99a2262..d450be36add2 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -230,6 +230,7 @@ enum { IFLA_BR_AGEING_TIME, IFLA_BR_STP_STATE, IFLA_BR_PRIORITY, + IFLA_BR_VLAN_FILTERING, __IFLA_BR_MAX, }; -- cgit v1.2.3 From fb811395cd5a71b9e94a068f524a6f4a21b67bdb Mon Sep 17 00:00:00 2001 From: Rick Jones Date: Fri, 7 Aug 2015 11:10:37 -0700 Subject: net: add explicit logging and stat for neighbour table overflow Add an explicit neighbour table overflow message (ratelimited) and statistic to make diagnosing neighbour table overflows tractable in the wild. Diagnosing a neighbour table overflow can be quite difficult in the wild because there is no explicit dmesg logged. Callers to neighbour code seem to use net_dbg_ratelimit when the neighbour call fails which means the "base message" is not emitted and the callback suppressed messages from the ratelimiting can end-up juxtaposed with unrelated messages. Further, a forced garbage collection will increment a stat on each call whether it was successful in freeing-up a table entry or not, so that statistic is only a hint. So, add a net_info_ratelimited message and explicit statistic to the neighbour code. Signed-off-by: Rick Jones Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 2e35c61bbdd1..788655bfa0f3 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -106,6 +106,7 @@ struct ndt_stats { __u64 ndts_rcv_probes_ucast; __u64 ndts_periodic_gc_runs; __u64 ndts_forced_gc_runs; + __u64 ndts_table_fulls; }; enum { -- cgit v1.2.3 From 2e15ea390e6f4466655066d97e22ec66870a042c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 7 Aug 2015 23:51:42 -0700 Subject: ip_gre: Add support to collect tunnel metadata. Following patch create new tunnel flag which enable tunnel metadata collection on given device. Signed-off-by: Pravin B Shelar Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index bd3cc11a431f..af4de90ba27d 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -112,6 +112,7 @@ enum { IFLA_GRE_ENCAP_FLAGS, IFLA_GRE_ENCAP_SPORT, IFLA_GRE_ENCAP_DPORT, + IFLA_GRE_COLLECT_METADATA, __IFLA_GRE_MAX, }; -- cgit v1.2.3 From d80efd5cb3dec16a8d1aea9b8a4a7921972dba65 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 10 Aug 2015 10:39:35 -0700 Subject: drm/vmwgfx: Initial DX support Initial DX support. Co-authored with Sinclair Yeh, Charmaine Lee and Jakob Bornecrantz. Signed-off-by: Thomas Hellstrom Signed-off-by: Sinclair Yeh Signed-off-by: Charmaine Lee --- include/uapi/drm/vmwgfx_drm.h | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h index c8a863180174..c5bcddd9f58c 100644 --- a/include/uapi/drm/vmwgfx_drm.h +++ b/include/uapi/drm/vmwgfx_drm.h @@ -64,6 +64,7 @@ #define DRM_VMW_GB_SURFACE_CREATE 23 #define DRM_VMW_GB_SURFACE_REF 24 #define DRM_VMW_SYNCCPU 25 +#define DRM_VMW_CREATE_EXTENDED_CONTEXT 26 /*************************************************************************/ /** @@ -89,6 +90,7 @@ #define DRM_VMW_PARAM_MAX_MOB_MEMORY 9 #define DRM_VMW_PARAM_MAX_MOB_SIZE 10 #define DRM_VMW_PARAM_SCREEN_TARGET 11 +#define DRM_VMW_PARAM_DX 12 /** * enum drm_vmw_handle_type - handle type for ref ioctls @@ -297,7 +299,7 @@ union drm_vmw_surface_reference_arg { * Argument to the DRM_VMW_EXECBUF Ioctl. */ -#define DRM_VMW_EXECBUF_VERSION 1 +#define DRM_VMW_EXECBUF_VERSION 2 struct drm_vmw_execbuf_arg { uint64_t commands; @@ -306,6 +308,8 @@ struct drm_vmw_execbuf_arg { uint64_t fence_rep; uint32_t version; uint32_t flags; + uint32_t context_handle; + uint32_t pad64; }; /** @@ -826,7 +830,6 @@ struct drm_vmw_update_layout_arg { enum drm_vmw_shader_type { drm_vmw_shader_type_vs = 0, drm_vmw_shader_type_ps, - drm_vmw_shader_type_gs }; @@ -908,6 +911,8 @@ enum drm_vmw_surface_flags { * @buffer_handle Buffer handle of backup buffer. SVGA3D_INVALID_ID * if none. * @base_size Size of the base mip level for all faces. + * @array_size Must be zero for non-DX hardware, and if non-zero + * svga3d_flags must have proper bind flags setup. * * Input argument to the DRM_VMW_GB_SURFACE_CREATE Ioctl. * Part of output argument for the DRM_VMW_GB_SURFACE_REF Ioctl. @@ -920,7 +925,7 @@ struct drm_vmw_gb_surface_create_req { uint32_t multisample_count; uint32_t autogen_filter; uint32_t buffer_handle; - uint32_t pad64; + uint32_t array_size; struct drm_vmw_size base_size; }; @@ -1060,4 +1065,28 @@ struct drm_vmw_synccpu_arg { uint32_t pad64; }; +/*************************************************************************/ +/** + * DRM_VMW_CREATE_EXTENDED_CONTEXT - Create a host context. + * + * Allocates a device unique context id, and queues a create context command + * for the host. Does not wait for host completion. + */ +enum drm_vmw_extended_context { + drm_vmw_context_legacy, + drm_vmw_context_dx +}; + +/** + * union drm_vmw_extended_context_arg + * + * @req: Context type. + * @rep: Context identifier. + * + * Argument to the DRM_VMW_CREATE_EXTENDED_CONTEXT Ioctl. + */ +union drm_vmw_extended_context_arg { + enum drm_vmw_extended_context req; + struct drm_vmw_context_arg rep; +}; #endif -- cgit v1.2.3 From 54fbde8a94a8a78547597215c9e4be590d075ee0 Mon Sep 17 00:00:00 2001 From: Sinclair Yeh Date: Wed, 29 Jul 2015 12:38:02 -0700 Subject: drm/vmwgfx: Fix copyright headers Updating and fixing copyright headers. Bump version minor to signal vgpu10 support. Signed-off-by: Sinclair Yeh Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul --- include/uapi/drm/vmwgfx_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h index c5bcddd9f58c..05b204954d16 100644 --- a/include/uapi/drm/vmwgfx_drm.h +++ b/include/uapi/drm/vmwgfx_drm.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA + * Copyright © 2009-2015 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a -- cgit v1.2.3 From 35103d11173b8fea874183f8aa508ae71234d299 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Thu, 13 Aug 2015 10:39:01 -0400 Subject: net: ipv6 sysctl option to ignore routes when nexthop link is down Like the ipv4 patch with a similar title, this adds a sysctl to allow the user to change routing behavior based on whether or not the interface associated with the nexthop was an up or down link. The default setting preserves the current behavior, but anyone that enables it will notice that nexthops on down interfaces will no longer be selected: net.ipv6.conf.all.ignore_routes_with_linkdown = 0 net.ipv6.conf.default.ignore_routes_with_linkdown = 0 net.ipv6.conf.lo.ignore_routes_with_linkdown = 0 ... When the above sysctls are set, not only will link status be reported to userspace, but an indication that a nexthop is dead and will not be used is also reported. 1000::/8 via 7000::2 dev p7p1 metric 1024 dead linkdown pref medium 1000::/8 via 8000::2 dev p8p1 metric 1024 pref medium 7000::/8 dev p7p1 proto kernel metric 256 dead linkdown pref medium 8000::/8 dev p8p1 proto kernel metric 256 pref medium 9000::/8 via 8000::2 dev p8p1 metric 2048 pref medium 9000::/8 via 7000::2 dev p7p1 metric 1024 dead linkdown pref medium fe80::/64 dev p7p1 proto kernel metric 256 dead linkdown pref medium fe80::/64 dev p8p1 proto kernel metric 256 pref medium This also adds devconf support and notification when sysctl values change. v2: drop use of rt6i_nhflags since it is not needed right now Signed-off-by: Andy Gospodarek Signed-off-by: Dinesh Dutt Signed-off-by: David S. Miller --- include/uapi/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 80f3b74446a1..38b4fef20219 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -173,6 +173,7 @@ enum { DEVCONF_STABLE_SECRET, DEVCONF_USE_OIF_ADDRS_ONLY, DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT, + DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, DEVCONF_MAX }; -- cgit v1.2.3 From 4e3c89920cd3a6cfce22c6f537690747c26128dd Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 13 Aug 2015 14:59:00 -0600 Subject: net: Introduce VRF related flags and helpers Add a VRF_MASTER flag for interfaces and helper functions for determining if a device is a VRF_MASTER. Add link attribute for passing VRF_TABLE id. Add vrf_ptr to netdevice. Add various macros for determining if a device is a VRF device, the index of the master VRF device and table associated with VRF device. Signed-off-by: Shrijeet Mukherjee Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d450be36add2..313c305fd1ad 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -341,6 +341,15 @@ enum macvlan_macaddr_mode { #define MACVLAN_FLAG_NOPROMISC 1 +/* VRF section */ +enum { + IFLA_VRF_UNSPEC, + IFLA_VRF_TABLE, + __IFLA_VRF_MAX +}; + +#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1) + /* IPVLAN section */ enum { IFLA_IPVLAN_UNSPEC, -- cgit v1.2.3 From a1c234f95cae2d293047bb6c36e7a4840dbac815 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 14 Aug 2015 16:40:40 +0200 Subject: lwtunnel: rename ip lwtunnel attributes We already have IFLA_IPTUN_ netlink attributes. The IP_TUN_ attributes look very similar, yet they serve very different purpose. This is confusing for anyone trying to implement a user space tool supporting lwt. As the IP_TUN_ attributes are used only for the lightweight tunnels, prefix them with LWTUNNEL_IP_ instead to make their purpose clear. Also, it's more logical to have them in lwtunnel.h together with the encap enum. Fixes: 3093fbe7ff4b ("route: Per route IP tunnel metadata via lightweight tunnel") Signed-off-by: Jiri Benc Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/lwtunnel.h | 14 ++++++++++++++ include/uapi/linux/rtnetlink.h | 15 --------------- 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 31377bbea3f8..3bf223bc2367 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -12,5 +12,19 @@ enum lwtunnel_encap_types { #define LWTUNNEL_ENCAP_MAX (__LWTUNNEL_ENCAP_MAX - 1) +enum lwtunnel_ip_t { + LWTUNNEL_IP_UNSPEC, + LWTUNNEL_IP_ID, + LWTUNNEL_IP_DST, + LWTUNNEL_IP_SRC, + LWTUNNEL_IP_TTL, + LWTUNNEL_IP_TOS, + LWTUNNEL_IP_SPORT, + LWTUNNEL_IP_DPORT, + LWTUNNEL_IP_FLAGS, + __LWTUNNEL_IP_MAX, +}; + +#define LWTUNNEL_IP_MAX (__LWTUNNEL_IP_MAX - 1) #endif /* _UAPI_LWTUNNEL_H_ */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 47d24cb3fbc1..0d3d3cc43356 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -286,21 +286,6 @@ enum rt_class_t { /* Routing message attributes */ -enum ip_tunnel_t { - IP_TUN_UNSPEC, - IP_TUN_ID, - IP_TUN_DST, - IP_TUN_SRC, - IP_TUN_TTL, - IP_TUN_TOS, - IP_TUN_SPORT, - IP_TUN_DPORT, - IP_TUN_FLAGS, - __IP_TUN_MAX, -}; - -#define IP_TUN_MAX (__IP_TUN_MAX - 1) - enum rtattr_type_t { RTA_UNSPEC, RTA_DST, -- cgit v1.2.3 From 47dceb8ecdc1c3ad1818dfea3d659a05b74c3fc2 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 14 Aug 2015 22:31:34 -0400 Subject: packet: add classic BPF fanout mode Add fanout mode PACKET_FANOUT_CBPF that accepts a classic BPF program to select a socket. This avoids having to keep adding special case fanout modes. One example use case is application layer load balancing. The QUIC protocol, for instance, encodes a connection ID in UDP payload. Also add socket option SOL_PACKET/PACKET_FANOUT_DATA that updates data associated with the socket group. Fanout mode PACKET_FANOUT_CBPF is the only user so far. Signed-off-by: Willem de Bruijn Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index d3d715f8c88f..a4bb16fa822e 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -55,6 +55,7 @@ struct sockaddr_ll { #define PACKET_TX_HAS_OFF 19 #define PACKET_QDISC_BYPASS 20 #define PACKET_ROLLOVER_STATS 21 +#define PACKET_FANOUT_DATA 22 #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 @@ -62,6 +63,7 @@ struct sockaddr_ll { #define PACKET_FANOUT_ROLLOVER 3 #define PACKET_FANOUT_RND 4 #define PACKET_FANOUT_QM 5 +#define PACKET_FANOUT_CBPF 6 #define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 -- cgit v1.2.3 From f2e520956a1ab636698f8160194c9b8ac0989aab Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 14 Aug 2015 22:31:35 -0400 Subject: packet: add extended BPF fanout mode Add fanout mode PACKET_FANOUT_EBPF that accepts an en extended BPF program to select a socket. Update the internal eBPF program by passing to socket option SOL_PACKET/PACKET_FANOUT_DATA a file descriptor returned by bpf(). Signed-off-by: Willem de Bruijn Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index a4bb16fa822e..9e7edfd8141e 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -64,6 +64,7 @@ struct sockaddr_ll { #define PACKET_FANOUT_RND 4 #define PACKET_FANOUT_QM 5 #define PACKET_FANOUT_CBPF 6 +#define PACKET_FANOUT_EBPF 7 #define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 -- cgit v1.2.3 From deedb59039f111c41aa5a54ee384c8e7c08bc78a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 14 Aug 2015 16:03:39 +0200 Subject: netfilter: nf_conntrack: add direction support for zones This work adds a direction parameter to netfilter zones, so identity separation can be performed only in original/reply or both directions (default). This basically opens up the possibility of doing NAT with conflicting IP address/port tuples from multiple, isolated tenants on a host (e.g. from a netns) without requiring each tenant to NAT twice resp. to use its own dedicated IP address to SNAT to, meaning overlapping tuples can be made unique with the zone identifier in original direction, where the NAT engine will then allocate a unique tuple in the commonly shared default zone for the reply direction. In some restricted, local DNAT cases, also port redirection could be used for making the reply traffic unique w/o requiring SNAT. The consensus we've reached and discussed at NFWS and since the initial implementation [1] was to directly integrate the direction meta data into the existing zones infrastructure, as opposed to the ct->mark approach we proposed initially. As we pass the nf_conntrack_zone object directly around, we don't have to touch all call-sites, but only those, that contain equality checks of zones. Thus, based on the current direction (original or reply), we either return the actual id, or the default NF_CT_DEFAULT_ZONE_ID. CT expectations are direction-agnostic entities when expectations are being compared among themselves, so we can only use the identifier in this case. Note that zone identifiers can not be included into the hash mix anymore as they don't contain a "stable" value that would be equal for both directions at all times, f.e. if only zone->id would unconditionally be xor'ed into the table slot hash, then replies won't find the corresponding conntracking entry anymore. If no particular direction is specified when configuring zones, the behaviour is exactly as we expect currently (both directions). Support has been added for the CT netlink interface as well as the x_tables raw CT target, which both already offer existing interfaces to user space for the configuration of zones. Below a minimal, simplified collision example (script in [2]) with netperf sessions: +--- tenant-1 ---+ mark := 1 | netperf |--+ +----------------+ | CT zone := mark [ORIGINAL] [ip,sport] := X +--------------+ +--- gateway ---+ | mark routing |--| SNAT |-- ... + +--------------+ +---------------+ | +--- tenant-2 ---+ | ~~~|~~~ | netperf |--+ +-----------+ | +----------------+ mark := 2 | netserver |------ ... + [ip,sport] := X +-----------+ [ip,port] := Y On the gateway netns, example: iptables -t raw -A PREROUTING -j CT --zone mark --zone-dir ORIGINAL iptables -t nat -A POSTROUTING -o -j SNAT --to-source --random-fully iptables -t mangle -A PREROUTING -m conntrack --ctdir ORIGINAL -j CONNMARK --save-mark iptables -t mangle -A POSTROUTING -m conntrack --ctdir REPLY -j CONNMARK --restore-mark conntrack dump from gateway netns: netperf -H 10.1.1.2 -t TCP_STREAM -l60 -p12865,5555 from each tenant netns tcp 6 431995 ESTABLISHED src=40.1.1.1 dst=10.1.1.2 sport=5555 dport=12865 zone-orig=1 src=10.1.1.2 dst=10.1.1.1 sport=12865 dport=1024 [ASSURED] mark=1 secctx=system_u:object_r:unlabeled_t:s0 use=1 tcp 6 431994 ESTABLISHED src=40.1.1.1 dst=10.1.1.2 sport=5555 dport=12865 zone-orig=2 src=10.1.1.2 dst=10.1.1.1 sport=12865 dport=5555 [ASSURED] mark=2 secctx=system_u:object_r:unlabeled_t:s0 use=1 tcp 6 299 ESTABLISHED src=40.1.1.1 dst=10.1.1.2 sport=39438 dport=33768 zone-orig=1 src=10.1.1.2 dst=10.1.1.1 sport=33768 dport=39438 [ASSURED] mark=1 secctx=system_u:object_r:unlabeled_t:s0 use=1 tcp 6 300 ESTABLISHED src=40.1.1.1 dst=10.1.1.2 sport=32889 dport=40206 zone-orig=2 src=10.1.1.2 dst=10.1.1.1 sport=40206 dport=32889 [ASSURED] mark=2 secctx=system_u:object_r:unlabeled_t:s0 use=2 Taking this further, test script in [2] creates 200 tenants and runs original-tuple colliding netperf sessions each. A conntrack -L dump in the gateway netns also confirms 200 overlapping entries, all in ESTABLISHED state as expected. I also did run various other tests with some permutations of the script, to mention some: SNAT in random/random-fully/persistent mode, no zones (no overlaps), static zones (original, reply, both directions), etc. [1] http://thread.gmane.org/gmane.comp.security.firewalls.netfilter.devel/57412/ [2] https://paste.fedoraproject.org/242835/65657871/ Signed-off-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + include/uapi/linux/netfilter/xt_CT.h | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index acad6c52a652..c1a4e1441a25 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -61,6 +61,7 @@ enum ctattr_tuple { CTA_TUPLE_UNSPEC, CTA_TUPLE_IP, CTA_TUPLE_PROTO, + CTA_TUPLE_ZONE, __CTA_TUPLE_MAX }; #define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1) diff --git a/include/uapi/linux/netfilter/xt_CT.h b/include/uapi/linux/netfilter/xt_CT.h index 5a688c1ca4d7..452005ff0e9e 100644 --- a/include/uapi/linux/netfilter/xt_CT.h +++ b/include/uapi/linux/netfilter/xt_CT.h @@ -6,7 +6,11 @@ enum { XT_CT_NOTRACK = 1 << 0, XT_CT_NOTRACK_ALIAS = 1 << 1, - XT_CT_MASK = XT_CT_NOTRACK | XT_CT_NOTRACK_ALIAS, + XT_CT_ZONE_DIR_ORIG = 1 << 2, + XT_CT_ZONE_DIR_REPL = 1 << 3, + + XT_CT_MASK = XT_CT_NOTRACK | XT_CT_NOTRACK_ALIAS | + XT_CT_ZONE_DIR_ORIG | XT_CT_ZONE_DIR_REPL, }; struct xt_ct_target_info { -- cgit v1.2.3 From 5e8018fc61423e677398d4ad4d72df70b9788e77 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 14 Aug 2015 16:03:40 +0200 Subject: netfilter: nf_conntrack: add efficient mark to zone mapping This work adds the possibility of deriving the zone id from the skb->mark field in a scalable manner. This allows for having only a single template serving hundreds/thousands of different zones, for example, instead of the need to have one match for each zone as an extra CT jump target. Note that we'd need to have this information attached to the template as at the time when we're trying to lookup a possible ct object, we already need to know zone information for a possible match when going into __nf_conntrack_find_get(). This work provides a minimal implementation for a possible mapping. In order to not add/expose an extra ct->status bit, the zone structure has been extended to carry a flag for deriving the mark. Signed-off-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_CT.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/xt_CT.h b/include/uapi/linux/netfilter/xt_CT.h index 452005ff0e9e..9e520418b858 100644 --- a/include/uapi/linux/netfilter/xt_CT.h +++ b/include/uapi/linux/netfilter/xt_CT.h @@ -8,9 +8,11 @@ enum { XT_CT_NOTRACK_ALIAS = 1 << 1, XT_CT_ZONE_DIR_ORIG = 1 << 2, XT_CT_ZONE_DIR_REPL = 1 << 3, + XT_CT_ZONE_MARK = 1 << 4, XT_CT_MASK = XT_CT_NOTRACK | XT_CT_NOTRACK_ALIAS | - XT_CT_ZONE_DIR_ORIG | XT_CT_ZONE_DIR_REPL, + XT_CT_ZONE_DIR_ORIG | XT_CT_ZONE_DIR_REPL | + XT_CT_ZONE_MARK, }; struct xt_ct_target_info { -- cgit v1.2.3 From 65d7ab8de582bc668e3dabb6ff48f750098a6e78 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 17 Aug 2015 13:42:27 -0700 Subject: net: Identifier Locator Addressing module Adding new module name ila. This implements ILA translation. Light weight tunnel redirection is used to perform the translation in the data path. This is configured by the "ip -6 route" command using the "encap ila " option, where is the value to set in destination locator of the packet. e.g. ip -6 route add 3333:0:0:1:5555:0:1:0/128 \ encap ila 2001:0:0:1 via 2401:db00:20:911a:face:0:25:0 Sets a route where 3333:0:0:1 will be overwritten by 2001:0:0:1 on output. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/ila.h | 15 +++++++++++++++ include/uapi/linux/lwtunnel.h | 1 + 2 files changed, 16 insertions(+) create mode 100644 include/uapi/linux/ila.h (limited to 'include/uapi') diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h new file mode 100644 index 000000000000..7ed9e670814e --- /dev/null +++ b/include/uapi/linux/ila.h @@ -0,0 +1,15 @@ +/* ila.h - ILA Interface */ + +#ifndef _UAPI_LINUX_ILA_H +#define _UAPI_LINUX_ILA_H + +enum { + ILA_ATTR_UNSPEC, + ILA_ATTR_LOCATOR, /* u64 */ + + __ILA_ATTR_MAX, +}; + +#define ILA_ATTR_MAX (__ILA_ATTR_MAX - 1) + +#endif /* _UAPI_LINUX_ILA_H */ diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 3bf223bc2367..aa84ca396bcb 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -7,6 +7,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_NONE, LWTUNNEL_ENCAP_MPLS, LWTUNNEL_ENCAP_IP, + LWTUNNEL_ENCAP_ILA, __LWTUNNEL_ENCAP_MAX, }; -- cgit v1.2.3 From d9232a3da8683cd9c9854a858bcca968fe5f3bca Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 23 Jul 2015 16:43:56 +1000 Subject: cxl: Add alternate MMIO error handling userspace programs using cxl currently have to use two strategies for dealing with MMIO errors simultaneously. They have to check every read for a return of all Fs in case the adapter has gone away and the kernel has not yet noticed, and they have to deal with SIGBUS in case the kernel has already noticed, invalidated the mapping and marked the context as failed. In order to simplify things, this patch adds an alternative approach where the kernel will return a page filled with Fs instead of delivering a SIGBUS. This allows userspace to only need to deal with one of these two error paths, and is intended for use in libraries that use cxl transparently and may not be able to safely install a signal handler. This approach will only work if certain constraints are met. Namely, if the application is both reading and writing to an address in the problem state area it cannot assume that a non-FF read is OK, as it may just be reading out a value it has previously written. Further - since only one page is used per context a write to a given offset would be visible when reading the same offset from a different page in the mapping (this only applies within a single context, not between contexts). An application could deal with this by e.g. making sure it also reads from a read-only offset after any reads to a read/write offset. Due to these constraints, this functionality must be explicitly requested by userspace when starting the context by passing in the CXL_START_WORK_ERR_FF flag. Signed-off-by: Ian Munsie Acked-by: Michael Neuling Signed-off-by: Michael Ellerman --- include/uapi/misc/cxl.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index 99a8ca15fe64..1e889aa8a36e 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -29,8 +29,10 @@ struct cxl_ioctl_start_work { #define CXL_START_WORK_AMR 0x0000000000000001ULL #define CXL_START_WORK_NUM_IRQS 0x0000000000000002ULL +#define CXL_START_WORK_ERR_FF 0x0000000000000004ULL #define CXL_START_WORK_ALL (CXL_START_WORK_AMR |\ - CXL_START_WORK_NUM_IRQS) + CXL_START_WORK_NUM_IRQS |\ + CXL_START_WORK_ERR_FF) /* Possible modes that an afu can be in */ -- cgit v1.2.3 From 81f03fedcce7ee7e83c37237ecaa2f68aad236fd Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Mon, 10 Aug 2015 15:20:41 -0600 Subject: NVMe: Add nvme subsystem reset IOCTL Controllers can perform optional subsystem resets as introduced in NVMe 1.1. This patch adds an IOCTL to trigger the subsystem reset by writing "NVMe" to the NSSR register. Signed-off-by: Jon Derrick Acked-by: Keith Busch Signed-off-by: Jens Axboe --- include/uapi/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index 732b32e92b02..8864194a4151 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -584,5 +584,6 @@ struct nvme_passthru_cmd { #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) #define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) #define NVME_IOCTL_RESET _IO('N', 0x44) +#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) #endif /* _UAPI_LINUX_NVME_H */ -- cgit v1.2.3 From bd49784fd1e8f42c7600fbfa206361324857f373 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 18 Aug 2015 16:26:16 -0400 Subject: dm stats: report precise_timestamps and histogram in @stats_list output If the user selected the precise_timestamps or histogram options, report it in the @stats_list message output. If the user didn't select these options, no extra tokens are reported, thus it is backward compatible with old software that doesn't know about precise timestamps and histogram. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 4.2 --- include/uapi/linux/dm-ioctl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 061aca3a962d..d34611e35a30 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 32 +#define DM_VERSION_MINOR 33 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2015-6-26)" +#define DM_VERSION_EXTRA "-ioctl (2015-8-18)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 32a2b002ce615eadd3bfaddabde290f70a1dd17b Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 20 Aug 2015 13:56:32 +0200 Subject: ipv6: route: per route IP tunnel metadata via lightweight tunnel Allow specification of per route IP tunnel instructions also for IPv6. This complements commit 3093fbe7ff4b ("route: Per route IP tunnel metadata via lightweight tunnel"). Signed-off-by: Jiri Benc CC: YOSHIFUJI Hideaki Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/lwtunnel.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index aa84ca396bcb..34141a5dfe74 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -8,6 +8,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_MPLS, LWTUNNEL_ENCAP_IP, LWTUNNEL_ENCAP_ILA, + LWTUNNEL_ENCAP_IP6, __LWTUNNEL_ENCAP_MAX, }; @@ -28,4 +29,19 @@ enum lwtunnel_ip_t { #define LWTUNNEL_IP_MAX (__LWTUNNEL_IP_MAX - 1) +enum lwtunnel_ip6_t { + LWTUNNEL_IP6_UNSPEC, + LWTUNNEL_IP6_ID, + LWTUNNEL_IP6_DST, + LWTUNNEL_IP6_SRC, + LWTUNNEL_IP6_HOPLIMIT, + LWTUNNEL_IP6_TC, + LWTUNNEL_IP6_SPORT, + LWTUNNEL_IP6_DPORT, + LWTUNNEL_IP6_FLAGS, + __LWTUNNEL_IP6_MAX, +}; + +#define LWTUNNEL_IP6_MAX (__LWTUNNEL_IP6_MAX - 1) + #endif /* _UAPI_LWTUNNEL_H_ */ -- cgit v1.2.3 From e4ff67513096e6e196ca58043fce04d0f87babbe Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 26 Jul 2015 15:03:27 +0300 Subject: ipvs: add sync_maxlen parameter for the sync daemon Allow setups with large MTU to send large sync packets by adding sync_maxlen parameter. The default value is now based on MTU but no more than 1500 for compatibility reasons. To avoid problems if MTU changes allow fragmentation by sending packets with DF=0. Problem reported by Dan Carpenter. Reported-by: Dan Carpenter Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/uapi/linux/ip_vs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index 3199243f2028..68377d8c8870 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -406,6 +406,7 @@ enum { IPVS_DAEMON_ATTR_STATE, /* sync daemon state (master/backup) */ IPVS_DAEMON_ATTR_MCAST_IFN, /* multicast interface name */ IPVS_DAEMON_ATTR_SYNC_ID, /* SyncID we belong to */ + IPVS_DAEMON_ATTR_SYNC_MAXLEN, /* UDP Payload Size */ __IPVS_DAEMON_ATTR_MAX, }; -- cgit v1.2.3 From d33288172e72c4729e8b9f2243fb40601afabc8f Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 26 Jul 2015 15:03:28 +0300 Subject: ipvs: add more mcast parameters for the sync daemon - mcast_group: configure the multicast address, now IPv6 is supported too - mcast_port: configure the multicast port - mcast_ttl: configure the multicast TTL/HOP_LIMIT Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/uapi/linux/ip_vs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index 68377d8c8870..391395c06c7e 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -407,6 +407,10 @@ enum { IPVS_DAEMON_ATTR_MCAST_IFN, /* multicast interface name */ IPVS_DAEMON_ATTR_SYNC_ID, /* SyncID we belong to */ IPVS_DAEMON_ATTR_SYNC_MAXLEN, /* UDP Payload Size */ + IPVS_DAEMON_ATTR_MCAST_GROUP, /* IPv4 Multicast Address */ + IPVS_DAEMON_ATTR_MCAST_GROUP6, /* IPv6 Multicast Address */ + IPVS_DAEMON_ATTR_MCAST_PORT, /* Multicast Port (base) */ + IPVS_DAEMON_ATTR_MCAST_TTL, /* Multicast TTL */ __IPVS_DAEMON_ATTR_MAX, }; -- cgit v1.2.3 From b96f465035f9fae83c1d8de3e80eecfe6877608c Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 25 Aug 2015 12:51:44 -0500 Subject: dlm: fix lvb copy for user locks For a userland lock request, the previous and current lock modes are used to decide when the lvb should be copied back to the user. The wrong previous value was used, so that it always matched the current value. This caused the lvb to be copied back to the user in the wrong cases. Signed-off-by: David Teigland --- include/uapi/linux/dlm_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/dlm_device.h b/include/uapi/linux/dlm_device.h index 3060783c4191..df56c8ff0769 100644 --- a/include/uapi/linux/dlm_device.h +++ b/include/uapi/linux/dlm_device.h @@ -26,7 +26,7 @@ /* Version of the device interface */ #define DLM_DEVICE_VERSION_MAJOR 6 #define DLM_DEVICE_VERSION_MINOR 0 -#define DLM_DEVICE_VERSION_PATCH 1 +#define DLM_DEVICE_VERSION_PATCH 2 /* struct passed to the lock write */ struct dlm_lock_params { -- cgit v1.2.3 From 1afe839e6b31a85fc53adbf8757d6373908d414d Mon Sep 17 00:00:00 2001 From: Andreas Herz Date: Fri, 21 Aug 2015 11:31:32 +0200 Subject: netfilter: ip6t_REJECT: added missing icmpv6 codes RFC 4443 added two new codes values for ICMPv6 type 1: 5 - Source address failed ingress/egress policy 6 - Reject route to destination And RFC 7084 states in L-14 that IPv6 Router MUST send ICMPv6 Destination Unreachable with code 5 for packets forwarded to it that use an address from a prefix that has been invalidated. Codes 5 and 6 are more informative subsets of code 1. Signed-off-by: Andreas Herz Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_ipv6/ip6t_REJECT.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_REJECT.h b/include/uapi/linux/netfilter_ipv6/ip6t_REJECT.h index 205ed62e4605..cd2e940c8bf5 100644 --- a/include/uapi/linux/netfilter_ipv6/ip6t_REJECT.h +++ b/include/uapi/linux/netfilter_ipv6/ip6t_REJECT.h @@ -10,7 +10,9 @@ enum ip6t_reject_with { IP6T_ICMP6_ADDR_UNREACH, IP6T_ICMP6_PORT_UNREACH, IP6T_ICMP6_ECHOREPLY, - IP6T_TCP_RESET + IP6T_TCP_RESET, + IP6T_ICMP6_POLICY_FAIL, + IP6T_ICMP6_REJECT_ROUTE }; struct ip6t_reject_info { -- cgit v1.2.3 From 65be2c79acc3aa0f9c0e8d4871f5a451d854465a Mon Sep 17 00:00:00 2001 From: "Matthew R. Ochs" Date: Thu, 13 Aug 2015 21:47:43 -0500 Subject: cxlflash: Superpipe support Add superpipe supporting infrastructure to device driver for the IBM CXL Flash adapter. This patch allows userspace applications to take advantage of the accelerated I/O features that this adapter provides and bypass the traditional filesystem stack. Signed-off-by: Matthew R. Ochs Signed-off-by: Manoj N. Kumar Reviewed-by: Michael Neuling Reviewed-by: Wen Xiong Reviewed-by: Brian King Signed-off-by: James Bottomley --- include/uapi/scsi/Kbuild | 1 + include/uapi/scsi/cxlflash_ioctl.h | 140 +++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 include/uapi/scsi/cxlflash_ioctl.h (limited to 'include/uapi') diff --git a/include/uapi/scsi/Kbuild b/include/uapi/scsi/Kbuild index 75746d52f208..d791e0ad509d 100644 --- a/include/uapi/scsi/Kbuild +++ b/include/uapi/scsi/Kbuild @@ -3,3 +3,4 @@ header-y += fc/ header-y += scsi_bsg_fc.h header-y += scsi_netlink.h header-y += scsi_netlink_fc.h +header-y += cxlflash_ioctl.h diff --git a/include/uapi/scsi/cxlflash_ioctl.h b/include/uapi/scsi/cxlflash_ioctl.h new file mode 100644 index 000000000000..570773406531 --- /dev/null +++ b/include/uapi/scsi/cxlflash_ioctl.h @@ -0,0 +1,140 @@ +/* + * CXL Flash Device Driver + * + * Written by: Manoj N. Kumar , IBM Corporation + * Matthew R. Ochs , IBM Corporation + * + * Copyright (C) 2015 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _CXLFLASH_IOCTL_H +#define _CXLFLASH_IOCTL_H + +#include + +/* + * Structure and flag definitions CXL Flash superpipe ioctls + */ + +#define DK_CXLFLASH_VERSION_0 0 + +struct dk_cxlflash_hdr { + __u16 version; /* Version data */ + __u16 rsvd[3]; /* Reserved for future use */ + __u64 flags; /* Input flags */ + __u64 return_flags; /* Returned flags */ +}; + +/* + * Notes: + * ----- + * The 'context_id' field of all ioctl structures contains the context + * identifier for a context in the lower 32-bits (upper 32-bits are not + * to be used when identifying a context to the AFU). That said, the value + * in its entirety (all 64-bits) is to be treated as an opaque cookie and + * should be presented as such when issuing ioctls. + * + * For DK_CXLFLASH_ATTACH ioctl, user specifies read/write access + * permissions via the O_RDONLY, O_WRONLY, and O_RDWR flags defined in + * the fcntl.h header file. + */ +#define DK_CXLFLASH_ATTACH_REUSE_CONTEXT 0x8000000000000000ULL + +struct dk_cxlflash_attach { + struct dk_cxlflash_hdr hdr; /* Common fields */ + __u64 num_interrupts; /* Requested number of interrupts */ + __u64 context_id; /* Returned context */ + __u64 mmio_size; /* Returned size of MMIO area */ + __u64 block_size; /* Returned block size, in bytes */ + __u64 adap_fd; /* Returned adapter file descriptor */ + __u64 last_lba; /* Returned last LBA on the device */ + __u64 max_xfer; /* Returned max transfer size, blocks */ + __u64 reserved[8]; /* Reserved for future use */ +}; + +struct dk_cxlflash_detach { + struct dk_cxlflash_hdr hdr; /* Common fields */ + __u64 context_id; /* Context to detach */ + __u64 reserved[8]; /* Reserved for future use */ +}; + +struct dk_cxlflash_udirect { + struct dk_cxlflash_hdr hdr; /* Common fields */ + __u64 context_id; /* Context to own physical resources */ + __u64 rsrc_handle; /* Returned resource handle */ + __u64 last_lba; /* Returned last LBA on the device */ + __u64 reserved[8]; /* Reserved for future use */ +}; + +struct dk_cxlflash_release { + struct dk_cxlflash_hdr hdr; /* Common fields */ + __u64 context_id; /* Context owning resources */ + __u64 rsrc_handle; /* Resource handle to release */ + __u64 reserved[8]; /* Reserved for future use */ +}; + +#define DK_CXLFLASH_VERIFY_SENSE_LEN 18 +#define DK_CXLFLASH_VERIFY_HINT_SENSE 0x8000000000000000ULL + +struct dk_cxlflash_verify { + struct dk_cxlflash_hdr hdr; /* Common fields */ + __u64 context_id; /* Context owning resources to verify */ + __u64 rsrc_handle; /* Resource handle of LUN */ + __u64 hint; /* Reasons for verify */ + __u64 last_lba; /* Returned last LBA of device */ + __u8 sense_data[DK_CXLFLASH_VERIFY_SENSE_LEN]; /* SCSI sense data */ + __u8 pad[6]; /* Pad to next 8-byte boundary */ + __u64 reserved[8]; /* Reserved for future use */ +}; + +#define DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET 0x8000000000000000ULL + +struct dk_cxlflash_recover_afu { + struct dk_cxlflash_hdr hdr; /* Common fields */ + __u64 reason; /* Reason for recovery request */ + __u64 context_id; /* Context to recover / updated ID */ + __u64 mmio_size; /* Returned size of MMIO area */ + __u64 adap_fd; /* Returned adapter file descriptor */ + __u64 reserved[8]; /* Reserved for future use */ +}; + +#define DK_CXLFLASH_MANAGE_LUN_WWID_LEN 16 +#define DK_CXLFLASH_MANAGE_LUN_ENABLE_SUPERPIPE 0x8000000000000000ULL +#define DK_CXLFLASH_MANAGE_LUN_DISABLE_SUPERPIPE 0x4000000000000000ULL +#define DK_CXLFLASH_MANAGE_LUN_ALL_PORTS_ACCESSIBLE 0x2000000000000000ULL + +struct dk_cxlflash_manage_lun { + struct dk_cxlflash_hdr hdr; /* Common fields */ + __u8 wwid[DK_CXLFLASH_MANAGE_LUN_WWID_LEN]; /* Page83 WWID, NAA-6 */ + __u64 reserved[8]; /* Rsvd, future use */ +}; + +union cxlflash_ioctls { + struct dk_cxlflash_attach attach; + struct dk_cxlflash_detach detach; + struct dk_cxlflash_udirect udirect; + struct dk_cxlflash_release release; + struct dk_cxlflash_verify verify; + struct dk_cxlflash_recover_afu recover_afu; + struct dk_cxlflash_manage_lun manage_lun; +}; + +#define MAX_CXLFLASH_IOCTL_SZ (sizeof(union cxlflash_ioctls)) + +#define CXL_MAGIC 0xCA +#define CXL_IOWR(_n, _s) _IOWR(CXL_MAGIC, _n, struct _s) + +#define DK_CXLFLASH_ATTACH CXL_IOWR(0x80, dk_cxlflash_attach) +#define DK_CXLFLASH_USER_DIRECT CXL_IOWR(0x81, dk_cxlflash_udirect) +#define DK_CXLFLASH_RELEASE CXL_IOWR(0x82, dk_cxlflash_release) +#define DK_CXLFLASH_DETACH CXL_IOWR(0x83, dk_cxlflash_detach) +#define DK_CXLFLASH_VERIFY CXL_IOWR(0x84, dk_cxlflash_verify) +#define DK_CXLFLASH_RECOVER_AFU CXL_IOWR(0x85, dk_cxlflash_recover_afu) +#define DK_CXLFLASH_MANAGE_LUN CXL_IOWR(0x86, dk_cxlflash_manage_lun) + +#endif /* ifndef _CXLFLASH_IOCTL_H */ -- cgit v1.2.3 From 2cb79266d6b229dbebd31fe114af1bdab25c8076 Mon Sep 17 00:00:00 2001 From: "Matthew R. Ochs" Date: Thu, 13 Aug 2015 21:47:53 -0500 Subject: cxlflash: Virtual LUN support Add support for physical LUN segmentation (virtual LUNs) to device driver supporting the IBM CXL Flash adapter. This patch allows user space applications to virtually segment a physical LUN into N virtual LUNs, taking advantage of the translation features provided by this adapter. Signed-off-by: Matthew R. Ochs Signed-off-by: Manoj N. Kumar Reviewed-by: Michael Neuling Reviewed-by: Wen Xiong Signed-off-by: James Bottomley --- include/uapi/scsi/cxlflash_ioctl.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/scsi/cxlflash_ioctl.h b/include/uapi/scsi/cxlflash_ioctl.h index 570773406531..831351b2e660 100644 --- a/include/uapi/scsi/cxlflash_ioctl.h +++ b/include/uapi/scsi/cxlflash_ioctl.h @@ -71,6 +71,17 @@ struct dk_cxlflash_udirect { __u64 reserved[8]; /* Reserved for future use */ }; +#define DK_CXLFLASH_UVIRTUAL_NEED_WRITE_SAME 0x8000000000000000ULL + +struct dk_cxlflash_uvirtual { + struct dk_cxlflash_hdr hdr; /* Common fields */ + __u64 context_id; /* Context to own virtual resources */ + __u64 lun_size; /* Requested size, in 4K blocks */ + __u64 rsrc_handle; /* Returned resource handle */ + __u64 last_lba; /* Returned last LBA of LUN */ + __u64 reserved[8]; /* Reserved for future use */ +}; + struct dk_cxlflash_release { struct dk_cxlflash_hdr hdr; /* Common fields */ __u64 context_id; /* Context owning resources */ @@ -78,6 +89,23 @@ struct dk_cxlflash_release { __u64 reserved[8]; /* Reserved for future use */ }; +struct dk_cxlflash_resize { + struct dk_cxlflash_hdr hdr; /* Common fields */ + __u64 context_id; /* Context owning resources */ + __u64 rsrc_handle; /* Resource handle of LUN to resize */ + __u64 req_size; /* New requested size, in 4K blocks */ + __u64 last_lba; /* Returned last LBA of LUN */ + __u64 reserved[8]; /* Reserved for future use */ +}; + +struct dk_cxlflash_clone { + struct dk_cxlflash_hdr hdr; /* Common fields */ + __u64 context_id_src; /* Context to clone from */ + __u64 context_id_dst; /* Context to clone to */ + __u64 adap_fd_src; /* Source context adapter fd */ + __u64 reserved[8]; /* Reserved for future use */ +}; + #define DK_CXLFLASH_VERIFY_SENSE_LEN 18 #define DK_CXLFLASH_VERIFY_HINT_SENSE 0x8000000000000000ULL @@ -118,7 +146,10 @@ union cxlflash_ioctls { struct dk_cxlflash_attach attach; struct dk_cxlflash_detach detach; struct dk_cxlflash_udirect udirect; + struct dk_cxlflash_uvirtual uvirtual; struct dk_cxlflash_release release; + struct dk_cxlflash_resize resize; + struct dk_cxlflash_clone clone; struct dk_cxlflash_verify verify; struct dk_cxlflash_recover_afu recover_afu; struct dk_cxlflash_manage_lun manage_lun; @@ -136,5 +167,8 @@ union cxlflash_ioctls { #define DK_CXLFLASH_VERIFY CXL_IOWR(0x84, dk_cxlflash_verify) #define DK_CXLFLASH_RECOVER_AFU CXL_IOWR(0x85, dk_cxlflash_recover_afu) #define DK_CXLFLASH_MANAGE_LUN CXL_IOWR(0x86, dk_cxlflash_manage_lun) +#define DK_CXLFLASH_USER_VIRTUAL CXL_IOWR(0x87, dk_cxlflash_uvirtual) +#define DK_CXLFLASH_VLUN_RESIZE CXL_IOWR(0x88, dk_cxlflash_resize) +#define DK_CXLFLASH_VLUN_CLONE CXL_IOWR(0x89, dk_cxlflash_clone) #endif /* ifndef _CXLFLASH_IOCTL_H */ -- cgit v1.2.3 From 7f8a436eaa2c3ddd8e1ff2fbca267e6275085536 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 26 Aug 2015 11:31:48 -0700 Subject: openvswitch: Add conntrack action Expose the kernel connection tracker via OVS. Userspace components can make use of the CT action to populate the connection state (ct_state) field for a flow. This state can be subsequently matched. Exposed connection states are OVS_CS_F_*: - NEW (0x01) - Beginning of a new connection. - ESTABLISHED (0x02) - Part of an existing connection. - RELATED (0x04) - Related to an established connection. - INVALID (0x20) - Could not track the connection for this packet. - REPLY_DIR (0x40) - This packet is in the reply direction for the flow. - TRACKED (0x80) - This packet has been sent through conntrack. When the CT action is executed by itself, it will send the packet through the connection tracker and populate the ct_state field with one or more of the connection state flags above. The CT action will always set the TRACKED bit. When the COMMIT flag is passed to the conntrack action, this specifies that information about the connection should be stored. This allows subsequent packets for the same (or related) connections to be correlated with this connection. Sending subsequent packets for the connection through conntrack allows the connection tracker to consider the packets as ESTABLISHED, RELATED, and/or REPLY_DIR. The CT action may optionally take a zone to track the flow within. This allows connections with the same 5-tuple to be kept logically separate from connections in other zones. If the zone is specified, then the "ct_zone" match field will be subsequently populated with the zone id. IP fragments are handled by transparently assembling them as part of the CT action. The maximum received unit (MRU) size is tracked so that refragmentation can occur during output. IP frag handling contributed by Andy Zhou. Based on original design by Justin Pettit. Signed-off-by: Joe Stringer Signed-off-by: Justin Pettit Signed-off-by: Andy Zhou Acked-by: Thomas Graf Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index d6b885460187..55f599792673 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -164,6 +164,9 @@ enum ovs_packet_cmd { * %OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute, which is sent only if the * output port is actually a tunnel port. Contains the output tunnel key * extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes. + * @OVS_PACKET_ATTR_MRU: Present for an %OVS_PACKET_CMD_ACTION and + * %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment + * size. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_PACKET_* commands. @@ -180,6 +183,7 @@ enum ovs_packet_attr { OVS_PACKET_ATTR_UNUSED2, OVS_PACKET_ATTR_PROBE, /* Packet operation is a feature probe, error logging should be suppressed. */ + OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */ __OVS_PACKET_ATTR_MAX }; @@ -319,6 +323,8 @@ enum ovs_key_attr { OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls. * The implementation may restrict * the accepted length of the array. */ + OVS_KEY_ATTR_CT_STATE, /* u8 bitmask of OVS_CS_F_* */ + OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -431,6 +437,15 @@ struct ovs_key_nd { __u8 nd_tll[ETH_ALEN]; }; +/* OVS_KEY_ATTR_CT_STATE flags */ +#define OVS_CS_F_NEW 0x01 /* Beginning of a new connection. */ +#define OVS_CS_F_ESTABLISHED 0x02 /* Part of an existing connection. */ +#define OVS_CS_F_RELATED 0x04 /* Related to an established + * connection. */ +#define OVS_CS_F_INVALID 0x20 /* Could not track connection. */ +#define OVS_CS_F_REPLY_DIR 0x40 /* Flow is in the reply direction. */ +#define OVS_CS_F_TRACKED 0x80 /* Conntrack has occurred. */ + /** * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow @@ -594,6 +609,28 @@ struct ovs_action_hash { uint32_t hash_basis; }; +/** + * enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action. + * @OVS_CT_ATTR_FLAGS: u32 connection tracking flags. + * @OVS_CT_ATTR_ZONE: u16 connection tracking zone. + */ +enum ovs_ct_attr { + OVS_CT_ATTR_UNSPEC, + OVS_CT_ATTR_FLAGS, /* u8 bitmask of OVS_CT_F_*. */ + OVS_CT_ATTR_ZONE, /* u16 zone id. */ + __OVS_CT_ATTR_MAX +}; + +#define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1) + +/* + * OVS_CT_ATTR_FLAGS flags - bitmask of %OVS_CT_F_* + * @OVS_CT_F_COMMIT: Commits the flow to the conntrack table. This allows + * future packets for the same connection to be identified as 'established' + * or 'related'. + */ +#define OVS_CT_F_COMMIT 0x01 + /** * enum ovs_action_attr - Action types. * @@ -623,6 +660,8 @@ struct ovs_action_hash { * indicate the new packet contents. This could potentially still be * %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there * is no MPLS label stack, as determined by ethertype, no action is taken. + * @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related + * entries in the flow key. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -648,6 +687,7 @@ enum ovs_action_attr { * data immediately followed by a mask. * The data must be zero for the unmasked * bits. */ + OVS_ACTION_ATTR_CT, /* One nested OVS_CT_ATTR_* . */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ -- cgit v1.2.3 From 182e3042e15de759e81618d11fe4f62f5259d982 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 26 Aug 2015 11:31:49 -0700 Subject: openvswitch: Allow matching on conntrack mark Allow matching and setting the ct_mark field. As with ct_state and ct_zone, these fields are populated when the CT action is executed. To write to this field, a value and mask can be specified as a nested attribute under the CT action. This data is stored with the conntrack entry, and is executed after the lookup occurs for the CT action. The conntrack entry itself must be committed using the COMMIT flag in the CT action flags for this change to persist. Signed-off-by: Justin Pettit Signed-off-by: Joe Stringer Acked-by: Thomas Graf Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 55f599792673..7a185b554343 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -325,6 +325,7 @@ enum ovs_key_attr { * the accepted length of the array. */ OVS_KEY_ATTR_CT_STATE, /* u8 bitmask of OVS_CS_F_* */ OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ + OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -613,11 +614,15 @@ struct ovs_action_hash { * enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action. * @OVS_CT_ATTR_FLAGS: u32 connection tracking flags. * @OVS_CT_ATTR_ZONE: u16 connection tracking zone. + * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the + * mask, the corresponding bit in the value is copied to the connection + * tracking mark field in the connection. */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, OVS_CT_ATTR_FLAGS, /* u8 bitmask of OVS_CT_F_*. */ OVS_CT_ATTR_ZONE, /* u16 zone id. */ + OVS_CT_ATTR_MARK, /* mark to associate with this connection. */ __OVS_CT_ATTR_MAX }; -- cgit v1.2.3 From c2ac667358708d7cce64c78f58af6adf4c1e848b Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 26 Aug 2015 11:31:52 -0700 Subject: openvswitch: Allow matching on conntrack label Allow matching and setting the ct_label field. As with ct_mark, this is populated by executing the CT action. The label field may be modified by specifying a label and mask nested under the CT action. It is stored as metadata attached to the connection. Label modification occurs after lookup, and will only persist when the conntrack entry is committed by providing the COMMIT flag to the CT action. Labels are currently fixed to 128 bits in size. Signed-off-by: Joe Stringer Acked-by: Thomas Graf Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 7a185b554343..9d52058a9330 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -326,6 +326,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_STATE, /* u8 bitmask of OVS_CS_F_* */ OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */ + OVS_KEY_ATTR_CT_LABEL, /* 16-octet connection tracking label */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -438,6 +439,11 @@ struct ovs_key_nd { __u8 nd_tll[ETH_ALEN]; }; +#define OVS_CT_LABEL_LEN 16 +struct ovs_key_ct_label { + __u8 ct_label[OVS_CT_LABEL_LEN]; +}; + /* OVS_KEY_ATTR_CT_STATE flags */ #define OVS_CS_F_NEW 0x01 /* Beginning of a new connection. */ #define OVS_CS_F_ESTABLISHED 0x02 /* Part of an existing connection. */ @@ -617,12 +623,16 @@ struct ovs_action_hash { * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the * mask, the corresponding bit in the value is copied to the connection * tracking mark field in the connection. + * @OVS_CT_ATTR_LABEL: %OVS_CT_LABEL_LEN value followed by %OVS_CT_LABEL_LEN + * mask. For each bit set in the mask, the corresponding bit in the value is + * copied to the connection tracking label field in the connection. */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, OVS_CT_ATTR_FLAGS, /* u8 bitmask of OVS_CT_F_*. */ OVS_CT_ATTR_ZONE, /* u16 zone id. */ OVS_CT_ATTR_MARK, /* mark to associate with this connection. */ + OVS_CT_ATTR_LABEL, /* label to associate with this connection. */ __OVS_CT_ATTR_MAX }; -- cgit v1.2.3 From cae3a2627520c3795b54533c5328b77af3405dbe Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 26 Aug 2015 11:31:53 -0700 Subject: openvswitch: Allow attaching helpers to ct action Add support for using conntrack helpers to assist protocol detection. The new OVS_CT_ATTR_HELPER attribute of the CT action specifies a helper to be used for this connection. If no helper is specified, then helpers will be automatically applied as per the sysctl configuration of net.netfilter.nf_conntrack_helper. The helper may be specified as part of the conntrack action, eg: ct(helper=ftp). Initial packets for related connections should be committed to allow later packets for the flow to be considered established. Example ovs-ofctl flows allowing FTP connections from ports 1->2: in_port=1,tcp,action=ct(helper=ftp,commit),2 in_port=2,tcp,ct_state=-trk,action=ct(recirc) in_port=2,tcp,ct_state=+trk-new+est,action=1 in_port=2,tcp,ct_state=+trk+rel,action=1 Signed-off-by: Joe Stringer Acked-by: Thomas Graf Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 9d52058a9330..32e07d8cbaf4 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -626,6 +626,7 @@ struct ovs_action_hash { * @OVS_CT_ATTR_LABEL: %OVS_CT_LABEL_LEN value followed by %OVS_CT_LABEL_LEN * mask. For each bit set in the mask, the corresponding bit in the value is * copied to the connection tracking label field in the connection. + * @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG. */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, @@ -633,6 +634,8 @@ enum ovs_ct_attr { OVS_CT_ATTR_ZONE, /* u16 zone id. */ OVS_CT_ATTR_MARK, /* mark to associate with this connection. */ OVS_CT_ATTR_LABEL, /* label to associate with this connection. */ + OVS_CT_ATTR_HELPER, /* netlink helper to assist detection of + related connections. */ __OVS_CT_ATTR_MAX }; -- cgit v1.2.3 From d2d427b3927bd7a0348fc7f323d0e291f79a2779 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 27 Aug 2015 15:32:26 +0900 Subject: bridge: Add netlink support for vlan_protocol attribute This enables bridge vlan_protocol to be configured through netlink. When CONFIG_BRIDGE_VLAN_FILTERING is disabled, kernel behaves the same way as this feature is not implemented. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 313c305fd1ad..2d13dd44ecaa 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -231,6 +231,7 @@ enum { IFLA_BR_STP_STATE, IFLA_BR_PRIORITY, IFLA_BR_VLAN_FILTERING, + IFLA_BR_VLAN_PROTOCOL, __IFLA_BR_MAX, }; -- cgit v1.2.3 From cd7918b35f0ee0106bbe2ce4a14b5a8c9763deb8 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 26 Aug 2015 23:46:51 -0700 Subject: geneve: Make dst-port configurable. Add netlink interface to configure Geneve UDP port number. So that user can configure it for a Gevene device. Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross Acked-by: Thomas Graf Acked-by: John W. Linville Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2d13dd44ecaa..9d73c31896d0 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -410,6 +410,7 @@ enum { IFLA_GENEVE_REMOTE, IFLA_GENEVE_TTL, IFLA_GENEVE_TOS, + IFLA_GENEVE_PORT, /* destination port */ __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) -- cgit v1.2.3 From e305ac6cf5a1e1386aedce7ef9cb773635d5845c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 26 Aug 2015 23:46:52 -0700 Subject: geneve: Add support to collect tunnel metadata. Following patch create new tunnel flag which enable tunnel metadata collection on given device. These devices can be used by tunnel metadata based routing or by OVS. Geneve Consolidation patch get rid of collect_md_tun to simplify tunnel lookup further. Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 9d73c31896d0..3a5f263cfc2f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -411,6 +411,7 @@ enum { IFLA_GENEVE_TTL, IFLA_GENEVE_TOS, IFLA_GENEVE_PORT, /* destination port */ + IFLA_GENEVE_COLLECT_METADATA, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) -- cgit v1.2.3 From 5153aacfb8e2744af68e7b84ccd3f02aeefe4f48 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 26 Aug 2015 21:12:15 +0800 Subject: NFS: Update NFS4_BITMAP_SIZE v4.1/v4.2 have define attributes at word2, nfs client also support security label now. v3, same as v2. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- include/uapi/linux/nfs4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 2119c7c274d7..2b871e0858d9 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -15,7 +15,7 @@ #include -#define NFS4_BITMAP_SIZE 2 +#define NFS4_BITMAP_SIZE 3 #define NFS4_VERIFIER_SIZE 8 #define NFS4_STATEID_SEQID_SIZE 4 #define NFS4_STATEID_OTHER_SIZE 12 -- cgit v1.2.3 From 0a6a3a23ea6efde079a5b77688541a98bf202721 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Fri, 28 Aug 2015 07:07:48 +0200 Subject: netlink: add NETLINK_CAP_ACK socket option Since commit c05cdb1b864f ("netlink: allow large data transfers from user-space"), the kernel may fail to allocate the necessary room for the acknowledgment message back to userspace. This patch introduces a new socket option that trims off the payload of the original netlink message. The netlink message header is still included, so the user can guess from the sequence number what is the message that has triggered the acknowledgment. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Christophe Ricard Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index cf6a65cccbdf..6f3fe16cd22a 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -110,6 +110,7 @@ struct nlmsgerr { #define NETLINK_TX_RING 7 #define NETLINK_LISTEN_ALL_NSID 8 #define NETLINK_LIST_MEMBERSHIPS 9 +#define NETLINK_CAP_ACK 10 struct nl_pktinfo { __u32 group; -- cgit v1.2.3 From b8d3e4163a3562d7cba486687904383e78e7dd6a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 31 Aug 2015 15:58:46 +0200 Subject: fib, fib6: reject invalid feature bits Feature bits that are invalid should not be accepted by the kernel, only the lower 4 bits may be configured, but not the remaining ones. Even from these 4, 2 of them are unused. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 0d3d3cc43356..702024769c74 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -418,10 +418,13 @@ enum { #define RTAX_MAX (__RTAX_MAX - 1) -#define RTAX_FEATURE_ECN 0x00000001 -#define RTAX_FEATURE_SACK 0x00000002 -#define RTAX_FEATURE_TIMESTAMP 0x00000004 -#define RTAX_FEATURE_ALLFRAG 0x00000008 +#define RTAX_FEATURE_ECN (1 << 0) +#define RTAX_FEATURE_SACK (1 << 1) +#define RTAX_FEATURE_TIMESTAMP (1 << 2) +#define RTAX_FEATURE_ALLFRAG (1 << 3) + +#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | RTAX_FEATURE_SACK | \ + RTAX_FEATURE_TIMESTAMP | RTAX_FEATURE_ALLFRAG) struct rta_session { __u8 proto; -- cgit v1.2.3 From 16f7249ddf831f5ec0e1358222ce5db300446b84 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Wed, 2 Sep 2015 13:41:18 +0200 Subject: uapi/drm/i915_drm.h: fix userspace compilation. commit 346add7834557b5b9628b9bf2387106d42e631d4 Author: Daniel Vetter Date: Tue Jul 14 18:07:30 2015 +0200 drm/i915: Use expcitly fixed type in compat32 structs changed the type of param field in drm_i915_getparam from int to s32. This header is exported to userspace and needs to use userspace type __s32 instead. This fixes userspace compilation errors like the following: include/drm/i915_drm.h:361:2: error: unknown type name 's32' s32 param; Signed-off-by: Artem Savkov Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula --- include/uapi/drm/i915_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index dbd16a2d37db..fd5aa47bd689 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -358,7 +358,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_RESOURCE_STREAMER 36 typedef struct drm_i915_getparam { - s32 param; + __s32 param; /* * WARNING: Using pointers instead of fixed-size u64 means we need to write * compat32 code. Don't repeat this mistake. -- cgit v1.2.3 From 58319057b7847667f0c9585b9de0e8932b0fdb08 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 4 Sep 2015 15:42:45 -0700 Subject: capabilities: ambient capabilities Credit where credit is due: this idea comes from Christoph Lameter with a lot of valuable input from Serge Hallyn. This patch is heavily based on Christoph's patch. ===== The status quo ===== On Linux, there are a number of capabilities defined by the kernel. To perform various privileged tasks, processes can wield capabilities that they hold. Each task has four capability masks: effective (pE), permitted (pP), inheritable (pI), and a bounding set (X). When the kernel checks for a capability, it checks pE. The other capability masks serve to modify what capabilities can be in pE. Any task can remove capabilities from pE, pP, or pI at any time. If a task has a capability in pP, it can add that capability to pE and/or pI. If a task has CAP_SETPCAP, then it can add any capability to pI, and it can remove capabilities from X. Tasks are not the only things that can have capabilities; files can also have capabilities. A file can have no capabilty information at all [1]. If a file has capability information, then it has a permitted mask (fP) and an inheritable mask (fI) as well as a single effective bit (fE) [2]. File capabilities modify the capabilities of tasks that execve(2) them. A task that successfully calls execve has its capabilities modified for the file ultimately being excecuted (i.e. the binary itself if that binary is ELF or for the interpreter if the binary is a script.) [3] In the capability evolution rules, for each mask Z, pZ represents the old value and pZ' represents the new value. The rules are: pP' = (X & fP) | (pI & fI) pI' = pI pE' = (fE ? pP' : 0) X is unchanged For setuid binaries, fP, fI, and fE are modified by a moderately complicated set of rules that emulate POSIX behavior. Similarly, if euid == 0 or ruid == 0, then fP, fI, and fE are modified differently (primary, fP and fI usually end up being the full set). For nonroot users executing binaries with neither setuid nor file caps, fI and fP are empty and fE is false. As an extra complication, if you execute a process as nonroot and fE is set, then the "secure exec" rules are in effect: AT_SECURE gets set, LD_PRELOAD doesn't work, etc. This is rather messy. We've learned that making any changes is dangerous, though: if a new kernel version allows an unprivileged program to change its security state in a way that persists cross execution of a setuid program or a program with file caps, this persistent state is surprisingly likely to allow setuid or file-capped programs to be exploited for privilege escalation. ===== The problem ===== Capability inheritance is basically useless. If you aren't root and you execute an ordinary binary, fI is zero, so your capabilities have no effect whatsoever on pP'. This means that you can't usefully execute a helper process or a shell command with elevated capabilities if you aren't root. On current kernels, you can sort of work around this by setting fI to the full set for most or all non-setuid executable files. This causes pP' = pI for nonroot, and inheritance works. No one does this because it's a PITA and it isn't even supported on most filesystems. If you try this, you'll discover that every nonroot program ends up with secure exec rules, breaking many things. This is a problem that has bitten many people who have tried to use capabilities for anything useful. ===== The proposed change ===== This patch adds a fifth capability mask called the ambient mask (pA). pA does what most people expect pI to do. pA obeys the invariant that no bit can ever be set in pA if it is not set in both pP and pI. Dropping a bit from pP or pI drops that bit from pA. This ensures that existing programs that try to drop capabilities still do so, with a complication. Because capability inheritance is so broken, setting KEEPCAPS, using setresuid to switch to nonroot uids, and then calling execve effectively drops capabilities. Therefore, setresuid from root to nonroot conditionally clears pA unless SECBIT_NO_SETUID_FIXUP is set. Processes that don't like this can re-add bits to pA afterwards. The capability evolution rules are changed: pA' = (file caps or setuid or setgid ? 0 : pA) pP' = (X & fP) | (pI & fI) | pA' pI' = pI pE' = (fE ? pP' : pA') X is unchanged If you are nonroot but you have a capability, you can add it to pA. If you do so, your children get that capability in pA, pP, and pE. For example, you can set pA = CAP_NET_BIND_SERVICE, and your children can automatically bind low-numbered ports. Hallelujah! Unprivileged users can create user namespaces, map themselves to a nonzero uid, and create both privileged (relative to their namespace) and unprivileged process trees. This is currently more or less impossible. Hallelujah! You cannot use pA to try to subvert a setuid, setgid, or file-capped program: if you execute any such program, pA gets cleared and the resulting evolution rules are unchanged by this patch. Users with nonzero pA are unlikely to unintentionally leak that capability. If they run programs that try to drop privileges, dropping privileges will still work. It's worth noting that the degree of paranoia in this patch could possibly be reduced without causing serious problems. Specifically, if we allowed pA to persist across executing non-pA-aware setuid binaries and across setresuid, then, naively, the only capabilities that could leak as a result would be the capabilities in pA, and any attacker *already* has those capabilities. This would make me nervous, though -- setuid binaries that tried to privilege-separate might fail to do so, and putting CAP_DAC_READ_SEARCH or CAP_DAC_OVERRIDE into pA could have unexpected side effects. (Whether these unexpected side effects would be exploitable is an open question.) I've therefore taken the more paranoid route. We can revisit this later. An alternative would be to require PR_SET_NO_NEW_PRIVS before setting ambient capabilities. I think that this would be annoying and would make granting otherwise unprivileged users minor ambient capabilities (CAP_NET_BIND_SERVICE or CAP_NET_RAW for example) much less useful than it is with this patch. ===== Footnotes ===== [1] Files that are missing the "security.capability" xattr or that have unrecognized values for that xattr end up with has_cap set to false. The code that does that appears to be complicated for no good reason. [2] The libcap capability mask parsers and formatters are dangerously misleading and the documentation is flat-out wrong. fE is *not* a mask; it's a single bit. This has probably confused every single person who has tried to use file capabilities. [3] Linux very confusingly processes both the script and the interpreter if applicable, for reasons that elude me. The results from thinking about a script's file capabilities and/or setuid bits are mostly discarded. Preliminary userspace code is here, but it needs updating: https://git.kernel.org/cgit/linux/kernel/git/luto/util-linux-playground.git/commit/?h=cap_ambient&id=7f5afbd175d2 Here is a test program that can be used to verify the functionality (from Christoph): /* * Test program for the ambient capabilities. This program spawns a shell * that allows running processes with a defined set of capabilities. * * (C) 2015 Christoph Lameter * Released under: GPL v3 or later. * * * Compile using: * * gcc -o ambient_test ambient_test.o -lcap-ng * * This program must have the following capabilities to run properly: * Permissions for CAP_NET_RAW, CAP_NET_ADMIN, CAP_SYS_NICE * * A command to equip the binary with the right caps is: * * setcap cap_net_raw,cap_net_admin,cap_sys_nice+p ambient_test * * * To get a shell with additional caps that can be inherited by other processes: * * ./ambient_test /bin/bash * * * Verifying that it works: * * From the bash spawed by ambient_test run * * cat /proc/$$/status * * and have a look at the capabilities. */ #include #include #include #include #include #include /* * Definitions from the kernel header files. These are going to be removed * when the /usr/include files have these defined. */ #define PR_CAP_AMBIENT 47 #define PR_CAP_AMBIENT_IS_SET 1 #define PR_CAP_AMBIENT_RAISE 2 #define PR_CAP_AMBIENT_LOWER 3 #define PR_CAP_AMBIENT_CLEAR_ALL 4 static void set_ambient_cap(int cap) { int rc; capng_get_caps_process(); rc = capng_update(CAPNG_ADD, CAPNG_INHERITABLE, cap); if (rc) { printf("Cannot add inheritable cap\n"); exit(2); } capng_apply(CAPNG_SELECT_CAPS); /* Note the two 0s at the end. Kernel checks for these */ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0)) { perror("Cannot set cap"); exit(1); } } int main(int argc, char **argv) { int rc; set_ambient_cap(CAP_NET_RAW); set_ambient_cap(CAP_NET_ADMIN); set_ambient_cap(CAP_SYS_NICE); printf("Ambient_test forking shell\n"); if (execv(argv[1], argv + 1)) perror("Cannot exec"); return 0; } Signed-off-by: Christoph Lameter # Original author Signed-off-by: Andy Lutomirski Acked-by: Serge E. Hallyn Acked-by: Kees Cook Cc: Jonathan Corbet Cc: Aaron Jones Cc: Ted Ts'o Cc: Andrew G. Morgan Cc: Mimi Zohar Cc: Austin S Hemmelgarn Cc: Markku Savela Cc: Jarkko Sakkinen Cc: Michael Kerrisk Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/prctl.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 31891d9535e2..a8d0759a9e40 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -190,4 +190,11 @@ struct prctl_mm_map { # define PR_FP_MODE_FR (1 << 0) /* 64b FP registers */ # define PR_FP_MODE_FRE (1 << 1) /* 32b compatibility */ +/* Control the ambient capability set */ +#define PR_CAP_AMBIENT 47 +# define PR_CAP_AMBIENT_IS_SET 1 +# define PR_CAP_AMBIENT_RAISE 2 +# define PR_CAP_AMBIENT_LOWER 3 +# define PR_CAP_AMBIENT_CLEAR_ALL 4 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 746bf6d64275be0c65b0631d8a72b16f1454cfa1 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 4 Sep 2015 15:42:51 -0700 Subject: capabilities: add a securebit to disable PR_CAP_AMBIENT_RAISE Per Andrew Morgan's request, add a securebit to allow admins to disable PR_CAP_AMBIENT_RAISE. This securebit will prevent processes from adding capabilities to their ambient set. For simplicity, this disables PR_CAP_AMBIENT_RAISE entirely rather than just disabling setting previously cleared bits. Signed-off-by: Andy Lutomirski Acked-by: Andrew G. Morgan Acked-by: Serge Hallyn Cc: Kees Cook Cc: Christoph Lameter Cc: Serge Hallyn Cc: Jonathan Corbet Cc: Aaron Jones Cc: Ted Ts'o Cc: Andrew G. Morgan Cc: Mimi Zohar Cc: Austin S Hemmelgarn Cc: Markku Savela Cc: Jarkko Sakkinen Cc: Michael Kerrisk Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/securebits.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/securebits.h b/include/uapi/linux/securebits.h index 985aac9e6bf8..35ac35cef217 100644 --- a/include/uapi/linux/securebits.h +++ b/include/uapi/linux/securebits.h @@ -43,9 +43,18 @@ #define SECBIT_KEEP_CAPS (issecure_mask(SECURE_KEEP_CAPS)) #define SECBIT_KEEP_CAPS_LOCKED (issecure_mask(SECURE_KEEP_CAPS_LOCKED)) +/* When set, a process cannot add new capabilities to its ambient set. */ +#define SECURE_NO_CAP_AMBIENT_RAISE 6 +#define SECURE_NO_CAP_AMBIENT_RAISE_LOCKED 7 /* make bit-6 immutable */ + +#define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE)) +#define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \ + (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED)) + #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ - issecure_mask(SECURE_KEEP_CAPS)) + issecure_mask(SECURE_KEEP_CAPS) | \ + issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE)) #define SECURE_ALL_LOCKS (SECURE_ALL_BITS << 1) #endif /* _UAPI_LINUX_SECUREBITS_H */ -- cgit v1.2.3 From 1038628d80e96e3a086189172d9be8eb85ecfabf Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 4 Sep 2015 15:46:04 -0700 Subject: userfaultfd: uAPI Defines the uAPI of the userfaultfd, notably the ioctl numbers and protocol. Signed-off-by: Andrea Arcangeli Acked-by: Pavel Emelyanov Cc: Sanidhya Kashyap Cc: zhang.zhanghailiang@huawei.com Cc: "Kirill A. Shutemov" Cc: Andres Lagar-Cavilla Cc: Dave Hansen Cc: Paolo Bonzini Cc: Rik van Riel Cc: Mel Gorman Cc: Andy Lutomirski Cc: Hugh Dickins Cc: Peter Feiner Cc: "Dr. David Alan Gilbert" Cc: Johannes Weiner Cc: "Huangpeng (Peter)" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/Kbuild | 1 + include/uapi/linux/userfaultfd.h | 83 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 include/uapi/linux/userfaultfd.h (limited to 'include/uapi') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index aafb9937b162..70ff1d9abf0d 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -456,3 +456,4 @@ header-y += xfrm.h header-y += xilinx-v4l2-controls.h header-y += zorro.h header-y += zorro_ids.h +header-y += userfaultfd.h diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h new file mode 100644 index 000000000000..09c2e2a8c9d6 --- /dev/null +++ b/include/uapi/linux/userfaultfd.h @@ -0,0 +1,83 @@ +/* + * include/linux/userfaultfd.h + * + * Copyright (C) 2007 Davide Libenzi + * Copyright (C) 2015 Red Hat, Inc. + * + */ + +#ifndef _LINUX_USERFAULTFD_H +#define _LINUX_USERFAULTFD_H + +#include + +#define UFFD_API ((__u64)0xAA) +/* FIXME: add "|UFFD_BIT_WP" to UFFD_API_BITS after implementing it */ +#define UFFD_API_BITS (UFFD_BIT_WRITE) +#define UFFD_API_IOCTLS \ + ((__u64)1 << _UFFDIO_REGISTER | \ + (__u64)1 << _UFFDIO_UNREGISTER | \ + (__u64)1 << _UFFDIO_API) +#define UFFD_API_RANGE_IOCTLS \ + ((__u64)1 << _UFFDIO_WAKE) + +/* + * Valid ioctl command number range with this API is from 0x00 to + * 0x3F. UFFDIO_API is the fixed number, everything else can be + * changed by implementing a different UFFD_API. If sticking to the + * same UFFD_API more ioctl can be added and userland will be aware of + * which ioctl the running kernel implements through the ioctl command + * bitmask written by the UFFDIO_API. + */ +#define _UFFDIO_REGISTER (0x00) +#define _UFFDIO_UNREGISTER (0x01) +#define _UFFDIO_WAKE (0x02) +#define _UFFDIO_API (0x3F) + +/* userfaultfd ioctl ids */ +#define UFFDIO 0xAA +#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \ + struct uffdio_api) +#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \ + struct uffdio_register) +#define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \ + struct uffdio_range) +#define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \ + struct uffdio_range) + +/* + * Valid bits below PAGE_SHIFT in the userfault address read through + * the read() syscall. + */ +#define UFFD_BIT_WRITE (1<<0) /* this was a write fault, MISSING or WP */ +#define UFFD_BIT_WP (1<<1) /* handle_userfault() reason VM_UFFD_WP */ +#define UFFD_BITS 2 /* two above bits used for UFFD_BIT_* mask */ + +struct uffdio_api { + /* userland asks for an API number */ + __u64 api; + + /* kernel answers below with the available features for the API */ + __u64 bits; + __u64 ioctls; +}; + +struct uffdio_range { + __u64 start; + __u64 len; +}; + +struct uffdio_register { + struct uffdio_range range; +#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) +#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) + __u64 mode; + + /* + * kernel answers which ioctl commands are available for the + * range, keep at the end as the last 8 bytes aren't read. + */ + __u64 ioctls; +}; + +#endif /* _LINUX_USERFAULTFD_H */ -- cgit v1.2.3 From 3f602d2724b1f7d2d27ddcd7963a040a5890fd16 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 4 Sep 2015 15:46:34 -0700 Subject: userfaultfd: Rename uffd_api.bits into .features This is (seems to be) the minimal thing that is required to unblock standard uffd usage from the non-cooperative one. Now more bits can be added to the features field indicating e.g. UFFD_FEATURE_FORK and others needed for the latter use-case. Signed-off-by: Pavel Emelyanov Signed-off-by: Andrea Arcangeli Cc: Sanidhya Kashyap Cc: zhang.zhanghailiang@huawei.com Cc: "Kirill A. Shutemov" Cc: Andres Lagar-Cavilla Cc: Dave Hansen Cc: Paolo Bonzini Cc: Rik van Riel Cc: Mel Gorman Cc: Andy Lutomirski Cc: Hugh Dickins Cc: Peter Feiner Cc: "Dr. David Alan Gilbert" Cc: Johannes Weiner Cc: "Huangpeng (Peter)" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 09c2e2a8c9d6..330206016249 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -12,8 +12,8 @@ #include #define UFFD_API ((__u64)0xAA) -/* FIXME: add "|UFFD_BIT_WP" to UFFD_API_BITS after implementing it */ -#define UFFD_API_BITS (UFFD_BIT_WRITE) +/* FIXME: add "|UFFD_FEATURE_WP" to UFFD_API_FEATURES after implementing it */ +#define UFFD_API_FEATURES (UFFD_FEATURE_WRITE_BIT) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -53,12 +53,18 @@ #define UFFD_BIT_WP (1<<1) /* handle_userfault() reason VM_UFFD_WP */ #define UFFD_BITS 2 /* two above bits used for UFFD_BIT_* mask */ +/* + * Features reported in uffdio_api.features field + */ +#define UFFD_FEATURE_WRITE_BIT (1<<0) /* Corresponds to UFFD_BIT_WRITE */ +#define UFFD_FEATURE_WP_BIT (1<<1) /* Corresponds to UFFD_BIT_WP */ + struct uffdio_api { /* userland asks for an API number */ __u64 api; /* kernel answers below with the available features for the API */ - __u64 bits; + __u64 features; __u64 ioctls; }; -- cgit v1.2.3 From a9b85f9415fd9e529d03299e5335433f614ec1fb Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 4 Sep 2015 15:46:37 -0700 Subject: userfaultfd: change the read API to return a uffd_msg I had requests to return the full address (not the page aligned one) to userland. It's not entirely clear how the page offset could be relevant because userfaults aren't like SIGBUS that can sigjump to a different place and it actually skip resolving the fault depending on a page offset. There's currently no real way to skip the fault especially because after a UFFDIO_COPY|ZEROPAGE, the fault is optimized to be retried within the kernel without having to return to userland first (not even self modifying code replacing the .text that touched the faulting address would prevent the fault to be repeated). Userland cannot skip repeating the fault even more so if the fault was triggered by a KVM secondary page fault or any get_user_pages or any copy-user inside some syscall which will return to kernel code. The second time FAULT_FLAG_RETRY_NOWAIT won't be set leading to a SIGBUS being raised because the userfault can't wait if it cannot release the mmap_map first (and FAULT_FLAG_RETRY_NOWAIT is required for that). Still returning userland a proper structure during the read() on the uffd, can allow to use the current UFFD_API for the future non-cooperative extensions too and it looks cleaner as well. Once we get additional fields there's no point to return the fault address page aligned anymore to reuse the bits below PAGE_SHIFT. The only downside is that the read() syscall will read 32bytes instead of 8bytes but that's not going to be measurable overhead. The total number of new events that can be extended or of new future bits for already shipped events, is limited to 64 by the features field of the uffdio_api structure. If more will be needed a bump of UFFD_API will be required. [akpm@linux-foundation.org: use __packed] Signed-off-by: Andrea Arcangeli Acked-by: Pavel Emelyanov Cc: Sanidhya Kashyap Cc: zhang.zhanghailiang@huawei.com Cc: "Kirill A. Shutemov" Cc: Andres Lagar-Cavilla Cc: Dave Hansen Cc: Paolo Bonzini Cc: Rik van Riel Cc: Mel Gorman Cc: Andy Lutomirski Cc: Hugh Dickins Cc: Peter Feiner Cc: "Dr. David Alan Gilbert" Cc: Johannes Weiner Cc: "Huangpeng (Peter)" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 70 +++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 15 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 330206016249..a5f8825381ef 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -11,9 +11,15 @@ #include +#include + #define UFFD_API ((__u64)0xAA) -/* FIXME: add "|UFFD_FEATURE_WP" to UFFD_API_FEATURES after implementing it */ -#define UFFD_API_FEATURES (UFFD_FEATURE_WRITE_BIT) +/* + * After implementing the respective features it will become: + * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ + * UFFD_FEATURE_EVENT_FORK) + */ +#define UFFD_API_FEATURES (0) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -45,26 +51,60 @@ #define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \ struct uffdio_range) -/* - * Valid bits below PAGE_SHIFT in the userfault address read through - * the read() syscall. - */ -#define UFFD_BIT_WRITE (1<<0) /* this was a write fault, MISSING or WP */ -#define UFFD_BIT_WP (1<<1) /* handle_userfault() reason VM_UFFD_WP */ -#define UFFD_BITS 2 /* two above bits used for UFFD_BIT_* mask */ +/* read() structure */ +struct uffd_msg { + __u8 event; + + __u8 reserved1; + __u16 reserved2; + __u32 reserved3; + + union { + struct { + __u64 flags; + __u64 address; + } pagefault; + + struct { + /* unused reserved fields */ + __u64 reserved1; + __u64 reserved2; + __u64 reserved3; + } reserved; + } arg; +} __packed; /* - * Features reported in uffdio_api.features field + * Start at 0x12 and not at 0 to be more strict against bugs. */ -#define UFFD_FEATURE_WRITE_BIT (1<<0) /* Corresponds to UFFD_BIT_WRITE */ -#define UFFD_FEATURE_WP_BIT (1<<1) /* Corresponds to UFFD_BIT_WP */ +#define UFFD_EVENT_PAGEFAULT 0x12 +#if 0 /* not available yet */ +#define UFFD_EVENT_FORK 0x13 +#endif + +/* flags for UFFD_EVENT_PAGEFAULT */ +#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ +#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ struct uffdio_api { - /* userland asks for an API number */ + /* userland asks for an API number and the features to enable */ __u64 api; - - /* kernel answers below with the available features for the API */ + /* + * Kernel answers below with the all available features for + * the API, this notifies userland of which events and/or + * which flags for each event are enabled in the current + * kernel. + * + * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE + * are to be considered implicitly always enabled in all kernels as + * long as the uffdio_api.api requested matches UFFD_API. + */ +#if 0 /* not available yet */ +#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) +#define UFFD_FEATURE_EVENT_FORK (1<<1) +#endif __u64 features; + __u64 ioctls; }; -- cgit v1.2.3 From 1f1c6f075904c241f9e44eb37efa8777141fc938 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 4 Sep 2015 15:47:01 -0700 Subject: userfaultfd: UFFDIO_COPY|UFFDIO_ZEROPAGE uAPI This implements the uABI of UFFDIO_COPY and UFFDIO_ZEROPAGE. Signed-off-by: Andrea Arcangeli Acked-by: Pavel Emelyanov Cc: Sanidhya Kashyap Cc: zhang.zhanghailiang@huawei.com Cc: "Kirill A. Shutemov" Cc: Andres Lagar-Cavilla Cc: Dave Hansen Cc: Paolo Bonzini Cc: Rik van Riel Cc: Mel Gorman Cc: Andy Lutomirski Cc: Hugh Dickins Cc: Peter Feiner Cc: "Dr. David Alan Gilbert" Cc: Johannes Weiner Cc: "Huangpeng (Peter)" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 42 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index a5f8825381ef..df0e09bb7dd5 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -25,7 +25,9 @@ (__u64)1 << _UFFDIO_UNREGISTER | \ (__u64)1 << _UFFDIO_API) #define UFFD_API_RANGE_IOCTLS \ - ((__u64)1 << _UFFDIO_WAKE) + ((__u64)1 << _UFFDIO_WAKE | \ + (__u64)1 << _UFFDIO_COPY | \ + (__u64)1 << _UFFDIO_ZEROPAGE) /* * Valid ioctl command number range with this API is from 0x00 to @@ -38,6 +40,8 @@ #define _UFFDIO_REGISTER (0x00) #define _UFFDIO_UNREGISTER (0x01) #define _UFFDIO_WAKE (0x02) +#define _UFFDIO_COPY (0x03) +#define _UFFDIO_ZEROPAGE (0x04) #define _UFFDIO_API (0x3F) /* userfaultfd ioctl ids */ @@ -50,6 +54,10 @@ struct uffdio_range) #define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \ struct uffdio_range) +#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \ + struct uffdio_copy) +#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ + struct uffdio_zeropage) /* read() structure */ struct uffd_msg { @@ -126,4 +134,36 @@ struct uffdio_register { __u64 ioctls; }; +struct uffdio_copy { + __u64 dst; + __u64 src; + __u64 len; + /* + * There will be a wrprotection flag later that allows to map + * pages wrprotected on the fly. And such a flag will be + * available if the wrprotection ioctl are implemented for the + * range according to the uffdio_register.ioctls. + */ +#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) + __u64 mode; + + /* + * "copy" is written by the ioctl and must be at the end: the + * copy_from_user will not read the last 8 bytes. + */ + __s64 copy; +}; + +struct uffdio_zeropage { + struct uffdio_range range; +#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0) + __u64 mode; + + /* + * "zeropage" is written by the ioctl and must be at the end: + * the copy_from_user will not read the last 8 bytes. + */ + __s64 zeropage; +}; + #endif /* _LINUX_USERFAULTFD_H */ -- cgit v1.2.3 From a13d7201d7deedcbb6ac6efa94a1a7d34d3d79ec Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 7 Aug 2015 17:34:41 +0100 Subject: xen/privcmd: Further s/MFN/GFN/ clean-up The privcmd code is mixing the usage of GFN and MFN within the same functions which make the code difficult to understand when you only work with auto-translated guests. The privcmd driver is only dealing with GFN so replace all the mention of MFN into GFN. The ioctl structure used to map foreign change has been left unchanged given that the userspace is using it. Nonetheless, add a comment to explain the expected value within the "mfn" field. Signed-off-by: Julien Grall Reviewed-by: David Vrabel Signed-off-by: David Vrabel --- include/uapi/xen/privcmd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h index a85316811d79..7ddeeda93809 100644 --- a/include/uapi/xen/privcmd.h +++ b/include/uapi/xen/privcmd.h @@ -44,6 +44,10 @@ struct privcmd_hypercall { struct privcmd_mmap_entry { __u64 va; + /* + * This should be a GFN. It's not possible to change the name because + * it's exposed to the user-space. + */ __u64 mfn; __u64 npages; }; -- cgit v1.2.3 From b14132797d8041a42e03f4ffa1e722da1425adfb Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 18 Aug 2015 03:28:01 -0400 Subject: elf-em.h: move EM_MICROBLAZE to the common header The linux/audit.h header uses EM_MICROBLAZE in order to define AUDIT_ARCH_MICROBLAZE, but it's only available in the microblaze asm headers. Move it to the common elf-em.h header so that the define can be used on non-microblaze systems. Otherwise we get build errors that EM_MICROBLAZE isn't defined when we try to use the AUDIT_ARCH_MICROBLAZE symbol. Signed-off-by: Mike Frysinger Signed-off-by: Michal Simek --- include/uapi/linux/elf-em.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index 3429a3ba382b..b56dfcfe922a 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -39,6 +39,7 @@ #define EM_TI_C6000 140 /* TI C6X DSPs */ #define EM_AARCH64 183 /* ARM 64 bit */ #define EM_TILEPRO 188 /* Tilera TILEPro */ +#define EM_MICROBLAZE 189 /* Xilinx MicroBlaze */ #define EM_TILEGX 191 /* Tilera TILE-Gx */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ #define EM_AVR32 0x18ad /* Atmel AVR32 */ -- cgit v1.2.3 From 1ab1e895492d8084dfc1c854efacde219e56b8c1 Mon Sep 17 00:00:00 2001 From: Henrik Austad Date: Wed, 9 Sep 2015 12:25:17 +0200 Subject: ether: add IEEE 1722 ethertype - TSN IEEE 1722 describes AVB (later renamed to TSN - Time Sensitive Networking), a protocol, encapsualtion and synchronization to utilize standard networks for audio/video (and later other time-sensitive) streams. This standard uses ethertype 0x22F0. http://standards.ieee.org/develop/regauth/ethertype/eth.txt This is a respin of a previous patch ("ether: add AVB frame type ETH_P_AVB") CC: "David S. Miller" CC: netdev@vger.kernel.org CC: linux-api@vger.kernel.org CC: linux-kernel@vger.kernel.org Signed-off-by: Henrik Austad Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index aa63ed023c2b..ea9221b0331a 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -42,6 +42,7 @@ #define ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */ #define ETH_P_PUP 0x0200 /* Xerox PUP packet */ #define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */ +#define ETH_P_TSN 0x22F0 /* TSN (IEEE 1722) packet */ #define ETH_P_IP 0x0800 /* Internet Protocol packet */ #define ETH_P_X25 0x0805 /* CCITT X.25 */ #define ETH_P_ARP 0x0806 /* Address Resolution packet */ -- cgit v1.2.3 From f074a8f49eb87cde95ac9d040ad5e7ea4f029738 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 9 Sep 2015 15:35:48 -0700 Subject: proc: export idle flag via kpageflags As noted by Minchan, a benefit of reading idle flag from /proc/kpageflags is that one can easily filter dirty and/or unevictable pages while estimating the size of unused memory. Note that idle flag read from /proc/kpageflags may be stale in case the page was accessed via a PTE, because it would be too costly to iterate over all page mappings on each /proc/kpageflags read to provide an up-to-date value. To make sure the flag is up-to-date one has to read /sys/kernel/mm/page_idle/bitmap first. Signed-off-by: Vladimir Davydov Reviewed-by: Andres Lagar-Cavilla Cc: Minchan Kim Cc: Raghavendra K T Cc: Johannes Weiner Cc: Michal Hocko Cc: Greg Thelen Cc: Michel Lespinasse Cc: David Rientjes Cc: Pavel Emelyanov Cc: Cyrill Gorcunov Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/kernel-page-flags.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h index a6c4962e5d46..5da5f8751ce7 100644 --- a/include/uapi/linux/kernel-page-flags.h +++ b/include/uapi/linux/kernel-page-flags.h @@ -33,6 +33,7 @@ #define KPF_THP 22 #define KPF_BALLOON 23 #define KPF_ZERO_PAGE 24 +#define KPF_IDLE 25 #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */ -- cgit v1.2.3 From ac64a2ce509104a746321a4f9646b6750cf281eb Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 4 Sep 2015 01:39:56 +0200 Subject: target: use stringify.h instead of own definition Signed-off-by: David Disseldorp Acked-by: Andy Grover Signed-off-by: Nicholas Bellinger --- include/uapi/linux/target_core_user.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index b67f99d3c520..95c6521d8a95 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -42,10 +42,6 @@ #define TCMU_MAILBOX_VERSION 2 #define ALIGN_SIZE 64 /* Should be enough for most CPUs */ -/* See https://gcc.gnu.org/onlinedocs/cpp/Stringification.html */ -#define xstr(s) str(s) -#define str(s) #s - struct tcmu_mailbox { __u16 version; __u16 flags; -- cgit v1.2.3 From 5b25b13ab08f616efd566347d809b4ece54570d1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 11 Sep 2015 13:07:39 -0700 Subject: sys_membarrier(): system-wide memory barrier (generic, x86) Here is an implementation of a new system call, sys_membarrier(), which executes a memory barrier on all threads running on the system. It is implemented by calling synchronize_sched(). It can be used to distribute the cost of user-space memory barriers asymmetrically by transforming pairs of memory barriers into pairs consisting of sys_membarrier() and a compiler barrier. For synchronization primitives that distinguish between read-side and write-side (e.g. userspace RCU [1], rwlocks), the read-side can be accelerated significantly by moving the bulk of the memory barrier overhead to the write-side. The existing applications of which I am aware that would be improved by this system call are as follows: * Through Userspace RCU library (http://urcu.so) - DNS server (Knot DNS) https://www.knot-dns.cz/ - Network sniffer (http://netsniff-ng.org/) - Distributed object storage (https://sheepdog.github.io/sheepdog/) - User-space tracing (http://lttng.org) - Network storage system (https://www.gluster.org/) - Virtual routers (https://events.linuxfoundation.org/sites/events/files/slides/DPDK_RCU_0MQ.pdf) - Financial software (https://lkml.org/lkml/2015/3/23/189) Those projects use RCU in userspace to increase read-side speed and scalability compared to locking. Especially in the case of RCU used by libraries, sys_membarrier can speed up the read-side by moving the bulk of the memory barrier cost to synchronize_rcu(). * Direct users of sys_membarrier - core dotnet garbage collector (https://github.com/dotnet/coreclr/issues/198) Microsoft core dotnet GC developers are planning to use the mprotect() side-effect of issuing memory barriers through IPIs as a way to implement Windows FlushProcessWriteBuffers() on Linux. They are referring to sys_membarrier in their github thread, specifically stating that sys_membarrier() is what they are looking for. To explain the benefit of this scheme, let's introduce two example threads: Thread A (non-frequent, e.g. executing liburcu synchronize_rcu()) Thread B (frequent, e.g. executing liburcu rcu_read_lock()/rcu_read_unlock()) In a scheme where all smp_mb() in thread A are ordering memory accesses with respect to smp_mb() present in Thread B, we can change each smp_mb() within Thread A into calls to sys_membarrier() and each smp_mb() within Thread B into compiler barriers "barrier()". Before the change, we had, for each smp_mb() pairs: Thread A Thread B previous mem accesses previous mem accesses smp_mb() smp_mb() following mem accesses following mem accesses After the change, these pairs become: Thread A Thread B prev mem accesses prev mem accesses sys_membarrier() barrier() follow mem accesses follow mem accesses As we can see, there are two possible scenarios: either Thread B memory accesses do not happen concurrently with Thread A accesses (1), or they do (2). 1) Non-concurrent Thread A vs Thread B accesses: Thread A Thread B prev mem accesses sys_membarrier() follow mem accesses prev mem accesses barrier() follow mem accesses In this case, thread B accesses will be weakly ordered. This is OK, because at that point, thread A is not particularly interested in ordering them with respect to its own accesses. 2) Concurrent Thread A vs Thread B accesses Thread A Thread B prev mem accesses prev mem accesses sys_membarrier() barrier() follow mem accesses follow mem accesses In this case, thread B accesses, which are ensured to be in program order thanks to the compiler barrier, will be "upgraded" to full smp_mb() by synchronize_sched(). * Benchmarks On Intel Xeon E5405 (8 cores) (one thread is calling sys_membarrier, the other 7 threads are busy looping) 1000 non-expedited sys_membarrier calls in 33s =3D 33 milliseconds/call. * User-space user of this system call: Userspace RCU library Both the signal-based and the sys_membarrier userspace RCU schemes permit us to remove the memory barrier from the userspace RCU rcu_read_lock() and rcu_read_unlock() primitives, thus significantly accelerating them. These memory barriers are replaced by compiler barriers on the read-side, and all matching memory barriers on the write-side are turned into an invocation of a memory barrier on all active threads in the process. By letting the kernel perform this synchronization rather than dumbly sending a signal to every process threads (as we currently do), we diminish the number of unnecessary wake ups and only issue the memory barriers on active threads. Non-running threads do not need to execute such barrier anyway, because these are implied by the scheduler context switches. Results in liburcu: Operations in 10s, 6 readers, 2 writers: memory barriers in reader: 1701557485 reads, 2202847 writes signal-based scheme: 9830061167 reads, 6700 writes sys_membarrier: 9952759104 reads, 425 writes sys_membarrier (dyn. check): 7970328887 reads, 425 writes The dynamic sys_membarrier availability check adds some overhead to the read-side compared to the signal-based scheme, but besides that, sys_membarrier slightly outperforms the signal-based scheme. However, this non-expedited sys_membarrier implementation has a much slower grace period than signal and memory barrier schemes. Besides diminishing the number of wake-ups, one major advantage of the membarrier system call over the signal-based scheme is that it does not need to reserve a signal. This plays much more nicely with libraries, and with processes injected into for tracing purposes, for which we cannot expect that signals will be unused by the application. An expedited version of this system call can be added later on to speed up the grace period. Its implementation will likely depend on reading the cpu_curr()->mm without holding each CPU's rq lock. This patch adds the system call to x86 and to asm-generic. [1] http://urcu.so membarrier(2) man page: MEMBARRIER(2) Linux Programmer's Manual MEMBARRIER(2) NAME membarrier - issue memory barriers on a set of threads SYNOPSIS #include int membarrier(int cmd, int flags); DESCRIPTION The cmd argument is one of the following: MEMBARRIER_CMD_QUERY Query the set of supported commands. It returns a bitmask of supported commands. MEMBARRIER_CMD_SHARED Execute a memory barrier on all threads running on the system. Upon return from system call, the caller thread is ensured that all running threads have passed through a state where all memory accesses to user-space addresses match program order between entry to and return from the system call (non-running threads are de facto in such a state). This covers threads from all pro=E2=80=90 cesses running on the system. This command returns 0. The flags argument needs to be 0. For future extensions. All memory accesses performed in program order from each targeted thread is guaranteed to be ordered with respect to sys_membarrier(). If we use the semantic "barrier()" to represent a compiler barrier forcing memory accesses to be performed in program order across the barrier, and smp_mb() to represent explicit memory barriers forcing full memory ordering across the barrier, we have the following ordering table for each pair of barrier(), sys_membarrier() and smp_mb(): The pair ordering is detailed as (O: ordered, X: not ordered): barrier() smp_mb() sys_membarrier() barrier() X X O smp_mb() X O O sys_membarrier() O O O RETURN VALUE On success, these system calls return zero. On error, -1 is returned, and errno is set appropriately. For a given command, with flags argument set to 0, this system call is guaranteed to always return the same value until reboot. ERRORS ENOSYS System call is not implemented. EINVAL Invalid arguments. Linux 2015-04-15 MEMBARRIER(2) Signed-off-by: Mathieu Desnoyers Reviewed-by: Paul E. McKenney Reviewed-by: Josh Triplett Cc: KOSAKI Motohiro Cc: Steven Rostedt Cc: Nicholas Miell Cc: Ingo Molnar Cc: Alan Cox Cc: Lai Jiangshan Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: David Howells Cc: Pranith Kumar Cc: Michael Kerrisk Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/unistd.h | 4 ++- include/uapi/linux/Kbuild | 1 + include/uapi/linux/membarrier.h | 53 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/membarrier.h (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index e016bd9b1a04..8da542a2874d 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -709,9 +709,11 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create) __SYSCALL(__NR_bpf, sys_bpf) #define __NR_execveat 281 __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat) +#define __NR_membarrier 282 +__SYSCALL(__NR_membarrier, sys_membarrier) #undef __NR_syscalls -#define __NR_syscalls 282 +#define __NR_syscalls 283 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 70ff1d9abf0d..f7b2db44eb4b 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -252,6 +252,7 @@ header-y += mdio.h header-y += media.h header-y += media-bus-format.h header-y += mei.h +header-y += membarrier.h header-y += memfd.h header-y += mempolicy.h header-y += meye.h diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h new file mode 100644 index 000000000000..e0b108bd2624 --- /dev/null +++ b/include/uapi/linux/membarrier.h @@ -0,0 +1,53 @@ +#ifndef _UAPI_LINUX_MEMBARRIER_H +#define _UAPI_LINUX_MEMBARRIER_H + +/* + * linux/membarrier.h + * + * membarrier system call API + * + * Copyright (c) 2010, 2015 Mathieu Desnoyers + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * enum membarrier_cmd - membarrier system call command + * @MEMBARRIER_CMD_QUERY: Query the set of supported commands. It returns + * a bitmask of valid commands. + * @MEMBARRIER_CMD_SHARED: Execute a memory barrier on all running threads. + * Upon return from system call, the caller thread + * is ensured that all running threads have passed + * through a state where all memory accesses to + * user-space addresses match program order between + * entry to and return from the system call + * (non-running threads are de facto in such a + * state). This covers threads from all processes + * running on the system. This command returns 0. + * + * Command to be passed to the membarrier system call. The commands need to + * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to + * the value 0. + */ +enum membarrier_cmd { + MEMBARRIER_CMD_QUERY = 0, + MEMBARRIER_CMD_SHARED = (1 << 0), +}; + +#endif /* _UAPI_LINUX_MEMBARRIER_H */ -- cgit v1.2.3