From 6f6f467eaaa0ac4fb77714d0172d65c781dabb8c Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 30 Aug 2013 06:12:40 -0700 Subject: Drop remaining references to H8/300 architecture With the architecture gone, any references to it are no longer needed. Cc: Yoshinori Sato Acked-by: Greg Kroah-Hartman Signed-off-by: Guenter Roeck --- include/uapi/linux/audit.h | 1 - include/uapi/linux/elf-em.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 75cef3fd97ad..db0b825b4810 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -329,7 +329,6 @@ enum { #define AUDIT_ARCH_ARMEB (EM_ARM) #define AUDIT_ARCH_CRIS (EM_CRIS|__AUDIT_ARCH_LE) #define AUDIT_ARCH_FRV (EM_FRV) -#define AUDIT_ARCH_H8300 (EM_H8_300) #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) #define AUDIT_ARCH_IA64 (EM_IA_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_M32R (EM_M32R) diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index 59c17a2d38ad..01529bd96438 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -31,7 +31,6 @@ #define EM_CRIS 76 /* Axis Communications 32-bit embedded processor */ #define EM_V850 87 /* NEC v850 */ #define EM_M32R 88 /* Renesas M32R */ -#define EM_H8_300 46 /* Renesas H8/300,300H,H8S */ #define EM_MN10300 89 /* Panasonic/MEI MN10300, AM33 */ #define EM_BLACKFIN 106 /* ADI Blackfin Processor */ #define EM_TI_C6000 140 /* TI C6X DSPs */ -- cgit v1.2.3 From df62cdf348c91baac61b4cb19d19ea1ef87b271e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Sep 2013 09:10:20 -0700 Subject: net_sched: htb: support of 64bit rates HTB already can deal with 64bit rates, we only have to add two new attributes so that tc can use them to break the current 32bit ABI barrier. TCA_HTB_RATE64 : class rate (in bytes per second) TCA_HTB_CEIL64 : class ceil (in bytes per second) This allows us to setup HTB on 40Gbps links, as 32bit limit is actually ~34Gbps Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 9b829134d422..f2624b549e61 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -357,6 +357,8 @@ enum { TCA_HTB_CTAB, TCA_HTB_RTAB, TCA_HTB_DIRECT_QLEN, + TCA_HTB_RATE64, + TCA_HTB_CEIL64, __TCA_HTB_MAX, }; -- cgit v1.2.3 From 2485602f1af209aaef3f394ac8336a67cb8742aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 18 Aug 2013 22:41:37 +0200 Subject: can: add explicit copyrights to can headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These files are copied to the source code of user space applications (in this case can-utils) and so it makes sense to mention explicitly their copyright. I added the terms of C code that was introduced in the same commit as these headers. Signed-off-by: Uwe Kleine-König Acked-by: Oliver Hartkopp Acked-by: Urs Thuermann Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/bcm.h | 32 ++++++++++++++++++++++++++++++++ include/uapi/linux/can/error.h | 32 ++++++++++++++++++++++++++++++++ include/uapi/linux/can/gw.h | 32 ++++++++++++++++++++++++++++++++ include/uapi/linux/can/raw.h | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 128 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/bcm.h b/include/uapi/linux/can/bcm.h index 3ebe387fea4d..382251a1d214 100644 --- a/include/uapi/linux/can/bcm.h +++ b/include/uapi/linux/can/bcm.h @@ -7,6 +7,38 @@ * Copyright (c) 2002-2007 Volkswagen Group Electronic Research * All rights reserved. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Volkswagen nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * The provided data structures and external interfaces from this code + * are not restricted to be used by modules with a GPL compatible license. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. */ #ifndef CAN_BCM_H diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h index 7b7148bded71..b63204545320 100644 --- a/include/uapi/linux/can/error.h +++ b/include/uapi/linux/can/error.h @@ -7,6 +7,38 @@ * Copyright (c) 2002-2007 Volkswagen Group Electronic Research * All rights reserved. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Volkswagen nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * The provided data structures and external interfaces from this code + * are not restricted to be used by modules with a GPL compatible license. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. */ #ifndef CAN_ERROR_H diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h index 4e27c82b564a..844c8964bdfe 100644 --- a/include/uapi/linux/can/gw.h +++ b/include/uapi/linux/can/gw.h @@ -7,6 +7,38 @@ * Copyright (c) 2011 Volkswagen Group Electronic Research * All rights reserved. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Volkswagen nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * The provided data structures and external interfaces from this code + * are not restricted to be used by modules with a GPL compatible license. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. */ #ifndef CAN_GW_H diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h index a814062b0719..c7d8c334e0ce 100644 --- a/include/uapi/linux/can/raw.h +++ b/include/uapi/linux/can/raw.h @@ -8,6 +8,38 @@ * Copyright (c) 2002-2007 Volkswagen Group Electronic Research * All rights reserved. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Volkswagen nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * The provided data structures and external interfaces from this code + * are not restricted to be used by modules with a GPL compatible license. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. */ #ifndef CAN_RAW_H -- cgit v1.2.3 From 1c2da13c21a14e9db99c701412ac9069d5b91cf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 7 Sep 2013 21:34:38 +0200 Subject: can: add explicit copyrights to can's netlink header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This file is copied to the source code of user space applications (in this case can-utils) and so it makes sense to mention explicitly their copyright. Signed-off-by: Uwe Kleine-König Acked-by: Wolfgang Grandegger Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/netlink.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 14966ddb7df1..df944ed206a8 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -5,6 +5,14 @@ * * Copyright (c) 2009 Wolfgang Grandegger * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. */ #ifndef CAN_NETLINK_H -- cgit v1.2.3 From ad4d35f865408a494f0a4c02b1c7ebd3f80f5dbf Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 5 Sep 2013 15:55:26 +0800 Subject: [SCSI] csiostor: Use pcie_capability_clear_and_set_word() to simplify code pci_is_pcie() and pcie_capability_clear_and_set_word() make it trivial to set the PCIe Completion Timeout, so just fold the csio_set_pcie_completion_timeout() function into its caller. [bhelgaas: changelog, fold csio_set_pcie_completion_timeout() into caller] Signed-off-by: Yijing Wang Signed-off-by: Bjorn Helgaas Cc: Jiang Liu Cc: "James E.J. Bottomley" Cc: Naresh Kumar Inna Cc: "David S. Miller" Cc: Jesper Juhl --- include/uapi/linux/pci_regs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index baa7852468ef..1a38377a0032 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -558,7 +558,8 @@ #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ #define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ -#define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ +#define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ #define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */ #define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */ #define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */ -- cgit v1.2.3 From f36f8c75ae2e7d4da34f4c908cebdb4aa42c977e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Sep 2013 10:35:19 +0100 Subject: KEYS: Add per-user_namespace registers for persistent per-UID kerberos caches Add support for per-user_namespace registers of persistent per-UID kerberos caches held within the kernel. This allows the kerberos cache to be retained beyond the life of all a user's processes so that the user's cron jobs can work. The kerberos cache is envisioned as a keyring/key tree looking something like: struct user_namespace \___ .krb_cache keyring - The register \___ _krb.0 keyring - Root's Kerberos cache \___ _krb.5000 keyring - User 5000's Kerberos cache \___ _krb.5001 keyring - User 5001's Kerberos cache \___ tkt785 big_key - A ccache blob \___ tkt12345 big_key - Another ccache blob Or possibly: struct user_namespace \___ .krb_cache keyring - The register \___ _krb.0 keyring - Root's Kerberos cache \___ _krb.5000 keyring - User 5000's Kerberos cache \___ _krb.5001 keyring - User 5001's Kerberos cache \___ tkt785 keyring - A ccache \___ krbtgt/REDHAT.COM@REDHAT.COM big_key \___ http/REDHAT.COM@REDHAT.COM user \___ afs/REDHAT.COM@REDHAT.COM user \___ nfs/REDHAT.COM@REDHAT.COM user \___ krbtgt/KERNEL.ORG@KERNEL.ORG big_key \___ http/KERNEL.ORG@KERNEL.ORG big_key What goes into a particular Kerberos cache is entirely up to userspace. Kernel support is limited to giving you the Kerberos cache keyring that you want. The user asks for their Kerberos cache by: krb_cache = keyctl_get_krbcache(uid, dest_keyring); The uid is -1 or the user's own UID for the user's own cache or the uid of some other user's cache (requires CAP_SETUID). This permits rpc.gssd or whatever to mess with the cache. The cache returned is a keyring named "_krb." that the possessor can read, search, clear, invalidate, unlink from and add links to. Active LSMs get a chance to rule on whether the caller is permitted to make a link. Each uid's cache keyring is created when it first accessed and is given a timeout that is extended each time this function is called so that the keyring goes away after a while. The timeout is configurable by sysctl but defaults to three days. Each user_namespace struct gets a lazily-created keyring that serves as the register. The cache keyrings are added to it. This means that standard key search and garbage collection facilities are available. The user_namespace struct's register goes away when it does and anything left in it is then automatically gc'd. Signed-off-by: David Howells Tested-by: Simo Sorce cc: Serge E. Hallyn cc: Eric W. Biederman --- include/uapi/linux/keyctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index c9b7f4faf97a..840cb990abe2 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -56,5 +56,6 @@ #define KEYCTL_REJECT 19 /* reject a partially constructed key */ #define KEYCTL_INSTANTIATE_IOV 20 /* instantiate a partially constructed key */ #define KEYCTL_INVALIDATE 21 /* invalidate a key */ +#define KEYCTL_GET_PERSISTENT 22 /* get a user's persistent keyring */ #endif /* _LINUX_KEYCTL_H */ -- cgit v1.2.3 From 72b70b6ec4fa7da86a3ac0aacee699b18d94fc3b Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 28 Aug 2013 00:39:48 +0200 Subject: NFC: Define secure element IO API and commands In order to send and receive ISO7816 APDUs to and from NFC embedded secure elements, we define a specific netlink command. On a typical SE use case, host applications will send very few APDUs (Less than 10) per transaction. This is why we decided to go for a simple netlink API. Defining another NFC socket protocol for such low traffic would have been overengineered. Signed-off-by: Samuel Ortiz --- include/uapi/linux/nfc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 29bed72a4ac4..6ad6cc03ccd3 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -85,6 +85,7 @@ * a specific SE notifies us about the end of a transaction. The parameter * for this event is the application ID (AID). * @NFC_CMD_GET_SE: Dump all discovered secure elements from an NFC controller. + * @NFC_CMD_SE_IO: Send/Receive APDUs to/from the selected secure element. */ enum nfc_commands { NFC_CMD_UNSPEC, @@ -114,6 +115,7 @@ enum nfc_commands { NFC_EVENT_SE_CONNECTIVITY, NFC_EVENT_SE_TRANSACTION, NFC_CMD_GET_SE, + NFC_CMD_SE_IO, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; @@ -147,6 +149,7 @@ enum nfc_commands { * @NFC_ATTR_SE_INDEX: Secure element index * @NFC_ATTR_SE_TYPE: Secure element type (UICC or EMBEDDED) * @NFC_ATTR_FIRMWARE_DOWNLOAD_STATUS: Firmware download operation status + * @NFC_ATTR_APDU: Secure element APDU */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -174,6 +177,7 @@ enum nfc_attrs { NFC_ATTR_SE_TYPE, NFC_ATTR_SE_AID, NFC_ATTR_FIRMWARE_DOWNLOAD_STATUS, + NFC_ATTR_SE_APDU, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; -- cgit v1.2.3 From 3a6a9201897c6482573ad07ee880574147761006 Mon Sep 17 00:00:00 2001 From: Sudeep Dutt Date: Thu, 5 Sep 2013 16:41:55 -0700 Subject: Intel MIC Host Driver, card OS state management. This patch enables the following features: a) Boots and shuts down the card via sysfs entries. b) Allocates and maps a device page for communication with the card driver and updates the device page address via scratchpad registers. c) Provides sysfs entries for shutdown status, kernel command line, ramdisk and log buffer information. Co-author: Dasaratharaman Chandramouli Signed-off-by: Ashutosh Dixit Signed-off-by: Caz Yokoyama Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Harshavardhan R Kharche Signed-off-by: Nikhil Rao Signed-off-by: Sudeep Dutt Acked-by: Yaozu (Eddie) Dong Reviewed-by: Peter P Waskiewicz Jr Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/Kbuild | 1 + include/uapi/linux/mic_common.h | 74 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 include/uapi/linux/mic_common.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 115add2515aa..d37263fbdca7 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -241,6 +241,7 @@ header-y += media.h header-y += mei.h header-y += mempolicy.h header-y += meye.h +header-y += mic_common.h header-y += mii.h header-y += minix_fs.h header-y += mman.h diff --git a/include/uapi/linux/mic_common.h b/include/uapi/linux/mic_common.h new file mode 100644 index 000000000000..a9091e529183 --- /dev/null +++ b/include/uapi/linux/mic_common.h @@ -0,0 +1,74 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC driver. + * + */ +#ifndef __MIC_COMMON_H_ +#define __MIC_COMMON_H_ + +#include + +/** + * struct mic_bootparam: Virtio device independent information in device page + * + * @magic: A magic value used by the card to ensure it can see the host + * @c2h_shutdown_db: Card to Host shutdown doorbell set by host + * @h2c_shutdown_db: Host to Card shutdown doorbell set by card + * @h2c_config_db: Host to Card Virtio config doorbell set by card + * @shutdown_status: Card shutdown status set by card + * @shutdown_card: Set to 1 by the host when a card shutdown is initiated + */ +struct mic_bootparam { + __u32 magic; + __s8 c2h_shutdown_db; + __s8 h2c_shutdown_db; + __s8 h2c_config_db; + __u8 shutdown_status; + __u8 shutdown_card; +} __aligned(8); + +/* Device page size */ +#define MIC_DP_SIZE 4096 + +#define MIC_MAGIC 0xc0ffee00 + +/** + * enum mic_states - MIC states. + */ +enum mic_states { + MIC_OFFLINE = 0, + MIC_ONLINE, + MIC_SHUTTING_DOWN, + MIC_RESET_FAILED, + MIC_LAST +}; + +/** + * enum mic_status - MIC status reported by card after + * a host or card initiated shutdown or a card crash. + */ +enum mic_status { + MIC_NOP = 0, + MIC_CRASHED, + MIC_HALTED, + MIC_POWER_OFF, + MIC_RESTART, + MIC_STATUS_LAST +}; + +#endif -- cgit v1.2.3 From f69bcbf3b4c4b333dcd7a48eaf868bf0c88edab5 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Thu, 5 Sep 2013 16:42:18 -0700 Subject: Intel MIC Host Driver Changes for Virtio Devices. This patch introduces the host "Virtio over PCIe" interface for Intel MIC. It allows creating user space backends on the host and instantiating virtio devices for them on the Intel MIC card. It uses the existing VRINGH infrastructure in the kernel to access virtio rings from the host. A character device per MIC is exposed with IOCTL, mmap and poll callbacks. This allows the user space backend to: (a) add/remove a virtio device via a device page. (b) map (R/O) virtio rings and device page to user space. (c) poll for availability of data. (d) copy a descriptor or entire descriptor chain to/from the card. (e) modify virtio configuration. (f) handle virtio device reset. The buffers are copied over using CPU copies for this initial patch and host initiated MIC DMA support is planned for future patches. The avail and desc virtio rings are in host memory and the used ring is in card memory to maximize writes across PCIe for performance. Co-author: Sudeep Dutt Signed-off-by: Ashutosh Dixit Signed-off-by: Caz Yokoyama Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Nikhil Rao Signed-off-by: Harshavardhan R Kharche Signed-off-by: Sudeep Dutt Acked-by: Yaozu (Eddie) Dong Reviewed-by: Peter P Waskiewicz Jr Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/Kbuild | 1 + include/uapi/linux/mic_common.h | 166 +++++++++++++++++++++++++++++++++++++++- include/uapi/linux/mic_ioctl.h | 74 ++++++++++++++++++ 3 files changed, 240 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/mic_ioctl.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index d37263fbdca7..33d2b8fe166d 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -242,6 +242,7 @@ header-y += mei.h header-y += mempolicy.h header-y += meye.h header-y += mic_common.h +header-y += mic_ioctl.h header-y += mii.h header-y += minix_fs.h header-y += mman.h diff --git a/include/uapi/linux/mic_common.h b/include/uapi/linux/mic_common.h index a9091e529183..364ac751bd04 100644 --- a/include/uapi/linux/mic_common.h +++ b/include/uapi/linux/mic_common.h @@ -21,7 +21,60 @@ #ifndef __MIC_COMMON_H_ #define __MIC_COMMON_H_ -#include +#include + +#ifndef __KERNEL__ +#define ALIGN(a, x) (((a) + (x) - 1) & ~((x) - 1)) +#define __aligned(x) __attribute__ ((aligned(x))) +#endif + +#define mic_aligned_size(x) ALIGN(sizeof(x), 8) + +/** + * struct mic_device_desc: Virtio device information shared between the + * virtio driver and userspace backend + * + * @type: Device type: console/network/disk etc. Type 0/-1 terminates. + * @num_vq: Number of virtqueues. + * @feature_len: Number of bytes of feature bits. Multiply by 2: one for + host features and one for guest acknowledgements. + * @config_len: Number of bytes of the config array after virtqueues. + * @status: A status byte, written by the Guest. + * @config: Start of the following variable length config. + */ +struct mic_device_desc { + __s8 type; + __u8 num_vq; + __u8 feature_len; + __u8 config_len; + __u8 status; + __u64 config[0]; +} __aligned(8); + +/** + * struct mic_device_ctrl: Per virtio device information in the device page + * used internally by the host and card side drivers. + * + * @vdev: Used for storing MIC vdev information by the guest. + * @config_change: Set to 1 by host when a config change is requested. + * @vdev_reset: Set to 1 by guest to indicate virtio device has been reset. + * @guest_ack: Set to 1 by guest to ack a command. + * @host_ack: Set to 1 by host to ack a command. + * @used_address_updated: Set to 1 by guest when the used address should be + * updated. + * @c2h_vdev_db: The doorbell number to be used by guest. Set by host. + * @h2c_vdev_db: The doorbell number to be used by host. Set by guest. + */ +struct mic_device_ctrl { + __u64 vdev; + __u8 config_change; + __u8 vdev_reset; + __u8 guest_ack; + __u8 host_ack; + __u8 used_address_updated; + __s8 c2h_vdev_db; + __s8 h2c_vdev_db; +} __aligned(8); /** * struct mic_bootparam: Virtio device independent information in device page @@ -42,6 +95,117 @@ struct mic_bootparam { __u8 shutdown_card; } __aligned(8); +/** + * struct mic_device_page: High level representation of the device page + * + * @bootparam: The bootparam structure is used for sharing information and + * status updates between MIC host and card drivers. + * @desc: Array of MIC virtio device descriptors. + */ +struct mic_device_page { + struct mic_bootparam bootparam; + struct mic_device_desc desc[0]; +}; +/** + * struct mic_vqconfig: This is how we expect the device configuration field + * for a virtqueue to be laid out in config space. + * + * @address: Guest/MIC physical address of the virtio ring + * (avail and desc rings) + * @used_address: Guest/MIC physical address of the used ring + * @num: The number of entries in the virtio_ring + */ +struct mic_vqconfig { + __u64 address; + __u64 used_address; + __u16 num; +} __aligned(8); + +/* + * The alignment to use between consumer and producer parts of vring. + * This is pagesize for historical reasons. + */ +#define MIC_VIRTIO_RING_ALIGN 4096 + +#define MIC_MAX_VRINGS 4 +#define MIC_VRING_ENTRIES 128 + +/* + * Max vring entries (power of 2) to ensure desc and avail rings + * fit in a single page + */ +#define MIC_MAX_VRING_ENTRIES 128 + +/** + * Max size of the desc block in bytes: includes: + * - struct mic_device_desc + * - struct mic_vqconfig (num_vq of these) + * - host and guest features + * - virtio device config space + */ +#define MIC_MAX_DESC_BLK_SIZE 256 + +/** + * struct _mic_vring_info - Host vring info exposed to userspace backend + * for the avail index and magic for the card. + * + * @avail_idx: host avail idx + * @magic: A magic debug cookie. + */ +struct _mic_vring_info { + __u16 avail_idx; + int magic; +}; + +/** + * struct mic_vring - Vring information. + * + * @vr: The virtio ring. + * @info: Host vring information exposed to the userspace backend for the + * avail index and magic for the card. + * @va: The va for the buffer allocated for vr and info. + * @len: The length of the buffer required for allocating vr and info. + */ +struct mic_vring { + struct vring vr; + struct _mic_vring_info *info; + void *va; + int len; +}; + +#define mic_aligned_desc_size(d) ALIGN(mic_desc_size(d), 8) + +#ifndef INTEL_MIC_CARD +static inline unsigned mic_desc_size(const struct mic_device_desc *desc) +{ + return mic_aligned_size(*desc) + + desc->num_vq * mic_aligned_size(struct mic_vqconfig) + + desc->feature_len * 2 + + desc->config_len; +} + +static inline struct mic_vqconfig * +mic_vq_config(const struct mic_device_desc *desc) +{ + return (struct mic_vqconfig *)(desc + 1); +} + +static inline __u8 *mic_vq_features(const struct mic_device_desc *desc) +{ + return (__u8 *)(mic_vq_config(desc) + desc->num_vq); +} + +static inline __u8 *mic_vq_configspace(const struct mic_device_desc *desc) +{ + return mic_vq_features(desc) + desc->feature_len * 2; +} +static inline unsigned mic_total_desc_size(struct mic_device_desc *desc) +{ + return mic_aligned_desc_size(desc) + + mic_aligned_size(struct mic_device_ctrl); +} +#endif + /* Device page size */ #define MIC_DP_SIZE 4096 diff --git a/include/uapi/linux/mic_ioctl.h b/include/uapi/linux/mic_ioctl.h new file mode 100644 index 000000000000..0e6cbf3e5292 --- /dev/null +++ b/include/uapi/linux/mic_ioctl.h @@ -0,0 +1,74 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#ifndef _MIC_IOCTL_H_ +#define _MIC_IOCTL_H_ + +/* + * mic_copy - MIC virtio descriptor copy. + * + * @iov: An array of IOVEC structures containing user space buffers. + * @iovcnt: Number of IOVEC structures in iov. + * @vr_idx: The vring index. + * @update_used: A non zero value results in used index being updated. + * @out_len: The aggregate of the total length written to or read from + * the virtio device. + */ +struct mic_copy_desc { +#ifdef __KERNEL__ + struct iovec __user *iov; +#else + struct iovec *iov; +#endif + int iovcnt; + __u8 vr_idx; + __u8 update_used; + __u32 out_len; +}; + +/* + * Add a new virtio device + * The (struct mic_device_desc *) pointer points to a device page entry + * for the virtio device consisting of: + * - struct mic_device_desc + * - struct mic_vqconfig (num_vq of these) + * - host and guest features + * - virtio device config space + * The total size referenced by the pointer should equal the size returned + * by desc_size() in mic_common.h + */ +#define MIC_VIRTIO_ADD_DEVICE _IOWR('s', 1, struct mic_device_desc *) + +/* + * Copy the number of entries in the iovec and update the used index + * if requested by the user. + */ +#define MIC_VIRTIO_COPY_DESC _IOWR('s', 2, struct mic_copy_desc *) + +/* + * Notify virtio device of a config change + * The (__u8 *) pointer points to config space values for the device + * as they should be written into the device page. The total size + * referenced by the pointer should equal the config_len field of struct + * mic_device_desc. + */ +#define MIC_VIRTIO_CONFIG_CHANGE _IOWR('s', 5, __u8 *) + +#endif -- cgit v1.2.3 From 09a2c73ddfc7f173237fc7209a65b34dd5bcb5ed Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 12 Sep 2013 16:20:00 +0800 Subject: PCI: Remove unused PCI_MSIX_FLAGS_BIRMASK definition PCI_MSIX_FLAGS_BIRMASK has been replaced by PCI_MSIX_TABLE_BIR for better readability. Now no one uses it, remove it. No functional change. Signed-off-by: Yijing Wang Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index baa7852468ef..f0509148111d 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -319,7 +319,6 @@ #define PCI_MSIX_PBA 8 /* Pending Bit Array offset */ #define PCI_MSIX_PBA_BIR 0x00000007 /* BAR index */ #define PCI_MSIX_PBA_OFFSET 0xfffffff8 /* Offset into specified BAR */ -#define PCI_MSIX_FLAGS_BIRMASK (7 << 0) /* deprecated */ #define PCI_CAP_MSIX_SIZEOF 12 /* size of MSIX registers */ /* MSI-X entry's format */ -- cgit v1.2.3 From b019ba959fc4600a646bb9d732149b1b0056f88a Mon Sep 17 00:00:00 2001 From: Sudeep Dutt Date: Fri, 27 Sep 2013 09:50:06 -0700 Subject: misc: mic: fix a warning in the IOCTL header file. The following warning from mic_ioctl.h is fixed via this patch: found __[us]{8,16,32,64} type without #include Reported-by: Greg Kroah-Hartman Signed-off-by: Ashutosh Dixit Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Nikhil Rao Signed-off-by: Harshavardhan R Kharche Signed-off-by: Sudeep Dutt Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/mic_ioctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mic_ioctl.h b/include/uapi/linux/mic_ioctl.h index 0e6cbf3e5292..7fabba5059cf 100644 --- a/include/uapi/linux/mic_ioctl.h +++ b/include/uapi/linux/mic_ioctl.h @@ -21,6 +21,8 @@ #ifndef _MIC_IOCTL_H_ #define _MIC_IOCTL_H_ +#include + /* * mic_copy - MIC virtio descriptor copy. * -- cgit v1.2.3 From 5e04c0c38c90f1f11a0e87800e4c22d4aba1d733 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 30 Sep 2013 07:57:18 +0200 Subject: netfilter: ipset: Introduce new operation to get both setname and family ip[6]tables set match and SET target need to know the family of the set in order to reject adding rules which refer to a set with a non-mathcing family. Currently such rules are silently accepted and then ignored instead of generating a clear error message to the user, which is not helpful. Signed-off-by: Jozsef Kadlecsik --- include/uapi/linux/netfilter/ipset/ip_set.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 8024cdf13b70..2b61ac44dcc1 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -250,6 +250,14 @@ struct ip_set_req_get_set { #define IP_SET_OP_GET_BYINDEX 0x00000007 /* Get set name by index */ /* Uses ip_set_req_get_set */ +#define IP_SET_OP_GET_FNAME 0x00000008 /* Get set index and family */ +struct ip_set_req_get_set_family { + unsigned int op; + unsigned int version; + unsigned int family; + union ip_set_name_index set; +}; + #define IP_SET_OP_VERSION 0x00000100 /* Ask kernel version */ struct ip_set_req_version { unsigned int op; -- cgit v1.2.3 From 68b63f08d22f23161c43cd2417104aa213ff877f Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Sun, 22 Sep 2013 20:56:30 +0200 Subject: netfilter: ipset: Support comments for ipset entries in the core. This adds the core support for having comments on ipset entries. The comments are stored as standard null-terminated strings in dynamically allocated memory after being passed to the kernel. As a result of this, code has been added to the generic destroy function to iterate all extensions and call that extension's destroy task if the set has that extension activated, and if such a task is defined. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- include/uapi/linux/netfilter/ipset/ip_set.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 2b61ac44dcc1..25d3b2f79c02 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -10,12 +10,14 @@ #ifndef _UAPI_IP_SET_H #define _UAPI_IP_SET_H - #include /* The protocol version */ #define IPSET_PROTOCOL 6 +/* The maximum permissible comment length we will accept over netlink */ +#define IPSET_MAX_COMMENT_SIZE 255 + /* The max length of strings including NUL: set and type identifiers */ #define IPSET_MAXNAMELEN 32 @@ -110,6 +112,7 @@ enum { IPSET_ATTR_IFACE, IPSET_ATTR_BYTES, IPSET_ATTR_PACKETS, + IPSET_ATTR_COMMENT, __IPSET_ATTR_ADT_MAX, }; #define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) @@ -140,6 +143,7 @@ enum ipset_errno { IPSET_ERR_IPADDR_IPV4, IPSET_ERR_IPADDR_IPV6, IPSET_ERR_COUNTER, + IPSET_ERR_COMMENT, /* Type specific error codes */ IPSET_ERR_TYPE_SPECIFIC = 4352, @@ -176,6 +180,8 @@ enum ipset_cadt_flags { IPSET_FLAG_NOMATCH = (1 << IPSET_FLAG_BIT_NOMATCH), IPSET_FLAG_BIT_WITH_COUNTERS = 3, IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS), + IPSET_FLAG_BIT_WITH_COMMENT = 4, + IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT), IPSET_FLAG_CADT_MAX = 15, }; -- cgit v1.2.3 From 91cb498e6a34b429a032f8cfbb57dde28cd20e0c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 4 Sep 2013 20:57:48 +0200 Subject: netfilter: cttimeout: allow to set/get default protocol timeouts Default timeouts are currently set via proc/sysctl interface, the typical pattern is a file name like: /proc/sys/net/netfilter/nf_conntrack_PROTOCOL_timeout_STATE This results in one entry per default protocol state timeout. This patch simplifies this by allowing to set default protocol timeouts via cttimeout netlink interface. This should allow us to get rid of the existing proc/sysctl code in the midterm. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_cttimeout.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h index a2810a7c5e30..1ab0b97b3a1e 100644 --- a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h +++ b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h @@ -6,6 +6,8 @@ enum ctnl_timeout_msg_types { IPCTNL_MSG_TIMEOUT_NEW, IPCTNL_MSG_TIMEOUT_GET, IPCTNL_MSG_TIMEOUT_DELETE, + IPCTNL_MSG_TIMEOUT_DEFAULT_SET, + IPCTNL_MSG_TIMEOUT_DEFAULT_GET, IPCTNL_MSG_TIMEOUT_MAX }; -- cgit v1.2.3 From 42c4e0c77ac91505ab94284b14025e3a0865c0a5 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 30 Sep 2013 14:20:07 +0530 Subject: ARM/ARM64: KVM: Implement KVM_ARM_PREFERRED_TARGET ioctl For implementing CPU=host, we need a mechanism for querying preferred VCPU target type on underlying Host. This patch implements KVM_ARM_PREFERRED_TARGET vm ioctl which returns struct kvm_vcpu_init instance containing information about preferred VCPU target type and target specific features available for it. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 99c25338ede8..e32e776f20c0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1012,6 +1012,7 @@ struct kvm_s390_ucas_mapping { /* VM is being stopped by host */ #define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) #define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) +#define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) #define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From 32819dc1834866cb9547cb75f81af9edd58d33cd Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 2 Oct 2013 13:39:25 +0200 Subject: bonding: modify the old and add new xmit hash policies This patch adds two new hash policy modes which use skb_flow_dissect: 3 - Encapsulated layer 2+3 4 - Encapsulated layer 3+4 There should be a good improvement for tunnel users in those modes. It also changes the old hash functions to: hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; hash ^= (hash >> 16); hash ^= (hash >> 8); Where hash will be initialized either to L2 hash, that is SRCMAC[5] XOR DSTMAC[5], or to flow->ports which should be extracted from the upper layer. Flow's dst and src are also extracted based on the xmit policy either directly from the buffer or by using skb_flow_dissect, but in both cases if the protocol is IPv6 then dst and src are obtained by ipv6_addr_hash() on the real addresses. In case of a non-dissectable packet, the algorithms fall back to L2 hashing. The bond_set_mode_ops() function is now obsolete and thus deleted because it was used only to set the proper hash policy. Also we trim a pointer from struct bonding because we no longer need to keep the hash function, now there's only a single hash function - bond_xmit_hash that works based on bond->params.xmit_policy. The hash function and skb_flow_dissect were suggested by Eric Dumazet. The layer names were suggested by Andy Gospodarek, because I suck at semantics. Signed-off-by: Nikolay Aleksandrov Acked-by: Eric Dumazet Acked-by: Veaceslav Falico Signed-off-by: David S. Miller --- include/uapi/linux/if_bonding.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h index a17edda8a781..9635a62f6f89 100644 --- a/include/uapi/linux/if_bonding.h +++ b/include/uapi/linux/if_bonding.h @@ -91,6 +91,8 @@ #define BOND_XMIT_POLICY_LAYER2 0 /* layer 2 (MAC only), default */ #define BOND_XMIT_POLICY_LAYER34 1 /* layer 3+4 (IP ^ (TCP || UDP)) */ #define BOND_XMIT_POLICY_LAYER23 2 /* layer 2+3 (IP ^ MAC) */ +#define BOND_XMIT_POLICY_ENCAP23 3 /* encapsulated layer 2+3 */ +#define BOND_XMIT_POLICY_ENCAP34 4 /* encapsulated layer 3+4 */ typedef struct ifbond { __s32 bond_mode; -- cgit v1.2.3 From fdfbbd07e91f8fe387140776f3fd94605f0c89e5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Sep 2013 07:40:39 -0700 Subject: perf: Add generic transaction flags Add a generic qualifier for transaction events, as a new sample type that returns a flag word. This is particularly useful for qualifying aborts: to distinguish aborts which happen due to asynchronous events (like conflicts caused by another CPU) versus instructions that lead to an abort. The tuning strategies are very different for those cases, so it's important to distinguish them easily and early. Since it's inconvenient and inflexible to filter for this in the kernel we report all the events out and allow some post processing in user space. The flags are based on the Intel TSX events, but should be fairly generic and mostly applicable to other HTM architectures too. In addition to various flag words there's also reserved space to report an program supplied abort code. For TSX this is used to distinguish specific classes of aborts, like a lock busy abort when doing lock elision. Flags: Elision and generic transactions (ELISION vs TRANSACTION) (HLE vs RTM on TSX; IBM etc. would likely only use TRANSACTION) Aborts caused by current thread vs aborts caused by others (SYNC vs ASYNC) Retryable transaction (RETRY) Conflicts with other threads (CONFLICT) Transaction write capacity overflow (CAPACITY WRITE) Transaction read capacity overflow (CAPACITY READ) Transactions implicitely aborted can also return an abort code. This can be used to signal specific events to the profiler. A common case is abort on lock busy in a RTM eliding library (code 0xff) To handle this case we include the TSX abort code Common example aborts in TSX would be: - Data conflict with another thread on memory read. Flags: TRANSACTION|ASYNC|CONFLICT - executing a WRMSR in a transaction. Flags: TRANSACTION|SYNC - HLE transaction in user space is too large Flags: ELISION|SYNC|CAPACITY-WRITE The only flag that is somewhat TSX specific is ELISION. This adds the perf core glue needed for reporting the new flag word out. v2: Add MEM/MISC v3: Move transaction to the end v4: Separate capacity-read/write and remove misc v5: Remove _SAMPLE. Move abort flags to 32bit. Rename transaction to txn Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379688044-14173-2-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 009a655a5d35..da48837d617d 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -136,8 +136,9 @@ enum perf_event_sample_format { PERF_SAMPLE_WEIGHT = 1U << 14, PERF_SAMPLE_DATA_SRC = 1U << 15, PERF_SAMPLE_IDENTIFIER = 1U << 16, + PERF_SAMPLE_TRANSACTION = 1U << 17, - PERF_SAMPLE_MAX = 1U << 17, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 18, /* non-ABI */ }; /* @@ -180,6 +181,28 @@ enum perf_sample_regs_abi { PERF_SAMPLE_REGS_ABI_64 = 2, }; +/* + * Values for the memory transaction event qualifier, mostly for + * abort events. Multiple bits can be set. + */ +enum { + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + + PERF_TXN_MAX = (1 << 8), /* non-ABI */ + + /* bits 32..63 are reserved for the abort code */ + + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, +}; + /* * The format of the data returned by read() on a perf event fd, * as specified by attr.read_format: -- cgit v1.2.3 From af190494f9b2e1fb6e1c039e9626c3c334717da1 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Thu, 3 Oct 2013 18:06:23 -0700 Subject: misc: mic: Enable OSPM suspend and resume support. This patch enables support for OSPM suspend and resume in the MIC driver. During a host suspend event, the driver performs an orderly shutdown of the cards if they are online. Upon resume, any cards that were previously online before suspend are rebooted. The driver performs an orderly shutdown of the card primarily to ensure that applications in the card are terminated and mounted devices are safely un-mounted before the card is powered down in the event of an OSPM suspend. The driver makes use of the MIC daemon to accomplish OSPM suspend and resume. The driver registers a PM notifier per MIC device. The devices get notified synchronously during PM_SUSPEND_PREPARE and PM_POST_SUSPEND phases. During the PM_SUSPEND_PREPARE phase, the driver performs one of the following three tasks. 1) If the card is 'offline', the driver sets the card to a 'suspended' state and returns. 2) If the card is 'online', the driver initiates card shutdown by setting the card state to suspending. This notifies the MIC daemon which invokes shutdown and sets card state to 'suspended'. The driver returns after the shutdown is complete. 3) If the card is already being shutdown, possibly by a host user space application, the driver sets the card state to 'suspended' and returns after the shutdown is complete. During the PM_POST_SUSPEND phase, the driver simply notifies the daemon and returns. The daemon boots those cards that were previously online during the suspend phase. Signed-off-by: Ashutosh Dixit Signed-off-by: Nikhil Rao Signed-off-by: Harshavardhan R Kharche Signed-off-by: Sudeep Dutt Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/mic_common.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mic_common.h b/include/uapi/linux/mic_common.h index 364ac751bd04..17e7d95e4f53 100644 --- a/include/uapi/linux/mic_common.h +++ b/include/uapi/linux/mic_common.h @@ -219,6 +219,8 @@ enum mic_states { MIC_ONLINE, MIC_SHUTTING_DOWN, MIC_RESET_FAILED, + MIC_SUSPENDING, + MIC_SUSPENDED, MIC_LAST }; -- cgit v1.2.3 From fe1811438a2a229e93beaefa69481d72652795e5 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 7 Oct 2013 16:27:55 -0700 Subject: cfg80211: fix nl80211.h documentation for DFS enum states The names are prefixed incorrectly on the documentation. Signed-off-by: Luis R. Rodriguez [also remove spurious blank line] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index fde2c021b26d..a58ea652cc24 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3860,13 +3860,12 @@ enum nl80211_radar_event { * * Channel states used by the DFS code. * - * @IEEE80211_DFS_USABLE: The channel can be used, but channel availability + * @NL80211_DFS_USABLE: The channel can be used, but channel availability * check (CAC) must be performed before using it for AP or IBSS. - * @IEEE80211_DFS_UNAVAILABLE: A radar has been detected on this channel, it + * @NL80211_DFS_UNAVAILABLE: A radar has been detected on this channel, it * is therefore marked as not available. - * @IEEE80211_DFS_AVAILABLE: The channel has been CAC checked and is available. + * @NL80211_DFS_AVAILABLE: The channel has been CAC checked and is available. */ - enum nl80211_dfs_state { NL80211_DFS_USABLE, NL80211_DFS_UNAVAILABLE, -- cgit v1.2.3 From 789fd03331aa1ec45cb58168e2d82525c97c7351 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 4 Oct 2013 18:07:24 -0700 Subject: cfg80211: rename regulatory_hint_11d() to regulatory_hint_country_ie() It is incorrect to refer to this as 11d as 802.11d was just a proposed amendment, 802.11d was merged to the standard so use proper terminology. Signed-off-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a58ea652cc24..8c0417c222c6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -988,7 +988,7 @@ enum nl80211_commands { * to query the CRDA to retrieve one regulatory domain. This attribute can * also be used by userspace to query the kernel for the currently set * regulatory domain. We chose an alpha2 as that is also used by the - * IEEE-802.11d country information element to identify a country. + * IEEE-802.11 country information element to identify a country. * Users can also simply ask the wireless core to set regulatory domain * to a specific alpha2. * @NL80211_ATTR_REG_RULES: a nested array of regulatory domain regulatory -- cgit v1.2.3 From c01fc9ada926aaad907989ca2eba40c2a2a73afe Mon Sep 17 00:00:00 2001 From: Sunil Dutt Date: Wed, 9 Oct 2013 20:45:21 +0530 Subject: cfg80211: pass station supported channel and oper class info The information of the peer's supported channels and supported operating classes are required for the driver to perform TDLS off channel operations. This commit enhances the function nl80211_(new)set_station to pass this information of the peer to the driver. Signed-off-by: Sunil Dutt [return errors for malformed tuples] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 8c0417c222c6..f2aef2a7a570 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1496,6 +1496,11 @@ enum nl80211_commands { * @NL80211_ATTR_RXMGMT_FLAGS: flags for nl80211_send_mgmt(), u32. * As specified in the &enum nl80211_rxmgmt_flags. * + * @NL80211_ATTR_STA_SUPPORTED_CHANNELS: array of supported channels. + * + * @NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES: array of supported + * supported operating classes. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1806,6 +1811,10 @@ enum nl80211_attrs { NL80211_ATTR_RXMGMT_FLAGS, + NL80211_ATTR_STA_SUPPORTED_CHANNELS, + + NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 96518518cc417bb0a8c80b9fb736202e28acdf96 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 14 Oct 2013 11:00:02 +0200 Subject: netfilter: add nftables This patch adds nftables which is the intended successor of iptables. This packet filtering framework reuses the existing netfilter hooks, the connection tracking system, the NAT subsystem, the transparent proxying engine, the logging infrastructure and the userspace packet queueing facilities. In a nutshell, nftables provides a pseudo-state machine with 4 general purpose registers of 128 bits and 1 specific purpose register to store verdicts. This pseudo-machine comes with an extensible instruction set, a.k.a. "expressions" in the nftables jargon. The expressions included in this patch provide the basic functionality, they are: * bitwise: to perform bitwise operations. * byteorder: to change from host/network endianess. * cmp: to compare data with the content of the registers. * counter: to enable counters on rules. * ct: to store conntrack keys into register. * exthdr: to match IPv6 extension headers. * immediate: to load data into registers. * limit: to limit matching based on packet rate. * log: to log packets. * meta: to match metainformation that usually comes with the skbuff. * nat: to perform Network Address Translation. * payload: to fetch data from the packet payload and store it into registers. * reject (IPv4 only): to explicitly close connection, eg. TCP RST. Using this instruction-set, the userspace utility 'nft' can transform the rules expressed in human-readable text representation (using a new syntax, inspired by tcpdump) to nftables bytecode. nftables also inherits the table, chain and rule objects from iptables, but in a more configurable way, and it also includes the original datatype-agnostic set infrastructure with mapping support. This set infrastructure is enhanced in the follow up patch (netfilter: nf_tables: add netlink set API). This patch includes the following components: * the netlink API: net/netfilter/nf_tables_api.c and include/uapi/netfilter/nf_tables.h * the packet filter core: net/netfilter/nf_tables_core.c * the expressions (described above): net/netfilter/nft_*.c * the filter tables: arp, IPv4, IPv6 and bridge: net/ipv4/netfilter/nf_tables_ipv4.c net/ipv6/netfilter/nf_tables_ipv6.c net/ipv4/netfilter/nf_tables_arp.c net/bridge/netfilter/nf_tables_bridge.c * the NAT table (IPv4 only): net/ipv4/netfilter/nf_table_nat_ipv4.c * the route table (similar to mangle): net/ipv4/netfilter/nf_table_route_ipv4.c net/ipv6/netfilter/nf_table_route_ipv6.c * internal definitions under: include/net/netfilter/nf_tables.h include/net/netfilter/nf_tables_core.h * It also includes an skeleton expression: net/netfilter/nft_expr_template.c and the preliminary implementation of the meta target net/netfilter/nft_meta_target.c It also includes a change in struct nf_hook_ops to add a new pointer to store private data to the hook, that is used to store the rule list per chain. This patch is based on the patch from Patrick McHardy, plus merged accumulated cleanups, fixes and small enhancements to the nftables code that has been done since 2009, which are: From Patrick McHardy: * nf_tables: adjust netlink handler function signatures * nf_tables: only retry table lookup after successful table module load * nf_tables: fix event notification echo and avoid unnecessary messages * nft_ct: add l3proto support * nf_tables: pass expression context to nft_validate_data_load() * nf_tables: remove redundant definition * nft_ct: fix maxattr initialization * nf_tables: fix invalid event type in nf_tables_getrule() * nf_tables: simplify nft_data_init() usage * nf_tables: build in more core modules * nf_tables: fix double lookup expression unregistation * nf_tables: move expression initialization to nf_tables_core.c * nf_tables: build in payload module * nf_tables: use NFPROTO constants * nf_tables: rename pid variables to portid * nf_tables: save 48 bits per rule * nf_tables: introduce chain rename * nf_tables: check for duplicate names on chain rename * nf_tables: remove ability to specify handles for new rules * nf_tables: return error for rule change request * nf_tables: return error for NLM_F_REPLACE without rule handle * nf_tables: include NLM_F_APPEND/NLM_F_REPLACE flags in rule notification * nf_tables: fix NLM_F_MULTI usage in netlink notifications * nf_tables: include NLM_F_APPEND in rule dumps From Pablo Neira Ayuso: * nf_tables: fix stack overflow in nf_tables_newrule * nf_tables: nft_ct: fix compilation warning * nf_tables: nft_ct: fix crash with invalid packets * nft_log: group and qthreshold are 2^16 * nf_tables: nft_meta: fix socket uid,gid handling * nft_counter: allow to restore counters * nf_tables: fix module autoload * nf_tables: allow to remove all rules placed in one chain * nf_tables: use 64-bits rule handle instead of 16-bits * nf_tables: fix chain after rule deletion * nf_tables: improve deletion performance * nf_tables: add missing code in route chain type * nf_tables: rise maximum number of expressions from 12 to 128 * nf_tables: don't delete table if in use * nf_tables: fix basechain release From Tomasz Bursztyka: * nf_tables: Add support for changing users chain's name * nf_tables: Change chain's name to be fixed sized * nf_tables: Add support for replacing a rule by another one * nf_tables: Update uapi nftables netlink header documentation From Florian Westphal: * nft_log: group is u16, snaplen u32 From Phil Oester: * nf_tables: operational limit match Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + include/uapi/linux/netfilter/nf_conntrack_common.h | 4 + include/uapi/linux/netfilter/nf_tables.h | 582 +++++++++++++++++++++ include/uapi/linux/netfilter/nfnetlink.h | 5 +- 4 files changed, 591 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/netfilter/nf_tables.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 174915420d3f..6ce0b7f566a7 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h header-y += nf_conntrack_sctp.h header-y += nf_conntrack_tcp.h header-y += nf_conntrack_tuple_common.h +header-y += nf_tables.h header-y += nf_nat.h header-y += nfnetlink.h header-y += nfnetlink_acct.h diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 8dd803818ebe..319f47128db8 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -25,6 +25,10 @@ enum ip_conntrack_info { IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 }; +#define NF_CT_STATE_INVALID_BIT (1 << 0) +#define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1)) + /* Bitset representing status of connection. */ enum ip_conntrack_status { /* It's an expected connection: bit 0 set. This bit never changed */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h new file mode 100644 index 000000000000..ec6d84a8ed1e --- /dev/null +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -0,0 +1,582 @@ +#ifndef _LINUX_NF_TABLES_H +#define _LINUX_NF_TABLES_H + +#define NFT_CHAIN_MAXNAMELEN 32 + +enum nft_registers { + NFT_REG_VERDICT, + NFT_REG_1, + NFT_REG_2, + NFT_REG_3, + NFT_REG_4, + __NFT_REG_MAX +}; +#define NFT_REG_MAX (__NFT_REG_MAX - 1) + +/** + * enum nft_verdicts - nf_tables internal verdicts + * + * @NFT_CONTINUE: continue evaluation of the current rule + * @NFT_BREAK: terminate evaluation of the current rule + * @NFT_JUMP: push the current chain on the jump stack and jump to a chain + * @NFT_GOTO: jump to a chain without pushing the current chain on the jump stack + * @NFT_RETURN: return to the topmost chain on the jump stack + * + * The nf_tables verdicts share their numeric space with the netfilter verdicts. + */ +enum nft_verdicts { + NFT_CONTINUE = -1, + NFT_BREAK = -2, + NFT_JUMP = -3, + NFT_GOTO = -4, + NFT_RETURN = -5, +}; + +/** + * enum nf_tables_msg_types - nf_tables netlink message types + * + * @NFT_MSG_NEWTABLE: create a new table (enum nft_table_attributes) + * @NFT_MSG_GETTABLE: get a table (enum nft_table_attributes) + * @NFT_MSG_DELTABLE: delete a table (enum nft_table_attributes) + * @NFT_MSG_NEWCHAIN: create a new chain (enum nft_chain_attributes) + * @NFT_MSG_GETCHAIN: get a chain (enum nft_chain_attributes) + * @NFT_MSG_DELCHAIN: delete a chain (enum nft_chain_attributes) + * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes) + * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes) + * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes) + */ +enum nf_tables_msg_types { + NFT_MSG_NEWTABLE, + NFT_MSG_GETTABLE, + NFT_MSG_DELTABLE, + NFT_MSG_NEWCHAIN, + NFT_MSG_GETCHAIN, + NFT_MSG_DELCHAIN, + NFT_MSG_NEWRULE, + NFT_MSG_GETRULE, + NFT_MSG_DELRULE, + NFT_MSG_MAX, +}; + +enum nft_list_attributes { + NFTA_LIST_UNPEC, + NFTA_LIST_ELEM, + __NFTA_LIST_MAX +}; +#define NFTA_LIST_MAX (__NFTA_LIST_MAX - 1) + +/** + * enum nft_hook_attributes - nf_tables netfilter hook netlink attributes + * + * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32) + * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32) + */ +enum nft_hook_attributes { + NFTA_HOOK_UNSPEC, + NFTA_HOOK_HOOKNUM, + NFTA_HOOK_PRIORITY, + __NFTA_HOOK_MAX +}; +#define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1) + +/** + * enum nft_table_attributes - nf_tables table netlink attributes + * + * @NFTA_TABLE_NAME: name of the table (NLA_STRING) + */ +enum nft_table_attributes { + NFTA_TABLE_UNSPEC, + NFTA_TABLE_NAME, + __NFTA_TABLE_MAX +}; +#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) + +/** + * enum nft_chain_attributes - nf_tables chain netlink attributes + * + * @NFTA_CHAIN_TABLE: name of the table containing the chain (NLA_STRING) + * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64) + * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING) + * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes) + */ +enum nft_chain_attributes { + NFTA_CHAIN_UNSPEC, + NFTA_CHAIN_TABLE, + NFTA_CHAIN_HANDLE, + NFTA_CHAIN_NAME, + NFTA_CHAIN_HOOK, + __NFTA_CHAIN_MAX +}; +#define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) + +/** + * enum nft_rule_attributes - nf_tables rule netlink attributes + * + * @NFTA_RULE_TABLE: name of the table containing the rule (NLA_STRING) + * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING) + * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64) + * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) + */ +enum nft_rule_attributes { + NFTA_RULE_UNSPEC, + NFTA_RULE_TABLE, + NFTA_RULE_CHAIN, + NFTA_RULE_HANDLE, + NFTA_RULE_EXPRESSIONS, + __NFTA_RULE_MAX +}; +#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) + +enum nft_data_attributes { + NFTA_DATA_UNSPEC, + NFTA_DATA_VALUE, + NFTA_DATA_VERDICT, + __NFTA_DATA_MAX +}; +#define NFTA_DATA_MAX (__NFTA_DATA_MAX - 1) + +/** + * enum nft_verdict_attributes - nf_tables verdict netlink attributes + * + * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts) + * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING) + */ +enum nft_verdict_attributes { + NFTA_VERDICT_UNSPEC, + NFTA_VERDICT_CODE, + NFTA_VERDICT_CHAIN, + __NFTA_VERDICT_MAX +}; +#define NFTA_VERDICT_MAX (__NFTA_VERDICT_MAX - 1) + +/** + * enum nft_expr_attributes - nf_tables expression netlink attributes + * + * @NFTA_EXPR_NAME: name of the expression type (NLA_STRING) + * @NFTA_EXPR_DATA: type specific data (NLA_NESTED) + */ +enum nft_expr_attributes { + NFTA_EXPR_UNSPEC, + NFTA_EXPR_NAME, + NFTA_EXPR_DATA, + __NFTA_EXPR_MAX +}; +#define NFTA_EXPR_MAX (__NFTA_EXPR_MAX - 1) + +/** + * enum nft_immediate_attributes - nf_tables immediate expression netlink attributes + * + * @NFTA_IMMEDIATE_DREG: destination register to load data into (NLA_U32) + * @NFTA_IMMEDIATE_DATA: data to load (NLA_NESTED: nft_data_attributes) + */ +enum nft_immediate_attributes { + NFTA_IMMEDIATE_UNSPEC, + NFTA_IMMEDIATE_DREG, + NFTA_IMMEDIATE_DATA, + __NFTA_IMMEDIATE_MAX +}; +#define NFTA_IMMEDIATE_MAX (__NFTA_IMMEDIATE_MAX - 1) + +/** + * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes + * + * @NFTA_BITWISE_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BITWISE_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BITWISE_LEN: length of operands (NLA_U32) + * @NFTA_BITWISE_MASK: mask value (NLA_NESTED: nft_data_attributes) + * @NFTA_BITWISE_XOR: xor value (NLA_NESTED: nft_data_attributes) + * + * The bitwise expression performs the following operation: + * + * dreg = (sreg & mask) ^ xor + * + * which allow to express all bitwise operations: + * + * mask xor + * NOT: 1 1 + * OR: 0 x + * XOR: 1 x + * AND: x 0 + */ +enum nft_bitwise_attributes { + NFTA_BITWISE_UNSPEC, + NFTA_BITWISE_SREG, + NFTA_BITWISE_DREG, + NFTA_BITWISE_LEN, + NFTA_BITWISE_MASK, + NFTA_BITWISE_XOR, + __NFTA_BITWISE_MAX +}; +#define NFTA_BITWISE_MAX (__NFTA_BITWISE_MAX - 1) + +/** + * enum nft_byteorder_ops - nf_tables byteorder operators + * + * @NFT_BYTEORDER_NTOH: network to host operator + * @NFT_BYTEORDER_HTON: host to network opertaor + */ +enum nft_byteorder_ops { + NFT_BYTEORDER_NTOH, + NFT_BYTEORDER_HTON, +}; + +/** + * enum nft_byteorder_attributes - nf_tables byteorder expression netlink attributes + * + * @NFTA_BYTEORDER_SREG: source register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_BYTEORDER_OP: operator (NLA_U32: enum nft_byteorder_ops) + * @NFTA_BYTEORDER_LEN: length of the data (NLA_U32) + * @NFTA_BYTEORDER_SIZE: data size in bytes (NLA_U32: 2 or 4) + */ +enum nft_byteorder_attributes { + NFTA_BYTEORDER_UNSPEC, + NFTA_BYTEORDER_SREG, + NFTA_BYTEORDER_DREG, + NFTA_BYTEORDER_OP, + NFTA_BYTEORDER_LEN, + NFTA_BYTEORDER_SIZE, + __NFTA_BYTEORDER_MAX +}; +#define NFTA_BYTEORDER_MAX (__NFTA_BYTEORDER_MAX - 1) + +/** + * enum nft_cmp_ops - nf_tables relational operator + * + * @NFT_CMP_EQ: equal + * @NFT_CMP_NEQ: not equal + * @NFT_CMP_LT: less than + * @NFT_CMP_LTE: less than or equal to + * @NFT_CMP_GT: greater than + * @NFT_CMP_GTE: greater than or equal to + */ +enum nft_cmp_ops { + NFT_CMP_EQ, + NFT_CMP_NEQ, + NFT_CMP_LT, + NFT_CMP_LTE, + NFT_CMP_GT, + NFT_CMP_GTE, +}; + +/** + * enum nft_cmp_attributes - nf_tables cmp expression netlink attributes + * + * @NFTA_CMP_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_CMP_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_CMP_DATA: data to compare against (NLA_NESTED: nft_data_attributes) + */ +enum nft_cmp_attributes { + NFTA_CMP_UNSPEC, + NFTA_CMP_SREG, + NFTA_CMP_OP, + NFTA_CMP_DATA, + __NFTA_CMP_MAX +}; +#define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) + +enum nft_set_elem_flags { + NFT_SE_INTERVAL_END = 0x1, +}; + +enum nft_set_elem_attributes { + NFTA_SE_UNSPEC, + NFTA_SE_KEY, + NFTA_SE_DATA, + NFTA_SE_FLAGS, + __NFTA_SE_MAX +}; +#define NFTA_SE_MAX (__NFTA_SE_MAX - 1) + +enum nft_set_flags { + NFT_SET_INTERVAL = 0x1, + NFT_SET_MAP = 0x2, +}; + +enum nft_set_attributes { + NFTA_SET_UNSPEC, + NFTA_SET_FLAGS, + NFTA_SET_SREG, + NFTA_SET_DREG, + NFTA_SET_KLEN, + NFTA_SET_DLEN, + NFTA_SET_ELEMENTS, + __NFTA_SET_MAX +}; +#define NFTA_SET_MAX (__NFTA_SET_MAX - 1) + +enum nft_hash_flags { + NFT_HASH_MAP = 0x1, +}; + +enum nft_hash_elem_attributes { + NFTA_HE_UNSPEC, + NFTA_HE_KEY, + NFTA_HE_DATA, + __NFTA_HE_MAX +}; +#define NFTA_HE_MAX (__NFTA_HE_MAX - 1) + +enum nft_hash_attributes { + NFTA_HASH_UNSPEC, + NFTA_HASH_FLAGS, + NFTA_HASH_SREG, + NFTA_HASH_DREG, + NFTA_HASH_KLEN, + NFTA_HASH_ELEMENTS, + __NFTA_HASH_MAX +}; +#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) + +/** + * enum nft_payload_bases - nf_tables payload expression offset bases + * + * @NFT_PAYLOAD_LL_HEADER: link layer header + * @NFT_PAYLOAD_NETWORK_HEADER: network header + * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header + */ +enum nft_payload_bases { + NFT_PAYLOAD_LL_HEADER, + NFT_PAYLOAD_NETWORK_HEADER, + NFT_PAYLOAD_TRANSPORT_HEADER, +}; + +/** + * enum nft_payload_attributes - nf_tables payload expression netlink attributes + * + * @NFTA_PAYLOAD_DREG: destination register to load data into (NLA_U32: nft_registers) + * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases) + * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32) + * @NFTA_PAYLOAD_LEN: payload length (NLA_U32) + */ +enum nft_payload_attributes { + NFTA_PAYLOAD_UNSPEC, + NFTA_PAYLOAD_DREG, + NFTA_PAYLOAD_BASE, + NFTA_PAYLOAD_OFFSET, + NFTA_PAYLOAD_LEN, + __NFTA_PAYLOAD_MAX +}; +#define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) + +/** + * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression netlink attributes + * + * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8) + * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32) + * @NFTA_EXTHDR_LEN: extension header length (NLA_U32) + */ +enum nft_exthdr_attributes { + NFTA_EXTHDR_UNSPEC, + NFTA_EXTHDR_DREG, + NFTA_EXTHDR_TYPE, + NFTA_EXTHDR_OFFSET, + NFTA_EXTHDR_LEN, + __NFTA_EXTHDR_MAX +}; +#define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1) + +/** + * enum nft_meta_keys - nf_tables meta expression keys + * + * @NFT_META_LEN: packet length (skb->len) + * @NFT_META_PROTOCOL: packet ethertype protocol (skb->protocol), invalid in OUTPUT + * @NFT_META_PRIORITY: packet priority (skb->priority) + * @NFT_META_MARK: packet mark (skb->mark) + * @NFT_META_IIF: packet input interface index (dev->ifindex) + * @NFT_META_OIF: packet output interface index (dev->ifindex) + * @NFT_META_IIFNAME: packet input interface name (dev->name) + * @NFT_META_OIFNAME: packet output interface name (dev->name) + * @NFT_META_IIFTYPE: packet input interface type (dev->type) + * @NFT_META_OIFTYPE: packet output interface type (dev->type) + * @NFT_META_SKUID: originating socket UID (fsuid) + * @NFT_META_SKGID: originating socket GID (fsgid) + * @NFT_META_NFTRACE: packet nftrace bit + * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_META_SECMARK: packet secmark (skb->secmark) + */ +enum nft_meta_keys { + NFT_META_LEN, + NFT_META_PROTOCOL, + NFT_META_PRIORITY, + NFT_META_MARK, + NFT_META_IIF, + NFT_META_OIF, + NFT_META_IIFNAME, + NFT_META_OIFNAME, + NFT_META_IIFTYPE, + NFT_META_OIFTYPE, + NFT_META_SKUID, + NFT_META_SKGID, + NFT_META_NFTRACE, + NFT_META_RTCLASSID, + NFT_META_SECMARK, +}; + +/** + * enum nft_meta_attributes - nf_tables meta expression netlink attributes + * + * @NFTA_META_DREG: destination register (NLA_U32) + * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys) + */ +enum nft_meta_attributes { + NFTA_META_UNSPEC, + NFTA_META_DREG, + NFTA_META_KEY, + __NFTA_META_MAX +}; +#define NFTA_META_MAX (__NFTA_META_MAX - 1) + +/** + * enum nft_ct_keys - nf_tables ct expression keys + * + * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info) + * @NFT_CT_DIRECTION: conntrack direction (enum ip_conntrack_dir) + * @NFT_CT_STATUS: conntrack status (bitmask of enum ip_conntrack_status) + * @NFT_CT_MARK: conntrack mark value + * @NFT_CT_SECMARK: conntrack secmark value + * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms + * @NFT_CT_HELPER: connection tracking helper assigned to conntrack + * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol + * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address) + * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address) + * @NFT_CT_PROTOCOL: conntrack layer 4 protocol + * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source + * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination + */ +enum nft_ct_keys { + NFT_CT_STATE, + NFT_CT_DIRECTION, + NFT_CT_STATUS, + NFT_CT_MARK, + NFT_CT_SECMARK, + NFT_CT_EXPIRATION, + NFT_CT_HELPER, + NFT_CT_L3PROTOCOL, + NFT_CT_SRC, + NFT_CT_DST, + NFT_CT_PROTOCOL, + NFT_CT_PROTO_SRC, + NFT_CT_PROTO_DST, +}; + +/** + * enum nft_ct_attributes - nf_tables ct expression netlink attributes + * + * @NFTA_CT_DREG: destination register (NLA_U32) + * @NFTA_CT_KEY: conntrack data item to load (NLA_U32: nft_ct_keys) + * @NFTA_CT_DIRECTION: direction in case of directional keys (NLA_U8) + */ +enum nft_ct_attributes { + NFTA_CT_UNSPEC, + NFTA_CT_DREG, + NFTA_CT_KEY, + NFTA_CT_DIRECTION, + __NFTA_CT_MAX +}; +#define NFTA_CT_MAX (__NFTA_CT_MAX - 1) + +/** + * enum nft_limit_attributes - nf_tables limit expression netlink attributes + * + * @NFTA_LIMIT_RATE: refill rate (NLA_U64) + * @NFTA_LIMIT_UNIT: refill unit (NLA_U64) + */ +enum nft_limit_attributes { + NFTA_LIMIT_UNSPEC, + NFTA_LIMIT_RATE, + NFTA_LIMIT_UNIT, + __NFTA_LIMIT_MAX +}; +#define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1) + +/** + * enum nft_counter_attributes - nf_tables counter expression netlink attributes + * + * @NFTA_COUNTER_BYTES: number of bytes (NLA_U64) + * @NFTA_COUNTER_PACKETS: number of packets (NLA_U64) + */ +enum nft_counter_attributes { + NFTA_COUNTER_UNSPEC, + NFTA_COUNTER_BYTES, + NFTA_COUNTER_PACKETS, + __NFTA_COUNTER_MAX +}; +#define NFTA_COUNTER_MAX (__NFTA_COUNTER_MAX - 1) + +/** + * enum nft_log_attributes - nf_tables log expression netlink attributes + * + * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U32) + * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING) + * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32) + * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32) + */ +enum nft_log_attributes { + NFTA_LOG_UNSPEC, + NFTA_LOG_GROUP, + NFTA_LOG_PREFIX, + NFTA_LOG_SNAPLEN, + NFTA_LOG_QTHRESHOLD, + __NFTA_LOG_MAX +}; +#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) + +/** + * enum nft_reject_types - nf_tables reject expression reject types + * + * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable + * @NFT_REJECT_TCP_RST: reject using TCP RST + */ +enum nft_reject_types { + NFT_REJECT_ICMP_UNREACH, + NFT_REJECT_TCP_RST, +}; + +/** + * enum nft_reject_attributes - nf_tables reject expression netlink attributes + * + * @NFTA_REJECT_TYPE: packet type to use (NLA_U32: nft_reject_types) + * @NFTA_REJECT_ICMP_CODE: ICMP code to use (NLA_U8) + */ +enum nft_reject_attributes { + NFTA_REJECT_UNSPEC, + NFTA_REJECT_TYPE, + NFTA_REJECT_ICMP_CODE, + __NFTA_REJECT_MAX +}; +#define NFTA_REJECT_MAX (__NFTA_REJECT_MAX - 1) + +/** + * enum nft_nat_types - nf_tables nat expression NAT types + * + * @NFT_NAT_SNAT: source NAT + * @NFT_NAT_DNAT: destination NAT + */ +enum nft_nat_types { + NFT_NAT_SNAT, + NFT_NAT_DNAT, +}; + +/** + * enum nft_nat_attributes - nf_tables nat expression netlink attributes + * + * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types) + * @NFTA_NAT_ADDR_MIN: source register of address range start (NLA_U32: nft_registers) + * @NFTA_NAT_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) + * @NFTA_NAT_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_NAT_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + */ +enum nft_nat_attributes { + NFTA_NAT_UNSPEC, + NFTA_NAT_TYPE, + NFTA_NAT_ADDR_MIN, + NFTA_NAT_ADDR_MAX, + NFTA_NAT_PROTO_MIN, + NFTA_NAT_PROTO_MAX, + __NFTA_NAT_MAX +}; +#define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1) + +#endif /* _LINUX_NF_TABLES_H */ diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 4a4efafad5f4..d276c3bd55b8 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -18,6 +18,8 @@ enum nfnetlink_groups { #define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_DESTROY, #define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY + NFNLGRP_NFTABLES, +#define NFNLGRP_NFTABLES NFNLGRP_NFTABLES __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) @@ -51,6 +53,7 @@ struct nfgenmsg { #define NFNL_SUBSYS_ACCT 7 #define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 #define NFNL_SUBSYS_CTHELPER 9 -#define NFNL_SUBSYS_COUNT 10 +#define NFNL_SUBSYS_NFTABLES 10 +#define NFNL_SUBSYS_COUNT 11 #endif /* _UAPI_NFNETLINK_H */ -- cgit v1.2.3 From 20a69341f2d00cd042e81c82289fba8a13c05a25 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 11 Oct 2013 12:06:22 +0200 Subject: netfilter: nf_tables: add netlink set API This patch adds the new netlink API for maintaining nf_tables sets independently of the ruleset. The API supports the following operations: - creation of sets - deletion of sets - querying of specific sets - dumping of all sets - addition of set elements - removal of set elements - dumping of all set elements Sets are identified by name, each table defines an individual namespace. The name of a set may be allocated automatically, this is mostly useful in combination with the NFT_SET_ANONYMOUS flag, which destroys a set automatically once the last reference has been released. Sets can be marked constant, meaning they're not allowed to change while linked to a rule. This allows to perform lockless operation for set types that would otherwise require locking. Additionally, if the implementation supports it, sets can (as before) be used as maps, associating a data value with each key (or range), by specifying the NFT_SET_MAP flag and can be used for interval queries by specifying the NFT_SET_INTERVAL flag. Set elements are added and removed incrementally. All element operations support batching, reducing netlink message and set lookup overhead. The old "set" and "hash" expressions are replaced by a generic "lookup" expression, which binds to the specified set. Userspace is not aware of the actual set implementation used by the kernel anymore, all configuration options are generic. Currently the implementation selection logic is largely missing and the kernel will simply use the first registered implementation supporting the requested operation. Eventually, the plan is to have userspace supply a description of the data characteristics and select the implementation based on expected performance and memory use. This patch includes the new 'lookup' expression to look up for element matching in the set. This patch includes kernel-doc descriptions for this set API and it also includes the following fixes. From Patrick McHardy: * netfilter: nf_tables: fix set element data type in dumps * netfilter: nf_tables: fix indentation of struct nft_set_elem comments * netfilter: nf_tables: fix oops in nft_validate_data_load() * netfilter: nf_tables: fix oops while listing sets of built-in tables * netfilter: nf_tables: destroy anonymous sets immediately if binding fails * netfilter: nf_tables: propagate context to set iter callback * netfilter: nf_tables: add loop detection From Pablo Neira Ayuso: * netfilter: nf_tables: allow to dump all existing sets * netfilter: nf_tables: fix wrong type for flags variable in newelem Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 191 ++++++++++++++++++++++--------- 1 file changed, 139 insertions(+), 52 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index ec6d84a8ed1e..9e924014efe3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -44,6 +44,12 @@ enum nft_verdicts { * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes) * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes) * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes) + * @NFT_MSG_NEWSET: create a new set (enum nft_set_attributes) + * @NFT_MSG_GETSET: get a set (enum nft_set_attributes) + * @NFT_MSG_DELSET: delete a set (enum nft_set_attributes) + * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes) + * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes) + * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -55,9 +61,20 @@ enum nf_tables_msg_types { NFT_MSG_NEWRULE, NFT_MSG_GETRULE, NFT_MSG_DELRULE, + NFT_MSG_NEWSET, + NFT_MSG_GETSET, + NFT_MSG_DELSET, + NFT_MSG_NEWSETELEM, + NFT_MSG_GETSETELEM, + NFT_MSG_DELSETELEM, NFT_MSG_MAX, }; +/** + * enum nft_list_attributes - nf_tables generic list netlink attributes + * + * @NFTA_LIST_ELEM: list element (NLA_NESTED) + */ enum nft_list_attributes { NFTA_LIST_UNPEC, NFTA_LIST_ELEM, @@ -127,6 +144,113 @@ enum nft_rule_attributes { }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) +/** + * enum nft_set_flags - nf_tables set flags + * + * @NFT_SET_ANONYMOUS: name allocation, automatic cleanup on unlink + * @NFT_SET_CONSTANT: set contents may not change while bound + * @NFT_SET_INTERVAL: set contains intervals + * @NFT_SET_MAP: set is used as a dictionary + */ +enum nft_set_flags { + NFT_SET_ANONYMOUS = 0x1, + NFT_SET_CONSTANT = 0x2, + NFT_SET_INTERVAL = 0x4, + NFT_SET_MAP = 0x8, +}; + +/** + * enum nft_set_attributes - nf_tables set netlink attributes + * + * @NFTA_SET_TABLE: table name (NLA_STRING) + * @NFTA_SET_NAME: set name (NLA_STRING) + * @NFTA_SET_FLAGS: bitmask of enum nft_set_flags (NLA_U32) + * @NFTA_SET_KEY_TYPE: key data type, informational purpose only (NLA_U32) + * @NFTA_SET_KEY_LEN: key data length (NLA_U32) + * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32) + * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32) + */ +enum nft_set_attributes { + NFTA_SET_UNSPEC, + NFTA_SET_TABLE, + NFTA_SET_NAME, + NFTA_SET_FLAGS, + NFTA_SET_KEY_TYPE, + NFTA_SET_KEY_LEN, + NFTA_SET_DATA_TYPE, + NFTA_SET_DATA_LEN, + __NFTA_SET_MAX +}; +#define NFTA_SET_MAX (__NFTA_SET_MAX - 1) + +/** + * enum nft_set_elem_flags - nf_tables set element flags + * + * @NFT_SET_ELEM_INTERVAL_END: element ends the previous interval + */ +enum nft_set_elem_flags { + NFT_SET_ELEM_INTERVAL_END = 0x1, +}; + +/** + * enum nft_set_elem_attributes - nf_tables set element netlink attributes + * + * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data) + * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes) + * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32) + */ +enum nft_set_elem_attributes { + NFTA_SET_ELEM_UNSPEC, + NFTA_SET_ELEM_KEY, + NFTA_SET_ELEM_DATA, + NFTA_SET_ELEM_FLAGS, + __NFTA_SET_ELEM_MAX +}; +#define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) + +/** + * enum nft_set_elem_list_attributes - nf_tables set element list netlink attributes + * + * @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING) + * @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING) + * @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes) + */ +enum nft_set_elem_list_attributes { + NFTA_SET_ELEM_LIST_UNSPEC, + NFTA_SET_ELEM_LIST_TABLE, + NFTA_SET_ELEM_LIST_SET, + NFTA_SET_ELEM_LIST_ELEMENTS, + __NFTA_SET_ELEM_LIST_MAX +}; +#define NFTA_SET_ELEM_LIST_MAX (__NFTA_SET_ELEM_LIST_MAX - 1) + +/** + * enum nft_data_types - nf_tables data types + * + * @NFT_DATA_VALUE: generic data + * @NFT_DATA_VERDICT: netfilter verdict + * + * The type of data is usually determined by the kernel directly and is not + * explicitly specified by userspace. The only difference are sets, where + * userspace specifies the key and mapping data types. + * + * The values 0xffffff00-0xffffffff are reserved for internally used types. + * The remaining range can be freely used by userspace to encode types, all + * values are equivalent to NFT_DATA_VALUE. + */ +enum nft_data_types { + NFT_DATA_VALUE, + NFT_DATA_VERDICT = 0xffffff00U, +}; + +#define NFT_DATA_RESERVED_MASK 0xffffff00U + +/** + * enum nft_data_attributes - nf_tables data netlink attributes + * + * @NFTA_DATA_VALUE: generic data (NLA_BINARY) + * @NFTA_DATA_VERDICT: nf_tables verdict (NLA_NESTED: nft_verdict_attributes) + */ enum nft_data_attributes { NFTA_DATA_UNSPEC, NFTA_DATA_VALUE, @@ -275,58 +399,21 @@ enum nft_cmp_attributes { }; #define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) -enum nft_set_elem_flags { - NFT_SE_INTERVAL_END = 0x1, -}; - -enum nft_set_elem_attributes { - NFTA_SE_UNSPEC, - NFTA_SE_KEY, - NFTA_SE_DATA, - NFTA_SE_FLAGS, - __NFTA_SE_MAX -}; -#define NFTA_SE_MAX (__NFTA_SE_MAX - 1) - -enum nft_set_flags { - NFT_SET_INTERVAL = 0x1, - NFT_SET_MAP = 0x2, -}; - -enum nft_set_attributes { - NFTA_SET_UNSPEC, - NFTA_SET_FLAGS, - NFTA_SET_SREG, - NFTA_SET_DREG, - NFTA_SET_KLEN, - NFTA_SET_DLEN, - NFTA_SET_ELEMENTS, - __NFTA_SET_MAX -}; -#define NFTA_SET_MAX (__NFTA_SET_MAX - 1) - -enum nft_hash_flags { - NFT_HASH_MAP = 0x1, -}; - -enum nft_hash_elem_attributes { - NFTA_HE_UNSPEC, - NFTA_HE_KEY, - NFTA_HE_DATA, - __NFTA_HE_MAX -}; -#define NFTA_HE_MAX (__NFTA_HE_MAX - 1) - -enum nft_hash_attributes { - NFTA_HASH_UNSPEC, - NFTA_HASH_FLAGS, - NFTA_HASH_SREG, - NFTA_HASH_DREG, - NFTA_HASH_KLEN, - NFTA_HASH_ELEMENTS, - __NFTA_HASH_MAX -}; -#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) +/** + * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes + * + * @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING) + * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers) + * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers) + */ +enum nft_lookup_attributes { + NFTA_LOOKUP_UNSPEC, + NFTA_LOOKUP_SET, + NFTA_LOOKUP_SREG, + NFTA_LOOKUP_DREG, + __NFTA_LOOKUP_MAX +}; +#define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1) /** * enum nft_payload_bases - nf_tables payload expression offset bases -- cgit v1.2.3 From 9370761c56b66aa5c65e069a7b010111a025018d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 23:21:26 +0200 Subject: netfilter: nf_tables: convert built-in tables/chains to chain types This patch converts built-in tables/chains to chain types that allows you to deploy customized table and chain configurations from userspace. After this patch, you have to specify the chain type when creating a new chain: add chain ip filter output { type filter hook input priority 0; } ^^^^ ------ The existing chain types after this patch are: filter, route and nat. Note that tables are just containers of chains with no specific semantics, which is a significant change with regards to iptables. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 9e924014efe3..779cf951c8de 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -115,6 +115,7 @@ enum nft_table_attributes { * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64) * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING) * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes) + * @NFTA_CHAIN_TYPE: type name of the string (NLA_NUL_STRING) */ enum nft_chain_attributes { NFTA_CHAIN_UNSPEC, @@ -122,6 +123,7 @@ enum nft_chain_attributes { NFTA_CHAIN_HANDLE, NFTA_CHAIN_NAME, NFTA_CHAIN_HOOK, + NFTA_CHAIN_TYPE, __NFTA_CHAIN_MAX }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) -- cgit v1.2.3 From 0ca743a5599199152a31a7146b83213c786c2eb2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2013 00:06:06 +0200 Subject: netfilter: nf_tables: add compatibility layer for x_tables This patch adds the x_tables compatibility layer. This allows you to use existing x_tables matches and targets from nf_tables. This compatibility later allows us to use existing matches/targets for features that are still missing in nf_tables. We can progressively replace them with native nf_tables extensions. It also provides the userspace compatibility software that allows you to express the rule-set using the iptables syntax but using the nf_tables kernel components. In order to get this compatibility layer working, I've done the following things: * add NFNL_SUBSYS_NFT_COMPAT: this new nfnetlink subsystem is used to query the x_tables match/target revision, so we don't need to use the native x_table getsockopt interface. * emulate xt structures: this required extending the struct nft_pktinfo to include the fragment offset, which is already obtained from ip[6]_tables and that is used by some matches/targets. * add support for default policy to base chains, required to emulate x_tables. * add NFTA_CHAIN_USE attribute to obtain the number of references to chains, required by x_tables emulation. * add chain packet/byte counters using per-cpu. * support 32-64 bits compat. For historical reasons, this patch includes the following patches that were posted in the netfilter-devel mailing list. From Pablo Neira Ayuso: * nf_tables: add default policy to base chains * netfilter: nf_tables: add NFTA_CHAIN_USE attribute * nf_tables: nft_compat: private data of target and matches in contiguous area * nf_tables: validate hooks for compat match/target * nf_tables: nft_compat: release cached matches/targets * nf_tables: x_tables support as a compile time option * nf_tables: fix alias for xtables over nftables module * nf_tables: add packet and byte counters per chain * nf_tables: fix per-chain counter stats if no counters are passed * nf_tables: don't bump chain stats * nf_tables: add protocol and flags for xtables over nf_tables * nf_tables: add ip[6]t_entry emulation * nf_tables: move specific layer 3 compat code to nf_tables_ipv[4|6] * nf_tables: support 32bits-64bits x_tables compat * nf_tables: fix compilation if CONFIG_COMPAT is disabled From Patrick McHardy: * nf_tables: move policy to struct nft_base_chain * nf_tables: send notifications for base chain policy changes From Alexander Primak: * nf_tables: remove the duplicate NF_INET_LOCAL_OUT From Nicolas Dichtel: * nf_tables: fix compilation when nf-netlink is a module Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + include/uapi/linux/netfilter/nf_tables.h | 32 +++++++++++++++++++++ include/uapi/linux/netfilter/nf_tables_compat.h | 38 +++++++++++++++++++++++++ include/uapi/linux/netfilter/nfnetlink.h | 3 +- 4 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/netfilter/nf_tables_compat.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 6ce0b7f566a7..17c3af2c4bb9 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -6,6 +6,7 @@ header-y += nf_conntrack_sctp.h header-y += nf_conntrack_tcp.h header-y += nf_conntrack_tuple_common.h header-y += nf_tables.h +header-y += nf_tables_compat.h header-y += nf_nat.h header-y += nfnetlink.h header-y += nfnetlink_acct.h diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 779cf951c8de..1563875e6942 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -115,7 +115,10 @@ enum nft_table_attributes { * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64) * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING) * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes) + * @NFTA_CHAIN_POLICY: numeric policy of the chain (NLA_U32) + * @NFTA_CHAIN_USE: number of references to this chain (NLA_U32) * @NFTA_CHAIN_TYPE: type name of the string (NLA_NUL_STRING) + * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes) */ enum nft_chain_attributes { NFTA_CHAIN_UNSPEC, @@ -123,7 +126,10 @@ enum nft_chain_attributes { NFTA_CHAIN_HANDLE, NFTA_CHAIN_NAME, NFTA_CHAIN_HOOK, + NFTA_CHAIN_POLICY, + NFTA_CHAIN_USE, NFTA_CHAIN_TYPE, + NFTA_CHAIN_COUNTERS, __NFTA_CHAIN_MAX }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) @@ -135,6 +141,7 @@ enum nft_chain_attributes { * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING) * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64) * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) + * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) */ enum nft_rule_attributes { NFTA_RULE_UNSPEC, @@ -142,10 +149,35 @@ enum nft_rule_attributes { NFTA_RULE_CHAIN, NFTA_RULE_HANDLE, NFTA_RULE_EXPRESSIONS, + NFTA_RULE_COMPAT, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) +/** + * enum nft_rule_compat_flags - nf_tables rule compat flags + * + * @NFT_RULE_COMPAT_F_INV: invert the check result + */ +enum nft_rule_compat_flags { + NFT_RULE_COMPAT_F_INV = (1 << 1), + NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV, +}; + +/** + * enum nft_rule_compat_attributes - nf_tables rule compat attributes + * + * @NFTA_RULE_COMPAT_PROTO: numerice value of handled protocol (NLA_U32) + * @NFTA_RULE_COMPAT_FLAGS: bitmask of enum nft_rule_compat_flags (NLA_U32) + */ +enum nft_rule_compat_attributes { + NFTA_RULE_COMPAT_UNSPEC, + NFTA_RULE_COMPAT_PROTO, + NFTA_RULE_COMPAT_FLAGS, + __NFTA_RULE_COMPAT_MAX +}; +#define NFTA_RULE_COMPAT_MAX (__NFTA_RULE_COMPAT_MAX - 1) + /** * enum nft_set_flags - nf_tables set flags * diff --git a/include/uapi/linux/netfilter/nf_tables_compat.h b/include/uapi/linux/netfilter/nf_tables_compat.h new file mode 100644 index 000000000000..8310f5f76551 --- /dev/null +++ b/include/uapi/linux/netfilter/nf_tables_compat.h @@ -0,0 +1,38 @@ +#ifndef _NFT_COMPAT_NFNETLINK_H_ +#define _NFT_COMPAT_NFNETLINK_H_ + +enum nft_target_attributes { + NFTA_TARGET_UNSPEC, + NFTA_TARGET_NAME, + NFTA_TARGET_REV, + NFTA_TARGET_INFO, + __NFTA_TARGET_MAX +}; +#define NFTA_TARGET_MAX (__NFTA_TARGET_MAX - 1) + +enum nft_match_attributes { + NFTA_MATCH_UNSPEC, + NFTA_MATCH_NAME, + NFTA_MATCH_REV, + NFTA_MATCH_INFO, + __NFTA_MATCH_MAX +}; +#define NFTA_MATCH_MAX (__NFTA_MATCH_MAX - 1) + +#define NFT_COMPAT_NAME_MAX 32 + +enum { + NFNL_MSG_COMPAT_GET, + NFNL_MSG_COMPAT_MAX +}; + +enum { + NFTA_COMPAT_UNSPEC = 0, + NFTA_COMPAT_NAME, + NFTA_COMPAT_REV, + NFTA_COMPAT_TYPE, + __NFTA_COMPAT_MAX, +}; +#define NFTA_COMPAT_MAX (__NFTA_COMPAT_MAX - 1) + +#endif diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index d276c3bd55b8..288959404d54 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -54,6 +54,7 @@ struct nfgenmsg { #define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 #define NFNL_SUBSYS_CTHELPER 9 #define NFNL_SUBSYS_NFTABLES 10 -#define NFNL_SUBSYS_COUNT 11 +#define NFNL_SUBSYS_NFT_COMPAT 11 +#define NFNL_SUBSYS_COUNT 12 #endif /* _UAPI_NFNETLINK_H */ -- cgit v1.2.3 From 9ddf63235749a9efa1fad2eeb74be2ee9b580f8d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 13:26:33 +0200 Subject: netfilter: nf_tables: add support for dormant tables This patch allows you to temporarily disable an entire table. You can change the state of a dormant table via NFT_MSG_NEWTABLE messages. Using this operation you can wake up a table, so their chains are registered. This provides atomicity at chain level. Thus, the rule-set of one chain is applied at once, avoiding any possible intermediate state in every chain. Still, the chains that belongs to a table are registered consecutively. This also allows you to have inactive tables in the kernel. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 1563875e6942..a9c4bce1988f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -96,14 +96,25 @@ enum nft_hook_attributes { }; #define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1) +/** + * enum nft_table_flags - nf_tables table flags + * + * @NFT_TABLE_F_DORMANT: this table is not active + */ +enum nft_table_flags { + NFT_TABLE_F_DORMANT = 0x1, +}; + /** * enum nft_table_attributes - nf_tables table netlink attributes * * @NFTA_TABLE_NAME: name of the table (NLA_STRING) + * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) */ enum nft_table_attributes { NFTA_TABLE_UNSPEC, NFTA_TABLE_NAME, + NFTA_TABLE_FLAGS, __NFTA_TABLE_MAX }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) -- cgit v1.2.3 From eb31628e37a0a4e01fffd79dcc7f815d2357f53a Mon Sep 17 00:00:00 2001 From: Tomasz Bursztyka Date: Thu, 10 Oct 2013 13:39:19 +0200 Subject: netfilter: nf_tables: Add support for IPv6 NAT This patch generalizes the NAT expression to support both IPv4 and IPv6 using the existing IPv4/IPv6 NAT infrastructure. This also adds the NAT chain type for IPv6. This patch collapses the following patches that were posted to the netfilter-devel mailing list, from Tomasz: * nf_tables: Change NFTA_NAT_ attributes to better semantic significance * nf_tables: Split IPv4 NAT into NAT expression and IPv4 NAT chain * nf_tables: Add support for IPv6 NAT expression * nf_tables: Add support for IPv6 NAT chain * nf_tables: Fix up build issue on IPv6 NAT support And, from Pablo Neira Ayuso: * fix missing dependencies in nft_chain_nat Signed-off-by: Tomasz Bursztyka Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index a9c4bce1988f..7d4a1992f89c 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -695,18 +695,20 @@ enum nft_nat_types { * enum nft_nat_attributes - nf_tables nat expression netlink attributes * * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types) - * @NFTA_NAT_ADDR_MIN: source register of address range start (NLA_U32: nft_registers) - * @NFTA_NAT_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) - * @NFTA_NAT_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) - * @NFTA_NAT_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + * @NFTA_NAT_FAMILY: NAT family (NLA_U32) + * @NFTA_NAT_REG_ADDR_MIN: source register of address range start (NLA_U32: nft_registers) + * @NFTA_NAT_REG_ADDR_MAX: source register of address range end (NLA_U32: nft_registers) + * @NFTA_NAT_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_NAT_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) */ enum nft_nat_attributes { NFTA_NAT_UNSPEC, NFTA_NAT_TYPE, - NFTA_NAT_ADDR_MIN, - NFTA_NAT_ADDR_MAX, - NFTA_NAT_PROTO_MIN, - NFTA_NAT_PROTO_MAX, + NFTA_NAT_FAMILY, + NFTA_NAT_REG_ADDR_MIN, + NFTA_NAT_REG_ADDR_MAX, + NFTA_NAT_REG_PROTO_MIN, + NFTA_NAT_REG_PROTO_MAX, __NFTA_NAT_MAX }; #define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1) -- cgit v1.2.3 From 5e94846686d027a4c8ecc5d9d52b18036d3e8f7a Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 10 Oct 2013 13:41:44 +0200 Subject: netfilter: nf_tables: add insert operation This patch adds a new rule attribute NFTA_RULE_POSITION which is used to store the position of a rule relatively to the others. By providing the create command and specifying the position, the rule is inserted after the rule with the handle equal to the provided position. Regarding notification, the position attribute specifies the handle of the previous rule to make sure we don't point to any stale rule in notifications coming from the commit path. This patch includes the following fix from Pablo: * nf_tables: fix rule deletion event reporting Signed-off-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 7d4a1992f89c..fbfd229a8e99 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -153,6 +153,7 @@ enum nft_chain_attributes { * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64) * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) + * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) */ enum nft_rule_attributes { NFTA_RULE_UNSPEC, @@ -161,6 +162,7 @@ enum nft_rule_attributes { NFTA_RULE_HANDLE, NFTA_RULE_EXPRESSIONS, NFTA_RULE_COMPAT, + NFTA_RULE_POSITION, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) -- cgit v1.2.3 From 0628b123c96d126e617beb3b4fd63b874d0e4f17 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2013 11:05:33 +0200 Subject: netfilter: nfnetlink: add batch support and use it from nf_tables This patch adds a batch support to nfnetlink. Basically, it adds two new control messages: * NFNL_MSG_BATCH_BEGIN, that indicates the beginning of a batch, the nfgenmsg->res_id indicates the nfnetlink subsystem ID. * NFNL_MSG_BATCH_END, that results in the invocation of the ss->commit callback function. If not specified or an error ocurred in the batch, the ss->abort function is invoked instead. The end message represents the commit operation in nftables, the lack of end message results in an abort. This patch also adds the .call_batch function that is only called from the batch receival path. This patch adds atomic rule updates and dumps based on bitmask generations. This allows to atomically commit a set of rule-set updates incrementally without altering the internal state of existing nf_tables expressions/matches/targets. The idea consists of using a generation cursor of 1 bit and a bitmask of 2 bits per rule. Assuming the gencursor is 0, then the genmask (expressed as a bitmask) can be interpreted as: 00 active in the present, will be active in the next generation. 01 inactive in the present, will be active in the next generation. 10 active in the present, will be deleted in the next generation. ^ gencursor Once you invoke the transition to the next generation, the global gencursor is updated: 00 active in the present, will be active in the next generation. 01 active in the present, needs to zero its future, it becomes 00. 10 inactive in the present, delete now. ^ gencursor If a dump is in progress and nf_tables enters a new generation, the dump will stop and return -EBUSY to let userspace know that it has to retry again. In order to invalidate dumps, a global genctr counter is increased everytime nf_tables enters a new generation. This new operation can be used from the user-space utility that controls the firewall, eg. nft -f restore The rule updates contained in `file' will be applied atomically. cat file ----- add filter INPUT ip saddr 1.1.1.1 counter accept #1 del filter INPUT ip daddr 2.2.2.2 counter drop #2 -EOF- Note that the rule 1 will be inactive until the transition to the next generation, the rule 2 will be evicted in the next generation. There is a penalty during the rule update due to the branch misprediction in the packet matching framework. But that should be quickly resolved once the iteration over the commit list that contain rules that require updates is finished. Event notification happens once the rule-set update has been committed. So we skip notifications is case the rule-set update is aborted, which can happen in case that the rule-set is tested to apply correctly. This patch squashed the following patches from Pablo: * nf_tables: atomic rule updates and dumps * nf_tables: get rid of per rule list_head for commits * nf_tables: use per netns commit list * nfnetlink: add batch support and use it from nf_tables * nf_tables: all rule updates are transactional * nf_tables: attach replacement rule after stale one * nf_tables: do not allow deletion/replacement of stale rules * nf_tables: remove unused NFTA_RULE_FLAGS Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 288959404d54..596ddd45253c 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -57,4 +57,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_NFT_COMPAT 11 #define NFNL_SUBSYS_COUNT 12 +/* Reserved control nfnetlink messages */ +#define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE +#define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1 + #endif /* _UAPI_NFNETLINK_H */ -- cgit v1.2.3 From 120c9794a3ee2f9b1548a1b0b252652e3c134f59 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 28 Aug 2013 19:31:22 +0300 Subject: ipvs: fix the IPVS_CMD_ATTR_MAX definition It was wrong (bigger) but problem is harmless. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/uapi/linux/ip_vs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index 29458223d044..fbcffe8041f7 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -334,7 +334,7 @@ enum { __IPVS_CMD_ATTR_MAX, }; -#define IPVS_CMD_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1) +#define IPVS_CMD_ATTR_MAX (__IPVS_CMD_ATTR_MAX - 1) /* * Attributes used to describe a service -- cgit v1.2.3 From cbbc58d4fdfab1a39a6ac1b41fcb17885952157a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 7 Oct 2013 22:18:01 +0530 Subject: kvm: powerpc: book3s: Allow the HV and PR selection per virtual machine This moves the kvmppc_ops callbacks to be a per VM entity. This enables us to select HV and PR mode when creating a VM. We also allow both kvm-hv and kvm-pr kernel module to be loaded. To achieve this we move /dev/kvm ownership to kvm.ko module. Depending on which KVM mode we select during VM creation we take a reference count on respective module Signed-off-by: Aneesh Kumar K.V [agraf: fix coding style] Signed-off-by: Alexander Graf --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e32e776f20c0..5b5341f78368 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -518,6 +518,10 @@ struct kvm_ppc_smmu_info { /* machine type bits, to be used as argument to KVM_CREATE_VM */ #define KVM_VM_S390_UCONTROL 1 +/* on ppc, 0 indicate default, 1 should force HV and 2 PR */ +#define KVM_VM_PPC_HV 1 +#define KVM_VM_PPC_PR 2 + #define KVM_S390_SIE_PAGE_OFFSET 1 /* -- cgit v1.2.3 From 90af231106c0b8d223c27d35464af95cb3d9cacf Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 18 Oct 2013 17:43:38 +0200 Subject: bonding: add Netlink support mode option Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 80394e8dc3a3..06fd3fe10f3b 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -325,6 +325,16 @@ struct ifla_vxlan_port_range { __be16 high; }; +/* Bonding section */ + +enum { + IFLA_BOND_UNSPEC, + IFLA_BOND_MODE, + __IFLA_BOND_MAX, +}; + +#define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1) + /* SR-IOV virtual function management section */ enum { -- cgit v1.2.3 From ec76aa49855f6d6fea5e01de179fb57dd47c619d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 18 Oct 2013 17:43:39 +0200 Subject: bonding: add Netlink support active_slave option Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 06fd3fe10f3b..8a1e346243b7 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -330,6 +330,7 @@ struct ifla_vxlan_port_range { enum { IFLA_BOND_UNSPEC, IFLA_BOND_MODE, + IFLA_BOND_ACTIVE_SLAVE, __IFLA_BOND_MAX, }; -- cgit v1.2.3 From 1bd7116f1cb833c998cddb6b188df463342069d8 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Tue, 22 Oct 2013 10:42:46 -0700 Subject: openvswitch: collect mega flow mask stats Collect mega flow mask stats. ovs-dpctl show command can be used to display them for debugging and performance tuning. Signed-off-by: Andy Zhou Signed-off-by: Jesse Gross --- include/uapi/linux/openvswitch.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index a74d375b439b..2cc4644f68ef 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -63,15 +63,18 @@ enum ovs_datapath_cmd { * not be sent. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. + * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the + * datapath. Always present in notifications. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_DP_* commands. */ enum ovs_datapath_attr { OVS_DP_ATTR_UNSPEC, - OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ - OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ - OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ + OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ + OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ + OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ + OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ __OVS_DP_ATTR_MAX }; @@ -84,6 +87,14 @@ struct ovs_dp_stats { __u64 n_flows; /* Number of flows present */ }; +struct ovs_dp_megaflow_stats { + __u64 n_mask_hit; /* Number of masks used for flow lookups. */ + __u32 n_masks; /* Number of masks for the datapath. */ + __u32 pad0; /* Pad for future expension. */ + __u64 pad1; /* Pad for future expension. */ + __u64 pad2; /* Pad for future expension. */ +}; + struct ovs_vport_stats { __u64 rx_packets; /* total packets received */ __u64 tx_packets; /* total packets transmitted */ -- cgit v1.2.3 From ee08997fee16f10be23c9748d609dbdf3baab8e4 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Mon, 6 May 2013 15:40:01 +0300 Subject: crypto: provide single place for hash algo information This patch provides a single place for information about hash algorithms, such as hash sizes and kernel driver names, which will be used by IMA and the public key code. Changelog: - Fix sparse and checkpatch warnings - Move hash algo enums to uapi for userspace signing functions. Signed-off-by: Dmitry Kasatkin Signed-off-by: Mimi Zohar Acked-by: Herbert Xu --- include/uapi/linux/hash_info.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 include/uapi/linux/hash_info.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/hash_info.h b/include/uapi/linux/hash_info.h new file mode 100644 index 000000000000..ca18c45f8304 --- /dev/null +++ b/include/uapi/linux/hash_info.h @@ -0,0 +1,37 @@ +/* + * Hash Info: Hash algorithms information + * + * Copyright (c) 2013 Dmitry Kasatkin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _UAPI_LINUX_HASH_INFO_H +#define _UAPI_LINUX_HASH_INFO_H + +enum hash_algo { + HASH_ALGO_MD4, + HASH_ALGO_MD5, + HASH_ALGO_SHA1, + HASH_ALGO_RIPE_MD_160, + HASH_ALGO_SHA256, + HASH_ALGO_SHA384, + HASH_ALGO_SHA512, + HASH_ALGO_SHA224, + HASH_ALGO_RIPE_MD_128, + HASH_ALGO_RIPE_MD_256, + HASH_ALGO_RIPE_MD_320, + HASH_ALGO_WP_256, + HASH_ALGO_WP_384, + HASH_ALGO_WP_512, + HASH_ALGO_TGR_128, + HASH_ALGO_TGR_160, + HASH_ALGO_TGR_192, + HASH_ALGO__LAST +}; + +#endif /* _UAPI_LINUX_HASH_INFO_H */ -- cgit v1.2.3 From 5336fa88e8ac6b666a3db9902a4797d94d86a702 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 7 Oct 2013 18:41:05 +0200 Subject: nl80211/cfg80211: enable DFS for IBSS mode To use DFS in IBSS mode, userspace is required to react to radar events. It can inform nl80211 that it is capable of doing so by adding a NL80211_ATTR_HANDLE_DFS attribute when joining the IBSS. This attribute is supplied to let the kernelspace know that the userspace application can and will handle radar events, e.g. by intiating channel switches to a valid channel. DFS channels may only be used if this attribute is supplied and the driver supports it. Driver support will be checked even if a channel without DFS will be initially joined, as a DFS channel may be chosen later. Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer [fix attribute name in commit message] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f2aef2a7a570..f752e9821e71 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1501,6 +1501,13 @@ enum nl80211_commands { * @NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES: array of supported * supported operating classes. * + * @NL80211_ATTR_HANDLE_DFS: A flag indicating whether user space + * controls DFS operation in IBSS mode. If the flag is included in + * %NL80211_CMD_JOIN_IBSS request, the driver will allow use of DFS + * channels and reports radar events to userspace. Userspace is required + * to react to radar events, e.g. initiate a channel switch or leave the + * IBSS network. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1815,6 +1822,8 @@ enum nl80211_attrs { NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES, + NL80211_ATTR_HANDLE_DFS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 4571912743ac6a04a6644e5a292bb9876bb5329b Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Wed, 16 Oct 2013 02:36:47 -0300 Subject: [media] v4l: ti-vpe: Add VPE mem to mem driver VPE is a block which consists of a single memory to memory path which can perform chrominance up/down sampling, de-interlacing, scaling, and color space conversion of raster or tiled YUV420 coplanar, YUV422 coplanar or YUV422 interleaved video formats. We create a mem2mem driver based primarily on the mem2mem-testdev example. The de-interlacer, scaler and color space converter are all bypassed for now to keep the driver simple. Chroma up/down sampler blocks are implemented, so conversion beteen different YUV formats is possible. Each mem2mem context allocates a buffer for VPE MMR values which it will use when it gets access to the VPE HW via the mem2mem queue, it also allocates a VPDMA descriptor list to which configuration and data descriptors are added. Based on the information received via v4l2 ioctls for the source and destination queues, the driver configures the values for the MMRs, and stores them in the buffer. There are also some VPDMA parameters like frame start and line mode which needs to be configured, these are configured by direct register writes via the VPDMA helper functions. The driver's device_run() mem2mem op will add each descriptor based on how the source and destination queues are set up for the given ctx, once the list is prepared, it's submitted to VPDMA, these descriptors when parsed by VPDMA will upload MMR registers, start DMA of video buffers on the various input and output clients/ports. When the list is parsed completely(and the DMAs on all the output ports done), an interrupt is generated which we use to notify that the source and destination buffers are done. The rest of the driver is quite similar to other mem2mem drivers, we use the multiplane v4l2 ioctls as the HW support coplanar formats. Signed-off-by: Archit Taneja Acked-by: Hans Verkuil Signed-off-by: Kamil Debski Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 083bb5a5aae2..1666aabbbb86 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -160,6 +160,10 @@ enum v4l2_colorfx { * of controls. Total of 16 controls is reserved for this driver */ #define V4L2_CID_USER_SI476X_BASE (V4L2_CID_USER_BASE + 0x1040) +/* The base for the TI VPE driver controls. Total of 16 controls is reserved for + * this driver */ +#define V4L2_CID_USER_TI_VPE_BASE (V4L2_CID_USER_BASE + 0x1050) + /* MPEG-class control IDs */ /* The MPEG controls are applicable to all codec controls * and the 'MPEG' part of the define is historical */ -- cgit v1.2.3 From 5837f6dfcb00f764976ddc178933e612702cbf54 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 18 Oct 2013 15:15:18 -0400 Subject: NFS: stop using NFS_MOUNT_SECFLAVOUR server flag Since the parsed sec= flavor is now stored in nfs_server->auth_info, we no longer need an nfs_server flag to determine if a sec= option was used. This flag has not been completely removed because it is still needed for the (old but still supported) non-text parsed mount options ABI compatability. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- include/uapi/linux/nfs_mount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfs_mount.h b/include/uapi/linux/nfs_mount.h index 576bddd72e04..64b0f22f5c4c 100644 --- a/include/uapi/linux/nfs_mount.h +++ b/include/uapi/linux/nfs_mount.h @@ -60,7 +60,7 @@ struct nfs_mount_data { #define NFS_MOUNT_BROKEN_SUID 0x0400 /* 4 */ #define NFS_MOUNT_NOACL 0x0800 /* 4 */ #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ -#define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ +#define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 non-text parsed mount data only */ #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */ #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ #define NFS_MOUNT_FLAGMASK 0xFFFF -- cgit v1.2.3 From 7d1d65cb84e1cfacba3f54c5934194785259e0d8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Oct 2013 16:43:02 +0100 Subject: net: sched: cls_bpf: add BPF-based classifier This work contains a lightweight BPF-based traffic classifier that can serve as a flexible alternative to ematch-based tree classification, i.e. now that BPF filter engine can also be JITed in the kernel. Naturally, tc actions and policies are supported as well with cls_bpf. Multiple BPF programs/filter can be attached for a class, or they can just as well be written within a single BPF program, that's really up to the user how he wishes to run/optimize the code, e.g. also for inversion of verdicts etc. The notion of a BPF program's return/exit codes is being kept as follows: 0: No match -1: Select classid given in "tc filter ..." command else: flowid, overwrite the default one As a minimal usage example with iproute2, we use a 3 band prio root qdisc on a router with sfq each as leave, and assign ssh and icmp bpf-based filters to band 1, http traffic to band 2 and the rest to band 3. For the first two bands we load the bytecode from a file, in the 2nd we load it inline as an example: echo 1 > /proc/sys/net/core/bpf_jit_enable tc qdisc del dev em1 root tc qdisc add dev em1 root handle 1: prio bands 3 priomap 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 tc qdisc add dev em1 parent 1:1 sfq perturb 16 tc qdisc add dev em1 parent 1:2 sfq perturb 16 tc qdisc add dev em1 parent 1:3 sfq perturb 16 tc filter add dev em1 parent 1: bpf run bytecode-file /etc/tc/ssh.bpf flowid 1:1 tc filter add dev em1 parent 1: bpf run bytecode-file /etc/tc/icmp.bpf flowid 1:1 tc filter add dev em1 parent 1: bpf run bytecode-file /etc/tc/http.bpf flowid 1:2 tc filter add dev em1 parent 1: bpf run bytecode "`bpfc -f tc -i misc.ops`" flowid 1:3 BPF programs can be easily created and passed to tc, either as inline 'bytecode' or 'bytecode-file'. There are a couple of front-ends that can compile opcodes, for example: 1) People familiar with tcpdump-like filters: tcpdump -iem1 -ddd port 22 | tr '\n' ',' > /etc/tc/ssh.bpf 2) People that want to low-level program their filters or use BPF extensions that lack support by libpcap's compiler: bpfc -f tc -i ssh.ops > /etc/tc/ssh.bpf ssh.ops example code: ldh [12] jne #0x800, drop ldb [23] jneq #6, drop ldh [20] jset #0x1fff, drop ldxb 4 * ([14] & 0xf) ldh [%x + 14] jeq #0x16, pass ldh [%x + 16] jne #0x16, drop pass: ret #-1 drop: ret #0 It was chosen to load bytecode into tc, since the reverse operation, tc filter list dev em1, is then able to show the exact commands again. Possible follow-up work could also include a small expression compiler for iproute2. Tested with the help of bmon. This idea came up during the Netfilter Workshop 2013 in Copenhagen. Also thanks to feedback from Eric Dumazet! Signed-off-by: Daniel Borkmann Cc: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 082eafaf026b..25731dfb3fcc 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -388,6 +388,20 @@ enum { #define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1) +/* BPF classifier */ + +enum { + TCA_BPF_UNSPEC, + TCA_BPF_ACT, + TCA_BPF_POLICE, + TCA_BPF_CLASSID, + TCA_BPF_OPS_LEN, + TCA_BPF_OPS, + __TCA_BPF_MAX, +}; + +#define TCA_BPF_MAX (__TCA_BPF_MAX - 1) + /* Extended Matches */ struct tcf_ematch_tree_hdr { -- cgit v1.2.3 From 9c15bb1d0a8411f9bb3395d21d5309bde7da0c1c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 22 Sep 2013 16:44:50 +0200 Subject: kvm: Add KVM_GET_EMULATED_CPUID Add a kvm ioctl which states which system functionality kvm emulates. The format used is that of CPUID and we return the corresponding CPUID bits set for which we do emulate functionality. Make sure ->padding is being passed on clean from userspace so that we can use it for something in the future, after the ioctl gets cast in stone. s/kvm_dev_ioctl_get_supported_cpuid/kvm_dev_ioctl_get_cpuid/ while at it. Signed-off-by: Borislav Petkov Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e32e776f20c0..32c60b98ddf8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -541,6 +541,7 @@ struct kvm_ppc_smmu_info { #define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 +#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) /* * Extension capability list. @@ -668,6 +669,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQ_XICS 92 #define KVM_CAP_ARM_EL1_32BIT 93 #define KVM_CAP_SPAPR_MULTITCE 94 +#define KVM_CAP_EXT_EMUL_CPUID 95 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From ec53500fae421e07c5d035918ca454a429732ef4 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 Oct 2013 11:02:17 -0600 Subject: kvm: Add VFIO device So far we've succeeded at making KVM and VFIO mostly unaware of each other, but areas are cropping up where a connection beyond eventfds and irqfds needs to be made. This patch introduces a KVM-VFIO device that is meant to be a gateway for such interaction. The user creates the device and can add and remove VFIO groups to it via file descriptors. When a group is added, KVM verifies the group is valid and gets a reference to it via the VFIO external user interface. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 32c60b98ddf8..509cfbfe9658 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -845,6 +845,10 @@ struct kvm_device_attr { #define KVM_DEV_TYPE_FSL_MPIC_20 1 #define KVM_DEV_TYPE_FSL_MPIC_42 2 #define KVM_DEV_TYPE_XICS 3 +#define KVM_DEV_TYPE_VFIO 4 +#define KVM_DEV_VFIO_GROUP 1 +#define KVM_DEV_VFIO_GROUP_ADD 1 +#define KVM_DEV_VFIO_GROUP_DEL 2 /* * ioctls for VM fds -- cgit v1.2.3 From 5eb26b156e29eadcc21f73fb5d14497f0db24b86 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Wed, 23 Oct 2013 01:44:59 -0700 Subject: openvswitch: TCP flags matching support. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tcp_flags=flags/mask Bitwise match on TCP flags. The flags and mask are 16-bit num‐ bers written in decimal or in hexadecimal prefixed by 0x. Each 1-bit in mask requires that the corresponding bit in port must match. Each 0-bit in mask causes the corresponding bit to be ignored. TCP protocol currently defines 9 flag bits, and additional 3 bits are reserved (must be transmitted as zero), see RFCs 793, 3168, and 3540. The flag bits are, numbering from the least significant bit: 0: FIN No more data from sender. 1: SYN Synchronize sequence numbers. 2: RST Reset the connection. 3: PSH Push function. 4: ACK Acknowledgement field significant. 5: URG Urgent pointer field significant. 6: ECE ECN Echo. 7: CWR Congestion Windows Reduced. 8: NS Nonce Sum. 9-11: Reserved. 12-15: Not matchable, must be zero. Signed-off-by: Jarno Rajahalme Signed-off-by: Jesse Gross --- include/uapi/linux/openvswitch.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 2cc4644f68ef..d120f9fe0017 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -271,6 +271,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */ OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */ + OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */ #ifdef __KERNEL__ OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ -- cgit v1.2.3 From f421436a591d34fa5279b54a96ac07d70250cc8d Mon Sep 17 00:00:00 2001 From: Arvid Brodin Date: Wed, 30 Oct 2013 21:10:47 +0100 Subject: net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0) High-availability Seamless Redundancy ("HSR") provides instant failover redundancy for Ethernet networks. It requires a special network topology where all nodes are connected in a ring (each node having two physical network interfaces). It is suited for applications that demand high availability and very short reaction time. HSR acts on the Ethernet layer, using a registered Ethernet protocol type to send special HSR frames in both directions over the ring. The driver creates virtual network interfaces that can be used just like any ordinary Linux network interface, for IP/TCP/UDP traffic etc. All nodes in the network ring must be HSR capable. This code is a "best effort" to comply with the HSR standard as described in IEC 62439-3:2010 (HSRv0). Signed-off-by: Arvid Brodin Signed-off-by: David S. Miller --- include/uapi/linux/hsr_netlink.h | 50 ++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/if_ether.h | 1 + include/uapi/linux/if_link.h | 13 +++++++++++ 3 files changed, 64 insertions(+) create mode 100644 include/uapi/linux/hsr_netlink.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/hsr_netlink.h b/include/uapi/linux/hsr_netlink.h new file mode 100644 index 000000000000..2475cb8a53af --- /dev/null +++ b/include/uapi/linux/hsr_netlink.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011-2013 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + */ + +#ifndef __UAPI_HSR_NETLINK_H +#define __UAPI_HSR_NETLINK_H + +/* Generic Netlink HSR family definition + */ + +/* attributes */ +enum { + HSR_A_UNSPEC, + HSR_A_NODE_ADDR, + HSR_A_IFINDEX, + HSR_A_IF1_AGE, + HSR_A_IF2_AGE, + HSR_A_NODE_ADDR_B, + HSR_A_IF1_SEQ, + HSR_A_IF2_SEQ, + HSR_A_IF1_IFINDEX, + HSR_A_IF2_IFINDEX, + HSR_A_ADDR_B_IFINDEX, + __HSR_A_MAX, +}; +#define HSR_A_MAX (__HSR_A_MAX - 1) + + +/* commands */ +enum { + HSR_C_UNSPEC, + HSR_C_RING_ERROR, + HSR_C_NODE_DOWN, + HSR_C_GET_NODE_STATUS, + HSR_C_SET_NODE_STATUS, + HSR_C_GET_NODE_LIST, + HSR_C_SET_NODE_LIST, + __HSR_C_MAX, +}; +#define HSR_C_MAX (__HSR_C_MAX - 1) + +#endif /* __UAPI_HSR_NETLINK_H */ diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index ade07f1c491a..2ce0f6a78fa5 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -85,6 +85,7 @@ #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ #define ETH_P_MVRP 0x88F5 /* 802.1Q MVRP */ #define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */ +#define ETH_P_PRP 0x88FB /* IEC 62439-3 PRP/HSRv0 */ #define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ #define ETH_P_TDLS 0x890D /* TDLS */ #define ETH_P_FIP 0x8914 /* FCoE Initialization Protocol */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8a1e346243b7..b78566f59aba 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -481,4 +481,17 @@ enum { #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) + +/* HSR section */ + +enum { + IFLA_HSR_UNSPEC, + IFLA_HSR_SLAVE1, + IFLA_HSR_SLAVE2, + IFLA_HSR_MULTICAST_SPEC, + __IFLA_HSR_MAX, +}; + +#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ -- cgit v1.2.3 From b50eba7e2d534762a19a7207dda012f09302a8d2 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Mon, 16 Sep 2013 18:20:42 -0400 Subject: audit: format user messages to size of MAX_AUDIT_MESSAGE_LENGTH Messages of type AUDIT_USER_TTY were being formatted to 1024 octets, truncating messages approaching MAX_AUDIT_MESSAGE_LENGTH (8970 octets). Set the formatting to 8560 characters, given maximum estimates for prefix and suffix budgets. See the problem discussion: https://www.redhat.com/archives/linux-audit/2009-January/msg00030.html And the new size rationale: https://www.redhat.com/archives/linux-audit/2013-September/msg00016.html Test ~8k messages with: auditctl -m "$(for i in $(seq -w 001 820);do echo -n "${i}0______";done)" Reported-by: LC Bruzenak Reported-by: Justin Stephenson Signed-off-by: Richard Guy Briggs Signed-off-by: Eric Paris --- include/uapi/linux/audit.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 75cef3fd97ad..5dfcd85037e2 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -358,6 +358,12 @@ enum { #define AUDIT_PERM_READ 4 #define AUDIT_PERM_ATTR 8 +/* MAX_AUDIT_MESSAGE_LENGTH is set in audit:lib/libaudit.h as: + * 8970 // PATH_MAX*2+CONTEXT_SIZE*2+11+256+1 + * max header+body+tailer: 44 + 29 + 32 + 262 + 7 + pad + */ +#define AUDIT_MESSAGE_TEXT_MAX 8560 + struct audit_status { __u32 mask; /* Bit mask for valid entries */ __u32 enabled; /* 1 = enabled, 0 = disabled */ -- cgit v1.2.3 From 42f74461a5b60cf6b42887e6d2ff5b7be4abf1ca Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Mon, 20 May 2013 15:08:18 -0400 Subject: audit: change decimal constant to macro for invalid uid SFR reported this 2013-05-15: > After merging the final tree, today's linux-next build (i386 defconfig) > produced this warning: > > kernel/auditfilter.c: In function 'audit_data_to_entry': > kernel/auditfilter.c:426:3: warning: this decimal constant is unsigned only > in ISO C90 [enabled by default] > > Introduced by commit 780a7654cee8 ("audit: Make testing for a valid > loginuid explicit") from Linus' tree. Replace this decimal constant in the code with a macro to make it more readable (add to the unsigned cast to quiet the warning). Cc: Stephen Rothwell Cc: "Eric W. Biederman" Signed-off-by: Richard Guy Briggs Signed-off-by: Eric Paris --- include/uapi/linux/audit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 5dfcd85037e2..c1f0fced3ede 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -380,6 +380,8 @@ struct audit_tty_status { __u32 log_passwd; /* 1 = enabled, 0 = disabled */ }; +#define AUDIT_UID_UNSET (unsigned int)-1 + /* audit_rule_data supports filter rules with both integer and string * fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and * AUDIT_LIST_RULES requests. -- cgit v1.2.3 From b0fed40214ce79ef70d97584ebdf13f89786da0e Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 22 May 2013 12:54:49 -0400 Subject: audit: implement generic feature setting and retrieving The audit_status structure was not designed with extensibility in mind. Define a new AUDIT_SET_FEATURE message type which takes a new structure of bits where things can be enabled/disabled/locked one at a time. This structure should be able to grow in the future while maintaining forward and backward compatibility (based loosly on the ideas from capabilities and prctl) This does not actually add any features, but is just infrastructure to allow new on/off types of audit system features. Signed-off-by: Eric Paris Signed-off-by: Richard Guy Briggs Signed-off-by: Eric Paris --- include/uapi/linux/audit.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index c1f0fced3ede..9eddf2ca614f 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -68,6 +68,9 @@ #define AUDIT_MAKE_EQUIV 1015 /* Append to watched tree */ #define AUDIT_TTY_GET 1016 /* Get TTY auditing status */ #define AUDIT_TTY_SET 1017 /* Set TTY auditing status */ +#define AUDIT_SET_FEATURE 1018 /* Turn an audit feature on or off */ +#define AUDIT_GET_FEATURE 1019 /* Get which features are enabled */ +#define AUDIT_FEATURE_CHANGE 1020 /* audit log listing feature changes */ #define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */ #define AUDIT_USER_AVC 1107 /* We filter this differently */ @@ -375,6 +378,19 @@ struct audit_status { __u32 backlog; /* messages waiting in queue */ }; +struct audit_features { +#define AUDIT_FEATURE_VERSION 1 + __u32 vers; + __u32 mask; /* which bits we are dealing with */ + __u32 features; /* which feature to enable/disable */ + __u32 lock; /* which features to lock */ +}; + +#define AUDIT_LAST_FEATURE -1 + +#define audit_feature_valid(x) ((x) >= 0 && (x) <= AUDIT_LAST_FEATURE) +#define AUDIT_FEATURE_TO_MASK(x) (1 << ((x) & 31)) /* mask for __u32 */ + struct audit_tty_status { __u32 enabled; /* 1 = enabled, 0 = disabled */ __u32 log_passwd; /* 1 = enabled, 0 = disabled */ -- cgit v1.2.3 From d040e5af380554c23ffe0a034ae5f3e53da93a1d Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 24 May 2013 09:18:04 -0400 Subject: audit: audit feature to only allow unsetting the loginuid This is a new audit feature which only grants processes with CAP_AUDIT_CONTROL the ability to unset their loginuid. They cannot directly set it from a valid uid to another valid uid. The ability to unset the loginuid is nice because a priviledged task, like that of container creation, can unset the loginuid and then priv is not needed inside the container when a login daemon needs to set the loginuid. Signed-off-by: Eric Paris Signed-off-by: Richard Guy Briggs Signed-off-by: Eric Paris --- include/uapi/linux/audit.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 9eddf2ca614f..05e5e8fc2ac4 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -386,7 +386,8 @@ struct audit_features { __u32 lock; /* which features to lock */ }; -#define AUDIT_LAST_FEATURE -1 +#define AUDIT_FEATURE_ONLY_UNSET_LOGINUID 0 +#define AUDIT_LAST_FEATURE AUDIT_FEATURE_ONLY_UNSET_LOGINUID #define audit_feature_valid(x) ((x) >= 0 && (x) <= AUDIT_LAST_FEATURE) #define AUDIT_FEATURE_TO_MASK(x) (1 << ((x) & 31)) /* mask for __u32 */ -- cgit v1.2.3 From 21b85c31d23f2047d47e1f74bfa5caa8b75c1c77 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 23 May 2013 14:26:00 -0400 Subject: audit: audit feature to set loginuid immutable This adds a new 'audit_feature' bit which allows userspace to set it such that the loginuid is absolutely immutable, even if you have CAP_AUDIT_CONTROL. Signed-off-by: Eric Paris Signed-off-by: Richard Guy Briggs Signed-off-by: Eric Paris --- include/uapi/linux/audit.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 05e5e8fc2ac4..e2f0d9977131 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -387,7 +387,8 @@ struct audit_features { }; #define AUDIT_FEATURE_ONLY_UNSET_LOGINUID 0 -#define AUDIT_LAST_FEATURE AUDIT_FEATURE_ONLY_UNSET_LOGINUID +#define AUDIT_FEATURE_LOGINUID_IMMUTABLE 1 +#define AUDIT_LAST_FEATURE AUDIT_FEATURE_LOGINUID_IMMUTABLE #define audit_feature_valid(x) ((x) >= 0 && (x) <= AUDIT_LAST_FEATURE) #define AUDIT_FEATURE_TO_MASK(x) (1 << ((x) & 31)) /* mask for __u32 */ -- cgit v1.2.3 From 482fc6094afad572a4ea1fd722e7b11ca72022a0 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 5 Nov 2013 02:24:17 +0100 Subject: ipv4: introduce new IP_MTU_DISCOVER mode IP_PMTUDISC_INTERFACE Sockets marked with IP_PMTUDISC_INTERFACE won't do path mtu discovery, their sockets won't accept and install new path mtu information and they will always use the interface mtu for outgoing packets. It is guaranteed that the packet is not fragmented locally. But we won't set the DF-Flag on the outgoing frames. Florian Weimer had the idea to use this flag to ensure DNS servers are never generating outgoing fragments. They may well be fragmented on the path, but the server never stores or usees path mtu values, which could well be forged in an attack. (The root of the problem with path MTU discovery is that there is no reliable way to authenticate ICMP Fragmentation Needed But DF Set messages because they are sent from intermediate routers with their source addresses, and the IMCP payload will not always contain sufficient information to identify a flow.) Recent research in the DNS community showed that it is possible to implement an attack where DNS cache poisoning is feasible by spoofing fragments. This work was done by Amir Herzberg and Haya Shulman: This issue was previously discussed among the DNS community, e.g. , without leading to fixes. This patch depends on the patch "ipv4: fix DO and PROBE pmtu mode regarding local fragmentation with UFO/CORK" for the enforcement of the non-fragmentable checks. If other users than ip_append_page/data should use this semantic too, we have to add a new flag to IPCB(skb)->flags to suppress local fragmentation and check for this in ip_finish_output. Many thanks to Florian Weimer for the idea and feedback while implementing this patch. Cc: David S. Miller Suggested-by: Florian Weimer Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index f9e8e496ae5d..393c5de09d42 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -115,6 +115,11 @@ struct in_addr { #define IP_PMTUDISC_WANT 1 /* Use per route hints */ #define IP_PMTUDISC_DO 2 /* Always DF */ #define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */ +/* Always use interface mtu (ignores dst pmtu) but don't set DF flag. + * Also incoming ICMP frag_needed notifications will be ignored on + * this socket to prevent accepting spoofed ones. + */ +#define IP_PMTUDISC_INTERFACE 4 #define IP_MULTICAST_IF 32 #define IP_MULTICAST_TTL 33 -- cgit v1.2.3 From f83c3838b9146b891d0405d3a83660e8f6aed02f Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Sun, 13 Oct 2013 18:05:23 -0300 Subject: mtd: Move major number definitions to major.h This patch moves the char and block major number definitions to major.h to be with the rest of the major numbers. While doing this, include major.h in the files that need it. Signed-off-by: Ezequiel Garcia Signed-off-by: Brian Norris --- include/uapi/linux/major.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/major.h b/include/uapi/linux/major.h index 6a8ca98c9a96..620252e69b44 100644 --- a/include/uapi/linux/major.h +++ b/include/uapi/linux/major.h @@ -54,6 +54,7 @@ #define ACSI_MAJOR 28 #define AZTECH_CDROM_MAJOR 29 #define FB_MAJOR 29 /* /dev/fb* framebuffers */ +#define MTD_BLOCK_MAJOR 31 #define CM206_CDROM_MAJOR 32 #define IDE2_MAJOR 33 #define IDE3_MAJOR 34 @@ -105,6 +106,7 @@ #define IDE6_MAJOR 88 #define IDE7_MAJOR 89 #define IDE8_MAJOR 90 +#define MTD_CHAR_MAJOR 90 #define IDE9_MAJOR 91 #define DASD_MAJOR 94 -- cgit v1.2.3 From a6cc0cfa72e0b6d9f2c8fd858aacc32313c4f272 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 6 Nov 2013 09:54:46 -0800 Subject: net: Add layer 2 hardware acceleration operations for macvlan devices Add a operations structure that allows a network interface to export the fact that it supports package forwarding in hardware between physical interfaces and other mac layer devices assigned to it (such as macvlans). This operaions structure can be used by virtual mac devices to bypass software switching so that forwarding can be done in hardware more efficiently. Signed-off-by: John Fastabend Signed-off-by: Neil Horman CC: Andy Gospodarek CC: "David S. Miller" Signed-off-by: David S. Miller --- include/uapi/linux/if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index 1ec407b01e46..d758163b0e43 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -83,6 +83,7 @@ #define IFF_SUPP_NOFCS 0x80000 /* device supports sending custom FCS */ #define IFF_LIVE_ADDR_CHANGE 0x100000 /* device supports hardware address * change when it's running */ +#define IFF_MACVLAN 0x200000 /* Macvlan device */ #define IF_GET_IFACE 0x0001 /* for querying only */ -- cgit v1.2.3 From a33c4a2663c19ac01e557d6b78806271eec2a150 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 8 Nov 2013 10:23:34 +0800 Subject: net_sched: tbf: support of 64bit rates With psched_ratecfg_precompute(), tbf can deal with 64bit rates. Add two new attributes so that tc can use them to break the 32bit limit. Signed-off-by: Yang Yingliang Suggested-by: Sergei Shtylyov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index f2624b549e61..307f293477e8 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -171,6 +171,8 @@ enum { TCA_TBF_PARMS, TCA_TBF_RTAB, TCA_TBF_PTAB, + TCA_TBF_RATE64, + TCA_TBF_PRATE64, __TCA_TBF_MAX, }; -- cgit v1.2.3 From 2c140a246dc0bc085b98eddde978060fcec1080c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 1 Nov 2013 18:27:41 -0400 Subject: dm: allow remove to be deferred This patch allows the removal of an open device to be deferred until it is closed. (Previously such a removal attempt would fail.) The deferred remove functionality is enabled by setting the flag DM_DEFERRED_REMOVE in the ioctl structure on DM_DEV_REMOVE or DM_REMOVE_ALL ioctl. On return from DM_DEV_REMOVE, the flag DM_DEFERRED_REMOVE indicates if the device was removed immediately or flagged to be removed on close - if the flag is clear, the device was removed. On return from DM_DEV_STATUS and other ioctls, the flag DM_DEFERRED_REMOVE is set if the device is scheduled to be removed on closure. A device that is scheduled to be deleted can be revived using the message "@cancel_deferred_remove". This message clears the DMF_DEFERRED_REMOVE flag so that the device won't be deleted on close. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon Signed-off-by: Mike Snitzer --- include/uapi/linux/dm-ioctl.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index f1e12bd40b3b..c8a4302093a3 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 26 +#define DM_VERSION_MINOR 27 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2013-08-15)" +#define DM_VERSION_EXTRA "-ioctl (2013-10-30)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ @@ -341,4 +341,15 @@ enum { */ #define DM_DATA_OUT_FLAG (1 << 16) /* Out */ +/* + * If set with DM_DEV_REMOVE or DM_REMOVE_ALL this indicates that if + * the device cannot be removed immediately because it is still in use + * it should instead be scheduled for removal when it gets closed. + * + * On return from DM_DEV_REMOVE, DM_DEV_STATUS or other ioctls, this + * flag indicates that the device is scheduled to be removed when it + * gets closed. + */ +#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */ + #endif /* _LINUX_DM_IOCTL_H */ -- cgit v1.2.3 From 81ab4190ac17df41686a37c97f701623276b652a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 31 Oct 2013 15:46:42 -0700 Subject: bcache: Pull on disk data structures out into a separate header Now, the on disk data structures are in a header that can be exported to userspace - and having them all centralized is nice too. Signed-off-by: Kent Overstreet --- include/uapi/linux/bcache.h | 373 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 373 insertions(+) create mode 100644 include/uapi/linux/bcache.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h new file mode 100644 index 000000000000..164a7e263988 --- /dev/null +++ b/include/uapi/linux/bcache.h @@ -0,0 +1,373 @@ +#ifndef _LINUX_BCACHE_H +#define _LINUX_BCACHE_H + +/* + * Bcache on disk data structures + */ + +#include + +#define BITMASK(name, type, field, offset, size) \ +static inline __u64 name(const type *k) \ +{ return (k->field >> offset) & ~(~0ULL << size); } \ + \ +static inline void SET_##name(type *k, __u64 v) \ +{ \ + k->field &= ~(~(~0ULL << size) << offset); \ + k->field |= (v & ~(~0ULL << size)) << offset; \ +} + +/* Btree keys - all units are in sectors */ + +struct bkey { + __u64 high; + __u64 low; + __u64 ptr[]; +}; + +#define KEY_FIELD(name, field, offset, size) \ + BITMASK(name, struct bkey, field, offset, size) + +#define PTR_FIELD(name, offset, size) \ +static inline __u64 name(const struct bkey *k, unsigned i) \ +{ return (k->ptr[i] >> offset) & ~(~0ULL << size); } \ + \ +static inline void SET_##name(struct bkey *k, unsigned i, __u64 v) \ +{ \ + k->ptr[i] &= ~(~(~0ULL << size) << offset); \ + k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \ +} + +#define KEY_SIZE_BITS 16 + +KEY_FIELD(KEY_PTRS, high, 60, 3) +KEY_FIELD(HEADER_SIZE, high, 58, 2) +KEY_FIELD(KEY_CSUM, high, 56, 2) +KEY_FIELD(KEY_PINNED, high, 55, 1) +KEY_FIELD(KEY_DIRTY, high, 36, 1) + +KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS) +KEY_FIELD(KEY_INODE, high, 0, 20) + +/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */ + +static inline __u64 KEY_OFFSET(const struct bkey *k) +{ + return k->low; +} + +static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v) +{ + k->low = v; +} + +/* + * The high bit being set is a relic from when we used it to do binary + * searches - it told you where a key started. It's not used anymore, + * and can probably be safely dropped. + */ +#define KEY(inode, offset, size) \ +((struct bkey) { \ + .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \ + .low = (offset) \ +}) + +#define ZERO_KEY KEY(0, 0, 0) + +#define MAX_KEY_INODE (~(~0 << 20)) +#define MAX_KEY_OFFSET (~0ULL >> 1) +#define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0) + +#define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k)) +#define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0) + +#define PTR_DEV_BITS 12 + +PTR_FIELD(PTR_DEV, 51, PTR_DEV_BITS) +PTR_FIELD(PTR_OFFSET, 8, 43) +PTR_FIELD(PTR_GEN, 0, 8) + +#define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) + +#define PTR(gen, offset, dev) \ + ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) + +/* Bkey utility code */ + +static inline unsigned long bkey_u64s(const struct bkey *k) +{ + return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k); +} + +static inline unsigned long bkey_bytes(const struct bkey *k) +{ + return bkey_u64s(k) * sizeof(__u64); +} + +#define bkey_copy(_dest, _src) memcpy(_dest, _src, bkey_bytes(_src)) + +static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) +{ + SET_KEY_INODE(dest, KEY_INODE(src)); + SET_KEY_OFFSET(dest, KEY_OFFSET(src)); +} + +static inline struct bkey *bkey_next(const struct bkey *k) +{ + __u64 *d = (void *) k; + return (struct bkey *) (d + bkey_u64s(k)); +} + +static inline struct bkey *bkey_last(const struct bkey *k, unsigned nr_keys) +{ + __u64 *d = (void *) k; + return (struct bkey *) (d + nr_keys); +} +/* Enough for a key with 6 pointers */ +#define BKEY_PAD 8 + +#define BKEY_PADDED(key) \ + union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; } + +/* Superblock */ + +/* Version 0: Cache device + * Version 1: Backing device + * Version 2: Seed pointer into btree node checksum + * Version 3: Cache device with new UUID format + * Version 4: Backing device with data offset + */ +#define BCACHE_SB_VERSION_CDEV 0 +#define BCACHE_SB_VERSION_BDEV 1 +#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 +#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 +#define BCACHE_SB_MAX_VERSION 4 + +#define SB_SECTOR 8 +#define SB_SIZE 4096 +#define SB_LABEL_SIZE 32 +#define SB_JOURNAL_BUCKETS 256U +/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ +#define MAX_CACHES_PER_SET 8 + +#define BDEV_DATA_START_DEFAULT 16 /* sectors */ + +struct cache_sb { + __u64 csum; + __u64 offset; /* sector where this sb was written */ + __u64 version; + + __u8 magic[16]; + + __u8 uuid[16]; + union { + __u8 set_uuid[16]; + __u64 set_magic; + }; + __u8 label[SB_LABEL_SIZE]; + + __u64 flags; + __u64 seq; + __u64 pad[8]; + + union { + struct { + /* Cache devices */ + __u64 nbuckets; /* device size */ + + __u16 block_size; /* sectors */ + __u16 bucket_size; /* sectors */ + + __u16 nr_in_set; + __u16 nr_this_dev; + }; + struct { + /* Backing devices */ + __u64 data_offset; + + /* + * block_size from the cache device section is still used by + * backing devices, so don't add anything here until we fix + * things to not need it for backing devices anymore + */ + }; + }; + + __u32 last_mount; /* time_t */ + + __u16 first_bucket; + union { + __u16 njournal_buckets; + __u16 keys; + }; + __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ +}; + +static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) +{ + return sb->version == BCACHE_SB_VERSION_BDEV + || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET; +} + +BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); +BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); +BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); +#define CACHE_REPLACEMENT_LRU 0U +#define CACHE_REPLACEMENT_FIFO 1U +#define CACHE_REPLACEMENT_RANDOM 2U + +BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); +#define CACHE_MODE_WRITETHROUGH 0U +#define CACHE_MODE_WRITEBACK 1U +#define CACHE_MODE_WRITEAROUND 2U +#define CACHE_MODE_NONE 3U +BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); +#define BDEV_STATE_NONE 0U +#define BDEV_STATE_CLEAN 1U +#define BDEV_STATE_DIRTY 2U +#define BDEV_STATE_STALE 3U + +/* + * Magic numbers + * + * The various other data structures have their own magic numbers, which are + * xored with the first part of the cache set's UUID + */ + +#define JSET_MAGIC 0x245235c1a3625032ULL +#define PSET_MAGIC 0x6750e15f87337f91ULL +#define BSET_MAGIC 0x90135c78b99e07f5ULL + +static inline __u64 jset_magic(struct cache_sb *sb) +{ + return sb->set_magic ^ JSET_MAGIC; +} + +static inline __u64 pset_magic(struct cache_sb *sb) +{ + return sb->set_magic ^ PSET_MAGIC; +} + +static inline __u64 bset_magic(struct cache_sb *sb) +{ + return sb->set_magic ^ BSET_MAGIC; +} + +/* + * Journal + * + * On disk format for a journal entry: + * seq is monotonically increasing; every journal entry has its own unique + * sequence number. + * + * last_seq is the oldest journal entry that still has keys the btree hasn't + * flushed to disk yet. + * + * version is for on disk format changes. + */ + +#define BCACHE_JSET_VERSION_UUIDv1 1 +#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ +#define BCACHE_JSET_VERSION 1 + +struct jset { + __u64 csum; + __u64 magic; + __u64 seq; + __u32 version; + __u32 keys; + + __u64 last_seq; + + BKEY_PADDED(uuid_bucket); + BKEY_PADDED(btree_root); + __u16 btree_level; + __u16 pad[3]; + + __u64 prio_bucket[MAX_CACHES_PER_SET]; + + union { + struct bkey start[0]; + __u64 d[0]; + }; +}; + +/* Bucket prios/gens */ + +struct prio_set { + __u64 csum; + __u64 magic; + __u64 seq; + __u32 version; + __u32 pad; + + __u64 next_bucket; + + struct bucket_disk { + __u16 prio; + __u8 gen; + } __attribute((packed)) data[]; +}; + +/* UUIDS - per backing device/flash only volume metadata */ + +struct uuid_entry { + union { + struct { + __u8 uuid[16]; + __u8 label[32]; + __u32 first_reg; + __u32 last_reg; + __u32 invalidated; + + __u32 flags; + /* Size of flash only volumes */ + __u64 sectors; + }; + + __u8 pad[128]; + }; +}; + +BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); + +/* Btree nodes */ + +/* Version 1: Seed pointer into btree node checksum + */ +#define BCACHE_BSET_CSUM 1 +#define BCACHE_BSET_VERSION 1 + +/* + * Btree nodes + * + * On disk a btree node is a list/log of these; within each set the keys are + * sorted + */ +struct bset { + __u64 csum; + __u64 magic; + __u64 seq; + __u32 version; + __u32 keys; + + union { + struct bkey start[0]; + __u64 d[0]; + }; +}; + +/* OBSOLETE */ + +/* UUIDS - per backing device/flash only volume metadata */ + +struct uuid_entry_v0 { + __u8 uuid[16]; + __u8 label[32]; + __u32 first_reg; + __u32 last_reg; + __u32 invalidated; + __u32 pad; +}; + +#endif /* _LINUX_BCACHE_H */ -- cgit v1.2.3 From 38e9efcdb33270b4da72143d8e7ca4dcf7f0989b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Nov 2013 12:20:35 +0100 Subject: random32: move rnd_state to linux/random.h struct rnd_state got mistakenly pulled into uapi header. It is not used anywhere and does also not belong there! Commit 5960164fde ("lib/random32: export pseudo-random number generator for modules"), the last commit on rnd_state before it got moved to uapi, says: This patch moves the definition of struct rnd_state and the inline __seed() function to linux/random.h. It renames the static __random32() function to prandom32() and exports it for use in modules. Hence, the structure was moved from lib/random32.c to linux/random.h so that it can be used within modules (FCoE-related code in this case), but not from user space. However, it seems to have been mistakenly moved to uapi header through the uapi script. Since no-one should make use of it from the linux headers, move the structure back to the kernel for internal use, so that it can be modified on demand. Joint work with Hannes Frederic Sowa. Cc: Joe Eykholt Signed-off-by: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/uapi/linux/random.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h index 7471b5b3b8ba..fff3528a078f 100644 --- a/include/uapi/linux/random.h +++ b/include/uapi/linux/random.h @@ -40,11 +40,4 @@ struct rand_pool_info { __u32 buf[0]; }; -struct rnd_state { - __u32 s1, s2, s3; -}; - -/* Exported functions */ - - #endif /* _UAPI_LINUX_RANDOM_H */ -- cgit v1.2.3 From 294e30fee35d3151d100cfe59e839c2dbc16a374 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 9 Oct 2013 12:00:56 -0400 Subject: Btrfs: add tests for find_lock_delalloc_range So both Liu and I made huge messes of find_lock_delalloc_range trying to fix stuff, me first by fixing extent size, then him by fixing something I broke and then me again telling him to fix it a different way. So this is obviously a candidate for some testing. This patch adds a pseudo fs so we can allocate fake inodes for tests that need an inode or pages. Then it addes a bunch of tests to make sure find_lock_delalloc_range is acting the way it is supposed to. With this patch and all of our previous patches to find_lock_delalloc_range I am sure it is working as expected now. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- include/uapi/linux/magic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 2944278a8ba7..77c60311a6c6 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -71,6 +71,6 @@ #define USBDEVICE_SUPER_MAGIC 0x9fa2 #define MTD_INODE_FS_MAGIC 0x11307854 #define ANON_INODE_FS_MAGIC 0x09041934 - +#define BTRFS_TEST_MAGIC 0x73727279 #endif /* __LINUX_MAGIC_H__ */ -- cgit v1.2.3 From f7625980f5820edd1a73536e1a03bcbc1f889fec Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 14 Nov 2013 11:28:18 -0700 Subject: PCI: Fix whitespace, capitalization, and spelling errors Fix whitespace, capitalization, and spelling errors. No functional change. I know "busses" is not an error, but "buses" was more common, so I used it consistently. Signed-off-by: Marta Rybczynska (pci_reset_bridge_secondary_bus()) Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki --- include/uapi/linux/pci_regs.h | 72 +++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 0890556f779e..4a98e85438a7 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -13,10 +13,10 @@ * PCI to PCI Bridge Specification * PCI System Design Guide * - * For hypertransport information, please consult the following manuals - * from http://www.hypertransport.org + * For HyperTransport information, please consult the following manuals + * from http://www.hypertransport.org * - * The Hypertransport I/O Link Specification + * The HyperTransport I/O Link Specification */ #ifndef LINUX_PCI_REGS_H @@ -37,7 +37,7 @@ #define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */ #define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */ #define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */ -#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ +#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ #define PCI_COMMAND_SERR 0x100 /* Enable SERR */ #define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */ #define PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */ @@ -45,7 +45,7 @@ #define PCI_STATUS 0x06 /* 16 bits */ #define PCI_STATUS_INTERRUPT 0x08 /* Interrupt status */ #define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ -#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */ +#define PCI_STATUS_66MHZ 0x20 /* Support 66 MHz PCI 2.1 bus */ #define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ #define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ #define PCI_STATUS_PARITY 0x100 /* Detected parity error */ @@ -205,14 +205,14 @@ #define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ #define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ #define PCI_CAP_ID_HT 0x08 /* HyperTransport */ -#define PCI_CAP_ID_VNDR 0x09 /* Vendor specific */ +#define PCI_CAP_ID_VNDR 0x09 /* Vendor-Specific */ #define PCI_CAP_ID_DBG 0x0A /* Debug port */ #define PCI_CAP_ID_CCRC 0x0B /* CompactPCI Central Resource Control */ -#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ +#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ #define PCI_CAP_ID_SSVID 0x0D /* Bridge subsystem vendor/device ID */ #define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */ #define PCI_CAP_ID_SECDEV 0x0F /* Secure Device */ -#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ +#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ #define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ #define PCI_CAP_ID_SATA 0x12 /* SATA Data/Index Conf. */ #define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */ @@ -268,8 +268,8 @@ #define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */ #define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */ #define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */ -#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ -#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ +#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ +#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ #define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */ #define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */ #define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */ @@ -321,7 +321,7 @@ #define PCI_MSIX_PBA_OFFSET 0xfffffff8 /* Offset into specified BAR */ #define PCI_CAP_MSIX_SIZEOF 12 /* size of MSIX registers */ -/* MSI-X entry's format */ +/* MSI-X Table entry format */ #define PCI_MSIX_ENTRY_SIZE 16 #define PCI_MSIX_ENTRY_LOWER_ADDR 0 #define PCI_MSIX_ENTRY_UPPER_ADDR 4 @@ -372,7 +372,7 @@ #define PCI_X_CMD_SPLIT_16 0x0060 /* Max 16 */ #define PCI_X_CMD_SPLIT_32 0x0070 /* Max 32 */ #define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */ -#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ +#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ #define PCI_X_STATUS 4 /* PCI-X capabilities */ #define PCI_X_STATUS_DEVFN 0x000000ff /* A copy of devfn */ #define PCI_X_STATUS_BUS 0x0000ff00 /* A copy of bus nr */ @@ -407,8 +407,8 @@ /* PCI Bridge Subsystem ID registers */ -#define PCI_SSVID_VENDOR_ID 4 /* PCI-Bridge subsystem vendor id register */ -#define PCI_SSVID_DEVICE_ID 6 /* PCI-Bridge subsystem device id register */ +#define PCI_SSVID_VENDOR_ID 4 /* PCI Bridge subsystem vendor ID */ +#define PCI_SSVID_DEVICE_ID 6 /* PCI Bridge subsystem device ID */ /* PCI Express capability registers */ @@ -484,12 +484,12 @@ #define PCI_EXP_LNKCTL_CLKREQ_EN 0x0100 /* Enable clkreq */ #define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ #define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ -#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Lnk Autonomous Bandwidth Interrupt Enable */ +#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Link Autonomous Bandwidth Interrupt Enable */ #define PCI_EXP_LNKSTA 18 /* Link Status */ #define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ #define PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */ #define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */ -#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Nogotiated Link Width */ +#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Negotiated Link Width */ #define PCI_EXP_LNKSTA_NLW_SHIFT 4 /* start of NLW mask in link status */ #define PCI_EXP_LNKSTA_LT 0x0800 /* Link Training */ #define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */ @@ -593,7 +593,7 @@ #define PCI_EXT_CAP_ID_MFVC 0x08 /* Multi-Function VC Capability */ #define PCI_EXT_CAP_ID_VC9 0x09 /* same as _VC */ #define PCI_EXT_CAP_ID_RCRB 0x0A /* Root Complex RB? */ -#define PCI_EXT_CAP_ID_VNDR 0x0B /* Vendor Specific */ +#define PCI_EXT_CAP_ID_VNDR 0x0B /* Vendor-Specific */ #define PCI_EXT_CAP_ID_CAC 0x0C /* Config Access - obsolete */ #define PCI_EXT_CAP_ID_ACS 0x0D /* Access Control Services */ #define PCI_EXT_CAP_ID_ARI 0x0E /* Alternate Routing ID */ @@ -602,12 +602,12 @@ #define PCI_EXT_CAP_ID_MRIOV 0x11 /* Multi Root I/O Virtualization */ #define PCI_EXT_CAP_ID_MCAST 0x12 /* Multicast */ #define PCI_EXT_CAP_ID_PRI 0x13 /* Page Request Interface */ -#define PCI_EXT_CAP_ID_AMD_XXX 0x14 /* reserved for AMD */ -#define PCI_EXT_CAP_ID_REBAR 0x15 /* resizable BAR */ -#define PCI_EXT_CAP_ID_DPA 0x16 /* dynamic power alloc */ -#define PCI_EXT_CAP_ID_TPH 0x17 /* TPH request */ -#define PCI_EXT_CAP_ID_LTR 0x18 /* latency tolerance reporting */ -#define PCI_EXT_CAP_ID_SECPCI 0x19 /* Secondary PCIe */ +#define PCI_EXT_CAP_ID_AMD_XXX 0x14 /* Reserved for AMD */ +#define PCI_EXT_CAP_ID_REBAR 0x15 /* Resizable BAR */ +#define PCI_EXT_CAP_ID_DPA 0x16 /* Dynamic Power Allocation */ +#define PCI_EXT_CAP_ID_TPH 0x17 /* TPH Requester */ +#define PCI_EXT_CAP_ID_LTR 0x18 /* Latency Tolerance Reporting */ +#define PCI_EXT_CAP_ID_SECPCI 0x19 /* Secondary PCIe Capability */ #define PCI_EXT_CAP_ID_PMUX 0x1A /* Protocol Multiplexing */ #define PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PASID @@ -667,9 +667,9 @@ #define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ /* Multi ERR_COR Received */ #define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 -/* ERR_FATAL/NONFATAL Recevied */ +/* ERR_FATAL/NONFATAL Received */ #define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 -/* Multi ERR_FATAL/NONFATAL Recevied */ +/* Multi ERR_FATAL/NONFATAL Received */ #define PCI_ERR_ROOT_MULTI_UNCOR_RCV 0x00000008 #define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First Fatal */ #define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ @@ -678,7 +678,7 @@ /* Virtual Channel */ #define PCI_VC_PORT_REG1 4 -#define PCI_VC_REG1_EVCC 0x7 /* extended vc count */ +#define PCI_VC_REG1_EVCC 0x7 /* extended VC count */ #define PCI_VC_PORT_REG2 8 #define PCI_VC_REG2_32_PHASE 0x2 #define PCI_VC_REG2_64_PHASE 0x4 @@ -711,7 +711,7 @@ #define PCI_VNDR_HEADER_LEN(x) (((x) >> 20) & 0xfff) /* - * Hypertransport sub capability types + * HyperTransport sub capability types * * Unfortunately there are both 3 bit and 5 bit capability types defined * in the HT spec, catering for that is a little messy. You probably don't @@ -739,8 +739,8 @@ #define HT_CAPTYPE_DIRECT_ROUTE 0xB0 /* Direct routing configuration */ #define HT_CAPTYPE_VCSET 0xB8 /* Virtual Channel configuration */ #define HT_CAPTYPE_ERROR_RETRY 0xC0 /* Retry on error configuration */ -#define HT_CAPTYPE_GEN3 0xD0 /* Generation 3 hypertransport configuration */ -#define HT_CAPTYPE_PM 0xE0 /* Hypertransport powermanagement configuration */ +#define HT_CAPTYPE_GEN3 0xD0 /* Generation 3 HyperTransport configuration */ +#define HT_CAPTYPE_PM 0xE0 /* HyperTransport power management configuration */ #define HT_CAP_SIZEOF_LONG 28 /* slave & primary */ #define HT_CAP_SIZEOF_SHORT 24 /* host & secondary */ @@ -777,14 +777,14 @@ #define PCI_PRI_ALLOC_REQ 0x0c /* PRI max reqs allowed */ #define PCI_EXT_CAP_PRI_SIZEOF 16 -/* PASID capability */ +/* Process Address Space ID */ #define PCI_PASID_CAP 0x04 /* PASID feature register */ #define PCI_PASID_CAP_EXEC 0x02 /* Exec permissions Supported */ -#define PCI_PASID_CAP_PRIV 0x04 /* Priviledge Mode Supported */ +#define PCI_PASID_CAP_PRIV 0x04 /* Privilege Mode Supported */ #define PCI_PASID_CTRL 0x06 /* PASID control register */ #define PCI_PASID_CTRL_ENABLE 0x01 /* Enable bit */ #define PCI_PASID_CTRL_EXEC 0x02 /* Exec permissions Enable */ -#define PCI_PASID_CTRL_PRIV 0x04 /* Priviledge Mode Enable */ +#define PCI_PASID_CTRL_PRIV 0x04 /* Privilege Mode Enable */ #define PCI_EXT_CAP_PASID_SIZEOF 8 /* Single Root I/O Virtualization */ @@ -839,22 +839,22 @@ #define PCI_ACS_CTRL 0x06 /* ACS Control Register */ #define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */ -#define PCI_VSEC_HDR 4 /* extended cap - vendor specific */ +#define PCI_VSEC_HDR 4 /* extended cap - vendor-specific */ #define PCI_VSEC_HDR_LEN_SHIFT 20 /* shift for length field */ -/* sata capability */ +/* SATA capability */ #define PCI_SATA_REGS 4 /* SATA REGs specifier */ #define PCI_SATA_REGS_MASK 0xF /* location - BAR#/inline */ #define PCI_SATA_REGS_INLINE 0xF /* REGS in config space */ #define PCI_SATA_SIZEOF_SHORT 8 #define PCI_SATA_SIZEOF_LONG 16 -/* resizable BARs */ +/* Resizable BARs */ #define PCI_REBAR_CTRL 8 /* control register */ #define PCI_REBAR_CTRL_NBAR_MASK (7 << 5) /* mask for # bars */ #define PCI_REBAR_CTRL_NBAR_SHIFT 5 /* shift for # bars */ -/* dynamic power allocation */ +/* Dynamic Power Allocation */ #define PCI_DPA_CAP 4 /* capability register */ #define PCI_DPA_CAP_SUBSTATE_MASK 0x1F /* # substates - 1 */ #define PCI_DPA_BASE_SIZEOF 16 /* size with 0 substates */ -- cgit v1.2.3 From 65c5189a2b57b9aa1d89e4b79da39928257c9505 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Nov 2013 08:57:26 -0800 Subject: pkt_sched: fq: warn users using defrate Commit 7eec4174ff29 ("pkt_sched: fq: fix non TCP flows pacing") obsoleted TCA_FQ_FLOW_DEFAULT_RATE without notice for the users. Suggested by David Miller Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 307f293477e8..885001b62c83 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -763,9 +763,7 @@ enum { TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */ - TCA_FQ_FLOW_DEFAULT_RATE,/* for sockets with unspecified sk_rate, - * use the following rate - */ + TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */ TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ -- cgit v1.2.3 From f52ed89971adbe79b6438c459814034707b8ab91 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Nov 2013 08:58:14 -0800 Subject: pkt_sched: fq: fix pacing for small frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For performance reasons, sch_fq tried hard to not setup timers for every sent packet, using a quantum based heuristic : A delay is setup only if the flow exhausted its credit. Problem is that application limited flows can refill their credit for every queued packet, and they can evade pacing. This problem can also be triggered when TCP flows use small MSS values, as TSO auto sizing builds packets that are smaller than the default fq quantum (3028 bytes) This patch adds a 40 ms delay to guard flow credit refill. Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") Signed-off-by: Eric Dumazet Cc: Maciej Żenczykowski Cc: Willem de Bruijn Cc: Yuchung Cheng Cc: Neal Cardwell Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 885001b62c83..a806687ad98f 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -768,6 +768,9 @@ enum { TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */ + + TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */ + __TCA_FQ_MAX }; -- cgit v1.2.3 From c0f8bd146a8b3e630798561c605f5669823107af Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Thu, 14 Nov 2013 15:16:19 +1100 Subject: UAPI: include in linux/raid/md_p.h linux/raid/md_p.h is using conditionals depending on endianess and fails with an error if neither of __BIG_ENDIAN, __LITTLE_ENDIAN or __BYTE_ORDER are defined, but it doesn't include any header which can define these constants. This make this header unusable alone. This patch adds a #include at the beginning of this header to make it usable alone. This is needed to compile klibc on MIPS. Signed-off-by: Aurelien Jarno Signed-off-by: NeilBrown --- include/uapi/linux/raid/md_p.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index fe1a5406d4d9..f7cf7f351144 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -16,6 +16,7 @@ #define _MD_P_H #include +#include /* * RAID superblock. -- cgit v1.2.3 From 2ecf7536b2787580616d23b6507005d930975ca0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Nov 2013 15:19:33 +0100 Subject: quota/genetlink: use proper genetlink multicast APIs The quota code is abusing the genetlink API and is using its family ID as the multicast group ID, which is invalid and may belong to somebody else (and likely will.) Make the quota code use the correct API, but since this is already used as-is by userspace, reserve a family ID for this code and also reserve that group ID to not break userspace assumptions. Acked-by: Jan Kara Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/genetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index c880a417d8a9..1af72d8228e0 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -27,6 +27,7 @@ struct genlmsghdr { */ #define GENL_ID_GENERATE 0 #define GENL_ID_CTRL NLMSG_MIN_TYPE +#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) /************************************************************************** * Controller -- cgit v1.2.3