From bae918ac280f01a4fa89b570643def7bb276f597 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 11 Aug 2010 11:11:06 +0900
Subject: tile: remove unused ISA_DMA_THRESHOLD define

No need to define ISA_DMA_THRESHOLD

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
[cmetcalf@tilera.com: converted to a single-line #include file]
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/scatterlist.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/scatterlist.h b/arch/tile/include/asm/scatterlist.h
index c5604242c0d5..35d786fe93ae 100644
--- a/arch/tile/include/asm/scatterlist.h
+++ b/arch/tile/include/asm/scatterlist.h
@@ -1,22 +1 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#ifndef _ASM_TILE_SCATTERLIST_H
-#define _ASM_TILE_SCATTERLIST_H
-
-#define ISA_DMA_THRESHOLD	(~0UL)
-
 #include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_TILE_SCATTERLIST_H */
-- 
cgit v1.2.3


From b77c49ab6d9bfe4d8207e1df72a1978fdd0a96b8 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Wed, 11 Aug 2010 10:54:13 -0400
Subject: arch/tile: support new kunmap_atomic() naming convention.

See commit 597781f3e51f48ef8e67be772196d9e9673752c4.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/highmem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h
index efdd12e91020..d155db6fa9bd 100644
--- a/arch/tile/include/asm/highmem.h
+++ b/arch/tile/include/asm/highmem.h
@@ -60,7 +60,7 @@ void *kmap_fix_kpte(struct page *page, int finished);
 /* This macro is used only in map_new_virtual() to map "page". */
 #define kmap_prot page_to_kpgprot(page)
 
-void kunmap_atomic(void *kvaddr, enum km_type type);
+void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
 void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
 void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
 struct page *kmap_atomic_to_page(void *ptr);
-- 
cgit v1.2.3


From 4565f0170dfc849b3629c27d769db800467baa62 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Aug 2010 18:03:22 -0700
Subject: dma-mapping: unify dma_get_cache_alignment implementations

dma_get_cache_alignment returns the minimum DMA alignment.  Architectures
defines it as ARCH_DMA_MINALIGN (formally ARCH_KMALLOC_MINALIGN).  So we
can unify dma_get_cache_alignment implementations.

Note that some architectures implement dma_get_cache_alignment wrongly.
dma_get_cache_alignment() should return the minimum DMA alignment.  So
fully-coherent architectures should return 1.  This patch also fixes this
issue.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/tile/include/asm/dma-mapping.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
index cf466b39aa13..1326b910fec6 100644
--- a/arch/tile/include/asm/dma-mapping.h
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -90,13 +90,6 @@ dma_set_mask(struct device *dev, u64 mask)
 	return 0;
 }
 
-static inline int
-dma_get_cache_alignment(void)
-{
-	return L2_CACHE_BYTES;
-}
-
 #define dma_is_consistent(d, h)	(1)
 
-
 #endif /* _ASM_TILE_DMA_MAPPING_H */
-- 
cgit v1.2.3


From 3b9c6c11f519718d618f5d7c9508daf78b207f6f Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 10 Aug 2010 18:03:25 -0700
Subject: dma-mapping: remove dma_is_consistent API

Architectures implement dma_is_consistent() in different ways (some
misinterpret the definition of API in DMA-API.txt).  So it hasn't been so
useful for drivers.  We have only one user of the API in tree.  Unlikely
out-of-tree drivers use the API.

Even if we fix dma_is_consistent() in some architectures, it doesn't look
useful at all.  It was invented long ago for some old systems that can't
allocate coherent memory at all.  It's better to export only APIs that are
definitely necessary for drivers.

Let's remove this API.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/tile/include/asm/dma-mapping.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
index 1326b910fec6..15e1dceecc64 100644
--- a/arch/tile/include/asm/dma-mapping.h
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -90,6 +90,4 @@ dma_set_mask(struct device *dev, u64 mask)
 	return 0;
 }
 
-#define dma_is_consistent(d, h)	(1)
-
 #endif /* _ASM_TILE_DMA_MAPPING_H */
-- 
cgit v1.2.3


From 32020effaf713c0c669864301bcd5dac6b9bb9e0 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 13 Aug 2010 08:32:21 -0400
Subject: arch/tile: Fix a couple of issues with the COMPAT code for TILE-Gx.

First, the siginfo preamble wasn't quite right; we need to indicate
that we are padding up to 4 ints of preamble for 64-bit code, and
then for compat mode we need to pad differently, using only 3 ints.

Second, the C ABI requires a save area of two registers, not two
pointers, since in compat mode we have 64-bit registers all of which
we need to save, even though we only have 32-bit VAs.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/arch/abi.h    | 4 +---
 arch/tile/include/asm/siginfo.h | 4 ++++
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/arch/abi.h b/arch/tile/include/arch/abi.h
index da8df5b9d914..8affc76f771a 100644
--- a/arch/tile/include/arch/abi.h
+++ b/arch/tile/include/arch/abi.h
@@ -59,9 +59,7 @@
  * The ABI requires callers to allocate a caller state save area of
  * this many bytes at the bottom of each stack frame.
  */
-#ifdef __tile__
-#define C_ABI_SAVE_AREA_SIZE (2 * __SIZEOF_POINTER__)
-#endif
+#define C_ABI_SAVE_AREA_SIZE (2 * (CHIP_WORD_SIZE() / 8))
 
 /**
  * The operand to an 'info' opcode directing the backtracer to not
diff --git a/arch/tile/include/asm/siginfo.h b/arch/tile/include/asm/siginfo.h
index 0c12d1b9ddf2..56d661bb010b 100644
--- a/arch/tile/include/asm/siginfo.h
+++ b/arch/tile/include/asm/siginfo.h
@@ -17,6 +17,10 @@
 
 #define __ARCH_SI_TRAPNO
 
+#ifdef __LP64__
+# define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
+#endif
+
 #include <asm-generic/siginfo.h>
 
 /*
-- 
cgit v1.2.3


From 1fcbe027b5d29ec9cd0eeb753c14fb366ae852ac Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 13 Aug 2010 08:40:57 -0400
Subject: arch/tile: support backtracing on TILE-Gx

This functionality was stubbed out until recently.  Now we support our
normal backtracing API on TILE-Gx as well as on TILE64/TILEPro.
This change includes a tweak to the instruction encoding caused by
adding addxli for compat mode.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/backtrace.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/backtrace.h b/arch/tile/include/asm/backtrace.h
index 6970bfcad549..758ca4619d50 100644
--- a/arch/tile/include/asm/backtrace.h
+++ b/arch/tile/include/asm/backtrace.h
@@ -21,7 +21,9 @@
 
 #include <arch/chip.h>
 
-#if CHIP_VA_WIDTH() > 32
+#if defined(__tile__)
+typedef unsigned long VirtualAddress;
+#elif CHIP_VA_WIDTH() > 32
 typedef unsigned long long VirtualAddress;
 #else
 typedef unsigned int VirtualAddress;
-- 
cgit v1.2.3


From c745a8a11fa1df6078bfc61fc29492ed43f71c2b Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 13 Aug 2010 08:52:19 -0400
Subject: arch/tile: Various cleanups.

This change rolls up random cleanups not representing any actual bugs.

- Remove a stale CONFIG_ value from the default tile_defconfig
- Remove unused tns_atomic_xxx() family of methods from <asm/atomic.h>
- Optimize get_order() using Tile's "clz" instruction
- Fix a bad hypervisor upcall name (not currently used in Linux anyway)
- Use __copy_in_user_inatomic() name for consistency, and export it
- Export some additional hypervisor driver I/O upcalls and some homecache calls
- Remove the obfuscating MEMCPY_TEST_WH64 support code
- Other stray comment cleanups, #if 0 removal, etc.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/atomic_32.h | 37 -------------------------------------
 arch/tile/include/asm/page.h      |  6 +++++-
 arch/tile/include/asm/uaccess.h   |  4 ++--
 arch/tile/include/hv/hypervisor.h |  8 ++++----
 4 files changed, 11 insertions(+), 44 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index 40a5a3a876d9..ed359aee8837 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -255,43 +255,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
 #define smp_mb__after_atomic_dec()	do { } while (0)
 #define smp_mb__after_atomic_inc()	do { } while (0)
 
-
-/*
- * Support "tns" atomic integers.  These are atomic integers that can
- * hold any value but "1".  They are more efficient than regular atomic
- * operations because the "lock" (aka acquire) step is a single "tns"
- * in the uncontended case, and the "unlock" (aka release) step is a
- * single "store" without an mf.  (However, note that on tilepro the
- * "tns" will evict the local cache line, so it's not all upside.)
- *
- * Note that you can ONLY observe the value stored in the pointer
- * using these operations; a direct read of the value may confusingly
- * return the special value "1".
- */
-
-int __tns_atomic_acquire(atomic_t *);
-void __tns_atomic_release(atomic_t *p, int v);
-
-static inline void tns_atomic_set(atomic_t *v, int i)
-{
-	__tns_atomic_acquire(v);
-	__tns_atomic_release(v, i);
-}
-
-static inline int tns_atomic_cmpxchg(atomic_t *v, int o, int n)
-{
-	int ret = __tns_atomic_acquire(v);
-	__tns_atomic_release(v, (ret == o) ? n : ret);
-	return ret;
-}
-
-static inline int tns_atomic_xchg(atomic_t *v, int n)
-{
-	int ret = __tns_atomic_acquire(v);
-	__tns_atomic_release(v, n);
-	return ret;
-}
-
 #endif /* !__ASSEMBLY__ */
 
 /*
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index f894a9016da6..7d90641cf18d 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -129,6 +129,11 @@ static inline u64 pmd_val(pmd_t pmd)
 
 #endif
 
+static inline __attribute_const__ int get_order(unsigned long size)
+{
+	return BITS_PER_LONG - __builtin_clzl((size - 1) >> PAGE_SHIFT);
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
@@ -332,7 +337,6 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
 	(VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/getorder.h>
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index ed17a80ec0ed..ef34d2caa5b1 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -389,14 +389,14 @@ static inline unsigned long __must_check copy_from_user(void *to,
  * Returns number of bytes that could not be copied.
  * On success, this will be zero.
  */
-extern unsigned long __copy_in_user_asm(
+extern unsigned long __copy_in_user_inatomic(
 	void __user *to, const void __user *from, unsigned long n);
 
 static inline unsigned long __must_check
 __copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {
 	might_sleep();
-	return __copy_in_user_asm(to, from, n);
+	return __copy_in_user_inatomic(to, from, n);
 }
 
 static inline unsigned long __must_check
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index 59b46dc53994..9bd303a141b2 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -532,11 +532,11 @@ void hv_disable_intr(HV_IntrMask disab_mask);
  */
 void hv_clear_intr(HV_IntrMask clear_mask);
 
-/** Assert a set of device interrupts.
+/** Raise a set of device interrupts.
  *
- * @param assert_mask Bitmap of interrupts to clear.
+ * @param raise_mask Bitmap of interrupts to raise.
  */
-void hv_assert_intr(HV_IntrMask assert_mask);
+void hv_raise_intr(HV_IntrMask raise_mask);
 
 /** Trigger a one-shot interrupt on some tile
  *
@@ -1712,7 +1712,7 @@ typedef struct
  * @param cache_control This argument allows you to specify a length of
  *        physical address space to flush (maximum HV_FLUSH_MAX_CACHE_LEN).
  *        You can "or" in HV_FLUSH_EVICT_L2 to flush the whole L2 cache.
- *        You can "or" in HV_FLUSH_EVICT_LI1 to flush the whole LII cache.
+ *        You can "or" in HV_FLUSH_EVICT_L1I to flush the whole L1I cache.
  *        HV_FLUSH_ALL flushes all caches.
  * @param cache_cpumask Bitmask (in row-major order, supervisor-relative) of
  *        tile indices to perform cache flush on.  The low bit of the first
-- 
cgit v1.2.3


From 947e7dc1aed0532478e10988328bfd7426e0c2bd Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 13 Aug 2010 20:32:41 -0400
Subject: arch/tile: Rename the hweight() implementations to __arch_hweight()

See commit 1527bc8b928dd1399c3d3467dd47d9ede210978a.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/bitops.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h
index 84600f3514da..6832b4be8990 100644
--- a/arch/tile/include/asm/bitops.h
+++ b/arch/tile/include/asm/bitops.h
@@ -98,26 +98,27 @@ static inline int fls64(__u64 w)
 	return (sizeof(__u64) * 8) - __builtin_clzll(w);
 }
 
-static inline unsigned int hweight32(unsigned int w)
+static inline unsigned int __arch_hweight32(unsigned int w)
 {
 	return __builtin_popcount(w);
 }
 
-static inline unsigned int hweight16(unsigned int w)
+static inline unsigned int __arch_hweight16(unsigned int w)
 {
 	return __builtin_popcount(w & 0xffff);
 }
 
-static inline unsigned int hweight8(unsigned int w)
+static inline unsigned int __arch_hweight8(unsigned int w)
 {
 	return __builtin_popcount(w & 0xff);
 }
 
-static inline unsigned long hweight64(__u64 w)
+static inline unsigned long __arch_hweight64(__u64 w)
 {
 	return __builtin_popcountll(w);
 }
 
+#include <asm-generic/bitops/const_hweight.h>
 #include <asm-generic/bitops/lock.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/ext2-non-atomic.h>
-- 
cgit v1.2.3


From b3ae98ab8217a8621859e1d9cbf3ee6c4c19533b Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 13 Aug 2010 20:43:39 -0400
Subject: arch/tile: rename ARCH_KMALLOC_MINALIGN to ARCH_DMA_MINALIGN

See commit a6eb9fe105d5de0053b261148cee56c94b4720ca.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 arch/tile/include/asm/cache.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h
index f6101840c9e7..08a2815b5e4e 100644
--- a/arch/tile/include/asm/cache.h
+++ b/arch/tile/include/asm/cache.h
@@ -27,11 +27,10 @@
 #define L2_CACHE_ALIGN(x)	(((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES)
 
 /*
- * TILE-Gx is fully coherents so we don't need to define
- * ARCH_KMALLOC_MINALIGN.
+ * TILE-Gx is fully coherent so we don't need to define ARCH_DMA_MINALIGN.
  */
 #ifndef __tilegx__
-#define ARCH_KMALLOC_MINALIGN	L2_CACHE_BYTES
+#define ARCH_DMA_MINALIGN	L2_CACHE_BYTES
 #endif
 
 /* use the cache line size for the L2, which is where it counts */
-- 
cgit v1.2.3


From 3b3c1b9d04db2ac925818c3cff677f5353c0b559 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Sun, 15 Aug 2010 12:14:41 -0400
Subject: arch/tile: export only COMMAND_LINE_SIZE to userspace.

This fixes a failure in "make headers_check" for tile.
I hadn't realized this file was exported to userspace by default.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/setup.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h
index 823ddd47ff6e..7caf0f36b030 100644
--- a/arch/tile/include/asm/setup.h
+++ b/arch/tile/include/asm/setup.h
@@ -15,6 +15,10 @@
 #ifndef _ASM_TILE_SETUP_H
 #define _ASM_TILE_SETUP_H
 
+#define COMMAND_LINE_SIZE	2048
+
+#ifdef __KERNEL__
+
 #include <linux/pfn.h>
 #include <linux/init.h>
 
@@ -23,10 +27,10 @@
  */
 #define MAXMEM_PFN	PFN_DOWN(MAXMEM)
 
-#define COMMAND_LINE_SIZE	2048
-
 void early_panic(const char *fmt, ...);
 void warn_early_printk(void);
 void __init disable_early_printk(void);
 
+#endif /* __KERNEL__ */
+
 #endif /* _ASM_TILE_SETUP_H */
-- 
cgit v1.2.3


From c41d68a513c71e35a14f66d71782d27a79a81ea6 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Tue, 7 Sep 2010 16:16:18 -0700
Subject: compat: Make compat_alloc_user_space() incorporate the access_ok()

compat_alloc_user_space() expects the caller to independently call
access_ok() to verify the returned area.  A missing call could
introduce problems on some architectures.

This patch incorporates the access_ok() check into
compat_alloc_user_space() and also adds a sanity check on the length.
The existing compat_alloc_user_space() implementations are renamed
arch_compat_alloc_user_space() and are used as part of the
implementation of the new global function.

This patch assumes NULL will cause __get_user()/__put_user() to either
fail or access userspace on all architectures.  This should be
followed by checking the return value of compat_access_user_space()
for NULL in the callers, at which time the access_ok() in the callers
can also be removed.

Reported-by: Ben Hawkes <hawkes@sota.gen.nz>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Tony Luck <tony.luck@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: James Bottomley <jejb@parisc-linux.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: <stable@kernel.org>
---
 arch/tile/include/asm/compat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index 5a34da6cdd79..345d81ce44bb 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -195,7 +195,7 @@ static inline unsigned long ptr_to_compat_reg(void __user *uptr)
 	return (long)(int)(long __force)uptr;
 }
 
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = task_pt_regs(current);
 	return (void __user *)regs->sp - len;
-- 
cgit v1.2.3


From e6e6c46d759cd013cb57eba112a4129a3a353c4b Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Wed, 15 Sep 2010 11:16:05 -0400
Subject: arch/tile: finish const-ifying sys_execve()

The sys_execve() implementation was properly const-ified but not
the declaration, the syscall wrappers, or the compat version.
This change completes the constification process.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/compat.h   |  5 +++--
 arch/tile/include/asm/syscalls.h | 21 +++++++++++++--------
 2 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index 5a34da6cdd79..070ad0a5ef1c 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -214,8 +214,9 @@ extern int compat_setup_rt_frame(int sig, struct k_sigaction *ka,
 struct compat_sigaction;
 struct compat_siginfo;
 struct compat_sigaltstack;
-long compat_sys_execve(char __user *path, compat_uptr_t __user *argv,
-		       compat_uptr_t __user *envp);
+long compat_sys_execve(const char __user *path,
+		       const compat_uptr_t __user *argv,
+		       const compat_uptr_t __user *envp);
 long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
 			     struct compat_sigaction __user *oact,
 			     size_t sigsetsize);
diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h
index af165a74537f..ce99ffefeacf 100644
--- a/arch/tile/include/asm/syscalls.h
+++ b/arch/tile/include/asm/syscalls.h
@@ -62,10 +62,12 @@ long sys_fork(void);
 long _sys_fork(struct pt_regs *regs);
 long sys_vfork(void);
 long _sys_vfork(struct pt_regs *regs);
-long sys_execve(char __user *filename, char __user * __user *argv,
-		char __user * __user *envp);
-long _sys_execve(char __user *filename, char __user * __user *argv,
-		 char __user * __user *envp, struct pt_regs *regs);
+long sys_execve(const char __user *filename,
+		const char __user *const __user *argv,
+		const char __user *const __user *envp);
+long _sys_execve(const char __user *filename,
+		 const char __user *const __user *argv,
+		 const char __user *const __user *envp, struct pt_regs *regs);
 
 /* kernel/signal.c */
 long sys_sigaltstack(const stack_t __user *, stack_t __user *);
@@ -86,10 +88,13 @@ int _sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *);
 #endif
 
 #ifdef CONFIG_COMPAT
-long compat_sys_execve(char __user *path, compat_uptr_t __user *argv,
-		       compat_uptr_t __user *envp);
-long _compat_sys_execve(char __user *path, compat_uptr_t __user *argv,
-			compat_uptr_t __user *envp, struct pt_regs *regs);
+long compat_sys_execve(const char __user *path,
+		       const compat_uptr_t __user *argv,
+		       const compat_uptr_t __user *envp);
+long _compat_sys_execve(const char __user *path,
+			const compat_uptr_t __user *argv,
+			const compat_uptr_t __user *envp,
+			struct pt_regs *regs);
 long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
 			    struct compat_sigaltstack __user *uoss_ptr);
 long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
-- 
cgit v1.2.3


From 74fca9da097b74117ae2cef9e5f0d9b0e28ccbb7 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Wed, 15 Sep 2010 11:16:08 -0400
Subject: arch/tile: Change struct sigcontext to be more useful

Rather than just using pt_regs, it now contains the actual saved
state explicitly, similar to pt_regs.  By doing it this way, we
provide a cleaner API for userspace (or equivalently, we avoid the
need for libc to provide its own definition of sigcontext).

While we're at it, move PT_FLAGS_xxx to where they are not visible
from userspace.  And always pass siginfo and mcontext to signal
handlers, even if they claim they don't need it, since sometimes
they actually try to use it anyway in practice.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/ptrace.h     | 15 ++++++---------
 arch/tile/include/asm/sigcontext.h | 18 +++++++++++++-----
 arch/tile/include/asm/signal.h     |  1 +
 3 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h
index acdae814e016..4a02bb073979 100644
--- a/arch/tile/include/asm/ptrace.h
+++ b/arch/tile/include/asm/ptrace.h
@@ -51,10 +51,7 @@ typedef uint_reg_t pt_reg_t;
 
 /*
  * This struct defines the way the registers are stored on the stack during a
- * system call/exception.  It should be a multiple of 8 bytes to preserve
- * normal stack alignment rules.
- *
- * Must track <sys/ucontext.h> and <sys/procfs.h>
+ * system call or exception.  "struct sigcontext" has the same shape.
  */
 struct pt_regs {
 	/* Saved main processor registers; 56..63 are special. */
@@ -80,11 +77,6 @@ struct pt_regs {
 
 #endif /* __ASSEMBLY__ */
 
-/* Flag bits in pt_regs.flags */
-#define PT_FLAGS_DISABLE_IRQ    1  /* on return to kernel, disable irqs */
-#define PT_FLAGS_CALLER_SAVES   2  /* caller-save registers are valid */
-#define PT_FLAGS_RESTORE_REGS   4  /* restore callee-save regs on return */
-
 #define PTRACE_GETREGS		12
 #define PTRACE_SETREGS		13
 #define PTRACE_GETFPREGS	14
@@ -101,6 +93,11 @@ struct pt_regs {
 
 #ifdef __KERNEL__
 
+/* Flag bits in pt_regs.flags */
+#define PT_FLAGS_DISABLE_IRQ    1  /* on return to kernel, disable irqs */
+#define PT_FLAGS_CALLER_SAVES   2  /* caller-save registers are valid */
+#define PT_FLAGS_RESTORE_REGS   4  /* restore callee-save regs on return */
+
 #ifndef __ASSEMBLY__
 
 #define instruction_pointer(regs) ((regs)->pc)
diff --git a/arch/tile/include/asm/sigcontext.h b/arch/tile/include/asm/sigcontext.h
index 7cd7672e3ad4..5e2d03336f53 100644
--- a/arch/tile/include/asm/sigcontext.h
+++ b/arch/tile/include/asm/sigcontext.h
@@ -15,13 +15,21 @@
 #ifndef _ASM_TILE_SIGCONTEXT_H
 #define _ASM_TILE_SIGCONTEXT_H
 
-/* NOTE: we can't include <linux/ptrace.h> due to #include dependencies. */
-#include <asm/ptrace.h>
-
-/* Must track <sys/ucontext.h> */
+#include <arch/abi.h>
 
+/*
+ * struct sigcontext has the same shape as struct pt_regs,
+ * but is simplified since we know the fault is from userspace.
+ */
 struct sigcontext {
-	struct pt_regs regs;
+	uint_reg_t gregs[53];	/* General-purpose registers.  */
+	uint_reg_t tp;		/* Aliases gregs[TREG_TP].  */
+	uint_reg_t sp;		/* Aliases gregs[TREG_SP].  */
+	uint_reg_t lr;		/* Aliases gregs[TREG_LR].  */
+	uint_reg_t pc;		/* Program counter.  */
+	uint_reg_t ics;		/* In Interrupt Critical Section?  */
+	uint_reg_t faultnum;	/* Fault number.  */
+	uint_reg_t pad[5];
 };
 
 #endif /* _ASM_TILE_SIGCONTEXT_H */
diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h
index eb0253f32202..c1ee1d61d44c 100644
--- a/arch/tile/include/asm/signal.h
+++ b/arch/tile/include/asm/signal.h
@@ -24,6 +24,7 @@
 #include <asm-generic/signal.h>
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+struct pt_regs;
 int restore_sigcontext(struct pt_regs *, struct sigcontext __user *, long *);
 int setup_sigcontext(struct sigcontext __user *, struct pt_regs *);
 void do_signal(struct pt_regs *regs);
-- 
cgit v1.2.3


From a802fc685426303ab627b7ad3fd5c97b5dea7e00 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Wed, 15 Sep 2010 11:16:10 -0400
Subject: arch/tile: Save and restore extra user state for tilegx

During context switch, save and restore a couple of additional bits of
tilegx user state that can be persistently modified by userspace.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/arch/chip_tile64.h  |  3 +++
 arch/tile/include/arch/chip_tilepro.h |  3 +++
 arch/tile/include/asm/processor.h     | 12 ++++++++++++
 3 files changed, 18 insertions(+)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/arch/chip_tile64.h b/arch/tile/include/arch/chip_tile64.h
index 1246573be59e..261aaba092d4 100644
--- a/arch/tile/include/arch/chip_tile64.h
+++ b/arch/tile/include/arch/chip_tile64.h
@@ -150,6 +150,9 @@
 /** Is the PROC_STATUS SPR supported? */
 #define CHIP_HAS_PROC_STATUS_SPR() 0
 
+/** Is the DSTREAM_PF SPR supported? */
+#define CHIP_HAS_DSTREAM_PF() 0
+
 /** Log of the number of mshims we have. */
 #define CHIP_LOG_NUM_MSHIMS() 2
 
diff --git a/arch/tile/include/arch/chip_tilepro.h b/arch/tile/include/arch/chip_tilepro.h
index e864c47fc89c..70017699a74c 100644
--- a/arch/tile/include/arch/chip_tilepro.h
+++ b/arch/tile/include/arch/chip_tilepro.h
@@ -150,6 +150,9 @@
 /** Is the PROC_STATUS SPR supported? */
 #define CHIP_HAS_PROC_STATUS_SPR() 1
 
+/** Is the DSTREAM_PF SPR supported? */
+#define CHIP_HAS_DSTREAM_PF() 0
+
 /** Log of the number of mshims we have. */
 #define CHIP_LOG_NUM_MSHIMS() 2
 
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index d942d09b252e..ccd5f8425688 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -103,6 +103,18 @@ struct thread_struct {
 	/* Any other miscellaneous processor state bits */
 	unsigned long proc_status;
 #endif
+#if !CHIP_HAS_FIXED_INTVEC_BASE()
+	/* Interrupt base for PL0 interrupts */
+	unsigned long interrupt_vector_base;
+#endif
+#if CHIP_HAS_TILE_RTF_HWM()
+	/* Tile cache retry fifo high-water mark */
+	unsigned long tile_rtf_hwm;
+#endif
+#if CHIP_HAS_DSTREAM_PF()
+	/* Data stream prefetch control */
+	unsigned long dstream_pf;
+#endif
 #ifdef CONFIG_HARDWALL
 	/* Is this task tied to an activated hardwall? */
 	struct hardwall_info *hardwall;
-- 
cgit v1.2.3


From 0fab59e5ddb4e0557825e9ab8e41b66b5f336941 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Wed, 15 Sep 2010 11:17:04 -0400
Subject: arch/tile: fix memcpy_fromio()/memcpy_toio() signatures

This tripped up a driver (not yet committed to git).  Fix it now.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/io.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index 8c95bef3fa45..ee43328713ab 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -164,22 +164,22 @@ static inline void _tile_writeq(u64 val, unsigned long addr)
 #define iowrite32 writel
 #define iowrite64 writeq
 
-static inline void *memcpy_fromio(void *dst, void *src, int len)
+static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
+				 size_t len)
 {
 	int x;
 	BUG_ON((unsigned long)src & 0x3);
 	for (x = 0; x < len; x += 4)
 		*(u32 *)(dst + x) = readl(src + x);
-	return dst;
 }
 
-static inline void *memcpy_toio(void *dst, void *src, int len)
+static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
+				size_t len)
 {
 	int x;
 	BUG_ON((unsigned long)dst & 0x3);
 	for (x = 0; x < len; x += 4)
 		writel(*(u32 *)(src + x), dst + x);
-	return dst;
 }
 
 /*
-- 
cgit v1.2.3


From df9ee29270c11dba7d0fe0b83ce47a4d8e8d2101 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 7 Oct 2010 14:08:55 +0100
Subject: Fix IRQ flag handling naming

Fix the IRQ flag handling naming.  In linux/irqflags.h under one configuration,
it maps:

	local_irq_enable() -> raw_local_irq_enable()
	local_irq_disable() -> raw_local_irq_disable()
	local_irq_save() -> raw_local_irq_save()
	...

and under the other configuration, it maps:

	raw_local_irq_enable() -> local_irq_enable()
	raw_local_irq_disable() -> local_irq_disable()
	raw_local_irq_save() -> local_irq_save()
	...

This is quite confusing.  There should be one set of names expected of the
arch, and this should be wrapped to give another set of names that are expected
by users of this facility.

Change this to have the arch provide:

	flags = arch_local_save_flags()
	flags = arch_local_irq_save()
	arch_local_irq_restore(flags)
	arch_local_irq_disable()
	arch_local_irq_enable()
	arch_irqs_disabled_flags(flags)
	arch_irqs_disabled()
	arch_safe_halt()

Then linux/irqflags.h wraps these to provide:

	raw_local_save_flags(flags)
	raw_local_irq_save(flags)
	raw_local_irq_restore(flags)
	raw_local_irq_disable()
	raw_local_irq_enable()
	raw_irqs_disabled_flags(flags)
	raw_irqs_disabled()
	raw_safe_halt()

with type checking on the flags 'arguments', and then wraps those to provide:

	local_save_flags(flags)
	local_irq_save(flags)
	local_irq_restore(flags)
	local_irq_disable()
	local_irq_enable()
	irqs_disabled_flags(flags)
	irqs_disabled()
	safe_halt()

with tracing included if enabled.

The arch functions can now all be inline functions rather than some of them
having to be macros.

Signed-off-by: David Howells <dhowells@redhat.com> [X86, FRV, MN10300]
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> [Tile]
Signed-off-by: Michal Simek <monstr@monstr.eu> [Microblaze]
Tested-by: Catalin Marinas <catalin.marinas@arm.com> [ARM]
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com> [AVR]
Acked-by: Tony Luck <tony.luck@intel.com> [IA-64]
Acked-by: Hirokazu Takata <takata@linux-m32r.org> [M32R]
Acked-by: Greg Ungerer <gerg@uclinux.org> [M68K/M68KNOMMU]
Acked-by: Ralf Baechle <ralf@linux-mips.org> [MIPS]
Acked-by: Kyle McMartin <kyle@mcmartin.ca> [PA-RISC]
Acked-by: Paul Mackerras <paulus@samba.org> [PowerPC]
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> [S390]
Acked-by: Chen Liqin <liqin.chen@sunplusct.com> [Score]
Acked-by: Matt Fleming <matt@console-pimps.org> [SH]
Acked-by: David S. Miller <davem@davemloft.net> [Sparc]
Acked-by: Chris Zankel <chris@zankel.net> [Xtensa]
Reviewed-by: Richard Henderson <rth@twiddle.net> [Alpha]
Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp> [H8300]
Cc: starvik@axis.com [CRIS]
Cc: jesper.nilsson@axis.com [CRIS]
Cc: linux-cris-kernel@axis.com
---
 arch/tile/include/asm/irqflags.h | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h
index 45cf67c2f286..a11d4837ee4d 100644
--- a/arch/tile/include/asm/irqflags.h
+++ b/arch/tile/include/asm/irqflags.h
@@ -103,55 +103,57 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
 #define INITIAL_INTERRUPTS_ENABLED INT_MASK(INT_MEM_ERROR)
 
 /* Disable interrupts. */
-#define raw_local_irq_disable() \
+#define arch_local_irq_disable() \
 	interrupt_mask_set_mask(LINUX_MASKABLE_INTERRUPTS)
 
 /* Disable all interrupts, including NMIs. */
-#define raw_local_irq_disable_all() \
+#define arch_local_irq_disable_all() \
 	interrupt_mask_set_mask(-1UL)
 
 /* Re-enable all maskable interrupts. */
-#define raw_local_irq_enable() \
+#define arch_local_irq_enable() \
 	interrupt_mask_reset_mask(__get_cpu_var(interrupts_enabled_mask))
 
 /* Disable or enable interrupts based on flag argument. */
-#define raw_local_irq_restore(disabled) do { \
+#define arch_local_irq_restore(disabled) do { \
 	if (disabled) \
-		raw_local_irq_disable(); \
+		arch_local_irq_disable(); \
 	else \
-		raw_local_irq_enable(); \
+		arch_local_irq_enable(); \
 } while (0)
 
 /* Return true if "flags" argument means interrupts are disabled. */
-#define raw_irqs_disabled_flags(flags) ((flags) != 0)
+#define arch_irqs_disabled_flags(flags) ((flags) != 0)
 
 /* Return true if interrupts are currently disabled. */
-#define raw_irqs_disabled() interrupt_mask_check(INT_MEM_ERROR)
+#define arch_irqs_disabled() interrupt_mask_check(INT_MEM_ERROR)
 
 /* Save whether interrupts are currently disabled. */
-#define raw_local_save_flags(flags) ((flags) = raw_irqs_disabled())
+#define arch_local_save_flags() arch_irqs_disabled()
 
 /* Save whether interrupts are currently disabled, then disable them. */
-#define raw_local_irq_save(flags) \
-	do { raw_local_save_flags(flags); raw_local_irq_disable(); } while (0)
+#define arch_local_irq_save() ({ \
+	unsigned long __flags = arch_local_save_flags(); \
+	arch_local_irq_disable(); \
+	__flags; })
 
 /* Prevent the given interrupt from being enabled next time we enable irqs. */
-#define raw_local_irq_mask(interrupt) \
+#define arch_local_irq_mask(interrupt) \
 	(__get_cpu_var(interrupts_enabled_mask) &= ~INT_MASK(interrupt))
 
 /* Prevent the given interrupt from being enabled immediately. */
-#define raw_local_irq_mask_now(interrupt) do { \
-	raw_local_irq_mask(interrupt); \
+#define arch_local_irq_mask_now(interrupt) do { \
+	arch_local_irq_mask(interrupt); \
 	interrupt_mask_set(interrupt); \
 } while (0)
 
 /* Allow the given interrupt to be enabled next time we enable irqs. */
-#define raw_local_irq_unmask(interrupt) \
+#define arch_local_irq_unmask(interrupt) \
 	(__get_cpu_var(interrupts_enabled_mask) |= INT_MASK(interrupt))
 
 /* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */
-#define raw_local_irq_unmask_now(interrupt) do { \
-	raw_local_irq_unmask(interrupt); \
+#define arch_local_irq_unmask_now(interrupt) do { \
+	arch_local_irq_unmask(interrupt); \
 	if (!irqs_disabled()) \
 		interrupt_mask_reset(interrupt); \
 } while (0)
-- 
cgit v1.2.3


From 708ff2a0097b02d32d375b66996661f36cd4d6d1 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 29 Sep 2010 18:08:50 +0900
Subject: bitops: make asm-generic/bitops/find.h more generic

asm-generic/bitops/find.h has the extern declarations of find_next_bit()
and find_next_zero_bit() and the macro definitions of find_first_bit()
and find_first_zero_bit(). It is only usable by the architectures which
enables CONFIG_GENERIC_FIND_NEXT_BIT and disables
CONFIG_GENERIC_FIND_FIRST_BIT.

x86 and tile enable both CONFIG_GENERIC_FIND_NEXT_BIT and
CONFIG_GENERIC_FIND_FIRST_BIT. These architectures cannot include
asm-generic/bitops/find.h in their asm/bitops.h. So ifdefed extern
declarations of find_first_bit and find_first_zero_bit() are put in
linux/bitops.h.

This makes asm-generic/bitops/find.h usable by these architectures
and use it. Also this change is needed for the forthcoming duplicated
extern declarations cleanup.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/bitops.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h
index 6832b4be8990..6d4f0ff2c68c 100644
--- a/arch/tile/include/asm/bitops.h
+++ b/arch/tile/include/asm/bitops.h
@@ -120,6 +120,7 @@ static inline unsigned long __arch_hweight64(__u64 w)
 
 #include <asm-generic/bitops/const_hweight.h>
 #include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/find.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/ext2-non-atomic.h>
 #include <asm-generic/bitops/minix.h>
-- 
cgit v1.2.3


From d929b6aeaacbe78cbfef4a80e3eed1bf0464d984 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 14 Oct 2010 14:34:33 -0400
Subject: arch/tile: Use <asm-generic/syscalls.h>

With this change we now include <asm-generic/syscalls.h> into the "tile"
version of the header.  To take full advantage of the prototypes there,
we also change our naming convention for "struct pt_regs *" syscalls so
that, e.g., _sys_execve() is the "true" syscall entry, which sets the
appropriate register to point to the pt_regs before calling sys_execve().

While doing this I realized I no longer needed the fork and vfork
entry point stubs, since those functions aren't in the generic
syscall ABI, so I removed them as well.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/compat.h   | 15 +++++++--
 arch/tile/include/asm/syscalls.h | 73 ++++++++++------------------------------
 2 files changed, 30 insertions(+), 58 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index 8b60ec8b2d19..c3ae570c0a5d 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -216,15 +216,16 @@ struct compat_siginfo;
 struct compat_sigaltstack;
 long compat_sys_execve(const char __user *path,
 		       const compat_uptr_t __user *argv,
-		       const compat_uptr_t __user *envp);
+		       const compat_uptr_t __user *envp, struct pt_regs *);
 long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
 			     struct compat_sigaction __user *oact,
 			     size_t sigsetsize);
 long compat_sys_rt_sigqueueinfo(int pid, int sig,
 				struct compat_siginfo __user *uinfo);
-long compat_sys_rt_sigreturn(void);
+long compat_sys_rt_sigreturn(struct pt_regs *);
 long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
-			    struct compat_sigaltstack __user *uoss_ptr);
+			    struct compat_sigaltstack __user *uoss_ptr,
+			    struct pt_regs *);
 long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high);
 long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high);
 long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count,
@@ -255,4 +256,12 @@ long tile_compat_sys_ptrace(compat_long_t request, compat_long_t pid,
 /* Tilera Linux syscalls that don't have "compat" versions. */
 #define compat_sys_flush_cache sys_flush_cache
 
+/* These are the intvec_64.S trampolines. */
+long _compat_sys_execve(const char __user *path,
+			const compat_uptr_t __user *argv,
+			const compat_uptr_t __user *envp);
+long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
+			    struct compat_sigaltstack __user *uoss_ptr);
+long _compat_sys_rt_sigreturn(void);
+
 #endif /* _ASM_TILE_COMPAT_H */
diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h
index ce99ffefeacf..3b5507c31eae 100644
--- a/arch/tile/include/asm/syscalls.h
+++ b/arch/tile/include/asm/syscalls.h
@@ -32,8 +32,9 @@ extern void *compat_sys_call_table[];
 
 /*
  * Note that by convention, any syscall which requires the current
- * register set takes an additional "struct pt_regs *" pointer; the
- * sys_xxx() function just adds the pointer and tail-calls to _sys_xxx().
+ * register set takes an additional "struct pt_regs *" pointer; a
+ * _sys_xxx() trampoline in intvec*.S just sets up the pointer and
+ * jumps to sys_xxx().
  */
 
 /* kernel/sys.c */
@@ -43,66 +44,17 @@ long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi,
 int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi,
 		       u32 len_lo, u32 len_hi, int advice);
 long sys_flush_cache(void);
-long sys_mmap2(unsigned long addr, unsigned long len,
-	       unsigned long prot, unsigned long flags,
-	       unsigned long fd, unsigned long pgoff);
-#ifdef __tilegx__
-long sys_mmap(unsigned long addr, unsigned long len,
-	      unsigned long prot, unsigned long flags,
-	      unsigned long fd, off_t pgoff);
+#ifndef __tilegx__  /* No mmap() in the 32-bit kernel. */
+#define sys_mmap sys_mmap
 #endif
 
-/* kernel/process.c */
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       void __user *parent_tid, void __user *child_tid);
-long _sys_clone(unsigned long clone_flags, unsigned long newsp,
-		void __user *parent_tid, void __user *child_tid,
-		struct pt_regs *regs);
-long sys_fork(void);
-long _sys_fork(struct pt_regs *regs);
-long sys_vfork(void);
-long _sys_vfork(struct pt_regs *regs);
-long sys_execve(const char __user *filename,
-		const char __user *const __user *argv,
-		const char __user *const __user *envp);
-long _sys_execve(const char __user *filename,
-		 const char __user *const __user *argv,
-		 const char __user *const __user *envp, struct pt_regs *regs);
-
-/* kernel/signal.c */
-long sys_sigaltstack(const stack_t __user *, stack_t __user *);
-long _sys_sigaltstack(const stack_t __user *, stack_t __user *,
-		      struct pt_regs *);
-long sys_rt_sigreturn(void);
-long _sys_rt_sigreturn(struct pt_regs *regs);
-
-/* platform-independent functions */
-long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
-long sys_rt_sigaction(int sig, const struct sigaction __user *act,
-		      struct sigaction __user *oact, size_t sigsetsize);
-
 #ifndef __tilegx__
 /* mm/fault.c */
-int sys_cmpxchg_badaddr(unsigned long address);
-int _sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *);
+long sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *);
+long _sys_cmpxchg_badaddr(unsigned long address);
 #endif
 
 #ifdef CONFIG_COMPAT
-long compat_sys_execve(const char __user *path,
-		       const compat_uptr_t __user *argv,
-		       const compat_uptr_t __user *envp);
-long _compat_sys_execve(const char __user *path,
-			const compat_uptr_t __user *argv,
-			const compat_uptr_t __user *envp,
-			struct pt_regs *regs);
-long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
-			    struct compat_sigaltstack __user *uoss_ptr);
-long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
-			     struct compat_sigaltstack __user *uoss_ptr,
-			     struct pt_regs *regs);
-long compat_sys_rt_sigreturn(void);
-long _compat_sys_rt_sigreturn(struct pt_regs *regs);
-
 /* These four are not defined for 64-bit, but serve as "compat" syscalls. */
 long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg);
 long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf);
@@ -110,4 +62,15 @@ long sys_truncate64(const char __user *path, loff_t length);
 long sys_ftruncate64(unsigned int fd, loff_t length);
 #endif
 
+/* These are the intvec*.S trampolines. */
+long _sys_sigaltstack(const stack_t __user *, stack_t __user *);
+long _sys_rt_sigreturn(void);
+long _sys_clone(unsigned long clone_flags, unsigned long newsp,
+		void __user *parent_tid, void __user *child_tid);
+long _sys_execve(const char __user *filename,
+		 const char __user *const __user *argv,
+		 const char __user *const __user *envp);
+
+#include <asm-generic/syscalls.h>
+
 #endif /* _ASM_TILE_SYSCALLS_H */
-- 
cgit v1.2.3


From bbacff94d0c38163ef01361de006797c92e69e58 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 14 Oct 2010 15:09:02 -0400
Subject: arch/tile: provide a definition of MAP_STACK

It's convenient for userspace (in particular, glibc) to find a
definition of MAP_STACK.  We use MAP_GROWSDOWN as an alias since
that's appropriate for the main stack, and since our current
allocation of mmap flags bits is running a bit short otherwise.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/mman.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/mman.h b/arch/tile/include/asm/mman.h
index 4c6811e3e8dc..81b8fc348d63 100644
--- a/arch/tile/include/asm/mman.h
+++ b/arch/tile/include/asm/mman.h
@@ -23,6 +23,7 @@
 #define MAP_POPULATE	0x0040		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x0080		/* do not block on IO */
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_STACK	MAP_GROWSDOWN	/* provide convenience alias */
 #define MAP_LOCKED	0x0200		/* pages are locked */
 #define MAP_NORESERVE	0x0400		/* don't check for reservations */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
-- 
cgit v1.2.3


From 13c9d5a6309763a494f7c3ed5aa45fb473985fd7 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 14 Oct 2010 15:12:55 -0400
Subject: arch/tile: properly export __mb_incoherent for modules

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/system.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/system.h b/arch/tile/include/asm/system.h
index f749be327ce0..96779c805902 100644
--- a/arch/tile/include/asm/system.h
+++ b/arch/tile/include/asm/system.h
@@ -89,6 +89,10 @@
 #define get_cycles_low() __insn_mfspr(SPR_CYCLE)   /* just get all 64 bits */
 #endif
 
+#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
+int __mb_incoherent(void);  /* Helper routine for mb_incoherent(). */
+#endif
+
 /* Fence to guarantee visibility of stores to incoherent memory. */
 static inline void
 mb_incoherent(void)
@@ -97,7 +101,6 @@ mb_incoherent(void)
 
 #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
 	{
-		int __mb_incoherent(void);
 #if CHIP_HAS_TILE_WRITE_PENDING()
 		const unsigned long WRITE_TIMEOUT_CYCLES = 400;
 		unsigned long start = get_cycles_low();
-- 
cgit v1.2.3


From 4fe938c5134fce1f25e1261eef6252fb47634962 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 14 Oct 2010 15:57:59 -0400
Subject: arch/tile: Bomb C99 comments to C89 comments in tile's
 <arch/sim_def.h>

Also, sync the file up the upstream version (an additional #define).

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/arch/sim_def.h | 548 +++++++++++++++++++--------------------
 1 file changed, 269 insertions(+), 279 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/arch/sim_def.h b/arch/tile/include/arch/sim_def.h
index 6418fbde063e..7a17082c3773 100644
--- a/arch/tile/include/arch/sim_def.h
+++ b/arch/tile/include/arch/sim_def.h
@@ -1,477 +1,461 @@
-// Copyright 2010 Tilera Corporation. All Rights Reserved.
-//
-//   This program is free software; you can redistribute it and/or
-//   modify it under the terms of the GNU General Public License
-//   as published by the Free Software Foundation, version 2.
-//
-//   This program is distributed in the hope that it will be useful, but
-//   WITHOUT ANY WARRANTY; without even the implied warranty of
-//   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
-//   NON INFRINGEMENT.  See the GNU General Public License for
-//   more details.
-
-//! @file
-//!
-//! Some low-level simulator definitions.
-//!
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file
+ *
+ * Some low-level simulator definitions.
+ */
 
 #ifndef __ARCH_SIM_DEF_H__
 #define __ARCH_SIM_DEF_H__
 
 
-//! Internal: the low bits of the SIM_CONTROL_* SPR values specify
-//! the operation to perform, and the remaining bits are
-//! an operation-specific parameter (often unused).
-//!
+/**
+ * Internal: the low bits of the SIM_CONTROL_* SPR values specify
+ * the operation to perform, and the remaining bits are
+ * an operation-specific parameter (often unused).
+ */
 #define _SIM_CONTROL_OPERATOR_BITS 8
 
 
-//== Values which can be written to SPR_SIM_CONTROL.
+/*
+ * Values which can be written to SPR_SIM_CONTROL.
+ */
 
-//! If written to SPR_SIM_CONTROL, stops profiling.
-//!
+/** If written to SPR_SIM_CONTROL, stops profiling. */
 #define SIM_CONTROL_PROFILER_DISABLE 0
 
-//! If written to SPR_SIM_CONTROL, starts profiling.
-//!
+/** If written to SPR_SIM_CONTROL, starts profiling. */
 #define SIM_CONTROL_PROFILER_ENABLE 1
 
-//! If written to SPR_SIM_CONTROL, clears profiling counters.
-//!
+/** If written to SPR_SIM_CONTROL, clears profiling counters. */
 #define SIM_CONTROL_PROFILER_CLEAR 2
 
-//! If written to SPR_SIM_CONTROL, checkpoints the simulator.
-//!
+/** If written to SPR_SIM_CONTROL, checkpoints the simulator. */
 #define SIM_CONTROL_CHECKPOINT 3
 
-//! If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8),
-//! sets the tracing mask to the given mask. See "sim_set_tracing()".
-//!
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8),
+ * sets the tracing mask to the given mask. See "sim_set_tracing()".
+ */
 #define SIM_CONTROL_SET_TRACING 4
 
-//! If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8),
-//! dumps the requested items of machine state to the log.
-//!
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8),
+ * dumps the requested items of machine state to the log.
+ */
 #define SIM_CONTROL_DUMP 5
 
-//! If written to SPR_SIM_CONTROL, clears chip-level profiling counters.
-//!
+/** If written to SPR_SIM_CONTROL, clears chip-level profiling counters. */
 #define SIM_CONTROL_PROFILER_CHIP_CLEAR 6
 
-//! If written to SPR_SIM_CONTROL, disables chip-level profiling.
-//!
+/** If written to SPR_SIM_CONTROL, disables chip-level profiling. */
 #define SIM_CONTROL_PROFILER_CHIP_DISABLE 7
 
-//! If written to SPR_SIM_CONTROL, enables chip-level profiling.
-//!
+/** If written to SPR_SIM_CONTROL, enables chip-level profiling. */
 #define SIM_CONTROL_PROFILER_CHIP_ENABLE 8
 
-//! If written to SPR_SIM_CONTROL, enables chip-level functional mode
-//!
+/** If written to SPR_SIM_CONTROL, enables chip-level functional mode */
 #define SIM_CONTROL_ENABLE_FUNCTIONAL 9
 
-//! If written to SPR_SIM_CONTROL, disables chip-level functional mode.
-//!
+/** If written to SPR_SIM_CONTROL, disables chip-level functional mode. */
 #define SIM_CONTROL_DISABLE_FUNCTIONAL 10
 
-//! If written to SPR_SIM_CONTROL, enables chip-level functional mode.
-//! All tiles must perform this write for functional mode to be enabled.
-//! Ignored in naked boot mode unless --functional is specified.
-//! WARNING: Only the hypervisor startup code should use this!
-//!
+/**
+ * If written to SPR_SIM_CONTROL, enables chip-level functional mode.
+ * All tiles must perform this write for functional mode to be enabled.
+ * Ignored in naked boot mode unless --functional is specified.
+ * WARNING: Only the hypervisor startup code should use this!
+ */
 #define SIM_CONTROL_ENABLE_FUNCTIONAL_BARRIER 11
 
-//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
-//! writes a string directly to the simulator output.  Written to once for
-//! each character in the string, plus a final NUL.  Instead of NUL,
-//! you can also use "SIM_PUTC_FLUSH_STRING" or "SIM_PUTC_FLUSH_BINARY".
-//!
-// ISSUE: Document the meaning of "newline", and the handling of NUL.
-//
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * writes a string directly to the simulator output.  Written to once for
+ * each character in the string, plus a final NUL.  Instead of NUL,
+ * you can also use "SIM_PUTC_FLUSH_STRING" or "SIM_PUTC_FLUSH_BINARY".
+ */
+/* ISSUE: Document the meaning of "newline", and the handling of NUL. */
 #define SIM_CONTROL_PUTC 12
 
-//! If written to SPR_SIM_CONTROL, clears the --grind-coherence state for
-//! this core.  This is intended to be used before a loop that will
-//! invalidate the cache by loading new data and evicting all current data.
-//! Generally speaking, this API should only be used by system code.
-//!
+/**
+ * If written to SPR_SIM_CONTROL, clears the --grind-coherence state for
+ * this core.  This is intended to be used before a loop that will
+ * invalidate the cache by loading new data and evicting all current data.
+ * Generally speaking, this API should only be used by system code.
+ */
 #define SIM_CONTROL_GRINDER_CLEAR 13
 
-//! If written to SPR_SIM_CONTROL, shuts down the simulator.
-//!
+/** If written to SPR_SIM_CONTROL, shuts down the simulator. */
 #define SIM_CONTROL_SHUTDOWN 14
 
-//! If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
-//! indicates that a fork syscall just created the given process.
-//!
+/**
+ * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+ * indicates that a fork syscall just created the given process.
+ */
 #define SIM_CONTROL_OS_FORK 15
 
-//! If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
-//! indicates that an exit syscall was just executed by the given process.
-//!
+/**
+ * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+ * indicates that an exit syscall was just executed by the given process.
+ */
 #define SIM_CONTROL_OS_EXIT 16
 
-//! If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
-//! indicates that the OS just switched to the given process.
-//!
+/**
+ * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+ * indicates that the OS just switched to the given process.
+ */
 #define SIM_CONTROL_OS_SWITCH 17
 
-//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
-//! indicates that an exec syscall was just executed. Written to once for
-//! each character in the executable name, plus a final NUL.
-//!
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * indicates that an exec syscall was just executed. Written to once for
+ * each character in the executable name, plus a final NUL.
+ */
 #define SIM_CONTROL_OS_EXEC 18
 
-//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
-//! indicates that an interpreter (PT_INTERP) was loaded.  Written to once
-//! for each character in "ADDR:PATH", plus a final NUL, where "ADDR" is a
-//! hex load address starting with "0x", and "PATH" is the executable name.
-//!
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * indicates that an interpreter (PT_INTERP) was loaded.  Written to once
+ * for each character in "ADDR:PATH", plus a final NUL, where "ADDR" is a
+ * hex load address starting with "0x", and "PATH" is the executable name.
+ */
 #define SIM_CONTROL_OS_INTERP 19
 
-//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
-//! indicates that a dll was loaded.  Written to once for each character
-//! in "ADDR:PATH", plus a final NUL, where "ADDR" is a hexadecimal load
-//! address starting with "0x", and "PATH" is the executable name.
-//!
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * indicates that a dll was loaded.  Written to once for each character
+ * in "ADDR:PATH", plus a final NUL, where "ADDR" is a hexadecimal load
+ * address starting with "0x", and "PATH" is the executable name.
+ */
 #define SIM_CONTROL_DLOPEN 20
 
-//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
-//! indicates that a dll was unloaded.  Written to once for each character
-//! in "ADDR", plus a final NUL, where "ADDR" is a hexadecimal load
-//! address starting with "0x".
-//!
+/**
+ * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8),
+ * indicates that a dll was unloaded.  Written to once for each character
+ * in "ADDR", plus a final NUL, where "ADDR" is a hexadecimal load
+ * address starting with "0x".
+ */
 #define SIM_CONTROL_DLCLOSE 21
 
-//! If written to SPR_SIM_CONTROL, combined with a flag (shifted by 8),
-//! indicates whether to allow data reads to remotely-cached
-//! dirty cache lines to be cached locally without grinder warnings or
-//! assertions (used by Linux kernel fast memcpy).
-//!
+/**
+ * If written to SPR_SIM_CONTROL, combined with a flag (shifted by 8),
+ * indicates whether to allow data reads to remotely-cached
+ * dirty cache lines to be cached locally without grinder warnings or
+ * assertions (used by Linux kernel fast memcpy).
+ */
 #define SIM_CONTROL_ALLOW_MULTIPLE_CACHING 22
 
-//! If written to SPR_SIM_CONTROL, enables memory tracing.
-//!
+/** If written to SPR_SIM_CONTROL, enables memory tracing. */
 #define SIM_CONTROL_ENABLE_MEM_LOGGING 23
 
-//! If written to SPR_SIM_CONTROL, disables memory tracing.
-//!
+/** If written to SPR_SIM_CONTROL, disables memory tracing. */
 #define SIM_CONTROL_DISABLE_MEM_LOGGING 24
 
-//! If written to SPR_SIM_CONTROL, changes the shaping parameters of one of
-//! the gbe or xgbe shims. Must specify the shim id, the type, the units, and
-//! the rate, as defined in SIM_SHAPING_SPR_ARG.
-//!
+/**
+ * If written to SPR_SIM_CONTROL, changes the shaping parameters of one of
+ * the gbe or xgbe shims. Must specify the shim id, the type, the units, and
+ * the rate, as defined in SIM_SHAPING_SPR_ARG.
+ */
 #define SIM_CONTROL_SHAPING 25
 
-//! If written to SPR_SIM_CONTROL, combined with character (shifted by 8),
-//! requests that a simulator command be executed.  Written to once for each
-//! character in the command, plus a final NUL.
-//!
+/**
+ * If written to SPR_SIM_CONTROL, combined with character (shifted by 8),
+ * requests that a simulator command be executed.  Written to once for each
+ * character in the command, plus a final NUL.
+ */
 #define SIM_CONTROL_COMMAND 26
 
-//! If written to SPR_SIM_CONTROL, indicates that the simulated system
-//! is panicking, to allow debugging via --debug-on-panic.
-//!
+/**
+ * If written to SPR_SIM_CONTROL, indicates that the simulated system
+ * is panicking, to allow debugging via --debug-on-panic.
+ */
 #define SIM_CONTROL_PANIC 27
 
-//! If written to SPR_SIM_CONTROL, triggers a simulator syscall.
-//! See "sim_syscall()" for more info.
-//!
+/**
+ * If written to SPR_SIM_CONTROL, triggers a simulator syscall.
+ * See "sim_syscall()" for more info.
+ */
 #define SIM_CONTROL_SYSCALL 32
 
-//! If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
-//! provides the pid that subsequent SIM_CONTROL_OS_FORK writes should
-//! use as the pid, rather than the default previous SIM_CONTROL_OS_SWITCH.
-//!
+/**
+ * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8),
+ * provides the pid that subsequent SIM_CONTROL_OS_FORK writes should
+ * use as the pid, rather than the default previous SIM_CONTROL_OS_SWITCH.
+ */
 #define SIM_CONTROL_OS_FORK_PARENT 33
 
-//! If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
-//! (shifted by 8), clears the pending magic data section.  The cleared
-//! pending magic data section and any subsequently appended magic bytes
-//! will only take effect when the classifier blast programmer is run.
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
+ * (shifted by 8), clears the pending magic data section.  The cleared
+ * pending magic data section and any subsequently appended magic bytes
+ * will only take effect when the classifier blast programmer is run.
+ */
 #define SIM_CONTROL_CLEAR_MPIPE_MAGIC_BYTES 34
 
-//! If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
-//! (shifted by 8) and a byte of data (shifted by 16), appends that byte
-//! to the shim's pending magic data section.  The pending magic data
-//! section takes effect when the classifier blast programmer is run.
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
+ * (shifted by 8) and a byte of data (shifted by 16), appends that byte
+ * to the shim's pending magic data section.  The pending magic data
+ * section takes effect when the classifier blast programmer is run.
+ */
 #define SIM_CONTROL_APPEND_MPIPE_MAGIC_BYTE 35
 
-//! If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
-//! (shifted by 8), an enable=1/disable=0 bit (shifted by 16), and a
-//! mask of links (shifted by 32), enable or disable the corresponding
-//! mPIPE links.
+/**
+ * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number
+ * (shifted by 8), an enable=1/disable=0 bit (shifted by 16), and a
+ * mask of links (shifted by 32), enable or disable the corresponding
+ * mPIPE links.
+ */
 #define SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE 36
 
-//== Syscall numbers for use with "sim_syscall()".
 
-//! Syscall number for sim_add_watchpoint().
-//!
+/*
+ * Syscall numbers for use with "sim_syscall()".
+ */
+
+/** Syscall number for sim_add_watchpoint(). */
 #define SIM_SYSCALL_ADD_WATCHPOINT 2
 
-//! Syscall number for sim_remove_watchpoint().
-//!
+/** Syscall number for sim_remove_watchpoint(). */
 #define SIM_SYSCALL_REMOVE_WATCHPOINT 3
 
-//! Syscall number for sim_query_watchpoint().
-//!
+/** Syscall number for sim_query_watchpoint(). */
 #define SIM_SYSCALL_QUERY_WATCHPOINT 4
 
-//! Syscall number that asserts that the cache lines whose 64-bit PA
-//! is passed as the second argument to sim_syscall(), and over a
-//! range passed as the third argument, are no longer in cache.
-//! The simulator raises an error if this is not the case.
-//!
+/**
+ * Syscall number that asserts that the cache lines whose 64-bit PA
+ * is passed as the second argument to sim_syscall(), and over a
+ * range passed as the third argument, are no longer in cache.
+ * The simulator raises an error if this is not the case.
+ */
 #define SIM_SYSCALL_VALIDATE_LINES_EVICTED 5
 
 
-//== Bit masks which can be shifted by 8, combined with
-//== SIM_CONTROL_SET_TRACING, and written to SPR_SIM_CONTROL.
+/*
+ * Bit masks which can be shifted by 8, combined with
+ * SIM_CONTROL_SET_TRACING, and written to SPR_SIM_CONTROL.
+ */
 
-//! @addtogroup arch_sim
-//! @{
+/**
+ * @addtogroup arch_sim
+ * @{
+ */
 
-//! Enable --trace-cycle when passed to simulator_set_tracing().
-//!
+/** Enable --trace-cycle when passed to simulator_set_tracing(). */
 #define SIM_TRACE_CYCLES          0x01
 
-//! Enable --trace-router when passed to simulator_set_tracing().
-//!
+/** Enable --trace-router when passed to simulator_set_tracing(). */
 #define SIM_TRACE_ROUTER          0x02
 
-//! Enable --trace-register-writes when passed to simulator_set_tracing().
-//!
+/** Enable --trace-register-writes when passed to simulator_set_tracing(). */
 #define SIM_TRACE_REGISTER_WRITES 0x04
 
-//! Enable --trace-disasm when passed to simulator_set_tracing().
-//!
+/** Enable --trace-disasm when passed to simulator_set_tracing(). */
 #define SIM_TRACE_DISASM          0x08
 
-//! Enable --trace-stall-info when passed to simulator_set_tracing().
-//!
+/** Enable --trace-stall-info when passed to simulator_set_tracing(). */
 #define SIM_TRACE_STALL_INFO      0x10
 
-//! Enable --trace-memory-controller when passed to simulator_set_tracing().
-//!
+/** Enable --trace-memory-controller when passed to simulator_set_tracing(). */
 #define SIM_TRACE_MEMORY_CONTROLLER 0x20
 
-//! Enable --trace-l2 when passed to simulator_set_tracing().
-//!
+/** Enable --trace-l2 when passed to simulator_set_tracing(). */
 #define SIM_TRACE_L2_CACHE 0x40
 
-//! Enable --trace-lines when passed to simulator_set_tracing().
-//!
+/** Enable --trace-lines when passed to simulator_set_tracing(). */
 #define SIM_TRACE_LINES 0x80
 
-//! Turn off all tracing when passed to simulator_set_tracing().
-//!
+/** Turn off all tracing when passed to simulator_set_tracing(). */
 #define SIM_TRACE_NONE 0
 
-//! Turn on all tracing when passed to simulator_set_tracing().
-//!
+/** Turn on all tracing when passed to simulator_set_tracing(). */
 #define SIM_TRACE_ALL (-1)
 
-//! @}
+/** @} */
 
-//! Computes the value to write to SPR_SIM_CONTROL to set tracing flags.
-//!
+/** Computes the value to write to SPR_SIM_CONTROL to set tracing flags. */
 #define SIM_TRACE_SPR_ARG(mask) \
   (SIM_CONTROL_SET_TRACING | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
 
 
-//== Bit masks which can be shifted by 8, combined with
-//== SIM_CONTROL_DUMP, and written to SPR_SIM_CONTROL.
+/*
+ * Bit masks which can be shifted by 8, combined with
+ * SIM_CONTROL_DUMP, and written to SPR_SIM_CONTROL.
+ */
 
-//! @addtogroup arch_sim
-//! @{
+/**
+ * @addtogroup arch_sim
+ * @{
+ */
 
-//! Dump the general-purpose registers.
-//!
+/** Dump the general-purpose registers. */
 #define SIM_DUMP_REGS          0x001
 
-//! Dump the SPRs.
-//!
+/** Dump the SPRs. */
 #define SIM_DUMP_SPRS          0x002
 
-//! Dump the ITLB.
-//!
+/** Dump the ITLB. */
 #define SIM_DUMP_ITLB          0x004
 
-//! Dump the DTLB.
-//!
+/** Dump the DTLB. */
 #define SIM_DUMP_DTLB          0x008
 
-//! Dump the L1 I-cache.
-//!
+/** Dump the L1 I-cache. */
 #define SIM_DUMP_L1I           0x010
 
-//! Dump the L1 D-cache.
-//!
+/** Dump the L1 D-cache. */
 #define SIM_DUMP_L1D           0x020
 
-//! Dump the L2 cache.
-//!
+/** Dump the L2 cache. */
 #define SIM_DUMP_L2            0x040
 
-//! Dump the switch registers.
-//!
+/** Dump the switch registers. */
 #define SIM_DUMP_SNREGS        0x080
 
-//! Dump the switch ITLB.
-//!
+/** Dump the switch ITLB. */
 #define SIM_DUMP_SNITLB        0x100
 
-//! Dump the switch L1 I-cache.
-//!
+/** Dump the switch L1 I-cache. */
 #define SIM_DUMP_SNL1I         0x200
 
-//! Dump the current backtrace.
-//!
+/** Dump the current backtrace. */
 #define SIM_DUMP_BACKTRACE     0x400
 
-//! Only dump valid lines in caches.
-//!
+/** Only dump valid lines in caches. */
 #define SIM_DUMP_VALID_LINES   0x800
 
-//! Dump everything that is dumpable.
-//!
+/** Dump everything that is dumpable. */
 #define SIM_DUMP_ALL (-1 & ~SIM_DUMP_VALID_LINES)
 
-// @}
+/** @} */
 
-//! Computes the value to write to SPR_SIM_CONTROL to dump machine state.
-//!
+/** Computes the value to write to SPR_SIM_CONTROL to dump machine state. */
 #define SIM_DUMP_SPR_ARG(mask) \
   (SIM_CONTROL_DUMP | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
 
 
-//== Bit masks which can be shifted by 8, combined with
-//== SIM_CONTROL_PROFILER_CHIP_xxx, and written to SPR_SIM_CONTROL.
+/*
+ * Bit masks which can be shifted by 8, combined with
+ * SIM_CONTROL_PROFILER_CHIP_xxx, and written to SPR_SIM_CONTROL.
+ */
 
-//! @addtogroup arch_sim
-//! @{
+/**
+ * @addtogroup arch_sim
+ * @{
+ */
 
-//! Use with with SIM_PROFILER_CHIP_xxx to control the memory controllers.
-//!
+/** Use with with SIM_PROFILER_CHIP_xxx to control the memory controllers. */
 #define SIM_CHIP_MEMCTL        0x001
 
-//! Use with with SIM_PROFILER_CHIP_xxx to control the XAUI interface.
-//!
+/** Use with with SIM_PROFILER_CHIP_xxx to control the XAUI interface. */
 #define SIM_CHIP_XAUI          0x002
 
-//! Use with with SIM_PROFILER_CHIP_xxx to control the PCIe interface.
-//!
+/** Use with with SIM_PROFILER_CHIP_xxx to control the PCIe interface. */
 #define SIM_CHIP_PCIE          0x004
 
-//! Use with with SIM_PROFILER_CHIP_xxx to control the MPIPE interface.
-//!
+/** Use with with SIM_PROFILER_CHIP_xxx to control the MPIPE interface. */
 #define SIM_CHIP_MPIPE         0x008
 
-//! Reference all chip devices.
-//!
+/** Use with with SIM_PROFILER_CHIP_xxx to control the TRIO interface. */
+#define SIM_CHIP_TRIO          0x010
+
+/** Reference all chip devices. */
 #define SIM_CHIP_ALL (-1)
 
-//! @}
+/** @} */
 
-//! Computes the value to write to SPR_SIM_CONTROL to clear chip statistics.
-//!
+/** Computes the value to write to SPR_SIM_CONTROL to clear chip statistics. */
 #define SIM_PROFILER_CHIP_CLEAR_SPR_ARG(mask) \
   (SIM_CONTROL_PROFILER_CHIP_CLEAR | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
 
-//! Computes the value to write to SPR_SIM_CONTROL to disable chip statistics.
-//!
+/** Computes the value to write to SPR_SIM_CONTROL to disable chip statistics.*/
 #define SIM_PROFILER_CHIP_DISABLE_SPR_ARG(mask) \
   (SIM_CONTROL_PROFILER_CHIP_DISABLE | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
 
-//! Computes the value to write to SPR_SIM_CONTROL to enable chip statistics.
-//!
+/** Computes the value to write to SPR_SIM_CONTROL to enable chip statistics. */
 #define SIM_PROFILER_CHIP_ENABLE_SPR_ARG(mask) \
   (SIM_CONTROL_PROFILER_CHIP_ENABLE | ((mask) << _SIM_CONTROL_OPERATOR_BITS))
 
 
-// Shim bitrate controls.
+/* Shim bitrate controls. */
 
-//! The number of bits used to store the shim id.
-//!
+/** The number of bits used to store the shim id. */
 #define SIM_CONTROL_SHAPING_SHIM_ID_BITS 3
 
-//! @addtogroup arch_sim
-//! @{
+/**
+ * @addtogroup arch_sim
+ * @{
+ */
 
-//! Change the gbe 0 bitrate.
-//!
+/** Change the gbe 0 bitrate. */
 #define SIM_CONTROL_SHAPING_GBE_0 0x0
 
-//! Change the gbe 1 bitrate.
-//!
+/** Change the gbe 1 bitrate. */
 #define SIM_CONTROL_SHAPING_GBE_1 0x1
 
-//! Change the gbe 2 bitrate.
-//!
+/** Change the gbe 2 bitrate. */
 #define SIM_CONTROL_SHAPING_GBE_2 0x2
 
-//! Change the gbe 3 bitrate.
-//!
+/** Change the gbe 3 bitrate. */
 #define SIM_CONTROL_SHAPING_GBE_3 0x3
 
-//! Change the xgbe 0 bitrate.
-//!
+/** Change the xgbe 0 bitrate. */
 #define SIM_CONTROL_SHAPING_XGBE_0 0x4
 
-//! Change the xgbe 1 bitrate.
-//!
+/** Change the xgbe 1 bitrate. */
 #define SIM_CONTROL_SHAPING_XGBE_1 0x5
 
-//! The type of shaping to do.
-//!
+/** The type of shaping to do. */
 #define SIM_CONTROL_SHAPING_TYPE_BITS 2
 
-//! Control the multiplier.
-//!
+/** Control the multiplier. */
 #define SIM_CONTROL_SHAPING_MULTIPLIER 0
 
-//! Control the PPS.
-//!
+/** Control the PPS. */
 #define SIM_CONTROL_SHAPING_PPS 1
 
-//! Control the BPS.
-//!
+/** Control the BPS. */
 #define SIM_CONTROL_SHAPING_BPS 2
 
-//! The number of bits for the units for the shaping parameter.
-//!
+/** The number of bits for the units for the shaping parameter. */
 #define SIM_CONTROL_SHAPING_UNITS_BITS 2
 
-//! Provide a number in single units.
-//!
+/** Provide a number in single units. */
 #define SIM_CONTROL_SHAPING_UNITS_SINGLE 0
 
-//! Provide a number in kilo units.
-//!
+/** Provide a number in kilo units. */
 #define SIM_CONTROL_SHAPING_UNITS_KILO 1
 
-//! Provide a number in mega units.
-//!
+/** Provide a number in mega units. */
 #define SIM_CONTROL_SHAPING_UNITS_MEGA 2
 
-//! Provide a number in giga units.
-//!
+/** Provide a number in giga units. */
 #define SIM_CONTROL_SHAPING_UNITS_GIGA 3
 
-// @}
+/** @} */
 
-//! How many bits are available for the rate.
-//!
+/** How many bits are available for the rate. */
 #define SIM_CONTROL_SHAPING_RATE_BITS \
   (32 - (_SIM_CONTROL_OPERATOR_BITS + \
          SIM_CONTROL_SHAPING_SHIM_ID_BITS + \
          SIM_CONTROL_SHAPING_TYPE_BITS + \
          SIM_CONTROL_SHAPING_UNITS_BITS))
 
-//! Computes the value to write to SPR_SIM_CONTROL to change a bitrate.
-//!
+/** Computes the value to write to SPR_SIM_CONTROL to change a bitrate. */
 #define SIM_SHAPING_SPR_ARG(shim, type, units, rate) \
   (SIM_CONTROL_SHAPING | \
    ((shim) | \
@@ -483,30 +467,36 @@
                SIM_CONTROL_SHAPING_UNITS_BITS))) << _SIM_CONTROL_OPERATOR_BITS)
 
 
-//== Values returned when reading SPR_SIM_CONTROL.
-// ISSUE: These names should share a longer common prefix.
+/*
+ * Values returned when reading SPR_SIM_CONTROL.
+ * ISSUE: These names should share a longer common prefix.
+ */
 
-//! When reading SPR_SIM_CONTROL, the mask of simulator tracing bits
-//! (SIM_TRACE_xxx values).
-//!
+/**
+ * When reading SPR_SIM_CONTROL, the mask of simulator tracing bits
+ * (SIM_TRACE_xxx values).
+ */
 #define SIM_TRACE_FLAG_MASK 0xFFFF
 
-//! When reading SPR_SIM_CONTROL, the mask for whether profiling is enabled.
-//!
+/** When reading SPR_SIM_CONTROL, the mask for whether profiling is enabled. */
 #define SIM_PROFILER_ENABLED_MASK 0x10000
 
 
-//== Special arguments for "SIM_CONTROL_PUTC".
+/*
+ * Special arguments for "SIM_CONTROL_PUTC".
+ */
 
-//! Flag value for forcing a PUTC string-flush, including
-//! coordinate/cycle prefix and newline.
-//!
+/**
+ * Flag value for forcing a PUTC string-flush, including
+ * coordinate/cycle prefix and newline.
+ */
 #define SIM_PUTC_FLUSH_STRING 0x100
 
-//! Flag value for forcing a PUTC binary-data-flush, which skips the
-//! prefix and does not append a newline.
-//!
+/**
+ * Flag value for forcing a PUTC binary-data-flush, which skips the
+ * prefix and does not append a newline.
+ */
 #define SIM_PUTC_FLUSH_BINARY 0x101
 
 
-#endif //__ARCH_SIM_DEF_H__
+#endif /* __ARCH_SIM_DEF_H__ */
-- 
cgit v1.2.3


From bf65e440e8248f22b2eacf8d47961bb9d52260f7 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 14 Oct 2010 16:00:11 -0400
Subject: arch/tile: add Tilera's <arch/sim.h> header as an open-source header

This change adds one of the Tilera standard <arch> headers to the set
of headers shipped with Linux.  The <arch/sim.h> header provides
methods for programmatically interacting with the Tilera simulator.

The current <arch/sim.h> provides inline assembly for the _sim_syscall
function, so the declaration and definition previously provided
manually in Linux are no longer needed.  We now use the standard
sim_validate_lines_evicted() method from <arch/sim.h> rather than
rolling our own direct call to sim_syscall().

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/arch/sim.h   | 619 +++++++++++++++++++++++++++++++++++++++++
 arch/tile/include/asm/system.h |   7 -
 2 files changed, 619 insertions(+), 7 deletions(-)
 create mode 100644 arch/tile/include/arch/sim.h

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/arch/sim.h b/arch/tile/include/arch/sim.h
new file mode 100644
index 000000000000..74b7c1624d34
--- /dev/null
+++ b/arch/tile/include/arch/sim.h
@@ -0,0 +1,619 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file
+ *
+ * Provides an API for controlling the simulator at runtime.
+ */
+
+/**
+ * @addtogroup arch_sim
+ * @{
+ *
+ * An API for controlling the simulator at runtime.
+ *
+ * The simulator's behavior can be modified while it is running.
+ * For example, human-readable trace output can be enabled and disabled
+ * around code of interest.
+ *
+ * There are two ways to modify simulator behavior:
+ * programmatically, by calling various sim_* functions, and
+ * interactively, by entering commands like "sim set functional true"
+ * at the tile-monitor prompt.  Typing "sim help" at that prompt provides
+ * a list of interactive commands.
+ *
+ * All interactive commands can also be executed programmatically by
+ * passing a string to the sim_command function.
+ */
+
+#ifndef __ARCH_SIM_H__
+#define __ARCH_SIM_H__
+
+#include <arch/sim_def.h>
+#include <arch/abi.h>
+
+#ifndef __ASSEMBLER__
+
+#include <arch/spr_def.h>
+
+
+/**
+ * Return true if the current program is running under a simulator,
+ * rather than on real hardware.  If running on hardware, other "sim_xxx()"
+ * calls have no useful effect.
+ */
+static inline int
+sim_is_simulator(void)
+{
+  return __insn_mfspr(SPR_SIM_CONTROL) != 0;
+}
+
+
+/**
+ * Checkpoint the simulator state to a checkpoint file.
+ *
+ * The checkpoint file name is either the default or the name specified
+ * on the command line with "--checkpoint-file".
+ */
+static __inline void
+sim_checkpoint(void)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_CHECKPOINT);
+}
+
+
+/**
+ * Report whether or not various kinds of simulator tracing are enabled.
+ *
+ * @return The bitwise OR of these values:
+ *
+ * SIM_TRACE_CYCLES (--trace-cycles),
+ * SIM_TRACE_ROUTER (--trace-router),
+ * SIM_TRACE_REGISTER_WRITES (--trace-register-writes),
+ * SIM_TRACE_DISASM (--trace-disasm),
+ * SIM_TRACE_STALL_INFO (--trace-stall-info)
+ * SIM_TRACE_MEMORY_CONTROLLER (--trace-memory-controller)
+ * SIM_TRACE_L2_CACHE (--trace-l2)
+ * SIM_TRACE_LINES (--trace-lines)
+ */
+static __inline unsigned int
+sim_get_tracing(void)
+{
+  return __insn_mfspr(SPR_SIM_CONTROL) & SIM_TRACE_FLAG_MASK;
+}
+
+
+/**
+ * Turn on or off different kinds of simulator tracing.
+ *
+ * @param mask Either one of these special values:
+ *
+ * SIM_TRACE_NONE (turns off tracing),
+ * SIM_TRACE_ALL (turns on all possible tracing).
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_TRACE_CYCLES (--trace-cycles),
+ * SIM_TRACE_ROUTER (--trace-router),
+ * SIM_TRACE_REGISTER_WRITES (--trace-register-writes),
+ * SIM_TRACE_DISASM (--trace-disasm),
+ * SIM_TRACE_STALL_INFO (--trace-stall-info)
+ * SIM_TRACE_MEMORY_CONTROLLER (--trace-memory-controller)
+ * SIM_TRACE_L2_CACHE (--trace-l2)
+ * SIM_TRACE_LINES (--trace-lines)
+ */
+static __inline void
+sim_set_tracing(unsigned int mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_TRACE_SPR_ARG(mask));
+}
+
+
+/**
+ * Request dumping of different kinds of simulator state.
+ *
+ * @param mask Either this special value:
+ *
+ * SIM_DUMP_ALL (dump all known state)
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_DUMP_REGS (the register file),
+ * SIM_DUMP_SPRS (the SPRs),
+ * SIM_DUMP_ITLB (the iTLB),
+ * SIM_DUMP_DTLB (the dTLB),
+ * SIM_DUMP_L1I (the L1 I-cache),
+ * SIM_DUMP_L1D (the L1 D-cache),
+ * SIM_DUMP_L2 (the L2 cache),
+ * SIM_DUMP_SNREGS (the switch register file),
+ * SIM_DUMP_SNITLB (the switch iTLB),
+ * SIM_DUMP_SNL1I (the switch L1 I-cache),
+ * SIM_DUMP_BACKTRACE (the current backtrace)
+ */
+static __inline void
+sim_dump(unsigned int mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_DUMP_SPR_ARG(mask));
+}
+
+
+/**
+ * Print a string to the simulator stdout.
+ *
+ * @param str The string to be written; a newline is automatically added.
+ */
+static __inline void
+sim_print_string(const char* str)
+{
+  int i;
+  for (i = 0; str[i] != 0; i++)
+  {
+    __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
+                 (str[i] << _SIM_CONTROL_OPERATOR_BITS));
+  }
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
+               (SIM_PUTC_FLUSH_STRING << _SIM_CONTROL_OPERATOR_BITS));
+}
+
+
+/**
+ * Execute a simulator command string.
+ *
+ * Type 'sim help' at the tile-monitor prompt to learn what commands
+ * are available.  Note the use of the tile-monitor "sim" command to
+ * pass commands to the simulator.
+ *
+ * The argument to sim_command() does not include the leading "sim"
+ * prefix used at the tile-monitor prompt; for example, you might call
+ * sim_command("trace disasm").
+ */
+static __inline void
+sim_command(const char* str)
+{
+  int c;
+  do
+  {
+    c = *str++;
+    __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_COMMAND |
+                 (c << _SIM_CONTROL_OPERATOR_BITS));
+  }
+  while (c);
+}
+
+
+
+#ifndef __DOXYGEN__
+
+/**
+ * The underlying implementation of "_sim_syscall()".
+ *
+ * We use extra "and" instructions to ensure that all the values
+ * we are passing to the simulator are actually valid in the registers
+ * (i.e. returned from memory) prior to the SIM_CONTROL spr.
+ */
+static __inline int _sim_syscall0(int val)
+{
+  long result;
+  __asm__ __volatile__ ("mtspr SIM_CONTROL, r0"
+                        : "=R00" (result) : "R00" (val));
+  return result;
+}
+
+static __inline int _sim_syscall1(int val, long arg1)
+{
+  long result;
+  __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }"
+                        : "=R00" (result) : "R00" (val), "R01" (arg1));
+  return result;
+}
+
+static __inline int _sim_syscall2(int val, long arg1, long arg2)
+{
+  long result;
+  __asm__ __volatile__ ("{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }"
+                        : "=R00" (result)
+                        : "R00" (val), "R01" (arg1), "R02" (arg2));
+  return result;
+}
+
+/* Note that _sim_syscall3() and higher are technically at risk of
+   receiving an interrupt right before the mtspr bundle, in which case
+   the register values for arguments 3 and up may still be in flight
+   to the core from a stack frame reload. */
+
+static __inline int _sim_syscall3(int val, long arg1, long arg2, long arg3)
+{
+  long result;
+  __asm__ __volatile__ ("{ and zero, r3, r3 };"
+                        "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }"
+                        : "=R00" (result)
+                        : "R00" (val), "R01" (arg1), "R02" (arg2),
+                          "R03" (arg3));
+  return result;
+}
+
+static __inline int _sim_syscall4(int val, long arg1, long arg2, long arg3,
+                                  long arg4)
+{
+  long result;
+  __asm__ __volatile__ ("{ and zero, r3, r4 };"
+                        "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }"
+                        : "=R00" (result)
+                        : "R00" (val), "R01" (arg1), "R02" (arg2),
+                          "R03" (arg3), "R04" (arg4));
+  return result;
+}
+
+static __inline int _sim_syscall5(int val, long arg1, long arg2, long arg3,
+                                  long arg4, long arg5)
+{
+  long result;
+  __asm__ __volatile__ ("{ and zero, r3, r4; and zero, r5, r5 };"
+                        "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }"
+                        : "=R00" (result)
+                        : "R00" (val), "R01" (arg1), "R02" (arg2),
+                          "R03" (arg3), "R04" (arg4), "R05" (arg5));
+  return result;
+}
+
+
+/**
+ * Make a special syscall to the simulator itself, if running under
+ * simulation. This is used as the implementation of other functions
+ * and should not be used outside this file.
+ *
+ * @param syscall_num The simulator syscall number.
+ * @param nr The number of additional arguments provided.
+ *
+ * @return Varies by syscall.
+ */
+#define _sim_syscall(syscall_num, nr, args...) \
+  _sim_syscall##nr( \
+    ((syscall_num) << _SIM_CONTROL_OPERATOR_BITS) | SIM_CONTROL_SYSCALL, args)
+
+
+/* Values for the "access_mask" parameters below. */
+#define SIM_WATCHPOINT_READ    1
+#define SIM_WATCHPOINT_WRITE   2
+#define SIM_WATCHPOINT_EXECUTE 4
+
+
+static __inline int
+sim_add_watchpoint(unsigned int process_id,
+                   unsigned long address,
+                   unsigned long size,
+                   unsigned int access_mask,
+                   unsigned long user_data)
+{
+  return _sim_syscall(SIM_SYSCALL_ADD_WATCHPOINT, 5, process_id,
+                     address, size, access_mask, user_data);
+}
+
+
+static __inline int
+sim_remove_watchpoint(unsigned int process_id,
+                      unsigned long address,
+                      unsigned long size,
+                      unsigned int access_mask,
+                      unsigned long user_data)
+{
+  return _sim_syscall(SIM_SYSCALL_REMOVE_WATCHPOINT, 5, process_id,
+                     address, size, access_mask, user_data);
+}
+
+
+/**
+ * Return value from sim_query_watchpoint.
+ */
+struct SimQueryWatchpointStatus
+{
+  /**
+   * 0 if a watchpoint fired, 1 if no watchpoint fired, or -1 for
+   * error (meaning a bad process_id).
+   */
+  int syscall_status;
+
+  /**
+   * The address of the watchpoint that fired (this is the address
+   * passed to sim_add_watchpoint, not an address within that range
+   * that actually triggered the watchpoint).
+   */
+  unsigned long address;
+
+  /** The arbitrary user_data installed by sim_add_watchpoint. */
+  unsigned long user_data;
+};
+
+
+static __inline struct SimQueryWatchpointStatus
+sim_query_watchpoint(unsigned int process_id)
+{
+  struct SimQueryWatchpointStatus status;
+  long val = SIM_CONTROL_SYSCALL |
+    (SIM_SYSCALL_QUERY_WATCHPOINT << _SIM_CONTROL_OPERATOR_BITS);
+  __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }"
+                        : "=R00" (status.syscall_status),
+                          "=R01" (status.address),
+                          "=R02" (status.user_data)
+                        : "R00" (val), "R01" (process_id));
+  return status;
+}
+
+
+/* On the simulator, confirm lines have been evicted everywhere. */
+static __inline void
+sim_validate_lines_evicted(unsigned long long pa, unsigned long length)
+{
+#ifdef __LP64__
+  _sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED, 2, pa, length);
+#else
+  _sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED, 4,
+               0 /* dummy */, (long)(pa), (long)(pa >> 32), length);
+#endif
+}
+
+
+#endif /* !__DOXYGEN__ */
+
+
+
+
+/**
+ * Modify the shaping parameters of a shim.
+ *
+ * @param shim The shim to modify. One of:
+ *   SIM_CONTROL_SHAPING_GBE_0
+ *   SIM_CONTROL_SHAPING_GBE_1
+ *   SIM_CONTROL_SHAPING_GBE_2
+ *   SIM_CONTROL_SHAPING_GBE_3
+ *   SIM_CONTROL_SHAPING_XGBE_0
+ *   SIM_CONTROL_SHAPING_XGBE_1
+ *
+ * @param type The type of shaping. This should be the same type of
+ * shaping that is already in place on the shim. One of:
+ *   SIM_CONTROL_SHAPING_MULTIPLIER
+ *   SIM_CONTROL_SHAPING_PPS
+ *   SIM_CONTROL_SHAPING_BPS
+ *
+ * @param units The magnitude of the rate. One of:
+ *   SIM_CONTROL_SHAPING_UNITS_SINGLE
+ *   SIM_CONTROL_SHAPING_UNITS_KILO
+ *   SIM_CONTROL_SHAPING_UNITS_MEGA
+ *   SIM_CONTROL_SHAPING_UNITS_GIGA
+ *
+ * @param rate The rate to which to change it. This must fit in
+ * SIM_CONTROL_SHAPING_RATE_BITS bits or a warning is issued and
+ * the shaping is not changed.
+ *
+ * @return 0 if no problems were detected in the arguments to sim_set_shaping
+ * or 1 if problems were detected (for example, rate does not fit in 17 bits).
+ */
+static __inline int
+sim_set_shaping(unsigned shim,
+                unsigned type,
+                unsigned units,
+                unsigned rate)
+{
+  if ((rate & ~((1 << SIM_CONTROL_SHAPING_RATE_BITS) - 1)) != 0)
+    return 1;
+
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_SHAPING_SPR_ARG(shim, type, units, rate));
+  return 0;
+}
+
+#ifdef __tilegx__
+
+/** Enable a set of mPIPE links.  Pass a -1 link_mask to enable all links. */
+static __inline void
+sim_enable_mpipe_links(unsigned mpipe, unsigned long link_mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL,
+               (SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE |
+                (mpipe << 8) | (1 << 16) | ((uint_reg_t)link_mask << 32)));
+}
+
+/** Disable a set of mPIPE links.  Pass a -1 link_mask to disable all links. */
+static __inline void
+sim_disable_mpipe_links(unsigned mpipe, unsigned long link_mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL,
+               (SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE |
+                (mpipe << 8) | (0 << 16) | ((uint_reg_t)link_mask << 32)));
+}
+
+#endif /* __tilegx__ */
+
+
+/*
+ * An API for changing "functional" mode.
+ */
+
+#ifndef __DOXYGEN__
+
+#define sim_enable_functional() \
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_ENABLE_FUNCTIONAL)
+
+#define sim_disable_functional() \
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_DISABLE_FUNCTIONAL)
+
+#endif /* __DOXYGEN__ */
+
+
+/*
+ * Profiler support.
+ */
+
+/**
+ * Turn profiling on for the current task.
+ *
+ * Note that this has no effect if run in an environment without
+ * profiling support (thus, the proper flags to the simulator must
+ * be supplied).
+ */
+static __inline void
+sim_profiler_enable(void)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_ENABLE);
+}
+
+
+/** Turn profiling off for the current task. */
+static __inline void
+sim_profiler_disable(void)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_DISABLE);
+}
+
+
+/**
+ * Turn profiling on or off for the current task.
+ *
+ * @param enabled If true, turns on profiling. If false, turns it off.
+ *
+ * Note that this has no effect if run in an environment without
+ * profiling support (thus, the proper flags to the simulator must
+ * be supplied).
+ */
+static __inline void
+sim_profiler_set_enabled(int enabled)
+{
+  int val =
+    enabled ? SIM_CONTROL_PROFILER_ENABLE : SIM_CONTROL_PROFILER_DISABLE;
+  __insn_mtspr(SPR_SIM_CONTROL, val);
+}
+
+
+/**
+ * Return true if and only if profiling is currently enabled
+ * for the current task.
+ *
+ * This returns false even if sim_profiler_enable() was called
+ * if the current execution environment does not support profiling.
+ */
+static __inline int
+sim_profiler_is_enabled(void)
+{
+  return ((__insn_mfspr(SPR_SIM_CONTROL) & SIM_PROFILER_ENABLED_MASK) != 0);
+}
+
+
+/**
+ * Reset profiling counters to zero for the current task.
+ *
+ * Resetting can be done while profiling is enabled.  It does not affect
+ * the chip-wide profiling counters.
+ */
+static __inline void
+sim_profiler_clear(void)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_CLEAR);
+}
+
+
+/**
+ * Enable specified chip-level profiling counters.
+ *
+ * Does not affect the per-task profiling counters.
+ *
+ * @param mask Either this special value:
+ *
+ * SIM_CHIP_ALL (enables all chip-level components).
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_CHIP_MEMCTL (enable all memory controllers)
+ * SIM_CHIP_XAUI (enable all XAUI controllers)
+ * SIM_CHIP_MPIPE (enable all MPIPE controllers)
+ */
+static __inline void
+sim_profiler_chip_enable(unsigned int mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_ENABLE_SPR_ARG(mask));
+}
+
+
+/**
+ * Disable specified chip-level profiling counters.
+ *
+ * Does not affect the per-task profiling counters.
+ *
+ * @param mask Either this special value:
+ *
+ * SIM_CHIP_ALL (disables all chip-level components).
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_CHIP_MEMCTL (disable all memory controllers)
+ * SIM_CHIP_XAUI (disable all XAUI controllers)
+ * SIM_CHIP_MPIPE (disable all MPIPE controllers)
+ */
+static __inline void
+sim_profiler_chip_disable(unsigned int mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_DISABLE_SPR_ARG(mask));
+}
+
+
+/**
+ * Reset specified chip-level profiling counters to zero.
+ *
+ * Does not affect the per-task profiling counters.
+ *
+ * @param mask Either this special value:
+ *
+ * SIM_CHIP_ALL (clears all chip-level components).
+ *
+ * or the bitwise OR of these values:
+ *
+ * SIM_CHIP_MEMCTL (clear all memory controllers)
+ * SIM_CHIP_XAUI (clear all XAUI controllers)
+ * SIM_CHIP_MPIPE (clear all MPIPE controllers)
+ */
+static __inline void
+sim_profiler_chip_clear(unsigned int mask)
+{
+  __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_CLEAR_SPR_ARG(mask));
+}
+
+
+/*
+ * Event support.
+ */
+
+#ifndef __DOXYGEN__
+
+static __inline void
+sim_event_begin(unsigned int x)
+{
+#if defined(__tile__) && !defined(__NO_EVENT_SPR__)
+  __insn_mtspr(SPR_EVENT_BEGIN, x);
+#endif
+}
+
+static __inline void
+sim_event_end(unsigned int x)
+{
+#if defined(__tile__) && !defined(__NO_EVENT_SPR__)
+  __insn_mtspr(SPR_EVENT_END, x);
+#endif
+}
+
+#endif /* !__DOXYGEN__ */
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* !__ARCH_SIM_H__ */
+
+/** @} */
diff --git a/arch/tile/include/asm/system.h b/arch/tile/include/asm/system.h
index 96779c805902..fb7ff9574d76 100644
--- a/arch/tile/include/asm/system.h
+++ b/arch/tile/include/asm/system.h
@@ -217,13 +217,6 @@ int hardwall_deactivate(struct task_struct *task);
 } while (0)
 #endif
 
-/* Invoke the simulator "syscall" mechanism (see arch/tile/kernel/entry.S). */
-extern int _sim_syscall(int syscall_num, ...);
-#define sim_syscall(syscall_num, ...) \
-	_sim_syscall(SIM_CONTROL_SYSCALL + \
-		((syscall_num) << _SIM_CONTROL_OPERATOR_BITS), \
-		## __VA_ARGS__)
-
 /*
  * Kernel threads can check to see if they need to migrate their
  * stack whenever they return from a context switch; for user
-- 
cgit v1.2.3


From a78c942df64ef4cf495fd4d8715e48501bd7f8a4 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 14 Oct 2010 16:23:03 -0400
Subject: arch/tile: parameterize system PLs to support KVM port

While not a port to KVM (yet), this change modifies the kernel
to be able to build either at PL1 or at PL2 with a suitable
config switch.  Pushing up this change avoids handling branch
merge issues going forward with the KVM work.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/arch/spr_def.h    | 85 +++++++++++++++++++++++++++++++++++++
 arch/tile/include/arch/spr_def_32.h | 39 +++++++++++++++++
 arch/tile/include/asm/irqflags.h    | 64 ++++++++++++++--------------
 arch/tile/include/asm/page.h        | 27 +++++++-----
 arch/tile/include/asm/processor.h   | 11 +++--
 arch/tile/include/asm/ptrace.h      |  4 +-
 arch/tile/include/asm/system.h      |  2 +-
 arch/tile/include/hv/hypervisor.h   | 30 ++++++-------
 8 files changed, 197 insertions(+), 65 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h
index c8fdbd9a45e6..442fcba0d122 100644
--- a/arch/tile/include/arch/spr_def.h
+++ b/arch/tile/include/arch/spr_def.h
@@ -12,8 +12,93 @@
  *   more details.
  */
 
+/*
+ * In addition to including the proper base SPR definition file, depending
+ * on machine architecture, this file defines several macros which allow
+ * kernel code to use protection-level dependent SPRs without worrying
+ * about which PL it's running at.  In these macros, the PL that the SPR
+ * or interrupt number applies to is replaced by K.
+ */
+
+#if CONFIG_KERNEL_PL != 1 && CONFIG_KERNEL_PL != 2
+#error CONFIG_KERNEL_PL must be 1 or 2
+#endif
+
+/* Concatenate 4 strings. */
+#define __concat4(a, b, c, d) a ## b ## c ## d
+#define _concat4(a, b, c, d)  __concat4(a, b, c, d)
+
 #ifdef __tilegx__
 #include <arch/spr_def_64.h>
+
+/* TILE-Gx dependent, protection-level dependent SPRs. */
+
+#define SPR_INTERRUPT_MASK_K \
+	_concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL,,)
+#define SPR_INTERRUPT_MASK_SET_K \
+	_concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL,,)
+#define SPR_INTERRUPT_MASK_RESET_K \
+	_concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL,,)
+#define SPR_INTERRUPT_VECTOR_BASE_K \
+	_concat4(SPR_INTERRUPT_VECTOR_BASE_, CONFIG_KERNEL_PL,,)
+
+#define SPR_IPI_MASK_K \
+	_concat4(SPR_IPI_MASK_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_MASK_RESET_K \
+	_concat4(SPR_IPI_MASK_RESET_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_MASK_SET_K \
+	_concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_EVENT_K \
+	_concat4(SPR_IPI_EVENT_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_EVENT_RESET_K \
+	_concat4(SPR_IPI_EVENT_RESET_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_MASK_SET_K \
+	_concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,)
+#define INT_IPI_K \
+	_concat4(INT_IPI_, CONFIG_KERNEL_PL,,)
+
+#define SPR_SINGLE_STEP_CONTROL_K \
+	_concat4(SPR_SINGLE_STEP_CONTROL_, CONFIG_KERNEL_PL,,)
+#define SPR_SINGLE_STEP_EN_K_K \
+	_concat4(SPR_SINGLE_STEP_EN_, CONFIG_KERNEL_PL, _, CONFIG_KERNEL_PL)
+#define INT_SINGLE_STEP_K \
+	_concat4(INT_SINGLE_STEP_, CONFIG_KERNEL_PL,,)
+
 #else
 #include <arch/spr_def_32.h>
+
+/* TILEPro dependent, protection-level dependent SPRs. */
+
+#define SPR_INTERRUPT_MASK_K_0 \
+	_concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL, _0,)
+#define SPR_INTERRUPT_MASK_K_1 \
+	_concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL, _1,)
+#define SPR_INTERRUPT_MASK_SET_K_0 \
+	_concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL, _0,)
+#define SPR_INTERRUPT_MASK_SET_K_1 \
+	_concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL, _1,)
+#define SPR_INTERRUPT_MASK_RESET_K_0 \
+	_concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL, _0,)
+#define SPR_INTERRUPT_MASK_RESET_K_1 \
+	_concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL, _1,)
+
 #endif
+
+/* Generic protection-level dependent SPRs. */
+
+#define SPR_SYSTEM_SAVE_K_0 \
+	_concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _0,)
+#define SPR_SYSTEM_SAVE_K_1 \
+	_concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _1,)
+#define SPR_SYSTEM_SAVE_K_2 \
+	_concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _2,)
+#define SPR_SYSTEM_SAVE_K_3 \
+	_concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _3,)
+#define SPR_EX_CONTEXT_K_0 \
+	_concat4(SPR_EX_CONTEXT_, CONFIG_KERNEL_PL, _0,)
+#define SPR_EX_CONTEXT_K_1 \
+	_concat4(SPR_EX_CONTEXT_, CONFIG_KERNEL_PL, _1,)
+#define SPR_INTCTRL_K_STATUS \
+	_concat4(SPR_INTCTRL_, CONFIG_KERNEL_PL, _STATUS,)
+#define INT_INTCTRL_K \
+	_concat4(INT_INTCTRL_, CONFIG_KERNEL_PL,,)
diff --git a/arch/tile/include/arch/spr_def_32.h b/arch/tile/include/arch/spr_def_32.h
index b4fc06864df6..bbc1f4c924ee 100644
--- a/arch/tile/include/arch/spr_def_32.h
+++ b/arch/tile/include/arch/spr_def_32.h
@@ -56,58 +56,93 @@
 #define SPR_EX_CONTEXT_1_1__ICS_SHIFT 2
 #define SPR_EX_CONTEXT_1_1__ICS_RMASK 0x1
 #define SPR_EX_CONTEXT_1_1__ICS_MASK  0x4
+#define SPR_EX_CONTEXT_2_0 0x4605
+#define SPR_EX_CONTEXT_2_1 0x4606
+#define SPR_EX_CONTEXT_2_1__PL_SHIFT 0
+#define SPR_EX_CONTEXT_2_1__PL_RMASK 0x3
+#define SPR_EX_CONTEXT_2_1__PL_MASK  0x3
+#define SPR_EX_CONTEXT_2_1__ICS_SHIFT 2
+#define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1
+#define SPR_EX_CONTEXT_2_1__ICS_MASK  0x4
 #define SPR_FAIL 0x4e09
 #define SPR_INTCTRL_0_STATUS 0x4a07
 #define SPR_INTCTRL_1_STATUS 0x4807
+#define SPR_INTCTRL_2_STATUS 0x4607
 #define SPR_INTERRUPT_CRITICAL_SECTION 0x4e0a
 #define SPR_INTERRUPT_MASK_0_0 0x4a08
 #define SPR_INTERRUPT_MASK_0_1 0x4a09
 #define SPR_INTERRUPT_MASK_1_0 0x4809
 #define SPR_INTERRUPT_MASK_1_1 0x480a
+#define SPR_INTERRUPT_MASK_2_0 0x4608
+#define SPR_INTERRUPT_MASK_2_1 0x4609
 #define SPR_INTERRUPT_MASK_RESET_0_0 0x4a0a
 #define SPR_INTERRUPT_MASK_RESET_0_1 0x4a0b
 #define SPR_INTERRUPT_MASK_RESET_1_0 0x480b
 #define SPR_INTERRUPT_MASK_RESET_1_1 0x480c
+#define SPR_INTERRUPT_MASK_RESET_2_0 0x460a
+#define SPR_INTERRUPT_MASK_RESET_2_1 0x460b
 #define SPR_INTERRUPT_MASK_SET_0_0 0x4a0c
 #define SPR_INTERRUPT_MASK_SET_0_1 0x4a0d
 #define SPR_INTERRUPT_MASK_SET_1_0 0x480d
 #define SPR_INTERRUPT_MASK_SET_1_1 0x480e
+#define SPR_INTERRUPT_MASK_SET_2_0 0x460c
+#define SPR_INTERRUPT_MASK_SET_2_1 0x460d
 #define SPR_MPL_DMA_CPL_SET_0 0x5800
 #define SPR_MPL_DMA_CPL_SET_1 0x5801
+#define SPR_MPL_DMA_CPL_SET_2 0x5802
 #define SPR_MPL_DMA_NOTIFY_SET_0 0x3800
 #define SPR_MPL_DMA_NOTIFY_SET_1 0x3801
+#define SPR_MPL_DMA_NOTIFY_SET_2 0x3802
 #define SPR_MPL_INTCTRL_0_SET_0 0x4a00
 #define SPR_MPL_INTCTRL_0_SET_1 0x4a01
+#define SPR_MPL_INTCTRL_0_SET_2 0x4a02
 #define SPR_MPL_INTCTRL_1_SET_0 0x4800
 #define SPR_MPL_INTCTRL_1_SET_1 0x4801
+#define SPR_MPL_INTCTRL_1_SET_2 0x4802
+#define SPR_MPL_INTCTRL_2_SET_0 0x4600
+#define SPR_MPL_INTCTRL_2_SET_1 0x4601
+#define SPR_MPL_INTCTRL_2_SET_2 0x4602
 #define SPR_MPL_SN_ACCESS_SET_0 0x0800
 #define SPR_MPL_SN_ACCESS_SET_1 0x0801
+#define SPR_MPL_SN_ACCESS_SET_2 0x0802
 #define SPR_MPL_SN_CPL_SET_0 0x5a00
 #define SPR_MPL_SN_CPL_SET_1 0x5a01
+#define SPR_MPL_SN_CPL_SET_2 0x5a02
 #define SPR_MPL_SN_FIREWALL_SET_0 0x2c00
 #define SPR_MPL_SN_FIREWALL_SET_1 0x2c01
+#define SPR_MPL_SN_FIREWALL_SET_2 0x2c02
 #define SPR_MPL_SN_NOTIFY_SET_0 0x2a00
 #define SPR_MPL_SN_NOTIFY_SET_1 0x2a01
+#define SPR_MPL_SN_NOTIFY_SET_2 0x2a02
 #define SPR_MPL_UDN_ACCESS_SET_0 0x0c00
 #define SPR_MPL_UDN_ACCESS_SET_1 0x0c01
+#define SPR_MPL_UDN_ACCESS_SET_2 0x0c02
 #define SPR_MPL_UDN_AVAIL_SET_0 0x4000
 #define SPR_MPL_UDN_AVAIL_SET_1 0x4001
+#define SPR_MPL_UDN_AVAIL_SET_2 0x4002
 #define SPR_MPL_UDN_CA_SET_0 0x3c00
 #define SPR_MPL_UDN_CA_SET_1 0x3c01
+#define SPR_MPL_UDN_CA_SET_2 0x3c02
 #define SPR_MPL_UDN_COMPLETE_SET_0 0x1400
 #define SPR_MPL_UDN_COMPLETE_SET_1 0x1401
+#define SPR_MPL_UDN_COMPLETE_SET_2 0x1402
 #define SPR_MPL_UDN_FIREWALL_SET_0 0x3000
 #define SPR_MPL_UDN_FIREWALL_SET_1 0x3001
+#define SPR_MPL_UDN_FIREWALL_SET_2 0x3002
 #define SPR_MPL_UDN_REFILL_SET_0 0x1000
 #define SPR_MPL_UDN_REFILL_SET_1 0x1001
+#define SPR_MPL_UDN_REFILL_SET_2 0x1002
 #define SPR_MPL_UDN_TIMER_SET_0 0x3600
 #define SPR_MPL_UDN_TIMER_SET_1 0x3601
+#define SPR_MPL_UDN_TIMER_SET_2 0x3602
 #define SPR_MPL_WORLD_ACCESS_SET_0 0x4e00
 #define SPR_MPL_WORLD_ACCESS_SET_1 0x4e01
+#define SPR_MPL_WORLD_ACCESS_SET_2 0x4e02
 #define SPR_PASS 0x4e0b
 #define SPR_PERF_COUNT_0 0x4205
 #define SPR_PERF_COUNT_1 0x4206
 #define SPR_PERF_COUNT_CTL 0x4207
+#define SPR_PERF_COUNT_DN_CTL 0x4210
 #define SPR_PERF_COUNT_STS 0x4208
 #define SPR_PROC_STATUS 0x4f00
 #define SPR_SIM_CONTROL 0x4e0c
@@ -124,6 +159,10 @@
 #define SPR_SYSTEM_SAVE_1_1 0x4901
 #define SPR_SYSTEM_SAVE_1_2 0x4902
 #define SPR_SYSTEM_SAVE_1_3 0x4903
+#define SPR_SYSTEM_SAVE_2_0 0x4700
+#define SPR_SYSTEM_SAVE_2_1 0x4701
+#define SPR_SYSTEM_SAVE_2_2 0x4702
+#define SPR_SYSTEM_SAVE_2_3 0x4703
 #define SPR_TILE_COORD 0x4c17
 #define SPR_TILE_RTF_HWM 0x4e10
 #define SPR_TILE_TIMER_CONTROL 0x3205
diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h
index 45cf67c2f286..6ebdd7d1e67a 100644
--- a/arch/tile/include/asm/irqflags.h
+++ b/arch/tile/include/asm/irqflags.h
@@ -47,53 +47,53 @@
 	int __n = (n); \
 	int __mask = 1 << (__n & 0x1f); \
 	if (__n < 32) \
-		__insn_mtspr(SPR_INTERRUPT_MASK_SET_1_0, __mask); \
+		__insn_mtspr(SPR_INTERRUPT_MASK_SET_K_0, __mask); \
 	else \
-		__insn_mtspr(SPR_INTERRUPT_MASK_SET_1_1, __mask); \
+		__insn_mtspr(SPR_INTERRUPT_MASK_SET_K_1, __mask); \
 } while (0)
 #define interrupt_mask_reset(n) do { \
 	int __n = (n); \
 	int __mask = 1 << (__n & 0x1f); \
 	if (__n < 32) \
-		__insn_mtspr(SPR_INTERRUPT_MASK_RESET_1_0, __mask); \
+		__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, __mask); \
 	else \
-		__insn_mtspr(SPR_INTERRUPT_MASK_RESET_1_1, __mask); \
+		__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, __mask); \
 } while (0)
 #define interrupt_mask_check(n) ({ \
 	int __n = (n); \
 	(((__n < 32) ? \
-	 __insn_mfspr(SPR_INTERRUPT_MASK_1_0) : \
-	 __insn_mfspr(SPR_INTERRUPT_MASK_1_1)) \
+	 __insn_mfspr(SPR_INTERRUPT_MASK_K_0) : \
+	 __insn_mfspr(SPR_INTERRUPT_MASK_K_1)) \
 	  >> (__n & 0x1f)) & 1; \
 })
 #define interrupt_mask_set_mask(mask) do { \
 	unsigned long long __m = (mask); \
-	__insn_mtspr(SPR_INTERRUPT_MASK_SET_1_0, (unsigned long)(__m)); \
-	__insn_mtspr(SPR_INTERRUPT_MASK_SET_1_1, (unsigned long)(__m>>32)); \
+	__insn_mtspr(SPR_INTERRUPT_MASK_SET_K_0, (unsigned long)(__m)); \
+	__insn_mtspr(SPR_INTERRUPT_MASK_SET_K_1, (unsigned long)(__m>>32)); \
 } while (0)
 #define interrupt_mask_reset_mask(mask) do { \
 	unsigned long long __m = (mask); \
-	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_1_0, (unsigned long)(__m)); \
-	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_1_1, (unsigned long)(__m>>32)); \
+	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, (unsigned long)(__m)); \
+	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, (unsigned long)(__m>>32)); \
 } while (0)
 #else
 #define interrupt_mask_set(n) \
-	__insn_mtspr(SPR_INTERRUPT_MASK_SET_1, (1UL << (n)))
+	__insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (1UL << (n)))
 #define interrupt_mask_reset(n) \
-	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_1, (1UL << (n)))
+	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (1UL << (n)))
 #define interrupt_mask_check(n) \
-	((__insn_mfspr(SPR_INTERRUPT_MASK_1) >> (n)) & 1)
+	((__insn_mfspr(SPR_INTERRUPT_MASK_K) >> (n)) & 1)
 #define interrupt_mask_set_mask(mask) \
-	__insn_mtspr(SPR_INTERRUPT_MASK_SET_1, (mask))
+	__insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (mask))
 #define interrupt_mask_reset_mask(mask) \
-	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_1, (mask))
+	__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (mask))
 #endif
 
 /*
  * The set of interrupts we want active if irqs are enabled.
  * Note that in particular, the tile timer interrupt comes and goes
  * from this set, since we have no other way to turn off the timer.
- * Likewise, INTCTRL_1 is removed and re-added during device
+ * Likewise, INTCTRL_K is removed and re-added during device
  * interrupts, as is the the hardwall UDN_FIREWALL interrupt.
  * We use a low bit (MEM_ERROR) as our sentinel value and make sure it
  * is always claimed as an "active interrupt" so we can query that bit
@@ -168,14 +168,14 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
 
 /* Return 0 or 1 to indicate whether interrupts are currently disabled. */
 #define IRQS_DISABLED(tmp)					\
-	mfspr   tmp, INTERRUPT_MASK_1;				\
+	mfspr   tmp, SPR_INTERRUPT_MASK_K;			\
 	andi    tmp, tmp, 1
 
 /* Load up a pointer to &interrupts_enabled_mask. */
 #define GET_INTERRUPTS_ENABLED_MASK_PTR(reg)			\
-	moveli reg, hw2_last(interrupts_enabled_mask); \
-	shl16insli reg, reg, hw1(interrupts_enabled_mask); \
-	shl16insli reg, reg, hw0(interrupts_enabled_mask); \
+	moveli reg, hw2_last(interrupts_enabled_mask);		\
+	shl16insli reg, reg, hw1(interrupts_enabled_mask);	\
+	shl16insli reg, reg, hw0(interrupts_enabled_mask);	\
 	add     reg, reg, tp
 
 /* Disable interrupts. */
@@ -183,18 +183,18 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
 	moveli  tmp0, hw2_last(LINUX_MASKABLE_INTERRUPTS);	\
 	shl16insli tmp0, tmp0, hw1(LINUX_MASKABLE_INTERRUPTS);	\
 	shl16insli tmp0, tmp0, hw0(LINUX_MASKABLE_INTERRUPTS);	\
-	mtspr   INTERRUPT_MASK_SET_1, tmp0
+	mtspr   SPR_INTERRUPT_MASK_SET_K, tmp0
 
 /* Disable ALL synchronous interrupts (used by NMI entry). */
 #define IRQ_DISABLE_ALL(tmp)					\
 	movei   tmp, -1;					\
-	mtspr   INTERRUPT_MASK_SET_1, tmp
+	mtspr   SPR_INTERRUPT_MASK_SET_K, tmp
 
 /* Enable interrupts. */
 #define IRQ_ENABLE(tmp0, tmp1)					\
 	GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0);			\
 	ld      tmp0, tmp0;					\
-	mtspr   INTERRUPT_MASK_RESET_1, tmp0
+	mtspr   SPR_INTERRUPT_MASK_RESET_K, tmp0
 
 #else /* !__tilegx__ */
 
@@ -208,14 +208,14 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
  * (making the original code's write of the "high" mask word idempotent).
  */
 #define IRQS_DISABLED(tmp)					\
-	mfspr   tmp, INTERRUPT_MASK_1_0;			\
+	mfspr   tmp, SPR_INTERRUPT_MASK_K_0;			\
 	shri    tmp, tmp, INT_MEM_ERROR;			\
 	andi    tmp, tmp, 1
 
 /* Load up a pointer to &interrupts_enabled_mask. */
 #define GET_INTERRUPTS_ENABLED_MASK_PTR(reg)			\
-	moveli  reg, lo16(interrupts_enabled_mask);	\
-	auli    reg, reg, ha16(interrupts_enabled_mask);\
+	moveli  reg, lo16(interrupts_enabled_mask);		\
+	auli    reg, reg, ha16(interrupts_enabled_mask);	\
 	add     reg, reg, tp
 
 /* Disable interrupts. */
@@ -225,16 +225,16 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
 	 moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS)		\
 	};							\
 	{							\
-	 mtspr  INTERRUPT_MASK_SET_1_0, tmp0;			\
+	 mtspr  SPR_INTERRUPT_MASK_SET_K_0, tmp0;		\
 	 auli   tmp1, tmp1, ha16(LINUX_MASKABLE_INTERRUPTS)	\
 	};							\
-	mtspr   INTERRUPT_MASK_SET_1_1, tmp1
+	mtspr   SPR_INTERRUPT_MASK_SET_K_1, tmp1
 
 /* Disable ALL synchronous interrupts (used by NMI entry). */
 #define IRQ_DISABLE_ALL(tmp)					\
 	movei   tmp, -1;					\
-	mtspr   INTERRUPT_MASK_SET_1_0, tmp;			\
-	mtspr   INTERRUPT_MASK_SET_1_1, tmp
+	mtspr   SPR_INTERRUPT_MASK_SET_K_0, tmp;		\
+	mtspr   SPR_INTERRUPT_MASK_SET_K_1, tmp
 
 /* Enable interrupts. */
 #define IRQ_ENABLE(tmp0, tmp1)					\
@@ -244,8 +244,8 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
 	 addi   tmp1, tmp0, 4					\
 	};							\
 	lw      tmp1, tmp1;					\
-	mtspr   INTERRUPT_MASK_RESET_1_0, tmp0;			\
-	mtspr   INTERRUPT_MASK_RESET_1_1, tmp1
+	mtspr   SPR_INTERRUPT_MASK_RESET_K_0, tmp0;		\
+	mtspr   SPR_INTERRUPT_MASK_RESET_K_1, tmp1
 #endif
 
 /*
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index 7d90641cf18d..7979a45430d3 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -199,17 +199,17 @@ static inline __attribute_const__ int get_order(unsigned long size)
  * If you want more physical memory than this then see the CONFIG_HIGHMEM
  * option in the kernel configuration.
  *
- * The top two 16MB chunks in the table below (VIRT and HV) are
- * unavailable to Linux.  Since the kernel interrupt vectors must live
- * at 0xfd000000, we map all of the bottom of RAM at this address with
- * a huge page table entry to minimize its ITLB footprint (as well as
- * at PAGE_OFFSET).  The last architected requirement is that user
- * interrupt vectors live at 0xfc000000, so we make that range of
- * memory available to user processes.  The remaining regions are sized
- * as shown; after the first four addresses, we show "typical" values,
- * since the actual addresses depend on kernel #defines.
+ * The top 16MB chunk in the table below is unavailable to Linux.  Since
+ * the kernel interrupt vectors must live at ether 0xfe000000 or 0xfd000000
+ * (depending on whether the kernel is at PL2 or Pl1), we map all of the
+ * bottom of RAM at this address with a huge page table entry to minimize
+ * its ITLB footprint (as well as at PAGE_OFFSET).  The last architected
+ * requirement is that user interrupt vectors live at 0xfc000000, so we
+ * make that range of memory available to user processes.  The remaining
+ * regions are sized as shown; the first four addresses use the PL 1
+ * values, and after that, we show "typical" values, since the actual
+ * addresses depend on kernel #defines.
  *
- * MEM_VIRT_INTRPT                 0xff000000
  * MEM_HV_INTRPT                   0xfe000000
  * MEM_SV_INTRPT (kernel code)     0xfd000000
  * MEM_USER_INTRPT (user vector)   0xfc000000
@@ -221,9 +221,14 @@ static inline __attribute_const__ int get_order(unsigned long size)
  */
 
 #define MEM_USER_INTRPT		_AC(0xfc000000, UL)
+#if CONFIG_KERNEL_PL == 1
 #define MEM_SV_INTRPT		_AC(0xfd000000, UL)
 #define MEM_HV_INTRPT		_AC(0xfe000000, UL)
-#define MEM_VIRT_INTRPT		_AC(0xff000000, UL)
+#else
+#define MEM_GUEST_INTRPT	_AC(0xfd000000, UL)
+#define MEM_SV_INTRPT		_AC(0xfe000000, UL)
+#define MEM_HV_INTRPT		_AC(0xff000000, UL)
+#endif
 
 #define INTRPT_SIZE		0x4000
 
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index ccd5f8425688..1747ff3946b2 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -328,18 +328,21 @@ extern int kdata_huge;
  * Note that assembly code assumes that USER_PL is zero.
  */
 #define USER_PL 0
-#define KERNEL_PL 1
+#if CONFIG_KERNEL_PL == 2
+#define GUEST_PL 1
+#endif
+#define KERNEL_PL CONFIG_KERNEL_PL
 
-/* SYSTEM_SAVE_1_0 holds the current cpu number ORed with ksp0. */
+/* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */
 #define CPU_LOG_MASK_VALUE 12
 #define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1)
 #if CONFIG_NR_CPUS > CPU_MASK_VALUE
 # error Too many cpus!
 #endif
 #define raw_smp_processor_id() \
-	((int)__insn_mfspr(SPR_SYSTEM_SAVE_1_0) & CPU_MASK_VALUE)
+	((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE)
 #define get_current_ksp0() \
-	(__insn_mfspr(SPR_SYSTEM_SAVE_1_0) & ~CPU_MASK_VALUE)
+	(__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE)
 #define next_current_ksp0(task) ({ \
 	unsigned long __ksp0 = task_ksp0(task); \
 	int __cpu = raw_smp_processor_id(); \
diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h
index 4a02bb073979..ac6d343129d3 100644
--- a/arch/tile/include/asm/ptrace.h
+++ b/arch/tile/include/asm/ptrace.h
@@ -62,8 +62,8 @@ struct pt_regs {
 	pt_reg_t lr;		/* aliases regs[TREG_LR] */
 
 	/* Saved special registers. */
-	pt_reg_t pc;		/* stored in EX_CONTEXT_1_0 */
-	pt_reg_t ex1;		/* stored in EX_CONTEXT_1_1 (PL and ICS bit) */
+	pt_reg_t pc;		/* stored in EX_CONTEXT_K_0 */
+	pt_reg_t ex1;		/* stored in EX_CONTEXT_K_1 (PL and ICS bit) */
 	pt_reg_t faultnum;	/* fault number (INT_SWINT_1 for syscall) */
 	pt_reg_t orig_r0;	/* r0 at syscall entry, else zero */
 	pt_reg_t flags;		/* flags (see below) */
diff --git a/arch/tile/include/asm/system.h b/arch/tile/include/asm/system.h
index fb7ff9574d76..5388850deeb2 100644
--- a/arch/tile/include/asm/system.h
+++ b/arch/tile/include/asm/system.h
@@ -164,7 +164,7 @@ extern struct task_struct *_switch_to(struct task_struct *prev,
 /* Helper function for _switch_to(). */
 extern struct task_struct *__switch_to(struct task_struct *prev,
 				       struct task_struct *next,
-				       unsigned long new_system_save_1_0);
+				       unsigned long new_system_save_k_0);
 
 /* Address that switched-away from tasks are at. */
 extern unsigned long get_switch_to_pc(void);
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index 9bd303a141b2..f672544cd4f9 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -1003,37 +1003,37 @@ int hv_console_write(HV_VirtAddr bytes, int len);
  *  when these occur in a client's interrupt critical section, they must
  *  be delivered through the downcall mechanism.
  *
- *  A downcall is initially delivered to the client as an INTCTRL_1
- *  interrupt.  Upon entry to the INTCTRL_1 vector, the client must
- *  immediately invoke the hv_downcall_dispatch service.  This service
- *  will not return; instead it will cause one of the client's actual
- *  downcall-handling interrupt vectors to be entered.  The EX_CONTEXT
- *  registers in the client will be set so that when the client irets,
- *  it will return to the code which was interrupted by the INTCTRL_1
- *  interrupt.
- *
- *  Under some circumstances, the firing of INTCTRL_1 can race with
+ *  A downcall is initially delivered to the client as an INTCTRL_CL
+ *  interrupt, where CL is the client's PL.  Upon entry to the INTCTRL_CL
+ *  vector, the client must immediately invoke the hv_downcall_dispatch
+ *  service.  This service will not return; instead it will cause one of
+ *  the client's actual downcall-handling interrupt vectors to be entered.
+ *  The EX_CONTEXT registers in the client will be set so that when the
+ *  client irets, it will return to the code which was interrupted by the
+ *  INTCTRL_CL interrupt.
+ *
+ *  Under some circumstances, the firing of INTCTRL_CL can race with
  *  the lowering of a device interrupt.  In such a case, the
  *  hv_downcall_dispatch service may issue an iret instruction instead
  *  of entering one of the client's actual downcall-handling interrupt
  *  vectors.  This will return execution to the location that was
- *  interrupted by INTCTRL_1.
+ *  interrupted by INTCTRL_CL.
  *
  *  Any saving of registers should be done by the actual handling
- *  vectors; no registers should be changed by the INTCTRL_1 handler.
+ *  vectors; no registers should be changed by the INTCTRL_CL handler.
  *  In particular, the client should not use a jal instruction to invoke
  *  the hv_downcall_dispatch service, as that would overwrite the client's
  *  lr register.  Note that the hv_downcall_dispatch service may overwrite
  *  one or more of the client's system save registers.
  *
- *  The client must not modify the INTCTRL_1_STATUS SPR.  The hypervisor
+ *  The client must not modify the INTCTRL_CL_STATUS SPR.  The hypervisor
  *  will set this register to cause a downcall to happen, and will clear
  *  it when no further downcalls are pending.
  *
- *  When a downcall vector is entered, the INTCTRL_1 interrupt will be
+ *  When a downcall vector is entered, the INTCTRL_CL interrupt will be
  *  masked.  When the client is done processing a downcall, and is ready
  *  to accept another, it must unmask this interrupt; if more downcalls
- *  are pending, this will cause the INTCTRL_1 vector to be reentered.
+ *  are pending, this will cause the INTCTRL_CL vector to be reentered.
  *  Currently the following interrupt vectors can be entered through a
  *  downcall:
  *
-- 
cgit v1.2.3


From 233325b94999d4bb8df227bb39904a57509e4995 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 14 Oct 2010 16:32:41 -0400
Subject: arch/tile: enable single-step support for TILE-Gx

This is not quite the complete support, since we're not yet shipping
intvec_64.S, but it is the support relevant to the set of files we are
currently shipping, and makes it easier to track changes between
our internal sources and our public GIT repository.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/traps.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
index 432a9c15c8a2..d06e35f57201 100644
--- a/arch/tile/include/asm/traps.h
+++ b/arch/tile/include/asm/traps.h
@@ -59,4 +59,8 @@ void do_hardwall_trap(struct pt_regs *, int fault_num);
 void do_breakpoint(struct pt_regs *, int fault_num);
 
 
+#ifdef __tilegx__
+void gx_singlestep_handle(struct pt_regs *, int fault_num);
+#endif
+
 #endif /* _ASM_TILE_SYSCALLS_H */
-- 
cgit v1.2.3


From c569cac8b69397d8bc80f95bc6edf13ed902e28b Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 14 Oct 2010 16:46:22 -0400
Subject: arch/tile: support new info op generated by compiler

This just syncs the backtracing support in the kernel to the
upstream backtrace library.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/backtrace.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/backtrace.h b/arch/tile/include/asm/backtrace.h
index 758ca4619d50..f18887d82399 100644
--- a/arch/tile/include/asm/backtrace.h
+++ b/arch/tile/include/asm/backtrace.h
@@ -146,7 +146,10 @@ enum {
 
 	CALLER_SP_IN_R52_BASE = 4,
 
-	CALLER_SP_OFFSET_BASE = 8
+	CALLER_SP_OFFSET_BASE = 8,
+
+	/* Marks the entry point of certain functions. */
+	ENTRY_POINT_INFO_OP = 16
 };
 
 
-- 
cgit v1.2.3


From 3e4d3af501cccdc8a8cca41bdbe57d54ad7e7e73 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 26 Oct 2010 14:21:51 -0700
Subject: mm: stack based kmap_atomic()

Keep the current interface but ignore the KM_type and use a stack based
approach.

The advantage is that we get rid of crappy code like:

	#define __KM_PTE			\
		(in_nmi() ? KM_NMI_PTE : 	\
		 in_irq() ? KM_IRQ_PTE :	\
		 KM_PTE0)

and in general can stop worrying about what context we're in and what kmap
slots might be appropriate for that.

The downside is that FRV kmap_atomic() gets more expensive.

For now we use a CPP trick suggested by Andrew:

  #define kmap_atomic(page, args...) __kmap_atomic(page)

to avoid having to touch all kmap_atomic() users in a single patch.

[ not compiled on:
  - mn10300: the arch doesn't actually build with highmem to begin with ]

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: fix up drivers/gpu/drm/i915/intel_overlay.c]
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/tile/include/asm/highmem.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h
index d155db6fa9bd..e0f7ee186721 100644
--- a/arch/tile/include/asm/highmem.h
+++ b/arch/tile/include/asm/highmem.h
@@ -60,12 +60,12 @@ void *kmap_fix_kpte(struct page *page, int finished);
 /* This macro is used only in map_new_virtual() to map "page". */
 #define kmap_prot page_to_kpgprot(page)
 
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+void *__kmap_atomic(struct page *page);
+void __kunmap_atomic(void *kvaddr);
+void *kmap_atomic_pfn(unsigned long pfn);
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
 struct page *kmap_atomic_to_page(void *ptr);
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
-void *kmap_atomic(struct page *page, enum km_type type);
+void *kmap_atomic_prot(struct page *page, pgprot_t prot);
 void kmap_atomic_fix_kpte(struct page *page, int finished);
 
 #define flush_cache_kmaps()	do { } while (0)
-- 
cgit v1.2.3


From ece0e2b6406a995c371e0311190631ea34ad851a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 26 Oct 2010 14:21:52 -0700
Subject: mm: remove pte_*map_nested()

Since we no longer need to provide KM_type, the whole pte_*map_nested()
API is now redundant, remove it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/tile/include/asm/pgtable.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index b3367379d537..dc4ccdd855bc 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -347,15 +347,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 extern pte_t *_pte_offset_map(pmd_t *, unsigned long address, enum km_type);
 #define pte_offset_map(dir, address) \
 	_pte_offset_map(dir, address, KM_PTE0)
-#define pte_offset_map_nested(dir, address) \
-	_pte_offset_map(dir, address, KM_PTE1)
 #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
-#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
 #else
 #define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
-#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
 #define pte_unmap(pte) do { } while (0)
-#define pte_unmap_nested(pte) do { } while (0)
 #endif
 
 /* Clear a non-executable kernel PTE and flush it from the TLB. */
-- 
cgit v1.2.3


From 38a6f4266989c4dae68eccb1a5cb4580a48003e4 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Mon, 1 Nov 2010 15:21:35 -0400
Subject: arch/tile: complete migration to new kmap_atomic scheme

This change makes KM_TYPE_NR independent of the actual deprecated
list of km_type values, which are no longer used in tile code anywhere.
For now we leave it set to 8, allowing that many nested mappings,
and thus reserving 32MB of address space.

A few remaining places using KM_* values were cleaned up as well.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/highmem.h    |  1 -
 arch/tile/include/asm/kmap_types.h | 34 ++++++++++++++++++++++++----------
 arch/tile/include/asm/pgtable.h    |  6 ++----
 3 files changed, 26 insertions(+), 15 deletions(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h
index e0f7ee186721..b2a6c5de79ab 100644
--- a/arch/tile/include/asm/highmem.h
+++ b/arch/tile/include/asm/highmem.h
@@ -23,7 +23,6 @@
 
 #include <linux/interrupt.h>
 #include <linux/threads.h>
-#include <asm/kmap_types.h>
 #include <asm/tlbflush.h>
 #include <asm/homecache.h>
 
diff --git a/arch/tile/include/asm/kmap_types.h b/arch/tile/include/asm/kmap_types.h
index 1480106d1c05..3d0f20246260 100644
--- a/arch/tile/include/asm/kmap_types.h
+++ b/arch/tile/include/asm/kmap_types.h
@@ -16,28 +16,42 @@
 #define _ASM_TILE_KMAP_TYPES_H
 
 /*
- * In TILE Linux each set of four of these uses another 16MB chunk of
- * address space, given 64 tiles and 64KB pages, so we only enable
- * ones that are required by the kernel configuration.
+ * In 32-bit TILE Linux we have to balance the desire to have a lot of
+ * nested atomic mappings with the fact that large page sizes and many
+ * processors chew up address space quickly.  In a typical
+ * 64-processor, 64KB-page layout build, making KM_TYPE_NR one larger
+ * adds 4MB of required address-space.  For now we leave KM_TYPE_NR
+ * set to depth 8.
  */
 enum km_type {
+	KM_TYPE_NR = 8
+};
+
+/*
+ * We provide dummy definitions of all the stray values that used to be
+ * required for kmap_atomic() and no longer are.
+ */
+enum {
 	KM_BOUNCE_READ,
 	KM_SKB_SUNRPC_DATA,
 	KM_SKB_DATA_SOFTIRQ,
 	KM_USER0,
 	KM_USER1,
 	KM_BIO_SRC_IRQ,
+	KM_BIO_DST_IRQ,
+	KM_PTE0,
+	KM_PTE1,
 	KM_IRQ0,
 	KM_IRQ1,
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
-	KM_MEMCPY0,
-	KM_MEMCPY1,
-#if defined(CONFIG_HIGHPTE)
-	KM_PTE0,
-	KM_PTE1,
-#endif
-	KM_TYPE_NR
+	KM_SYNC_ICACHE,
+	KM_SYNC_DCACHE,
+	KM_UML_USERCOPY,
+	KM_IRQ_PTE,
+	KM_NMI,
+	KM_NMI_PTE,
+	KM_KDB
 };
 
 #endif /* _ASM_TILE_KMAP_TYPES_H */
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index dc4ccdd855bc..a6604e9485da 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -344,10 +344,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
 #if defined(CONFIG_HIGHPTE)
-extern pte_t *_pte_offset_map(pmd_t *, unsigned long address, enum km_type);
-#define pte_offset_map(dir, address) \
-	_pte_offset_map(dir, address, KM_PTE0)
-#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
+extern pte_t *pte_offset_map(pmd_t *, unsigned long address);
+#define pte_unmap(pte) kunmap_atomic(pte)
 #else
 #define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
 #define pte_unmap(pte) do { } while (0)
-- 
cgit v1.2.3


From 2c7387ef9969bb073c25ecbdcc5be30770267b16 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 28 Oct 2010 16:07:07 -0400
Subject: asm-generic/stat.h: support 64-bit file time_t for stat()

The existing asm-generic/stat.h specifies st_mtime, etc., as a 32-value,
and works well for 32-bit architectures (currently microblaze, score,
and 32-bit tile).  However, for 64-bit architectures it isn't sufficient
to return 32 bits of time_t; this isn't good insurance against the 2037
rollover.  (It also makes glibc support less convenient, since we can't
use glibc's handy STAT_IS_KERNEL_STAT mode.)

This change extends the two "timespec" fields for each of the three atime,
mtime, and ctime fields from "int" to "long".  As a result, on 32-bit
platforms nothing changes, and 64-bit platforms will now work as expected.

The only wrinkle is 32-bit userspace under 64-bit kernels taking advantage
of COMPAT mode.  For these, we leave the "struct stat64" definitions with
the "int" versions of the time_t and nsec fields, so that architectures
can implement compat_sys_stat64() and friends with sys_stat64(), etc.,
and get the expected 32-bit structure layout.  This requires a
field-by-field copy in the kernel, implemented by the code guarded
under __ARCH_WANT_STAT64.

This does mean that the shape of the "struct stat" and "struct stat64"
structures is different on a 64-bit kernel, but only one of the two
structures should ever be used by any given process: "struct stat"
is meant for 64-bit userspace only, and "struct stat64" for 32-bit
userspace only.  (On a 32-bit kernel the two structures continue to have
the same shape, since "long" is 32 bits.)

The alternative is keeping the two structures the same shape on 64-bit
kernels, which means a 64-bit time_t in "struct stat64" for 32-bit
processes.  This is a little unnatural since 32-bit userspace can't
do anything with 64 bits of time_t information, since time_t is just
"long", not "int64_t"; and in any case 32-bit userspace might expect
to be running under a 32-bit kernel, which can't provide the high 32
bits anyway.  In the case of a 32-bit kernel we'd then be extending the
kernel's 32-bit time_t to 64 bits, then truncating it back to 32 bits
again in userspace, for no particular reason.  And, as mentioned above,
if we have 64-bit time_t for 32-bit processes we can't easily use glibc's
STAT_IS_KERNEL_STAT, since glibc's stat structure requires an embedded
"struct timespec", which is a pair of "long" (32-bit) values in a 32-bit
userspace.  "Inventive" solutions are possible, but are pretty hacky.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/tile/include/asm/stat.h   | 3 +++
 arch/tile/include/asm/unistd.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/stat.h b/arch/tile/include/asm/stat.h
index 3dc90fa92c70..b16e5db8f0e7 100644
--- a/arch/tile/include/asm/stat.h
+++ b/arch/tile/include/asm/stat.h
@@ -1 +1,4 @@
+#ifdef CONFIG_COMPAT
+#define __ARCH_WANT_STAT64	/* Used for compat_sys_stat64() etc. */
+#endif
 #include <asm-generic/stat.h>
diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
index f2e3ff485333..b35c2db71199 100644
--- a/arch/tile/include/asm/unistd.h
+++ b/arch/tile/include/asm/unistd.h
@@ -41,6 +41,7 @@ __SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr)
 #ifdef CONFIG_COMPAT
 #define __ARCH_WANT_SYS_LLSEEK
 #endif
+#define __ARCH_WANT_SYS_NEWFSTATAT
 #endif
 
 #endif /* _ASM_TILE_UNISTD_H */
-- 
cgit v1.2.3


From e5a06939736277c54a68ae275433db55b99d187c Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Mon, 1 Nov 2010 17:00:37 -0400
Subject: drivers/net/tile/: on-chip network drivers for the tile architecture

This change adds the first network driver for the tile architecture,
supporting the on-chip XGBE and GBE shims.

The infrastructure is present for the TILE-Gx networking drivers (another
three source files in the new directory) but for now the the actual
tilegx sources are waiting on releasing hardware to initial customers.

Note that arch/tile/include/hv/* are "upstream" headers from the
Tilera hypervisor and will probably benefit less from LKML review.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/cacheflush.h   |   52 +
 arch/tile/include/asm/processor.h    |   10 +
 arch/tile/include/hv/drv_xgbe_impl.h |  300 ++++
 arch/tile/include/hv/drv_xgbe_intf.h |  615 +++++++
 arch/tile/include/hv/netio_errors.h  |  122 ++
 arch/tile/include/hv/netio_intf.h    | 2975 ++++++++++++++++++++++++++++++++++
 6 files changed, 4074 insertions(+)
 create mode 100644 arch/tile/include/hv/drv_xgbe_impl.h
 create mode 100644 arch/tile/include/hv/drv_xgbe_intf.h
 create mode 100644 arch/tile/include/hv/netio_errors.h
 create mode 100644 arch/tile/include/hv/netio_intf.h

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h
index c5741da4eeac..14a3f8556ace 100644
--- a/arch/tile/include/asm/cacheflush.h
+++ b/arch/tile/include/asm/cacheflush.h
@@ -137,4 +137,56 @@ static inline void finv_buffer(void *buffer, size_t size)
 	mb_incoherent();
 }
 
+/*
+ * Flush & invalidate a VA range that is homed remotely on a single core,
+ * waiting until the memory controller holds the flushed values.
+ */
+static inline void finv_buffer_remote(void *buffer, size_t size)
+{
+	char *p;
+	int i;
+
+	/*
+	 * Flush and invalidate the buffer out of the local L1/L2
+	 * and request the home cache to flush and invalidate as well.
+	 */
+	__finv_buffer(buffer, size);
+
+	/*
+	 * Wait for the home cache to acknowledge that it has processed
+	 * all the flush-and-invalidate requests.  This does not mean
+	 * that the flushed data has reached the memory controller yet,
+	 * but it does mean the home cache is processing the flushes.
+	 */
+	__insn_mf();
+
+	/*
+	 * Issue a load to the last cache line, which can't complete
+	 * until all the previously-issued flushes to the same memory
+	 * controller have also completed.  If we weren't striping
+	 * memory, that one load would be sufficient, but since we may
+	 * be, we also need to back up to the last load issued to
+	 * another memory controller, which would be the point where
+	 * we crossed an 8KB boundary (the granularity of striping
+	 * across memory controllers).  Keep backing up and doing this
+	 * until we are before the beginning of the buffer, or have
+	 * hit all the controllers.
+	 */
+	for (i = 0, p = (char *)buffer + size - 1;
+	     i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer;
+	     ++i) {
+		const unsigned long STRIPE_WIDTH = 8192;
+
+		/* Force a load instruction to issue. */
+		*(volatile char *)p;
+
+		/* Jump to end of previous stripe. */
+		p -= STRIPE_WIDTH;
+		p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1));
+	}
+
+	/* Wait for the loads (and thus flushes) to have completed. */
+	__insn_mf();
+}
+
 #endif /* _ASM_TILE_CACHEFLUSH_H */
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 1747ff3946b2..a9e7c8760334 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -292,8 +292,18 @@ extern int kstack_hash;
 /* Are we using huge pages in the TLB for kernel data? */
 extern int kdata_huge;
 
+/* Support standard Linux prefetching. */
+#define ARCH_HAS_PREFETCH
+#define prefetch(x) __builtin_prefetch(x)
 #define PREFETCH_STRIDE CHIP_L2_LINE_SIZE()
 
+/* Bring a value into the L1D, faulting the TLB if necessary. */
+#ifdef __tilegx__
+#define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x))
+#else
+#define prefetch_L1(x) __insn_prefetch_L1((void *)(x))
+#endif
+
 #else /* __ASSEMBLY__ */
 
 /* Do some slow action (e.g. read a slow SPR). */
diff --git a/arch/tile/include/hv/drv_xgbe_impl.h b/arch/tile/include/hv/drv_xgbe_impl.h
new file mode 100644
index 000000000000..3a73b2b44913
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_impl.h
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drivers/xgbe/impl.h
+ * Implementation details for the NetIO library.
+ */
+
+#ifndef __DRV_XGBE_IMPL_H__
+#define __DRV_XGBE_IMPL_H__
+
+#include <hv/netio_errors.h>
+#include <hv/netio_intf.h>
+#include <hv/drv_xgbe_intf.h>
+
+
+/** How many groups we have (log2). */
+#define LOG2_NUM_GROUPS (12)
+/** How many groups we have. */
+#define NUM_GROUPS (1 << LOG2_NUM_GROUPS)
+
+/** Number of output requests we'll buffer per tile. */
+#define EPP_REQS_PER_TILE (32)
+
+/** Words used in an eDMA command without checksum acceleration. */
+#define EDMA_WDS_NO_CSUM      8
+/** Words used in an eDMA command with checksum acceleration. */
+#define EDMA_WDS_CSUM        10
+/** Total available words in the eDMA command FIFO. */
+#define EDMA_WDS_TOTAL      128
+
+
+/*
+ * FIXME: These definitions are internal and should have underscores!
+ * NOTE: The actual numeric values here are intentional and allow us to
+ * optimize the concept "if small ... else if large ... else ...", by
+ * checking for the low bit being set, and then for non-zero.
+ * These are used as array indices, so they must have the values (0, 1, 2)
+ * in some order.
+ */
+#define SIZE_SMALL (1)       /**< Small packet queue. */
+#define SIZE_LARGE (2)       /**< Large packet queue. */
+#define SIZE_JUMBO (0)       /**< Jumbo packet queue. */
+
+/** The number of "SIZE_xxx" values. */
+#define NETIO_NUM_SIZES 3
+
+
+/*
+ * Default numbers of packets for IPP drivers.  These values are chosen
+ * such that CIPP1 will not overflow its L2 cache.
+ */
+
+/** The default number of small packets. */
+#define NETIO_DEFAULT_SMALL_PACKETS 2750
+/** The default number of large packets. */
+#define NETIO_DEFAULT_LARGE_PACKETS 2500
+/** The default number of jumbo packets. */
+#define NETIO_DEFAULT_JUMBO_PACKETS 250
+
+
+/** Log2 of the size of a memory arena. */
+#define NETIO_ARENA_SHIFT      24      /* 16 MB */
+/** Size of a memory arena. */
+#define NETIO_ARENA_SIZE       (1 << NETIO_ARENA_SHIFT)
+
+
+/** A queue of packets.
+ *
+ * This structure partially defines a queue of packets waiting to be
+ * processed.  The queue as a whole is written to by an interrupt handler and
+ * read by non-interrupt code; this data structure is what's touched by the
+ * interrupt handler.  The other part of the queue state, the read offset, is
+ * kept in user space, not in hypervisor space, so it is in a separate data
+ * structure.
+ *
+ * The read offset (__packet_receive_read in the user part of the queue
+ * structure) points to the next packet to be read. When the read offset is
+ * equal to the write offset, the queue is empty; therefore the queue must
+ * contain one more slot than the required maximum queue size.
+ *
+ * Here's an example of all 3 state variables and what they mean.  All
+ * pointers move left to right.
+ *
+ * @code
+ *   I   I   V   V   V   V   I   I   I   I
+ *   0   1   2   3   4   5   6   7   8   9  10
+ *           ^       ^       ^               ^
+ *           |               |               |
+ *           |               |               __last_packet_plus_one
+ *           |               __buffer_write
+ *           __packet_receive_read
+ * @endcode
+ *
+ * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one
+ * = 10).  The read pointer is at 2, and the write pointer is at 6; thus,
+ * there are valid, unread packets in slots 2, 3, 4, and 5.  The remaining
+ * slots are invalid (do not contain a packet).
+ */
+typedef struct {
+  /** Byte offset of the next notify packet to be written: zero for the first
+   *  packet on the queue, sizeof (netio_pkt_t) for the second packet on the
+   *  queue, etc. */
+  volatile uint32_t __packet_write;
+
+  /** Offset of the packet after the last valid packet (i.e., when any
+   *  pointer is incremented to this value, it wraps back to zero). */
+  uint32_t __last_packet_plus_one;
+}
+__netio_packet_queue_t;
+
+
+/** A queue of buffers.
+ *
+ * This structure partially defines a queue of empty buffers which have been
+ * obtained via requests to the IPP.  (The elements of the queue are packet
+ * handles, which are transformed into a full netio_pkt_t when the buffer is
+ * retrieved.)  The queue as a whole is written to by an interrupt handler and
+ * read by non-interrupt code; this data structure is what's touched by the
+ * interrupt handler.  The other parts of the queue state, the read offset and
+ * requested write offset, are kept in user space, not in hypervisor space, so
+ * they are in a separate data structure.
+ *
+ * The read offset (__buffer_read in the user part of the queue structure)
+ * points to the next buffer to be read. When the read offset is equal to the
+ * write offset, the queue is empty; therefore the queue must contain one more
+ * slot than the required maximum queue size.
+ *
+ * The requested write offset (__buffer_requested_write in the user part of
+ * the queue structure) points to the slot which will hold the next buffer we
+ * request from the IPP, once we get around to sending such a request.  When
+ * the requested write offset is equal to the write offset, no requests for
+ * new buffers are outstanding; when the requested write offset is one greater
+ * than the read offset, no more requests may be sent.
+ *
+ * Note that, unlike the packet_queue, the buffer_queue places incoming
+ * buffers at decreasing addresses.  This makes the check for "is it time to
+ * wrap the buffer pointer" cheaper in the assembly code which receives new
+ * buffers, and means that the value which defines the queue size,
+ * __last_buffer, is different than in the packet queue.  Also, the offset
+ * used in the packet_queue is already scaled by the size of a packet; here we
+ * use unscaled slot indices for the offsets.  (These differences are
+ * historical, and in the future it's possible that the packet_queue will look
+ * more like this queue.)
+ *
+ * @code
+ * Here's an example of all 4 state variables and what they mean.  Remember:
+ * all pointers move right to left.
+ *
+ *   V   V   V   I   I   R   R   V   V   V
+ *   0   1   2   3   4   5   6   7   8   9
+ *           ^       ^       ^           ^
+ *           |       |       |           |
+ *           |       |       |           __last_buffer
+ *           |       |       __buffer_write
+ *           |       __buffer_requested_write
+ *           __buffer_read
+ * @endcode
+ *
+ * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9).
+ * The read pointer is at 2, and the write pointer is at 6; thus, there are
+ * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7.  The requested write
+ * pointer is at 4; thus, requests have been made to the IPP for buffers which
+ * will be placed in slots 6 and 5 when they arrive.  Finally, the remaining
+ * slots are invalid (do not contain a buffer).
+ */
+typedef struct
+{
+  /** Ordinal number of the next buffer to be written: 0 for the first slot in
+   *  the queue, 1 for the second slot in the queue, etc. */
+  volatile uint32_t __buffer_write;
+
+  /** Ordinal number of the last buffer (i.e., when any pointer is decremented
+   *  below zero, it is reloaded with this value). */
+  uint32_t __last_buffer;
+}
+__netio_buffer_queue_t;
+
+
+/**
+ * An object for providing Ethernet packets to a process.
+ */
+typedef struct __netio_queue_impl_t
+{
+  /** The queue of packets waiting to be received. */
+  __netio_packet_queue_t __packet_receive_queue;
+  /** The intr bit mask that IDs this device. */
+  unsigned int __intr_id;
+  /** Offset to queues of empty buffers, one per size. */
+  uint32_t __buffer_queue[NETIO_NUM_SIZES];
+  /** The address of the first EPP tile, or -1 if no EPP. */
+  /* ISSUE: Actually this is always "0" or "~0". */
+  uint32_t __epp_location;
+  /** The queue ID that this queue represents. */
+  unsigned int __queue_id;
+  /** Number of acknowledgements received. */
+  volatile uint32_t __acks_received;
+  /** Last completion number received for packet_sendv. */
+  volatile uint32_t __last_completion_rcv;
+  /** Number of packets allowed to be outstanding. */
+  uint32_t __max_outstanding;
+  /** First VA available for packets. */
+  void* __va_0;
+  /** First VA in second range available for packets. */
+  void* __va_1;
+  /** Padding to align the "__packets" field to the size of a netio_pkt_t. */
+  uint32_t __padding[3];
+  /** The packets themselves. */
+  netio_pkt_t __packets[0];
+}
+netio_queue_impl_t;
+
+
+/**
+ * An object for managing the user end of a NetIO queue.
+ */
+typedef struct __netio_queue_user_impl_t
+{
+  /** The next incoming packet to be read. */
+  uint32_t __packet_receive_read;
+  /** The next empty buffers to be read, one index per size. */
+  uint8_t __buffer_read[NETIO_NUM_SIZES];
+  /** Where the empty buffer we next request from the IPP will go, one index
+   * per size. */
+  uint8_t __buffer_requested_write[NETIO_NUM_SIZES];
+  /** PCIe interface flag. */
+  uint8_t __pcie;
+  /** Number of packets left to be received before we send a credit update. */
+  uint32_t __receive_credit_remaining;
+  /** Value placed in __receive_credit_remaining when it reaches zero. */
+  uint32_t __receive_credit_interval;
+  /** First fast I/O routine index. */
+  uint32_t __fastio_index;
+  /** Number of acknowledgements expected. */
+  uint32_t __acks_outstanding;
+  /** Last completion number requested. */
+  uint32_t __last_completion_req;
+  /** File descriptor for driver. */
+  int __fd;
+}
+netio_queue_user_impl_t;
+
+
+#define NETIO_GROUP_CHUNK_SIZE   64   /**< Max # groups in one IPP request */
+#define NETIO_BUCKET_CHUNK_SIZE  64   /**< Max # buckets in one IPP request */
+
+
+/** Internal structure used to convey packet send information to the
+ * hypervisor.  FIXME: Actually, it's not used for that anymore, but
+ * netio_packet_send() still uses it internally.
+ */
+typedef struct
+{
+  uint16_t flags;              /**< Packet flags (__NETIO_SEND_FLG_xxx) */
+  uint16_t transfer_size;      /**< Size of packet */
+  uint32_t va;                 /**< VA of start of packet */
+  __netio_pkt_handle_t handle; /**< Packet handle */
+  uint32_t csum0;              /**< First checksum word */
+  uint32_t csum1;              /**< Second checksum word */
+}
+__netio_send_cmd_t;
+
+
+/** Flags used in two contexts:
+ *  - As the "flags" member in the __netio_send_cmd_t, above; used only
+ *    for netio_pkt_send_{prepare,commit}.
+ *  - As part of the flags passed to the various send packet fast I/O calls.
+ */
+
+/** Need acknowledgement on this packet.  Note that some code in the
+ *  normal send_pkt fast I/O handler assumes that this is equal to 1. */
+#define __NETIO_SEND_FLG_ACK    0x1
+
+/** Do checksum on this packet.  (Only used with the __netio_send_cmd_t;
+ *  normal packet sends use a special fast I/O index to denote checksumming,
+ *  and multi-segment sends test the checksum descriptor.) */
+#define __NETIO_SEND_FLG_CSUM   0x2
+
+/** Get a completion on this packet.  Only used with multi-segment sends.  */
+#define __NETIO_SEND_FLG_COMPLETION 0x4
+
+/** Position of the number-of-extra-segments value in the flags word.
+    Only used with multi-segment sends. */
+#define __NETIO_SEND_FLG_XSEG_SHIFT 3
+
+/** Width of the number-of-extra-segments value in the flags word. */
+#define __NETIO_SEND_FLG_XSEG_WIDTH 2
+
+#endif /* __DRV_XGBE_IMPL_H__ */
diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h
new file mode 100644
index 000000000000..146e47d5334b
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_intf.h
@@ -0,0 +1,615 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drv_xgbe_intf.h
+ * Interface to the hypervisor XGBE driver.
+ */
+
+#ifndef __DRV_XGBE_INTF_H__
+#define __DRV_XGBE_INTF_H__
+
+/**
+ * An object for forwarding VAs and PAs to the hypervisor.
+ * @ingroup types
+ *
+ * This allows the supervisor to specify a number of areas of memory to
+ * store packet buffers.
+ */
+typedef struct
+{
+  /** The physical address of the memory. */
+  HV_PhysAddr pa;
+  /** Page table entry for the memory.  This is only used to derive the
+   *  memory's caching mode; the PA bits are ignored. */
+  HV_PTE pte;
+  /** The virtual address of the memory. */
+  HV_VirtAddr va;
+  /** Size (in bytes) of the memory area. */
+  int size;
+
+}
+netio_ipp_address_t;
+
+/** The various pread/pwrite offsets into the hypervisor-level driver.
+ * @ingroup types
+ */
+typedef enum
+{
+  /** Inform the Linux driver of the address of the NetIO arena memory.
+   *  This offset is actually only used to convey information from netio
+   *  to the Linux driver; it never makes it from there to the hypervisor.
+   *  Write-only; takes a uint32_t specifying the VA address. */
+  NETIO_FIXED_ADDR               = 0x5000000000000000ULL,
+
+  /** Inform the Linux driver of the size of the NetIO arena memory.
+   *  This offset is actually only used to convey information from netio
+   *  to the Linux driver; it never makes it from there to the hypervisor.
+   *  Write-only; takes a uint32_t specifying the VA size. */
+  NETIO_FIXED_SIZE               = 0x5100000000000000ULL,
+
+  /** Register current tile with IPP.  Write then read: write, takes a
+   *  netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */
+  NETIO_IPP_INPUT_REGISTER_OFF   = 0x6000000000000000ULL,
+
+  /** Unregister current tile from IPP.  Write-only, takes a dummy argument. */
+  NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL,
+
+  /** Start packets flowing.  Write-only, takes a dummy argument. */
+  NETIO_IPP_INPUT_INIT_OFF       = 0x6200000000000000ULL,
+
+  /** Stop packets flowing.  Write-only, takes a dummy argument. */
+  NETIO_IPP_INPUT_UNINIT_OFF     = 0x6300000000000000ULL,
+
+  /** Configure group (typically we group on VLAN).  Write-only: takes an
+   *  array of netio_group_t's, low 24 bits of the offset is the base group
+   *  number times the size of a netio_group_t. */
+  NETIO_IPP_INPUT_GROUP_CFG_OFF  = 0x6400000000000000ULL,
+
+  /** Configure bucket.  Write-only: takes an array of netio_bucket_t's, low
+   *  24 bits of the offset is the base bucket number times the size of a
+   *  netio_bucket_t. */
+  NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL,
+
+  /** Get/set a parameter.  Read or write: read or write data is the parameter
+   *  value, low 32 bits of the offset is a __netio_getset_offset_t. */
+  NETIO_IPP_PARAM_OFF            = 0x6600000000000000ULL,
+
+  /** Get fast I/O index.  Read-only; returns a 4-byte base index value. */
+  NETIO_IPP_GET_FASTIO_OFF       = 0x6700000000000000ULL,
+
+  /** Configure hijack IP address.  Packets with this IPv4 dest address
+   *  go to bucket NETIO_NUM_BUCKETS - 1.  Write-only: takes an IP address
+   *  in some standard form.  FIXME: Define the form! */
+  NETIO_IPP_INPUT_HIJACK_CFG_OFF  = 0x6800000000000000ULL,
+
+  /**
+   * Offsets beyond this point are reserved for the supervisor (although that
+   * enforcement must be done by the supervisor driver itself).
+   */
+  NETIO_IPP_USER_MAX_OFF         = 0x6FFFFFFFFFFFFFFFULL,
+
+  /** Register I/O memory.  Write-only, takes a netio_ipp_address_t. */
+  NETIO_IPP_IOMEM_REGISTER_OFF   = 0x7000000000000000ULL,
+
+  /** Unregister I/O memory.  Write-only, takes a netio_ipp_address_t. */
+  NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL,
+
+  /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux
+   * userspace code due to limitations in the pread/pwrite syscalls. */
+
+  /** Drain LIPP buffers. */
+  NETIO_IPP_DRAIN_OFF              = 0xFA00000000000000ULL,
+
+  /** Supply a netio_ipp_address_t to be used as shared memory for the
+   *  LEPP command queue. */
+  NETIO_EPP_SHM_OFF              = 0xFB00000000000000ULL,
+
+  /* 0xFC... is currently unused. */
+
+  /** Stop IPP/EPP tiles.  Write-only, takes a dummy argument.  */
+  NETIO_IPP_STOP_SHIM_OFF        = 0xFD00000000000000ULL,
+
+  /** Start IPP/EPP tiles.  Write-only, takes a dummy argument.  */
+  NETIO_IPP_START_SHIM_OFF       = 0xFE00000000000000ULL,
+
+  /** Supply packet arena.  Write-only, takes an array of
+    * netio_ipp_address_t values. */
+  NETIO_IPP_ADDRESS_OFF          = 0xFF00000000000000ULL,
+} netio_hv_offset_t;
+
+/** Extract the base offset from an offset */
+#define NETIO_BASE_OFFSET(off)    ((off) & 0xFF00000000000000ULL)
+/** Extract the local offset from an offset */
+#define NETIO_LOCAL_OFFSET(off)   ((off) & 0x00FFFFFFFFFFFFFFULL)
+
+
+/**
+ * Get/set offset.
+ */
+typedef union
+{
+  struct
+  {
+    uint64_t addr:48;        /**< Class-specific address */
+    unsigned int class:8;    /**< Class (e.g., NETIO_PARAM) */
+    unsigned int opcode:8;   /**< High 8 bits of NETIO_IPP_PARAM_OFF */
+  }
+  bits;                      /**< Bitfields */
+  uint64_t word;             /**< Aggregated value to use as the offset */
+}
+__netio_getset_offset_t;
+
+/**
+ * Fast I/O index offsets (must be contiguous).
+ */
+typedef enum
+{
+  NETIO_FASTIO_ALLOCATE         = 0, /**< Get empty packet buffer */
+  NETIO_FASTIO_FREE_BUFFER      = 1, /**< Give buffer back to IPP */
+  NETIO_FASTIO_RETURN_CREDITS   = 2, /**< Give credits to IPP */
+  NETIO_FASTIO_SEND_PKT_NOCK    = 3, /**< Send a packet, no checksum */
+  NETIO_FASTIO_SEND_PKT_CK      = 4, /**< Send a packet, with checksum */
+  NETIO_FASTIO_SEND_PKT_VEC     = 5, /**< Send a vector of packets */
+  NETIO_FASTIO_SENDV_PKT        = 6, /**< Sendv one packet */
+  NETIO_FASTIO_NUM_INDEX        = 7, /**< Total number of fast I/O indices */
+} netio_fastio_index_t;
+
+/** 3-word return type for Fast I/O call. */
+typedef struct
+{
+  int err;            /**< Error code. */
+  uint32_t val0;      /**< Value.  Meaning depends upon the specific call. */
+  uint32_t val1;      /**< Value.  Meaning depends upon the specific call. */
+} netio_fastio_rv3_t;
+
+/** 0-argument fast I/O call */
+int __netio_fastio0(uint32_t fastio_index);
+/** 1-argument fast I/O call */
+int __netio_fastio1(uint32_t fastio_index, uint32_t arg0);
+/** 3-argument fast I/O call, 2-word return value */
+netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0,
+                                       uint32_t arg1, uint32_t arg2);
+/** 4-argument fast I/O call */
+int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+                    uint32_t arg2, uint32_t arg3);
+/** 6-argument fast I/O call */
+int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+                    uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5);
+/** 9-argument fast I/O call */
+int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+                    uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5,
+                    uint32_t arg6, uint32_t arg7, uint32_t arg8);
+
+/** Allocate an empty packet.
+ * @param fastio_index Fast I/O index.
+ * @param size Size of the packet to allocate.
+ */
+#define __netio_fastio_allocate(fastio_index, size) \
+  __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size)
+
+/** Free a buffer.
+ * @param fastio_index Fast I/O index.
+ * @param handle Handle for the packet to free.
+ */
+#define __netio_fastio_free_buffer(fastio_index, handle) \
+  __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle)
+
+/** Increment our receive credits.
+ * @param fastio_index Fast I/O index.
+ * @param credits Number of credits to add.
+ */
+#define __netio_fastio_return_credits(fastio_index, credits) \
+  __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits)
+
+/** Send packet, no checksum.
+ * @param fastio_index Fast I/O index.
+ * @param ackflag Nonzero if we want an ack.
+ * @param size Size of the packet.
+ * @param va Virtual address of start of packet.
+ * @param handle Packet handle.
+ */
+#define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \
+  __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \
+                  size, va, handle)
+
+/** Send packet, calculate checksum.
+ * @param fastio_index Fast I/O index.
+ * @param ackflag Nonzero if we want an ack.
+ * @param size Size of the packet.
+ * @param va Virtual address of start of packet.
+ * @param handle Packet handle.
+ * @param csum0 Shim checksum header.
+ * @param csum1 Checksum seed.
+ */
+#define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \
+                                   csum0, csum1) \
+  __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \
+                  size, va, handle, csum0, csum1)
+
+
+/** Format for the "csum0" argument to the __netio_fastio_send routines
+ * and LEPP.  Note that this is currently exactly identical to the
+ * ShimProtocolOffloadHeader.
+ */
+typedef union
+{
+  struct
+  {
+    unsigned int start_byte:7;       /**< The first byte to be checksummed */
+    unsigned int count:14;           /**< Number of bytes to be checksummed. */
+    unsigned int destination_byte:7; /**< The byte to write the checksum to. */
+    unsigned int reserved:4;         /**< Reserved. */
+  } bits;                            /**< Decomposed method of access. */
+  unsigned int word;                 /**< To send out the IDN. */
+} __netio_checksum_header_t;
+
+
+/** Sendv packet with 1 or 2 segments.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
+ *        1 in next 2 bits; expected checksum in high 16 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; if zero, no checksum.
+ * @param va_F Virtual address of first segment.
+ * @param va_L Virtual address of last segment, if 2 segments.
+ * @param len_F_L Length of first segment in low 16 bits; length of last
+ *        segment, if 2 segments, in high 16 bits.
+ */
+#define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \
+                                     va_F, va_L, len_F_L) \
+  __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
+                  csum0, va_F, va_L, len_F_L)
+
+/** Send packet on PCIe interface.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe.
+ * @param va_F Virtual address of the packet buffer.
+ * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0.
+ * @param len_F_L Length of the packet buffer in low 16 bits.
+ */
+#define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \
+                                     va_F, va_L, len_F_L) \
+  __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \
+                  csum0, va_F, va_L, len_F_L)
+
+/** Sendv packet with 3 or 4 segments.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
+ *        1 in next 2 bits; expected checksum in high 16 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; if zero, no checksum.
+ * @param va_F Virtual address of first segment.
+ * @param va_L Virtual address of last segment (third segment if 3 segments,
+ *        fourth segment if 4 segments).
+ * @param len_F_L Length of first segment in low 16 bits; length of last
+ *        segment in high 16 bits.
+ * @param va_M0 Virtual address of "middle 0" segment; this segment is sent
+ *        second when there are three segments, and third if there are four.
+ * @param va_M1 Virtual address of "middle 1" segment; this segment is sent
+ *        second when there are four segments.
+ * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle
+ *        1 segment, if 4 segments, in high 16 bits.
+ */
+#define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \
+                                     va_L, len_F_L, va_M0, va_M1, len_M0_M1) \
+  __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
+                  csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1)
+
+/** Send vector of packets.
+ * @param fastio_index Fast I/O index.
+ * @param seqno Number of packets transmitted so far on this interface;
+ *        used to decide which packets should be acknowledged.
+ * @param nentries Number of entries in vector.
+ * @param va Virtual address of start of vector entry array.
+ * @return 3-word netio_fastio_rv3_t structure.  The structure's err member
+ *         is an error code, or zero if no error.  The val0 member is the
+ *         updated value of seqno; it has been incremented by 1 for each
+ *         packet sent.  That increment may be less than nentries if an
+ *         error occured, or if some of the entries in the vector contain
+ *         handles equal to NETIO_PKT_HANDLE_NONE.  The val1 member is the
+ *         updated value of nentries; it has been decremented by 1 for each
+ *         vector entry processed.  Again, that decrement may be less than
+ *         nentries (leaving the returned value positive) if an error
+ *         occurred.
+ */
+#define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \
+  __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \
+                      nentries, va)
+
+
+/** An egress DMA command for LEPP. */
+typedef struct
+{
+  /** Is this a TSO transfer?
+   *
+   * NOTE: This field is always 0, to distinguish it from
+   * lepp_tso_cmd_t.  It must come first!
+   */
+  uint8_t tso               : 1;
+
+  /** Unused padding bits. */
+  uint8_t _unused           : 3;
+
+  /** Should this packet be sent directly from caches instead of DRAM,
+   * using hash-for-home to locate the packet data?
+   */
+  uint8_t hash_for_home     : 1;
+
+  /** Should we compute a checksum? */
+  uint8_t compute_checksum  : 1;
+
+  /** Is this the final buffer for this packet?
+   *
+   * A single packet can be split over several input buffers (a "gather"
+   * operation).  This flag indicates that this is the last buffer
+   * in a packet.
+   */
+  uint8_t end_of_packet     : 1;
+
+  /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */
+  uint8_t send_completion   : 1;
+
+  /** High bits of Client Physical Address of the start of the buffer
+   *  to be egressed.
+   *
+   *  NOTE: Only 6 bits are actually needed here, as CPAs are
+   *  currently 38 bits.  So two bits could be scavenged from this.
+   */
+  uint8_t cpa_hi;
+
+  /** The number of bytes to be egressed. */
+  uint16_t length;
+
+  /** Low 32 bits of Client Physical Address of the start of the buffer
+   *  to be egressed.
+   */
+  uint32_t cpa_lo;
+
+  /** Checksum information (only used if 'compute_checksum'). */
+  __netio_checksum_header_t checksum_data;
+
+} lepp_cmd_t;
+
+
+/** A chunk of physical memory for a TSO egress. */
+typedef struct
+{
+  /** The low bits of the CPA. */
+  uint32_t cpa_lo;
+  /** The high bits of the CPA. */
+  uint16_t cpa_hi		: 15;
+  /** Should this packet be sent directly from caches instead of DRAM,
+   *  using hash-for-home to locate the packet data?
+   */
+  uint16_t hash_for_home	: 1;
+  /** The length in bytes. */
+  uint16_t length;
+} lepp_frag_t;
+
+
+/** An LEPP command that handles TSO. */
+typedef struct
+{
+  /** Is this a TSO transfer?
+   *
+   *  NOTE: This field is always 1, to distinguish it from
+   *  lepp_cmd_t.  It must come first!
+   */
+  uint8_t tso             : 1;
+
+  /** Unused padding bits. */
+  uint8_t _unused         : 7;
+
+  /** Size of the header[] array in bytes.  It must be in the range
+   *  [40, 127], which are the smallest header for a TCP packet over
+   *  Ethernet and the maximum possible prepend size supported by
+   *  hardware, respectively.  Note that the array storage must be
+   *  padded out to a multiple of four bytes so that the following
+   *  LEPP command is aligned properly.
+   */
+  uint8_t header_size;
+
+  /** Byte offset of the IP header in header[]. */
+  uint8_t ip_offset;
+
+  /** Byte offset of the TCP header in header[]. */
+  uint8_t tcp_offset;
+
+  /** The number of bytes to use for the payload of each packet,
+   *  except of course the last one, which may not have enough bytes.
+   *  This means that each Ethernet packet except the last will have a
+   *  size of header_size + payload_size.
+   */
+  uint16_t payload_size;
+
+  /** The length of the 'frags' array that follows this struct. */
+  uint16_t num_frags;
+
+  /** The actual frags. */
+  lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */];
+
+  /*
+   * The packet header template logically follows frags[],
+   * but you can't declare that in C.
+   *
+   * uint32_t header[header_size_in_words_rounded_up];
+   */
+
+} lepp_tso_cmd_t;
+
+
+/** An LEPP completion ring entry. */
+typedef void* lepp_comp_t;
+
+
+/** Maximum number of frags for one TSO command.  This is adapted from
+ *  linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for
+ *  our page size of exactly 65536.  We add one for a "body" fragment.
+ */
+#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1)
+
+/** Total number of bytes needed for an lepp_tso_cmd_t. */
+#define LEPP_TSO_CMD_SIZE(num_frags, header_size) \
+  (sizeof(lepp_tso_cmd_t) + \
+   (num_frags) * sizeof(lepp_frag_t) + \
+   (((header_size) + 3) & -4))
+
+/** The size of the lepp "cmd" queue. */
+#define LEPP_CMD_QUEUE_BYTES \
+ (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \
+  (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t))
+
+/** The largest possible command that can go in lepp_queue_t::cmds[]. */
+#define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128)
+
+/** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive).
+ */
+#define LEPP_CMD_LIMIT \
+  (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE)
+
+/** The maximum number of completions in an LEPP queue. */
+#define LEPP_COMP_QUEUE_SIZE \
+  ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t))
+
+/** Increment an index modulo the queue size. */
+#define LEPP_QINC(var) \
+  (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1))
+
+/** A queue used to convey egress commands from the client to LEPP. */
+typedef struct
+{
+  /** Index of first completion not yet processed by user code.
+   *  If this is equal to comp_busy, there are no such completions.
+   *
+   *  NOTE: This is only read/written by the user.
+   */
+  unsigned int comp_head;
+
+  /** Index of first completion record not yet completed.
+   *  If this is equal to comp_tail, there are no such completions.
+   *  This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever
+   *  a command with the 'completion' bit set is finished.
+   *
+   *  NOTE: This is only written by LEPP, only read by the user.
+   */
+  volatile unsigned int comp_busy;
+
+  /** Index of the first empty slot in the completion ring.
+   *  Entries from this up to but not including comp_head (in ring order)
+   *  can be filled in with completion data.
+   *
+   *  NOTE: This is only read/written by the user.
+   */
+  unsigned int comp_tail;
+
+  /** Byte index of first command enqueued for LEPP but not yet processed.
+   *
+   *  This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
+   *
+   *  NOTE: LEPP advances this counter as soon as it no longer needs
+   *  the cmds[] storage for this entry, but the transfer is not actually
+   *  complete (i.e. the buffer pointed to by the command is no longer
+   *  needed) until comp_busy advances.
+   *
+   *  If this is equal to cmd_tail, the ring is empty.
+   *
+   *  NOTE: This is only written by LEPP, only read by the user.
+   */
+  volatile unsigned int cmd_head;
+
+  /** Byte index of first empty slot in the command ring.  This field can
+   *  be incremented up to but not equal to cmd_head (because that would
+   *  mean the ring is empty).
+   *
+   *  This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
+   *
+   *  NOTE: This is read/written by the user, only read by LEPP.
+   */
+  volatile unsigned int cmd_tail;
+
+  /** A ring of variable-sized egress DMA commands.
+   *
+   *  NOTE: Only written by the user, only read by LEPP.
+   */
+  char cmds[LEPP_CMD_QUEUE_BYTES]
+    __attribute__((aligned(CHIP_L2_LINE_SIZE())));
+
+  /** A ring of user completion data.
+   *  NOTE: Only read/written by the user.
+   */
+  lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE]
+    __attribute__((aligned(CHIP_L2_LINE_SIZE())));
+} lepp_queue_t;
+
+
+/** An internal helper function for determining the number of entries
+ *  available in a ring buffer, given that there is one sentinel.
+ */
+static inline unsigned int
+_lepp_num_free_slots(unsigned int head, unsigned int tail)
+{
+  /*
+   * One entry is reserved for use as a sentinel, to distinguish
+   * "empty" from "full".  So we compute
+   * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation.
+   */
+  return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0);
+}
+
+
+/** Returns how many new comp entries can be enqueued. */
+static inline unsigned int
+lepp_num_free_comp_slots(const lepp_queue_t* q)
+{
+  return _lepp_num_free_slots(q->comp_head, q->comp_tail);
+}
+
+static inline int
+lepp_qsub(int v1, int v2)
+{
+  int delta = v1 - v2;
+  return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE);
+}
+
+
+/** FIXME: Check this from linux, via a new "pwrite()" call. */
+#define LIPP_VERSION 1
+
+
+/** We use exactly two bytes of alignment padding. */
+#define LIPP_PACKET_PADDING 2
+
+/** The minimum size of a "small" buffer (including the padding). */
+#define LIPP_SMALL_PACKET_SIZE 128
+
+/*
+ * NOTE: The following two values should total to less than around
+ * 13582, to keep the total size used for "lipp_state_t" below 64K.
+ */
+
+/** The maximum number of "small" buffers.
+ *  This is enough for 53 network cpus with 128 credits.  Note that
+ *  if these are exhausted, we will fall back to using large buffers.
+ */
+#define LIPP_SMALL_BUFFERS 6785
+
+/** The maximum number of "large" buffers.
+ *  This is enough for 53 network cpus with 128 credits.
+ */
+#define LIPP_LARGE_BUFFERS 6785
+
+#endif /* __DRV_XGBE_INTF_H__ */
diff --git a/arch/tile/include/hv/netio_errors.h b/arch/tile/include/hv/netio_errors.h
new file mode 100644
index 000000000000..e1591bff61b5
--- /dev/null
+++ b/arch/tile/include/hv/netio_errors.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * Error codes returned from NetIO routines.
+ */
+
+#ifndef __NETIO_ERRORS_H__
+#define __NETIO_ERRORS_H__
+
+/**
+ * @addtogroup error
+ *
+ * @brief The error codes returned by NetIO functions.
+ *
+ * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and
+ * a negative value if an error occurs.
+ *
+ * In cases where a NetIO function failed due to a error reported by
+ * system libraries, the error code will be the negation of the
+ * system errno at the time of failure.  The @ref netio_strerror()
+ * function will deliver error strings for both NetIO and system error
+ * codes.
+ *
+ * @{
+ */
+
+/** The set of all NetIO errors. */
+typedef enum
+{
+  /** Operation successfully completed. */
+  NETIO_NO_ERROR        = 0,
+
+  /** A packet was successfully retrieved from an input queue. */
+  NETIO_PKT             = 0,
+
+  /** Largest NetIO error number. */
+  NETIO_ERR_MAX         = -701,
+
+  /** The tile is not registered with the IPP. */
+  NETIO_NOT_REGISTERED  = -701,
+
+  /** No packet was available to retrieve from the input queue. */
+  NETIO_NOPKT           = -702,
+
+  /** The requested function is not implemented. */
+  NETIO_NOT_IMPLEMENTED = -703,
+
+  /** On a registration operation, the target queue already has the maximum
+   *  number of tiles registered for it, and no more may be added.  On a
+   *  packet send operation, the output queue is full and nothing more can
+   *  be queued until some of the queued packets are actually transmitted. */
+  NETIO_QUEUE_FULL      = -704,
+
+  /** The calling process or thread is not bound to exactly one CPU. */
+  NETIO_BAD_AFFINITY    = -705,
+
+  /** Cannot allocate memory on requested controllers. */
+  NETIO_CANNOT_HOME     = -706,
+
+  /** On a registration operation, the IPP specified is not configured
+   *  to support the options requested; for instance, the application
+   *  wants a specific type of tagged headers which the configured IPP
+   *  doesn't support.  Or, the supplied configuration information is
+   *  not self-consistent, or is out of range; for instance, specifying
+   *  both NETIO_RECV and NETIO_NO_RECV, or asking for more than
+   *  NETIO_MAX_SEND_BUFFERS to be preallocated.  On a VLAN or bucket
+   *  configure operation, the number of items, or the base item, was
+   *  out of range.
+   */
+  NETIO_BAD_CONFIG      = -707,
+
+  /** Too many tiles have registered to transmit packets. */
+  NETIO_TOOMANY_XMIT    = -708,
+
+  /** Packet transmission was attempted on a queue which was registered
+      with transmit disabled. */
+  NETIO_UNREG_XMIT      = -709,
+
+  /** This tile is already registered with the IPP. */
+  NETIO_ALREADY_REGISTERED = -710,
+
+  /** The Ethernet link is down. The application should try again later. */
+  NETIO_LINK_DOWN       = -711,
+
+  /** An invalid memory buffer has been specified.  This may be an unmapped
+   * virtual address, or one which does not meet alignment requirements.
+   * For netio_input_register(), this error may be returned when multiple
+   * processes specify different memory regions to be used for NetIO
+   * buffers.  That can happen if these processes specify explicit memory
+   * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init()
+   * has not been called by a common ancestor of the processes.
+   */
+  NETIO_FAULT           = -712,
+
+  /** Cannot combine user-managed shared memory and cache coherence. */
+  NETIO_BAD_CACHE_CONFIG = -713,
+
+  /** Smallest NetIO error number. */
+  NETIO_ERR_MIN         = -713,
+
+#ifndef __DOXYGEN__
+  /** Used internally to mean that no response is needed; never returned to
+   *  an application. */
+  NETIO_NO_RESPONSE     = 1
+#endif
+} netio_error_t;
+
+/** @} */
+
+#endif /* __NETIO_ERRORS_H__ */
diff --git a/arch/tile/include/hv/netio_intf.h b/arch/tile/include/hv/netio_intf.h
new file mode 100644
index 000000000000..8d20972aba2c
--- /dev/null
+++ b/arch/tile/include/hv/netio_intf.h
@@ -0,0 +1,2975 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * NetIO interface structures and macros.
+ */
+
+#ifndef __NETIO_INTF_H__
+#define __NETIO_INTF_H__
+
+#include <hv/netio_errors.h>
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__)
+#include <assert.h>
+#define netio_assert assert  /**< Enable assertions from macros */
+#else
+#define netio_assert(...) ((void)(0))  /**< Disable assertions from macros */
+#endif
+
+/*
+ * If none of these symbols are defined, we're building libnetio in an
+ * environment where we have pthreads, so we'll enable locking.
+ */
+#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \
+    !defined(__NEWLIB__)
+#define _NETIO_PTHREAD       /**< Include a mutex in netio_queue_t below */
+
+/*
+ * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on
+ * per-packet NetIO operations.  We still do pthread locking on things
+ * like netio_input_register, though.  This is used for building
+ * libnetio_unlocked.
+ */
+#ifndef NETIO_UNLOCKED
+
+/* Avoid PLT overhead by using our own inlined per-cpu lock. */
+#include <sched.h>
+typedef int _netio_percpu_mutex_t;
+
+static __inline int
+_netio_percpu_mutex_init(_netio_percpu_mutex_t* lock)
+{
+  *lock = 0;
+  return 0;
+}
+
+static __inline int
+_netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock)
+{
+  while (__builtin_expect(__insn_tns(lock), 0))
+    sched_yield();
+  return 0;
+}
+
+static __inline int
+_netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock)
+{
+  *lock = 0;
+  return 0;
+}
+
+#else /* NETIO_UNLOCKED */
+
+/* Don't do any locking for per-packet NetIO operations. */
+typedef int _netio_percpu_mutex_t;
+#define _netio_percpu_mutex_init(L)
+#define _netio_percpu_mutex_lock(L)
+#define _netio_percpu_mutex_unlock(L)
+
+#endif /* NETIO_UNLOCKED */
+#endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */
+
+/** How many tiles can register for a given queue.
+ *  @ingroup setup */
+#define NETIO_MAX_TILES_PER_QUEUE  64
+
+
+/** Largest permissible queue identifier.
+ *  @ingroup setup  */
+#define NETIO_MAX_QUEUE_ID        255
+
+
+#ifndef __DOXYGEN__
+
+/* Metadata packet checksum/ethertype flags. */
+
+/** The L4 checksum has not been calculated. */
+#define _NETIO_PKT_NO_L4_CSUM_SHIFT           0
+#define _NETIO_PKT_NO_L4_CSUM_RMASK           1
+#define _NETIO_PKT_NO_L4_CSUM_MASK \
+         (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT)
+
+/** The L3 checksum has not been calculated. */
+#define _NETIO_PKT_NO_L3_CSUM_SHIFT           1
+#define _NETIO_PKT_NO_L3_CSUM_RMASK           1
+#define _NETIO_PKT_NO_L3_CSUM_MASK \
+         (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT)
+
+/** The L3 checksum is incorrect (or perhaps has not been calculated). */
+#define _NETIO_PKT_BAD_L3_CSUM_SHIFT          2
+#define _NETIO_PKT_BAD_L3_CSUM_RMASK          1
+#define _NETIO_PKT_BAD_L3_CSUM_MASK \
+         (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT)
+
+/** The Ethernet packet type is unrecognized. */
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT    3
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK    1
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \
+         (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \
+          _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT)
+
+/* Metadata packet type flags. */
+
+/** Where the packet type bits are; this field is the index into
+ *  _netio_pkt_info. */
+#define _NETIO_PKT_TYPE_SHIFT        4
+#define _NETIO_PKT_TYPE_RMASK        0x3F
+
+/** How many VLAN tags the packet has, and, if we have two, which one we
+ *  actually grouped on.  A VLAN within a proprietary (Marvell or Broadcom)
+ *  tag is counted here. */
+#define _NETIO_PKT_VLAN_SHIFT        4
+#define _NETIO_PKT_VLAN_RMASK        0x3
+#define _NETIO_PKT_VLAN_MASK \
+         (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT)
+#define _NETIO_PKT_VLAN_NONE         0   /* No VLAN tag. */
+#define _NETIO_PKT_VLAN_ONE          1   /* One VLAN tag. */
+#define _NETIO_PKT_VLAN_TWO_OUTER    2   /* Two VLAN tags, outer one used. */
+#define _NETIO_PKT_VLAN_TWO_INNER    3   /* Two VLAN tags, inner one used. */
+
+/** Which proprietary tags the packet has. */
+#define _NETIO_PKT_TAG_SHIFT         6
+#define _NETIO_PKT_TAG_RMASK         0x3
+#define _NETIO_PKT_TAG_MASK \
+          (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT)
+#define _NETIO_PKT_TAG_NONE          0   /* No proprietary tags. */
+#define _NETIO_PKT_TAG_MRVL          1   /* Marvell HyperG.Stack tags. */
+#define _NETIO_PKT_TAG_MRVL_EXT      2   /* HyperG.Stack extended tags. */
+#define _NETIO_PKT_TAG_BRCM          3   /* Broadcom HiGig tags. */
+
+/** Whether a packet has an LLC + SNAP header. */
+#define _NETIO_PKT_SNAP_SHIFT        8
+#define _NETIO_PKT_SNAP_RMASK        0x1
+#define _NETIO_PKT_SNAP_MASK \
+          (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT)
+
+/* NOTE: Bits 9 and 10 are unused. */
+
+/** Length of any custom data before the L2 header, in words. */
+#define _NETIO_PKT_CUSTOM_LEN_SHIFT  11
+#define _NETIO_PKT_CUSTOM_LEN_RMASK  0x1F
+#define _NETIO_PKT_CUSTOM_LEN_MASK \
+          (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT)
+
+/** The L4 checksum is incorrect (or perhaps has not been calculated). */
+#define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16
+#define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1
+#define _NETIO_PKT_BAD_L4_CSUM_MASK \
+          (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT)
+
+/** Length of the L2 header, in words. */
+#define _NETIO_PKT_L2_LEN_SHIFT  17
+#define _NETIO_PKT_L2_LEN_RMASK  0x1F
+#define _NETIO_PKT_L2_LEN_MASK \
+          (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT)
+
+
+/* Flags in minimal packet metadata. */
+
+/** We need an eDMA checksum on this packet. */
+#define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT            0
+#define _NETIO_PKT_NEED_EDMA_CSUM_RMASK            1
+#define _NETIO_PKT_NEED_EDMA_CSUM_MASK \
+         (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT)
+
+/* Data within the packet information table. */
+
+/* Note that, for efficiency, code which uses these fields assumes that none
+ * of the shift values below are zero.  See uses below for an explanation. */
+
+/** Offset within the L2 header of the innermost ethertype (in halfwords). */
+#define _NETIO_PKT_INFO_ETYPE_SHIFT       6
+#define _NETIO_PKT_INFO_ETYPE_RMASK    0x1F
+
+/** Offset within the L2 header of the VLAN tag (in halfwords). */
+#define _NETIO_PKT_INFO_VLAN_SHIFT       11
+#define _NETIO_PKT_INFO_VLAN_RMASK     0x1F
+
+#endif
+
+
+/** The size of a memory buffer representing a small packet.
+ *  @ingroup egress */
+#define SMALL_PACKET_SIZE 256
+
+/** The size of a memory buffer representing a large packet.
+ *  @ingroup egress */
+#define LARGE_PACKET_SIZE 2048
+
+/** The size of a memory buffer representing a jumbo packet.
+ *  @ingroup egress */
+#define JUMBO_PACKET_SIZE (12 * 1024)
+
+
+/* Common ethertypes.
+ * @ingroup ingress */
+/** @{ */
+/** The ethertype of IPv4. */
+#define ETHERTYPE_IPv4 (0x0800)
+/** The ethertype of ARP. */
+#define ETHERTYPE_ARP (0x0806)
+/** The ethertype of VLANs. */
+#define ETHERTYPE_VLAN (0x8100)
+/** The ethertype of a Q-in-Q header. */
+#define ETHERTYPE_Q_IN_Q (0x9100)
+/** The ethertype of IPv6. */
+#define ETHERTYPE_IPv6 (0x86DD)
+/** The ethertype of MPLS. */
+#define ETHERTYPE_MPLS (0x8847)
+/** @} */
+
+
+/** The possible return values of NETIO_PKT_STATUS.
+ * @ingroup ingress
+ */
+typedef enum
+{
+  /** No problems were detected with this packet. */
+  NETIO_PKT_STATUS_OK,
+  /** The packet is undersized; this is expected behavior if the packet's
+    * ethertype is unrecognized, but otherwise the packet is likely corrupt. */
+  NETIO_PKT_STATUS_UNDERSIZE,
+  /** The packet is oversized and some trailing bytes have been discarded.
+      This is expected behavior for short packets, since it's impossible to
+      precisely determine the amount of padding which may have been added to
+      them to make them meet the minimum Ethernet packet size. */
+  NETIO_PKT_STATUS_OVERSIZE,
+  /** The packet was judged to be corrupt by hardware (for instance, it had
+      a bad CRC, or part of it was discarded due to lack of buffer space in
+      the I/O shim) and should be discarded. */
+  NETIO_PKT_STATUS_BAD
+} netio_pkt_status_t;
+
+
+/** Log2 of how many buckets we have. */
+#define NETIO_LOG2_NUM_BUCKETS (10)
+
+/** How many buckets we have.
+ * @ingroup ingress */
+#define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS)
+
+
+/**
+ * @brief A group-to-bucket identifier.
+ *
+ * @ingroup setup
+ *
+ * This tells us what to do with a given group.
+ */
+typedef union {
+  /** The header broken down into bits. */
+  struct {
+    /** Whether we should balance on L4, if available */
+    unsigned int __balance_on_l4:1;
+    /** Whether we should balance on L3, if available */
+    unsigned int __balance_on_l3:1;
+    /** Whether we should balance on L2, if available */
+    unsigned int __balance_on_l2:1;
+    /** Reserved for future use */
+    unsigned int __reserved:1;
+    /** The base bucket to use to send traffic */
+    unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS;
+    /** The mask to apply to the balancing value. This must be one less
+     * than a power of two, e.g. 0x3 or 0xFF.
+     */
+    unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS;
+    /** Pad to 32 bits */
+    unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS);
+  } bits;
+  /** To send out the IDN. */
+  unsigned int word;
+}
+netio_group_t;
+
+
+/**
+ * @brief A VLAN-to-bucket identifier.
+ *
+ * @ingroup setup
+ *
+ * This tells us what to do with a given VLAN.
+ */
+typedef netio_group_t netio_vlan_t;
+
+
+/**
+ * A bucket-to-queue mapping.
+ * @ingroup setup
+ */
+typedef unsigned char netio_bucket_t;
+
+
+/**
+ * A packet size can always fit in a netio_size_t.
+ * @ingroup setup
+ */
+typedef unsigned int netio_size_t;
+
+
+/**
+ * @brief Ethernet standard (ingress) packet metadata.
+ *
+ * @ingroup ingress
+ *
+ * This is additional data associated with each packet.
+ * This structure is opaque and accessed through the @ref ingress.
+ *
+ * Also, the buffer population operation currently assumes that standard
+ * metadata is at least as large as minimal metadata, and will need to be
+ * modified if that is no longer the case.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  /** This structure is opaque. */
+  unsigned char opaque[24];
+#else
+  /** The overall ordinal of the packet */
+  unsigned int __packet_ordinal;
+  /** The ordinal of the packet within the group */
+  unsigned int __group_ordinal;
+  /** The best flow hash IPP could compute. */
+  unsigned int __flow_hash;
+  /** Flags pertaining to checksum calculation, packet type, etc. */
+  unsigned int __flags;
+  /** The first word of "user data". */
+  unsigned int __user_data_0;
+  /** The second word of "user data". */
+  unsigned int __user_data_1;
+#endif
+}
+netio_pkt_metadata_t;
+
+
+/** To ensure that the L3 header is aligned mod 4, the L2 header should be
+ * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes
+ * long.  The standard way to do this is to simply add 2 bytes of padding
+ * before the L2 header.
+ */
+#define NETIO_PACKET_PADDING 2
+
+
+
+/**
+ * @brief Ethernet minimal (egress) packet metadata.
+ *
+ * @ingroup egress
+ *
+ * This structure represents information about packets which have
+ * been processed by @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer().  This structure is opaque
+ * and accessed through the @ref egress.
+ *
+ * @internal This structure is actually copied into the memory used by
+ * standard metadata, which is assumed to be large enough.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  /** This structure is opaque. */
+  unsigned char opaque[14];
+#else
+  /** The offset of the L2 header from the start of the packet data. */
+  unsigned short l2_offset;
+  /** The offset of the L3 header from the start of the packet data. */
+  unsigned short l3_offset;
+  /** Where to write the checksum. */
+  unsigned char csum_location;
+  /** Where to start checksumming from. */
+  unsigned char csum_start;
+  /** Flags pertaining to checksum calculation etc. */
+  unsigned short flags;
+  /** The L2 length of the packet. */
+  unsigned short l2_length;
+  /** The checksum with which to seed the checksum generator. */
+  unsigned short csum_seed;
+  /** How much to checksum. */
+  unsigned short csum_length;
+#endif
+}
+netio_pkt_minimal_metadata_t;
+
+
+#ifndef __DOXYGEN__
+
+/**
+ * @brief An I/O notification header.
+ *
+ * This is the first word of data received from an I/O shim in a notification
+ * packet. It contains framing and status information.
+ */
+typedef union
+{
+  unsigned int word; /**< The whole word. */
+  /** The various fields. */
+  struct
+  {
+    unsigned int __channel:7;    /**< Resource channel. */
+    unsigned int __type:4;       /**< Type. */
+    unsigned int __ack:1;        /**< Whether an acknowledgement is needed. */
+    unsigned int __reserved:1;   /**< Reserved. */
+    unsigned int __protocol:1;   /**< A protocol-specific word is added. */
+    unsigned int __status:2;     /**< Status of the transfer. */
+    unsigned int __framing:2;    /**< Framing of the transfer. */
+    unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */
+  } bits;
+}
+__netio_pkt_notif_t;
+
+
+/**
+ * Returns the base address of the packet.
+ */
+#define _NETIO_PKT_HANDLE_BASE(p) \
+  ((unsigned char*)((p).word & 0xFFFFFFC0))
+
+/**
+ * Returns the base address of the packet.
+ */
+#define _NETIO_PKT_BASE(p) \
+  _NETIO_PKT_HANDLE_BASE(p->__packet)
+
+/**
+ * @brief An I/O notification packet (second word)
+ *
+ * This is the second word of data received from an I/O shim in a notification
+ * packet.  This is the virtual address of the packet buffer, plus some flag
+ * bits.  (The virtual address of the packet is always 256-byte aligned so we
+ * have room for 8 bits' worth of flags in the low 8 bits.)
+ *
+ * @internal
+ * NOTE: The low two bits must contain "__queue", so the "packet size"
+ * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly.
+ *
+ * If __addr or __offset are moved, _NETIO_PKT_BASE
+ * (defined right below this) must be changed.
+ */
+typedef union
+{
+  unsigned int word; /**< The whole word. */
+  /** The various fields. */
+  struct
+  {
+    /** Which queue the packet will be returned to once it is sent back to
+        the IPP.  This is one of the SIZE_xxx values. */
+    unsigned int __queue:2;
+
+    /** The IPP handle of the sending IPP. */
+    unsigned int __ipp_handle:2;
+
+    /** Reserved for future use. */
+    unsigned int __reserved:1;
+
+    /** If 1, this packet has minimal (egress) metadata; otherwise, it
+        has standard (ingress) metadata. */
+    unsigned int __minimal:1;
+
+    /** Offset of the metadata within the packet.  This value is multiplied
+     *  by 64 and added to the base packet address to get the metadata
+     *  address.  Note that this field is aligned within the word such that
+     *  you can easily extract the metadata address with a 26-bit mask. */
+    unsigned int __offset:2;
+
+    /** The top 24 bits of the packet's virtual address. */
+    unsigned int __addr:24;
+  } bits;
+}
+__netio_pkt_handle_t;
+
+#endif /* !__DOXYGEN__ */
+
+
+/**
+ * @brief A handle for an I/O packet's storage.
+ * @ingroup ingress
+ *
+ * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its
+ * packet metadata removed.  It is a much smaller type that exists to
+ * facilitate applications where the full ::netio_pkt_t type is too
+ * large, such as those that cache enormous numbers of packets or wish
+ * to transmit packet descriptors over the UDN.
+ *
+ * Because there is no metadata, most ::netio_pkt_t operations cannot be
+ * performed on a netio_pkt_handle_t.  It supports only
+ * netio_free_handle() (to free the buffer) and
+ * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents).
+ * The application must acquire any additional metadata it wants from the
+ * original ::netio_pkt_t and record it separately.
+ *
+ * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling
+ * NETIO_PKT_HANDLE().  An invalid handle (analogous to NULL) can be
+ * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can
+ * be tested for validity with NETIO_PKT_HANDLE_IS_VALID().
+ */
+typedef struct
+{
+  unsigned int word; /**< Opaque bits. */
+} netio_pkt_handle_t;
+
+/**
+ * @brief A packet descriptor.
+ *
+ * @ingroup ingress
+ * @ingroup egress
+ *
+ * This data structure represents a packet.  The structure is manipulated
+ * through the @ref ingress and the @ref egress.
+ *
+ * While the contents of a netio_pkt_t are opaque, the structure itself is
+ * portable.  This means that it may be shared between all tiles which have
+ * done a netio_input_register() call for the interface on which the pkt_t
+ * was initially received (via netio_get_packet()) or retrieved (via
+ * netio_get_buffer()).  The contents of a netio_pkt_t can be transmitted to
+ * another tile via shared memory, or via a UDN message, or by other means.
+ * The destination tile may then use the pkt_t as if it had originally been
+ * received locally; it may read or write the packet's data, read its
+ * metadata, free the packet, send the packet, transfer the netio_pkt_t to
+ * yet another tile, and so forth.
+ *
+ * Once a netio_pkt_t has been transferred to a second tile, the first tile
+ * should not reference the original copy; in particular, if more than one
+ * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will
+ * become corrupted.  Note also that each tile which reads or modifies
+ * packet data must obey the memory coherency rules outlined in @ref input.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  /** This structure is opaque. */
+  unsigned char opaque[32];
+#else
+  /** For an ingress packet (one with standard metadata), this is the
+   *  notification header we got from the I/O shim.  For an egress packet
+   *  (one with minimal metadata), this word is zero if the packet has not
+   *  been populated, and nonzero if it has. */
+  __netio_pkt_notif_t __notif_header;
+
+  /** Virtual address of the packet buffer, plus state flags. */
+  __netio_pkt_handle_t __packet;
+
+  /** Metadata associated with the packet. */
+  netio_pkt_metadata_t __metadata;
+#endif
+}
+netio_pkt_t;
+
+
+#ifndef __DOXYGEN__
+
+#define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header)
+#define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle)
+#define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue)
+#define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header)
+#define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle)
+#define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal)
+#define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue)
+#define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags)
+
+/* Packet information table, used by the attribute access functions below. */
+extern const uint16_t _netio_pkt_info[];
+
+#endif /* __DOXYGEN__ */
+
+
+#ifndef __DOXYGEN__
+/* These macros are deprecated and will disappear in a future MDE release. */
+#define NETIO_PKT_GOOD_CHECKSUM(pkt) \
+  NETIO_PKT_L4_CSUM_CORRECT(pkt)
+#define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \
+  NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt)
+#endif /* __DOXYGEN__ */
+
+
+/* Packet attribute access functions. */
+
+/** Return a pointer to the metadata for a packet.
+ * @ingroup ingress
+ *
+ * Calling this function once and passing the result to other retrieval
+ * functions with a "_M" suffix usually improves performance.  This
+ * function must be called on an 'ingress' packet (i.e. one retrieved
+ * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer have not been called). Use of this
+ * function on an 'egress' packet will cause an assertion failure.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's standard metadata.
+ */
+static __inline netio_pkt_metadata_t*
+NETIO_PKT_METADATA(netio_pkt_t* pkt)
+{
+  netio_assert(!pkt->__packet.bits.__minimal);
+  return &pkt->__metadata;
+}
+
+
+/** Return a pointer to the minimal metadata for a packet.
+ * @ingroup egress
+ *
+ * Calling this function once and passing the result to other retrieval
+ * functions with a "_MM" suffix usually improves performance.  This
+ * function must be called on an 'egress' packet (i.e. one on which
+ * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer()
+ * have been called, or one retrieved by @ref netio_get_buffer()). Use of
+ * this function on an 'ingress' packet will cause an assertion failure.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's standard metadata.
+ */
+static __inline netio_pkt_minimal_metadata_t*
+NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+  netio_assert(pkt->__packet.bits.__minimal);
+  return (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
+}
+
+
+/** Determine whether a packet has 'minimal' metadata.
+ * @ingroup pktfuncs
+ *
+ * This function will return nonzero if the packet is an 'egress'
+ * packet (i.e. one on which @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer() have been called, or one
+ * retrieved by @ref netio_get_buffer()), and zero if the packet
+ * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(),
+ * which has not been converted into an 'egress' packet).
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet has minimal metadata.
+ */
+static __inline unsigned int
+NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt)
+{
+  return pkt->__packet.bits.__minimal;
+}
+
+
+/** Return a handle for a packet's storage.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A handle for the packet's storage.
+ */
+static __inline netio_pkt_handle_t
+NETIO_PKT_HANDLE(netio_pkt_t* pkt)
+{
+  netio_pkt_handle_t h;
+  h.word = pkt->__packet.word;
+  return h;
+}
+
+
+/** A special reserved value indicating the absence of a packet handle.
+ *
+ * @ingroup pktfuncs
+ */
+#define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 })
+
+
+/** Test whether a packet handle is valid.
+ *
+ * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE
+ * to indicate no packet at all.  This function tests to see if a packet
+ * handle is a real handle, not this special reserved value.
+ *
+ * @ingroup pktfuncs
+ *
+ * @param[in] handle Handle on which to operate.
+ * @return One if the packet handle is valid, else zero.
+ */
+static __inline unsigned int
+NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle)
+{
+  return handle.word != 0;
+}
+
+
+
+/** Return a pointer to the start of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] handle Handle on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle)
+{
+  return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING;
+}
+
+
+/** Return the length of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's custom header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  /*
+   * Note that we effectively need to extract a quantity from the flags word
+   * which is measured in words, and then turn it into bytes by shifting
+   * it left by 2.  We do this all at once by just shifting right two less
+   * bits, and shifting the mask up two bits.
+   */
+  return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) &
+          (_NETIO_PKT_CUSTOM_LEN_RMASK << 2));
+}
+
+
+/** Return the length of the packet, starting with the custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size -
+          NETIO_PACKET_PADDING);
+}
+
+
+/** Return a pointer to the start of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt));
+}
+
+
+/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  /*
+   * Note that we effectively need to extract a quantity from the flags word
+   * which is measured in words, and then turn it into bytes by shifting
+   * it left by 2.  We do this all at once by just shifting right two less
+   * bits, and shifting the mask up two bits.  We then add two bytes.
+   */
+  return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) &
+          (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2;
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) -
+          NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt));
+}
+
+
+/** Return a pointer to the start of the packet's L2 (Ethernet) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) +
+          NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt));
+}
+
+
+/** Retrieve the length of the packet, starting with the L3 (generally,
+ *  the IP) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_L2_LENGTH_M(mda, pkt) -
+          NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt));
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup ingress
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_L2_DATA_M(mda, pkt) +
+          NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt));
+}
+
+
+/** Return the ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given an ordinal number when it is delivered by the IPP.
+ * In the medium term, the ordinal is unique and monotonically increasing,
+ * being incremented by 1 for each packet; the ordinal of the first packet
+ * delivered after the IPP starts is zero.  (Since the ordinal is of finite
+ * size, given enough input packets, it will eventually wrap around to zero;
+ * in the long term, therefore, ordinals are not unique.)  The ordinals
+ * handed out by different IPPs are not disjoint, so two packets from
+ * different IPPs may have identical ordinals.  Packets dropped by the
+ * IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP packet ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__packet_ordinal;
+}
+
+
+/** Return the per-group ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given a per-group ordinal number when it is
+ * delivered by the IPP. By default, the group is the packet's VLAN,
+ * although IPP can be recompiled to use different values.  In
+ * the medium term, the ordinal is unique and monotonically
+ * increasing, being incremented by 1 for each packet; the ordinal of
+ * the first packet distributed to a particular group is zero.
+ * (Since the ordinal is of finite size, given enough input packets,
+ * it will eventually wrap around to zero; in the long term,
+ * therefore, ordinals are not unique.)  The ordinals handed out by
+ * different IPPs are not disjoint, so two packets from different IPPs
+ * may have identical ordinals; similarly, packets distributed to
+ * different groups may have identical ordinals.  Packets dropped by
+ * the IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP, per-group ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__group_ordinal;
+}
+
+
+/** Return the VLAN ID assigned to the packet.
+ * @ingroup ingress
+ *
+ * This value is usually contained within the packet header.
+ *
+ * This value will be zero if the packet does not have a VLAN tag, or if
+ * this value was not extracted from the packet.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's VLAN ID.
+ */
+static __inline unsigned short
+NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK;
+  unsigned short* pkt_p;
+  int index;
+  unsigned short val;
+
+  if (vl == _NETIO_PKT_VLAN_NONE)
+    return 0;
+
+  pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
+  index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
+
+  val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) &
+              _NETIO_PKT_INFO_VLAN_RMASK];
+
+#ifdef __TILECC__
+  return (__insn_bytex(val) >> 16) & 0xFFF;
+#else
+  return (__builtin_bswap32(val) >> 16) & 0xFFF;
+#endif
+}
+
+
+/** Return the ethertype of the packet.
+ * @ingroup ingress
+ *
+ * This value is usually contained within the packet header.
+ *
+ * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M()
+ * returns true, and otherwise, may not be well defined.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's ethertype.
+ */
+static __inline unsigned short
+NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
+  int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
+
+  unsigned short val =
+    pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) &
+          _NETIO_PKT_INFO_ETYPE_RMASK];
+
+  return __builtin_bswap32(val) >> 16;
+}
+
+
+/** Return the flow hash computed on the packet.
+ * @ingroup ingress
+ *
+ * For TCP and UDP packets, this hash is calculated by hashing together
+ * the "5-tuple" values, specifically the source IP address, destination
+ * IP address, protocol type, source port and destination port.
+ * The hash value is intended to be helpful for millions of distinct
+ * flows.
+ *
+ * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
+ * derived by hashing together the source and destination IP addresses.
+ *
+ * For MPLS-encapsulated packets, the flow hash is derived by hashing
+ * the first MPLS label.
+ *
+ * For all other packets the flow hash is computed from the source
+ * and destination Ethernet addresses.
+ *
+ * The hash is symmetric, meaning it produces the same value if the
+ * source and destination are swapped. The only exceptions are
+ * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
+ * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
+ * (Encap Security Payload), which use only the destination address
+ * since the source address is not meaningful.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's 32-bit flow hash.
+ */
+static __inline unsigned int
+NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__flow_hash;
+}
+
+
+/** Return the first word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
+ * word of user data contains the least significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_low()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's first word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__user_data_0;
+}
+
+
+/** Return the second word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
+ * word of user data contains the most significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_high()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's second word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__user_data_1;
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L4 checksum was calculated.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
+ *  be correct.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags &
+           (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK));
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L3 (IP) checksum was calculated.
+*/
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated and found to be
+ *  correct.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags &
+           (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK));
+}
+
+
+/** Determine whether the ethertype was recognized and L3 packet data was
+ *  processed.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the ethertype was recognized and L3 packet data was
+ *   processed.
+ */
+static __inline unsigned int
+NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK);
+}
+
+
+/** Retrieve the status of a packet and any errors that may have occurred
+ * during ingress processing (length mismatches, CRC errors, etc.).
+ * @ingroup ingress
+ *
+ * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns zero are always reported as underlength, as there is no a priori
+ * means to determine their length.  Normally, applications should use
+ * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this
+ * function.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's status.
+ */
+static __inline netio_pkt_status_t
+NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
+}
+
+
+/** Report whether a packet is bad (i.e., was shorter than expected based on
+ *  its headers, or had a bad CRC).
+ * @ingroup ingress
+ *
+ * Note that this function does not verify L3 or L4 checksums.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet is bad and should be discarded.
+ */
+static __inline unsigned int
+NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) &&
+          (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) ||
+           NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD));
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return mmd->l2_length;
+}
+
+
+/** Return the length of the L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
+                              netio_pkt_t* pkt)
+{
+  return mmd->l3_offset - mmd->l2_offset;
+}
+
+
+/** Return the length of the packet, starting with the L3 (IP) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) -
+          NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt));
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup egress
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return _NETIO_PKT_BASE(pkt) + mmd->l3_offset;
+}
+
+
+/** Return a pointer to the packet's L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return _NETIO_PKT_BASE(pkt) + mmd->l2_offset;
+}
+
+
+/** Retrieve the status of a packet and any errors that may have occurred
+ * during ingress processing (length mismatches, CRC errors, etc.).
+ * @ingroup ingress
+ *
+ * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns zero are always reported as underlength, as there is no a priori
+ * means to determine their length.  Normally, applications should use
+ * @ref NETIO_PKT_BAD() instead of explicitly checking status with this
+ * function.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's status.
+ */
+static __inline netio_pkt_status_t
+NETIO_PKT_STATUS(netio_pkt_t* pkt)
+{
+  netio_assert(!pkt->__packet.bits.__minimal);
+
+  return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
+}
+
+
+/** Report whether a packet is bad (i.e., was shorter than expected based on
+ *  its headers, or had a bad CRC).
+ * @ingroup ingress
+ *
+ * Note that this function does not verify L3 or L4 checksums.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet is bad and should be discarded.
+ */
+static __inline unsigned int
+NETIO_PKT_BAD(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_BAD_M(mda, pkt);
+}
+
+
+/** Return the length of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's custom header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
+}
+
+
+/** Return the length of the packet, starting with the custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return  The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt);
+}
+
+
+/** Return a pointer to the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_CUSTOM_DATA_M(mda, pkt);
+}
+
+
+/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt);
+  }
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return  The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L2_LENGTH_M(mda, pkt);
+  }
+}
+
+
+/** Return a pointer to the packet's L2 (Ethernet) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L2_DATA_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L2_DATA_M(mda, pkt);
+  }
+}
+
+
+/** Retrieve the length of the packet, starting with the L3 (generally, the IP)
+ * header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L3_LENGTH_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L3_LENGTH_M(mda, pkt);
+  }
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup pktfuncs
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L3_DATA_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L3_DATA_M(mda, pkt);
+  }
+}
+
+
+/** Return the ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given an ordinal number when it is delivered by the IPP.
+ * In the medium term, the ordinal is unique and monotonically increasing,
+ * being incremented by 1 for each packet; the ordinal of the first packet
+ * delivered after the IPP starts is zero.  (Since the ordinal is of finite
+ * size, given enough input packets, it will eventually wrap around to zero;
+ * in the long term, therefore, ordinals are not unique.)  The ordinals
+ * handed out by different IPPs are not disjoint, so two packets from
+ * different IPPs may have identical ordinals.  Packets dropped by the
+ * IPP or by the I/O shim are not assigned ordinals.
+ *
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP packet ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_ORDINAL(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_ORDINAL_M(mda, pkt);
+}
+
+
+/** Return the per-group ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given a per-group ordinal number when it is
+ * delivered by the IPP. By default, the group is the packet's VLAN,
+ * although IPP can be recompiled to use different values.  In
+ * the medium term, the ordinal is unique and monotonically
+ * increasing, being incremented by 1 for each packet; the ordinal of
+ * the first packet distributed to a particular group is zero.
+ * (Since the ordinal is of finite size, given enough input packets,
+ * it will eventually wrap around to zero; in the long term,
+ * therefore, ordinals are not unique.)  The ordinals handed out by
+ * different IPPs are not disjoint, so two packets from different IPPs
+ * may have identical ordinals; similarly, packets distributed to
+ * different groups may have identical ordinals.  Packets dropped by
+ * the IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP, per-group ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt);
+}
+
+
+/** Return the VLAN ID assigned to the packet.
+ * @ingroup ingress
+ *
+ * This is usually also contained within the packet header.  If the packet
+ * does not have a VLAN tag, the VLAN ID returned by this function is zero.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's VLAN ID.
+ */
+static __inline unsigned short
+NETIO_PKT_VLAN_ID(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_VLAN_ID_M(mda, pkt);
+}
+
+
+/** Return the ethertype of the packet.
+ * @ingroup ingress
+ *
+ * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns true, and otherwise, may not be well defined.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's ethertype.
+ */
+static __inline unsigned short
+NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_ETHERTYPE_M(mda, pkt);
+}
+
+
+/** Return the flow hash computed on the packet.
+ * @ingroup ingress
+ *
+ * For TCP and UDP packets, this hash is calculated by hashing together
+ * the "5-tuple" values, specifically the source IP address, destination
+ * IP address, protocol type, source port and destination port.
+ * The hash value is intended to be helpful for millions of distinct
+ * flows.
+ *
+ * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
+ * derived by hashing together the source and destination IP addresses.
+ *
+ * For MPLS-encapsulated packets, the flow hash is derived by hashing
+ * the first MPLS label.
+ *
+ * For all other packets the flow hash is computed from the source
+ * and destination Ethernet addresses.
+ *
+ * The hash is symmetric, meaning it produces the same value if the
+ * source and destination are swapped. The only exceptions are
+ * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
+ * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
+ * (Encap Security Payload), which use only the destination address
+ * since the source address is not meaningful.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's 32-bit flow hash.
+ */
+static __inline unsigned int
+NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_FLOW_HASH_M(mda, pkt);
+}
+
+
+/** Return the first word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
+ * word of user data contains the least significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_low()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's first word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_USER_DATA_0_M(mda, pkt);
+}
+
+
+/** Return the second word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
+ * word of user data contains the most significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_high()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's second word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_USER_DATA_1_M(mda, pkt);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L4 checksum was calculated.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
+ *  be correct.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L3 (IP) checksum was calculated.
+*/
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated and found to be
+ *  correct.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt);
+}
+
+
+/** Determine whether the Ethertype was recognized and L3 packet data was
+ *  processed.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the Ethertype was recognized and L3 packet data was
+ *   processed.
+ */
+static __inline unsigned int
+NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt);
+}
+
+
+/** Set an egress packet's L2 length, using a metadata pointer to speed the
+ * computation.
+ * @ingroup egress
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] len Packet L2 length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt,
+                           int len)
+{
+  mmd->l2_length = len;
+}
+
+
+/** Set an egress packet's L2 length.
+ * @ingroup egress
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] len Packet L2 length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len)
+{
+  netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+  NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len);
+}
+
+
+/** Set an egress packet's L2 header length, using a metadata pointer to
+ *  speed the computation.
+ * @ingroup egress
+ *
+ * It is not normally necessary to call this routine; only the L2 length,
+ * not the header length, is needed to transmit a packet.  It may be useful if
+ * the egress packet will later be processed by code which expects to use
+ * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] len Packet L2 header length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
+                                  netio_pkt_t* pkt, int len)
+{
+  mmd->l3_offset = mmd->l2_offset + len;
+}
+
+
+/** Set an egress packet's L2 header length.
+ * @ingroup egress
+ *
+ * It is not normally necessary to call this routine; only the L2 length,
+ * not the header length, is needed to transmit a packet.  It may be useful if
+ * the egress packet will later be processed by code which expects to use
+ * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] len Packet L2 header length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len)
+{
+  netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+  NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len);
+}
+
+
+/** Set up an egress packet for hardware checksum computation, using a
+ *  metadata pointer to speed the operation.
+ * @ingroup egress
+ *
+ *  NetIO provides the ability to automatically calculate a standard
+ *  16-bit Internet checksum on transmitted packets.  The application
+ *  may specify the point in the packet where the checksum starts, the
+ *  number of bytes to be checksummed, and the two bytes in the packet
+ *  which will be replaced with the completed checksum.  (If the range
+ *  of bytes to be checksummed includes the bytes to be replaced, the
+ *  initial values of those bytes will be included in the checksum.)
+ *
+ *  For some protocols, the packet checksum covers data which is not present
+ *  in the packet, or is at least not contiguous to the main data payload.
+ *  For instance, the TCP checksum includes a "pseudo-header" which includes
+ *  the source and destination IP addresses of the packet.  To accommodate
+ *  this, the checksum engine may be "seeded" with an initial value, which
+ *  the application would need to compute based on the specific protocol's
+ *  requirements.  Note that the seed is given in host byte order (little-
+ *  endian), not network byte order (big-endian); code written to compute a
+ *  pseudo-header checksum in network byte order will need to byte-swap it
+ *  before use as the seed.
+ *
+ *  Note that the checksum is computed as part of the transmission process,
+ *  so it will not be present in the packet upon completion of this routine.
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] start Offset within L2 packet of the first byte to include in
+ *   the checksum.
+ * @param[in] length Number of bytes to include in the checksum.
+ *   the checksum.
+ * @param[in] location Offset within L2 packet of the first of the two bytes
+ *   to be replaced with the calculated checksum.
+ * @param[in] seed Initial value of the running checksum before any of the
+ *   packet data is added.
+ */
+static __inline void
+NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd,
+                            netio_pkt_t* pkt, int start, int length,
+                            int location, uint16_t seed)
+{
+  mmd->csum_start = start;
+  mmd->csum_length = length;
+  mmd->csum_location = location;
+  mmd->csum_seed = seed;
+  mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK;
+}
+
+
+/** Set up an egress packet for hardware checksum computation.
+ * @ingroup egress
+ *
+ *  NetIO provides the ability to automatically calculate a standard
+ *  16-bit Internet checksum on transmitted packets.  The application
+ *  may specify the point in the packet where the checksum starts, the
+ *  number of bytes to be checksummed, and the two bytes in the packet
+ *  which will be replaced with the completed checksum.  (If the range
+ *  of bytes to be checksummed includes the bytes to be replaced, the
+ *  initial values of those bytes will be included in the checksum.)
+ *
+ *  For some protocols, the packet checksum covers data which is not present
+ *  in the packet, or is at least not contiguous to the main data payload.
+ *  For instance, the TCP checksum includes a "pseudo-header" which includes
+ *  the source and destination IP addresses of the packet.  To accommodate
+ *  this, the checksum engine may be "seeded" with an initial value, which
+ *  the application would need to compute based on the specific protocol's
+ *  requirements.  Note that the seed is given in host byte order (little-
+ *  endian), not network byte order (big-endian); code written to compute a
+ *  pseudo-header checksum in network byte order will need to byte-swap it
+ *  before use as the seed.
+ *
+ *  Note that the checksum is computed as part of the transmission process,
+ *  so it will not be present in the packet upon completion of this routine.
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] start Offset within L2 packet of the first byte to include in
+ *   the checksum.
+ * @param[in] length Number of bytes to include in the checksum.
+ *   the checksum.
+ * @param[in] location Offset within L2 packet of the first of the two bytes
+ *   to be replaced with the calculated checksum.
+ * @param[in] seed Initial value of the running checksum before any of the
+ *   packet data is added.
+ */
+static __inline void
+NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length,
+                         int location, uint16_t seed)
+{
+  netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+  NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed);
+}
+
+
+/** Return the number of bytes which could be prepended to a packet, using a
+ *  metadata pointer to speed the operation.
+ *  See @ref netio_populate_prepend_buffer() to get a full description of
+ *  prepending.
+ *
+ * @param[in,out] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (pkt->__packet.bits.__offset << 6) +
+         NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
+}
+
+
+/** Return the number of bytes which could be prepended to a packet, using a
+ *  metadata pointer to speed the operation.
+ *  See @ref netio_populate_prepend_buffer() to get a full description of
+ *  prepending.
+ * @ingroup egress
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset;
+}
+
+
+/** Return the number of bytes which could be prepended to a packet.
+ *  See @ref netio_populate_prepend_buffer() to get a full description of
+ *  prepending.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt);
+  }
+}
+
+
+/** Flush a packet's minimal metadata from the cache, using a metadata pointer
+ *  to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+                                    netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's minimal metadata from the cache, using a metadata
+ *  pointer to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+                                  netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's minimal metadata from the cache,
+ *  using a metadata pointer to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+                                        netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's metadata from the cache, using a metadata pointer
+ *  to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's metadata from the cache, using a metadata
+ *  pointer to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's metadata from the cache,
+ *  using a metadata pointer to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt)
+{
+}
+
+/** Number of NUMA nodes we can distribute buffers to.
+ * @ingroup setup */
+#define NETIO_NUM_NODE_WEIGHTS  16
+
+/**
+ * @brief An object for specifying the characteristics of NetIO communication
+ * endpoint.
+ *
+ * @ingroup setup
+ *
+ * The @ref netio_input_register() function uses this structure to define
+ * how an application tile will communicate with an IPP.
+ *
+ *
+ * Future updates to NetIO may add new members to this structure,
+ * which can affect the success of the registration operation.  Thus,
+ * if dynamically initializing the structure, applications are urged to
+ * zero it out first, for example:
+ *
+ * @code
+ * netio_input_config_t config;
+ * memset(&config, 0, sizeof (config));
+ * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE;
+ * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS;
+ * config.queue_id = 0;
+ *     .
+ *     .
+ *     .
+ * @endcode
+ *
+ * since that guarantees that any unused structure members, including
+ * members which did not exist when the application was first developed,
+ * will not have unexpected values.
+ *
+ * If statically initializing the structure, we strongly recommend use of
+ * C99-style named initializers, for example:
+ *
+ * @code
+ * netio_input_config_t config = {
+ *    .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE,
+ *    .num_receive_packets = NETIO_MAX_RECEIVE_PKTS,
+ *    .queue_id = 0,
+ * },
+ * @endcode
+ *
+ * instead of the old-style structure initialization:
+ *
+ * @code
+ * // Bad example! Currently equivalent to the above, but don't do this.
+ * netio_input_config_t config = {
+ *    NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0
+ * },
+ * @endcode
+ *
+ * since the C99 style requires no changes to the code if elements of the
+ * config structure are rearranged.  (It also makes the initialization much
+ * easier to understand.)
+ *
+ * Except for items which address a particular tile's transmit or receive
+ * characteristics, such as the ::NETIO_RECV flag, applications are advised
+ * to specify the same set of configuration data on all registrations.
+ * This prevents differing results if multiple tiles happen to do their
+ * registration operations in a different order on different invocations of
+ * the application.  This is particularly important for things like link
+ * management flags, and buffer size and homing specifications.
+ *
+ * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO
+ * buffer pool is automatically created and mapped into the application's
+ * virtual address space at an address chosen by the operating system,
+ * using the common memory (cmem) facility in the Tilera Multicore
+ * Components library.  The cmem facility allows multiple processes to gain
+ * access to shared memory which is mapped into each process at an
+ * identical virtual address.  In order for this to work, the processes
+ * must have a common ancestor, which must create the common memory using
+ * tmc_cmem_init().
+ *
+ * In programs using the iLib process creation API, or in programs which use
+ * only one process (which include programs using the pthreads library),
+ * tmc_cmem_init() is called automatically.  All other applications
+ * must call it explicitly, before any child processes which might call
+ * netio_input_register() are created.
+ */
+typedef struct
+{
+  /** Registration characteristics.
+
+      This value determines several characteristics of the registration;
+      flags for different types of behavior are ORed together to make the
+      final flag value.  Generally applications should specify exactly
+      one flag from each of the following categories:
+
+      - Whether the application will be receiving packets on this queue
+        (::NETIO_RECV or ::NETIO_NO_RECV).
+
+      - Whether the application will be transmitting packets on this queue,
+        and if so, whether it will request egress checksum calculation
+        (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT).  It is
+        legal to call netio_get_buffer() without one of the XMIT flags,
+        as long as ::NETIO_RECV is specified; in this case, the retrieved
+        buffers must be passed to another tile for transmission.
+
+      - Whether the application expects any vendor-specific tags in
+        its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM,
+        or ::NETIO_TAG_MRVL).  This must match the configuration of the
+        target IPP.
+
+      To accommodate applications written to previous versions of the NetIO
+      interface, none of the flags above are currently required; if omitted,
+      NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM |
+      ::NETIO_TAG_NONE were used.  However, explicit specification of
+      the relevant flags allows NetIO to do a better job of resource
+      allocation, allows earlier detection of certain configuration errors,
+      and may enable advanced features or higher performance in the future,
+      so their use is strongly recommended.
+
+      Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT
+      is a special case, intended primarily for use by programs which
+      retrieve network statistics or do link management operations.
+      When these flags are both specified, the resulting queue may not
+      be used with NetIO routines other than netio_get(), netio_set(),
+      and netio_input_unregister().  See @ref link for more information
+      on link management.
+
+      Other flags are optional; their use is described below.
+  */
+  int flags;
+
+  /** Interface name.  This is a string which identifies the specific
+      Ethernet controller hardware to be used.  The format of the string
+      is a device type and a device index, separated by a slash; so,
+      the first 10 Gigabit Ethernet controller is named "xgbe/0", while
+      the second 10/100/1000 Megabit Ethernet controller is named "gbe/1".
+   */
+  const char* interface;
+
+  /** Receive packet queue size.  This specifies the maximum number
+      of ingress packets that can be received on this queue without
+      being retrieved by @ref netio_get_packet().  If the IPP's distribution
+      algorithm calls for a packet to be sent to this queue, and this
+      number of packets are already pending there, the new packet
+      will either be discarded, or sent to another tile registered
+      for the same queue_id (see @ref drops).  This value must
+      be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least
+      ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain
+      interfaces.
+   */
+  int num_receive_packets;
+
+  /** The queue ID being requested.  Legal values for this range from 0
+      to ::NETIO_MAX_QUEUE_ID, inclusive.  ::NETIO_MAX_QUEUE_ID is always
+      greater than or equal to the number of tiles; this allows one queue
+      for each tile, plus at least one additional queue.  Some applications
+      may wish to use the additional queue as a destination for unwanted
+      packets, since packets delivered to queues for which no tiles have
+      registered are discarded.
+   */
+  unsigned int queue_id;
+
+  /** Maximum number of small send buffers to be held in the local empty
+      buffer cache.  This specifies the size of the area which holds
+      empty small egress buffers requested from the IPP but not yet
+      retrieved via @ref netio_get_buffer().  This value must be greater
+      than zero if the application will ever use @ref netio_get_buffer()
+      to allocate empty small egress buffers; it may be no larger than
+      ::NETIO_MAX_SEND_BUFFERS.  See @ref epp for more details on empty
+      buffer caching.
+   */
+  int num_send_buffers_small_total;
+
+  /** Number of small send buffers to be preallocated at registration.
+      If this value is nonzero, the specified number of empty small egress
+      buffers will be requested from the IPP during the netio_input_register
+      operation; this may speed the execution of @ref netio_get_buffer().
+      This may be no larger than @ref num_send_buffers_small_total.  See @ref
+      epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_small_prealloc;
+
+  /** Maximum number of large send buffers to be held in the local empty
+      buffer cache.  This specifies the size of the area which holds empty
+      large egress buffers requested from the IPP but not yet retrieved via
+      @ref netio_get_buffer().  This value must be greater than zero if the
+      application will ever use @ref netio_get_buffer() to allocate empty
+      large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
+      See @ref epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_large_total;
+
+  /** Number of large send buffers to be preallocated at registration.
+      If this value is nonzero, the specified number of empty large egress
+      buffers will be requested from the IPP during the netio_input_register
+      operation; this may speed the execution of @ref netio_get_buffer().
+      This may be no larger than @ref num_send_buffers_large_total.  See @ref
+      epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_large_prealloc;
+
+  /** Maximum number of jumbo send buffers to be held in the local empty
+      buffer cache.  This specifies the size of the area which holds empty
+      jumbo egress buffers requested from the IPP but not yet retrieved via
+      @ref netio_get_buffer().  This value must be greater than zero if the
+      application will ever use @ref netio_get_buffer() to allocate empty
+      jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
+      See @ref epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_jumbo_total;
+
+  /** Number of jumbo send buffers to be preallocated at registration.
+      If this value is nonzero, the specified number of empty jumbo egress
+      buffers will be requested from the IPP during the netio_input_register
+      operation; this may speed the execution of @ref netio_get_buffer().
+      This may be no larger than @ref num_send_buffers_jumbo_total.  See @ref
+      epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_jumbo_prealloc;
+
+  /** Total packet buffer size.  This determines the total size, in bytes,
+      of the NetIO buffer pool.  Note that the maximum number of available
+      buffers of each size is determined during hypervisor configuration
+      (see the <em>System Programmer's Guide</em> for details); this just
+      influences how much host memory is allocated for those buffers.
+
+      The buffer pool is allocated from common memory, which will be
+      automatically initialized if needed.  If your buffer pool is larger
+      than 240 MB, you might need to explicitly call @c tmc_cmem_init(),
+      as described in the Application Libraries Reference Manual (UG227).
+
+      Packet buffers are currently allocated in chunks of 16 MB; this
+      value will be rounded up to the next larger multiple of 16 MB.
+      If this value is zero, a default of 32 MB will be used; this was
+      the value used by previous versions of NetIO.  Note that taking this
+      default also affects the placement of buffers on Linux NUMA nodes.
+      See @ref buffer_node_weights for an explanation of buffer placement.
+
+      In order to successfully allocate packet buffers, Linux must have
+      available huge pages on the relevant Linux NUMA nodes.  See the
+      <em>System Programmer's Guide</em> for information on configuring
+      huge page support in Linux.
+   */
+  uint64_t total_buffer_size;
+
+  /** Buffer placement weighting factors.
+
+      This array specifies the relative amount of buffering to place
+      on each of the available Linux NUMA nodes.  This array is
+      indexed by the NUMA node, and the values in the array are
+      proportional to the amount of buffer space to allocate on that
+      node.
+
+      If memory striping is enabled in the Hypervisor, then there is
+      only one logical NUMA node (node 0). In that case, NetIO will by
+      default ignore the suggested buffer node weights, and buffers
+      will be striped across the physical memory controllers. See
+      UG209 System Programmer's Guide for a description of the
+      hypervisor option that controls memory striping.
+
+      If memory striping is disabled, then there are up to four NUMA
+      nodes, corresponding to the four DDRAM controllers in the TILE
+      processor architecture.  See UG100 Tile Processor Architecture
+      Overview for a diagram showing the location of each of the DDRAM
+      controllers relative to the tile array.
+
+      For instance, if memory striping is disabled, the following
+      configuration strucure:
+
+      @code
+      netio_input_config_t config = {
+            .
+            .
+            .
+        .total_buffer_size = 4 * 16 * 1024 * 1024;
+        .buffer_node_weights = { 1, 0, 1, 0 },
+      },
+      @endcode
+
+      would result in 32 MB of buffers being placed on controller 0, and
+      32 MB on controller 2.  (Since buffers are allocated in units of
+      16 MB, some sets of weights will not be able to be matched exactly.)
+
+      For the weights to be effective, @ref total_buffer_size must be
+      nonzero.  If @ref total_buffer_size is zero, causing the default
+      32 MB of buffer space to be used, then any specified weights will
+      be ignored, and buffers will positioned as they were in previous
+      versions of NetIO:
+
+      - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1,
+        and the other 16 MB will be placed on controller 2.
+
+      - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2,
+        and the other 16 MB will be placed on controller 3.
+
+      If @ref total_buffer_size is nonzero, but all weights are zero,
+      then all buffer space will be allocated on Linux NUMA node zero.
+
+      By default, the specified buffer placement is treated as a hint;
+      if sufficient free memory is not available on the specified
+      controllers, the buffers will be allocated elsewhere.  However,
+      if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a
+      failure to allocate buffer space exactly as requested will cause the
+      registration operation to fail with an error of ::NETIO_CANNOT_HOME.
+
+      Note that maximal network performance cannot be achieved with
+      only one memory controller.
+   */
+  uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS];
+
+  /** Fixed virtual address for packet buffers.  Only valid when
+      ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the
+      description of that flag for details.
+   */
+  void* fixed_buffer_va;
+
+  /**
+      Maximum number of outstanding send packet requests.  This value is
+      only relevant when an EPP is in use; it determines the number of
+      slots in the EPP's outgoing packet queue which this tile is allowed
+      to consume, and thus the number of packets which may be sent before
+      the sending tile must wait for an acknowledgment from the EPP.
+      Modifying this value is generally only helpful when using @ref
+      netio_send_packet_vector(), where it can help improve performance by
+      allowing a single vector send operation to process more packets.
+      Typically it is not specified, and the default, which divides the
+      outgoing packet slots evenly between all tiles on the chip, is used.
+
+      If a registration asks for more outgoing packet queue slots than are
+      available, ::NETIO_TOOMANY_XMIT will be returned.  The total number
+      of packet queue slots which are available for all tiles for each EPP
+      is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING.
+
+
+      This value is ignored if ::NETIO_XMIT is not specified in flags.
+      If you want to specify a large value here for a specific tile, you are
+      advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so
+      that they do not consume a default number of packet slots.  Any tile
+      transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING
+      slots allocated to it; values less than that will be silently
+      increased by the NetIO library.
+   */
+  int num_sends_outstanding;
+}
+netio_input_config_t;
+
+
+/** Registration flags; used in the @ref netio_input_config_t structure.
+ * @addtogroup setup
+ */
+/** @{ */
+
+/** Fail a registration request if we can't put packet buffers
+    on the specified memory controllers. */
+#define NETIO_STRICT_HOMING   0x00000002
+
+/** This application expects no tags on its L2 headers. */
+#define NETIO_TAG_NONE        0x00000004
+
+/** This application expects Marvell extended tags on its L2 headers. */
+#define NETIO_TAG_MRVL        0x00000008
+
+/** This application expects Broadcom tags on its L2 headers. */
+#define NETIO_TAG_BRCM        0x00000010
+
+/** This registration may call routines which receive packets. */
+#define NETIO_RECV            0x00000020
+
+/** This registration may not call routines which receive packets. */
+#define NETIO_NO_RECV         0x00000040
+
+/** This registration may call routines which transmit packets. */
+#define NETIO_XMIT            0x00000080
+
+/** This registration may call routines which transmit packets with
+    checksum acceleration. */
+#define NETIO_XMIT_CSUM       0x00000100
+
+/** This registration may not call routines which transmit packets. */
+#define NETIO_NO_XMIT         0x00000200
+
+/** This registration wants NetIO buffers mapped at an application-specified
+    virtual address.
+
+    NetIO buffers are by default created by the TMC common memory facility,
+    which must be configured by a common ancestor of all processes sharing
+    a network interface.  When this flag is specified, NetIO buffers are
+    instead mapped at an address chosen by the application (and specified
+    in @ref netio_input_config_t::fixed_buffer_va).  This allows multiple
+    unrelated but cooperating processes to share a NetIO interface.
+    All processes sharing the same interface must specify this flag,
+    and all must specify the same fixed virtual address.
+
+    @ref netio_input_config_t::fixed_buffer_va must be a
+    multiple of 16 MB, and the packet buffers will occupy @ref
+    netio_input_config_t::total_buffer_size bytes of virtual address
+    space, beginning at that address.  If any of those virtual addresses
+    are currently occupied by other memory objects, like application or
+    shared library code or data, @ref netio_input_register() will return
+    ::NETIO_FAULT.  While it is impossible to provide a fixed_buffer_va
+    which will work for all applications, a good first guess might be to
+    use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size.
+    If that fails, it might be helpful to consult the running application's
+    virtual address description file (/proc/<em>pid</em>/maps) to see
+    which regions of virtual address space are available.
+ */
+#define NETIO_FIXED_BUFFER_VA 0x00000400
+
+/** This registration call will not complete unless the network link
+    is up.  The process will wait several seconds for this to happen (the
+    precise interval is link-dependent), but if the link does not come up,
+    ::NETIO_LINK_DOWN will be returned.  This flag is the default if
+    ::NETIO_NOREQUIRE_LINK_UP is not specified.  Note that this flag by
+    itself does not request that the link be brought up; that can be done
+    with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the
+    latter is the default if no NETIO_AUTO_LINK_xxx flags are specified),
+    or by explicitly setting the link's desired state via netio_set().
+    If the link is not brought up by one of those methods, and this flag
+    is specified, the registration operation will return ::NETIO_LINK_DOWN.
+    This flag is ignored if it is specified along with ::NETIO_NO_XMIT and
+    ::NETIO_NO_RECV.  See @ref link for more information on link
+    management.
+ */
+#define NETIO_REQUIRE_LINK_UP    0x00000800
+
+/** This registration call will complete even if the network link is not up.
+    Whenever the link is not up, packets will not be sent or received:
+    netio_get_packet() will return ::NETIO_NOPKT once all queued packets
+    have been drained, and netio_send_packet() and similar routines will
+    return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP
+    or the I/O shim is full.  See @ref link for more information on link
+    management.
+ */
+#define NETIO_NOREQUIRE_LINK_UP  0x00001000
+
+#ifndef __DOXYGEN__
+/*
+ * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags,
+ * but should not be used directly by applications, and are thus not
+ * documented.
+ */
+#define _NETIO_AUTO_UP        0x00002000
+#define _NETIO_AUTO_DN        0x00004000
+#define _NETIO_AUTO_PRESENT   0x00008000
+#endif
+
+/** Set the desired state of the link to up, allowing any speeds which are
+    supported by the link hardware, as part of this registration operation.
+    Do not take down the link automatically.  This is the default if
+    no other NETIO_AUTO_LINK_xxx flags are specified.  This flag is ignored
+    if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+    See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_UP     (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP)
+
+/** Set the desired state of the link to up, allowing any speeds which are
+    supported by the link hardware, as part of this registration operation.
+    Set the desired state of the link to down the next time no tiles are
+    registered for packet reception or transmission.  This flag is ignored
+    if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+    See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_UPDN   (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \
+                                _NETIO_AUTO_DN)
+
+/** Set the desired state of the link to down the next time no tiles are
+    registered for packet reception or transmission.  This flag is ignored
+    if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+    See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_DN     (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN)
+
+/** Do not bring up the link automatically as part of this registration
+    operation.  Do not take down the link automatically.  This flag
+    is ignored if it is specified along with ::NETIO_NO_XMIT and
+    ::NETIO_NO_RECV.  See @ref link for more information on link management.
+  */
+#define NETIO_AUTO_LINK_NONE   _NETIO_AUTO_PRESENT
+
+
+/** Minimum number of receive packets. */
+#define NETIO_MIN_RECEIVE_PKTS            16
+
+/** Lower bound on the maximum number of receive packets; may be higher
+    than this on some interfaces. */
+#define NETIO_MAX_RECEIVE_PKTS           128
+
+/** Maximum number of send buffers, per packet size. */
+#define NETIO_MAX_SEND_BUFFERS            16
+
+/** Number of EPP queue slots, and thus outstanding sends, per EPP. */
+#define NETIO_TOTAL_SENDS_OUTSTANDING   2015
+
+/** Minimum number of EPP queue slots, and thus outstanding sends, per
+ *  transmitting tile. */
+#define NETIO_MIN_SENDS_OUTSTANDING       16
+
+
+/**@}*/
+
+#ifndef __DOXYGEN__
+
+/**
+ * An object for providing Ethernet packets to a process.
+ */
+struct __netio_queue_impl_t;
+
+/**
+ * An object for managing the user end of a NetIO queue.
+ */
+struct __netio_queue_user_impl_t;
+
+#endif /* !__DOXYGEN__ */
+
+
+/** A netio_queue_t describes a NetIO communications endpoint.
+ * @ingroup setup
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  uint8_t opaque[8];                 /**< This is an opaque structure. */
+#else
+  struct __netio_queue_impl_t* __system_part;    /**< The system part. */
+  struct __netio_queue_user_impl_t* __user_part; /**< The user part. */
+#ifdef _NETIO_PTHREAD
+  _netio_percpu_mutex_t lock;                    /**< Queue lock. */
+#endif
+#endif
+}
+netio_queue_t;
+
+
+/**
+ * @brief Packet send context.
+ *
+ * @ingroup egress
+ *
+ * Packet send context for use with netio_send_packet_prepare and _commit.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  uint8_t opaque[44];   /**< This is an opaque structure. */
+#else
+  uint8_t flags;        /**< Defined below */
+  uint8_t datalen;      /**< Number of valid words pointed to by data. */
+  uint32_t request[9];  /**< Request to be sent to the EPP or shim.  Note
+                             that this is smaller than the 11-word maximum
+                             request size, since some constant values are
+                             not saved in the context. */
+  uint32_t *data;       /**< Data to be sent to the EPP or shim via IDN. */
+#endif
+}
+netio_send_pkt_context_t;
+
+
+#ifndef __DOXYGEN__
+#define SEND_PKT_CTX_USE_EPP   1  /**< We're sending to an EPP. */
+#define SEND_PKT_CTX_SEND_CSUM 2  /**< Request includes a checksum. */
+#endif
+
+/**
+ * @brief Packet vector entry.
+ *
+ * @ingroup egress
+ *
+ * This data structure is used with netio_send_packet_vector() to send multiple
+ * packets with one NetIO call.  The structure should be initialized by
+ * calling netio_pkt_vector_set(), rather than by setting the fields
+ * directly.
+ *
+ * This structure is guaranteed to be a power of two in size, no
+ * bigger than one L2 cache line, and to be aligned modulo its size.
+ */
+typedef struct
+#ifndef __DOXYGEN__
+__attribute__((aligned(8)))
+#endif
+{
+  /** Reserved for use by the user application.  When initialized with
+   *  the netio_set_pkt_vector_entry() function, this field is guaranteed
+   *  to be visible to readers only after all other fields are already
+   *  visible.  This way it can be used as a valid flag or generation
+   *  counter. */
+  uint8_t user_data;
+
+  /* Structure members below this point should not be accessed directly by
+   * applications, as they may change in the future. */
+
+  /** Low 8 bits of the packet address to send.  The high bits are
+   *  acquired from the 'handle' field. */
+  uint8_t buffer_address_low;
+
+  /** Number of bytes to transmit. */
+  uint16_t size;
+
+  /** The raw handle from a netio_pkt_t.  If this is NETIO_PKT_HANDLE_NONE,
+   *  this vector entry will be skipped and no packet will be transmitted. */
+  netio_pkt_handle_t handle;
+}
+netio_pkt_vector_entry_t;
+
+
+/**
+ * @brief Initialize fields in a packet vector entry.
+ *
+ * @ingroup egress
+ *
+ * @param[out] v Pointer to the vector entry to be initialized.
+ * @param[in] pkt Packet to be transmitted when the vector entry is passed to
+ *        netio_send_packet_vector().  Note that the packet's attributes
+ *        (e.g., its L2 offset and length) are captured at the time this
+ *        routine is called; subsequent changes in those attributes will not
+ *        be reflected in the packet which is actually transmitted.
+ *        Changes in the packet's contents, however, will be so reflected.
+ *        If this is NULL, no packet will be transmitted.
+ * @param[in] user_data User data to be set in the vector entry.
+ *        This function guarantees that the "user_data" field will become
+ *        visible to a reader only after all other fields have become visible.
+ *        This allows a structure in a ring buffer to be written and read
+ *        by a polling reader without any locks or other synchronization.
+ */
+static __inline void
+netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt,
+                     uint8_t user_data)
+{
+  if (pkt)
+  {
+    if (NETIO_PKT_IS_MINIMAL(pkt))
+    {
+      netio_pkt_minimal_metadata_t* mmd =
+        (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
+      v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF;
+      v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
+    }
+    else
+    {
+      netio_pkt_metadata_t* mda = &pkt->__metadata;
+      v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF;
+      v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt);
+    }
+    v->handle.word = pkt->__packet.word;
+  }
+  else
+  {
+    v->handle.word = 0;   /* Set handle to NETIO_PKT_HANDLE_NONE. */
+  }
+
+  __asm__("" : : : "memory");
+
+  v->user_data = user_data;
+}
+
+
+/**
+ * Flags and structures for @ref netio_get() and @ref netio_set().
+ * @ingroup config
+ */
+
+/** @{ */
+/** Parameter class; addr is a NETIO_PARAM_xxx value. */
+#define NETIO_PARAM       0
+/** Interface MAC address. This address is only valid with @ref netio_get().
+ *  The value is a 6-byte MAC address.  Depending upon the overall system
+ *  design, a MAC address may or may not be available for each interface. */
+#define NETIO_PARAM_MAC        0
+
+/** Determine whether to suspend output on the receipt of pause frames.
+ *  If the value is nonzero, the I/O shim will suspend output when a pause
+ *  frame is received.  If the value is zero, pause frames will be ignored. */
+#define NETIO_PARAM_PAUSE_IN   1
+
+/** Determine whether to send pause frames if the I/O shim packet FIFOs are
+ *  nearly full.  If the value is zero, pause frames are not sent.  If
+ *  the value is nonzero, it is the delay value which will be sent in any
+ *  pause frames which are output, in units of 512 bit times. */
+#define NETIO_PARAM_PAUSE_OUT  2
+
+/** Jumbo frame support.  The value is a 4-byte integer.  If the value is
+ *  nonzero, the MAC will accept frames of up to 10240 bytes.  If the value
+ *  is zero, the MAC will only accept frames of up to 1544 bytes. */
+#define NETIO_PARAM_JUMBO      3
+
+/** I/O shim's overflow statistics register.  The value is two 16-bit integers.
+ *  The first 16-bit value (or the low 16 bits, if the value is treated as a
+ *  32-bit number) is the count of packets which were completely dropped and
+ *  not delivered by the shim.  The second 16-bit value (or the high 16 bits,
+ *  if the value is treated as a 32-bit number) is the count of packets
+ *  which were truncated and thus only partially delivered by the shim.  This
+ *  register is automatically reset to zero after it has been read.
+ */
+#define NETIO_PARAM_OVERFLOW   4
+
+/** IPP statistics.  This address is only valid with @ref netio_get().  The
+ *  value is a netio_stat_t structure.  Unlike the I/O shim statistics, the
+ *  IPP statistics are not all reset to zero on read; see the description
+ *  of the netio_stat_t for details. */
+#define NETIO_PARAM_STAT 5
+
+/** Possible link state.  The value is a combination of "NETIO_LINK_xxx"
+ *  flags.  With @ref netio_get(), this will indicate which flags are
+ *  actually supported by the hardware.
+ *
+ *  For historical reasons, specifying this value to netio_set() will have
+ *  the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is
+ *  discouraged.
+ */
+#define NETIO_PARAM_LINK_POSSIBLE_STATE 6
+
+/** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags.
+ *  With @ref netio_set(), this will attempt to immediately bring up the
+ *  link using whichever of the requested flags are supported by the
+ *  hardware, or take down the link if the flags are zero; if this is
+ *  not possible, an error will be returned.  Many programs will want
+ *  to use ::NETIO_PARAM_LINK_DESIRED_STATE instead.
+ *
+ *  For historical reasons, specifying this value to netio_get() will
+ *  have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE,
+ *  but this usage is discouraged.
+ */
+#define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE
+
+/** Current link state. This address is only valid with @ref netio_get().
+ *  The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together.
+ *  If the link is down, the value ANDed with NETIO_LINK_SPEED will be
+ *  zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will
+ *  result in exactly one of the NETIO_LINK_xxx values, indicating the
+ *  current speed. */
+#define NETIO_PARAM_LINK_CURRENT_STATE 7
+
+/** Variant symbol for current state, retained for compatibility with
+ *  pre-MDE-2.1 programs. */
+#define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE
+
+/** Packet Coherence protocol. This address is only valid with @ref netio_get().
+ *  The value is nonzero if the interface is configured for cache-coherent DMA.
+ */
+#define NETIO_PARAM_COHERENT 8
+
+/** Desired link state. The value is a conbination of "NETIO_LINK_xxx"
+ *  flags, which specify the desired state for the link.  With @ref
+ *  netio_set(), this will, in the background, attempt to bring up the link
+ *  using whichever of the requested flags are reasonable, or take down the
+ *  link if the flags are zero.  The actual link up or down operation may
+ *  happen after this call completes.  If the link state changes in the
+ *  future, the system will continue to try to get back to the desired link
+ *  state; for instance, if the link is brought up successfully, and then
+ *  the network cable is disconnected, the link will go down.  However, the
+ *  desired state of the link is still up, so if the cable is reconnected,
+ *  the link will be brought up again.
+ *
+ *  With @ref netio_get(), this will indicate the desired state for the
+ *  link, as set with a previous netio_set() call, or implicitly by a
+ *  netio_input_register() or netio_input_unregister() operation.  This may
+ *  not reflect the current state of the link; to get that, use
+ *  ::NETIO_PARAM_LINK_CURRENT_STATE. */
+#define NETIO_PARAM_LINK_DESIRED_STATE 9
+
+/** NetIO statistics structure.  Retrieved using the ::NETIO_PARAM_STAT
+ *  address passed to @ref netio_get(). */
+typedef struct
+{
+  /** Number of packets which have been received by the IPP and forwarded
+   *  to a tile's receive queue for processing.  This value wraps at its
+   *  maximum, and is not cleared upon read. */
+  uint32_t packets_received;
+
+  /** Number of packets which have been dropped by the IPP, because they could
+   *  not be received, or could not be forwarded to a tile.  The former happens
+   *  when the IPP does not have a free packet buffer of suitable size for an
+   *  incoming frame.  The latter happens when all potential destination tiles
+   *  for a packet, as defined by the group, bucket, and queue configuration,
+   *  have full receive queues.   This value wraps at its maximum, and is not
+   *  cleared upon read. */
+  uint32_t packets_dropped;
+
+  /*
+   * Note: the #defines after each of the following four one-byte values
+   * denote their location within the third word of the netio_stat_t.  They
+   * are intended for use only by the IPP implementation and are thus omitted
+   * from the Doxygen output.
+   */
+
+  /** Number of packets dropped because no worker was able to accept a new
+   *  packet.  This value saturates at its maximum, and is cleared upon
+   *  read. */
+  uint8_t drops_no_worker;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_WORKER   0
+#endif
+
+  /** Number of packets dropped because no small buffers were available.
+   *  This value saturates at its maximum, and is cleared upon read. */
+  uint8_t drops_no_smallbuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_SMALLBUF 1
+#endif
+
+  /** Number of packets dropped because no large buffers were available.
+   *  This value saturates at its maximum, and is cleared upon read. */
+  uint8_t drops_no_largebuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_LARGEBUF 2
+#endif
+
+  /** Number of packets dropped because no jumbo buffers were available.
+   *  This value saturates at its maximum, and is cleared upon read. */
+  uint8_t drops_no_jumbobuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_JUMBOBUF 3
+#endif
+}
+netio_stat_t;
+
+
+/** Link can run, should run, or is running at 10 Mbps. */
+#define NETIO_LINK_10M         0x01
+
+/** Link can run, should run, or is running at 100 Mbps. */
+#define NETIO_LINK_100M        0x02
+
+/** Link can run, should run, or is running at 1 Gbps. */
+#define NETIO_LINK_1G          0x04
+
+/** Link can run, should run, or is running at 10 Gbps. */
+#define NETIO_LINK_10G         0x08
+
+/** Link should run at the highest speed supported by the link and by
+ *  the device connected to the link.  Only usable as a value for
+ *  the link's desired state; never returned as a value for the current
+ *  or possible states. */
+#define NETIO_LINK_ANYSPEED    0x10
+
+/** All legal link speeds. */
+#define NETIO_LINK_SPEED  (NETIO_LINK_10M  | \
+                           NETIO_LINK_100M | \
+                           NETIO_LINK_1G   | \
+                           NETIO_LINK_10G  | \
+                           NETIO_LINK_ANYSPEED)
+
+
+/** MAC register class.  Addr is a register offset within the MAC.
+ *  Registers within the XGbE and GbE MACs are documented in the Tile
+ *  Processor I/O Device Guide (UG104). MAC registers start at address
+ *  0x4000, and do not include the MAC_INTERFACE registers. */
+#define NETIO_MAC             1
+
+/** MDIO register class (IEEE 802.3 clause 22 format).  Addr is the "addr"
+ *  member of a netio_mdio_addr_t structure. */
+#define NETIO_MDIO            2
+
+/** MDIO register class (IEEE 802.3 clause 45 format).  Addr is the "addr"
+ *  member of a netio_mdio_addr_t structure. */
+#define NETIO_MDIO_CLAUSE45   3
+
+/** NetIO MDIO address type.  Retrieved or provided using the ::NETIO_MDIO
+ *  address passed to @ref netio_get() or @ref netio_set(). */
+typedef union
+{
+  struct
+  {
+    unsigned int reg:16;  /**< MDIO register offset.  For clause 22 access,
+                               must be less than 32. */
+    unsigned int phy:5;   /**< Which MDIO PHY to access. */
+    unsigned int dev:5;   /**< Which MDIO device to access within that PHY.
+                               Applicable for clause 45 access only; ignored
+                               for clause 22 access. */
+  }
+  bits;                   /**< Container for bitfields. */
+  uint64_t addr;          /**< Value to pass to @ref netio_get() or
+                           *   @ref netio_set(). */
+}
+netio_mdio_addr_t;
+
+/** @} */
+
+#endif /* __NETIO_INTF_H__ */
-- 
cgit v1.2.3


From f02cbbe657939489347cbda598401a56913ffcbd Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Tue, 2 Nov 2010 12:05:10 -0400
Subject: pci root complex: support for tile architecture

This change enables PCI root complex support for TILEPro.  Unlike
TILE-Gx, TILEPro has no support for memory-mapped I/O, so the PCI
support consists of hypervisor upcalls for PIO, DMA, etc.  However,
the performance is fine for the devices we have tested with so far
(1Gb Ethernet, SATA, etc.).

The <asm/io.h> header was tweaked to be a little bit more aggressive
about disabling attempts to map/unmap IO port space.  The hacky
<asm/pci-bridge.h> header was rolled into the <asm/pci.h> header
and the result was simplified.  Both of the latter two headers were
preliminary versions not meant for release before now - oh well.

There is one quirk for our TILEmpower platform, which accidentally
negotiates up to 5GT and needs to be kicked down to 2.5GT.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/io.h         |  15 +++--
 arch/tile/include/asm/pci-bridge.h | 117 -------------------------------------
 arch/tile/include/asm/pci.h        | 107 +++++++++++++--------------------
 3 files changed, 52 insertions(+), 187 deletions(-)
 delete mode 100644 arch/tile/include/asm/pci-bridge.h

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index ee43328713ab..d3cbb9b14cbe 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -55,9 +55,6 @@ extern void iounmap(volatile void __iomem *addr);
 #define ioremap_writethrough(physaddr, size)	ioremap(physaddr, size)
 #define ioremap_fullcache(physaddr, size)	ioremap(physaddr, size)
 
-void __iomem *ioport_map(unsigned long port, unsigned int len);
-extern inline void ioport_unmap(void __iomem *addr) {}
-
 #define mmiowb()
 
 /* Conversion between virtual and physical mappings.  */
@@ -189,12 +186,22 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
  * we never run, uses them unconditionally.
  */
 
-static inline int ioport_panic(void)
+static inline long ioport_panic(void)
 {
 	panic("inb/outb and friends do not exist on tile");
 	return 0;
 }
 
+static inline void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+	return (void __iomem *) ioport_panic();
+}
+
+static inline void ioport_unmap(void __iomem *addr)
+{
+	ioport_panic();
+}
+
 static inline u8 inb(unsigned long addr)
 {
 	return ioport_panic();
diff --git a/arch/tile/include/asm/pci-bridge.h b/arch/tile/include/asm/pci-bridge.h
deleted file mode 100644
index e853b0e2793b..000000000000
--- a/arch/tile/include/asm/pci-bridge.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#ifndef _ASM_TILE_PCI_BRIDGE_H
-#define _ASM_TILE_PCI_BRIDGE_H
-
-#include <linux/ioport.h>
-#include <linux/pci.h>
-
-struct device_node;
-struct pci_controller;
-
-/*
- * pci_io_base returns the memory address at which you can access
- * the I/O space for PCI bus number `bus' (or NULL on error).
- */
-extern void __iomem *pci_bus_io_base(unsigned int bus);
-extern unsigned long pci_bus_io_base_phys(unsigned int bus);
-extern unsigned long pci_bus_mem_base_phys(unsigned int bus);
-
-/* Allocate a new PCI host bridge structure */
-extern struct pci_controller *pcibios_alloc_controller(void);
-
-/* Helper function for setting up resources */
-extern void pci_init_resource(struct resource *res, unsigned long start,
-			      unsigned long end, int flags, char *name);
-
-/* Get the PCI host controller for a bus */
-extern struct pci_controller *pci_bus_to_hose(int bus);
-
-/*
- * Structure of a PCI controller (host bridge)
- */
-struct pci_controller {
-	int index;		/* PCI domain number */
-	struct pci_bus *root_bus;
-
-	int first_busno;
-	int last_busno;
-
-	int hv_cfg_fd[2];	/* config{0,1} fds for this PCIe controller */
-	int hv_mem_fd;		/* fd to Hypervisor for MMIO operations */
-
-	struct pci_ops *ops;
-
-	int irq_base;		/* Base IRQ from the Hypervisor	*/
-	int plx_gen1;		/* flag for PLX Gen 1 configuration */
-
-	/* Address ranges that are routed to this controller/bridge. */
-	struct resource mem_resources[3];
-};
-
-static inline struct pci_controller *pci_bus_to_host(struct pci_bus *bus)
-{
-	return bus->sysdata;
-}
-
-extern void setup_indirect_pci_nomap(struct pci_controller *hose,
-			       void __iomem *cfg_addr, void __iomem *cfg_data);
-extern void setup_indirect_pci(struct pci_controller *hose,
-			       u32 cfg_addr, u32 cfg_data);
-extern void setup_grackle(struct pci_controller *hose);
-
-extern unsigned char common_swizzle(struct pci_dev *, unsigned char *);
-
-/*
- *   The following code swizzles for exactly one bridge.  The routine
- *   common_swizzle below handles multiple bridges.  But there are a
- *   some boards that don't follow the PCI spec's suggestion so we
- *   break this piece out separately.
- */
-static inline unsigned char bridge_swizzle(unsigned char pin,
-		unsigned char idsel)
-{
-	return (((pin-1) + idsel) % 4) + 1;
-}
-
-/*
- * The following macro is used to lookup irqs in a standard table
- * format for those PPC systems that do not already have PCI
- * interrupts properly routed.
- */
-/* FIXME - double check this */
-#define PCI_IRQ_TABLE_LOOKUP ({ \
-	long _ctl_ = -1; \
-	if (idsel >= min_idsel && idsel <= max_idsel && pin <= irqs_per_slot) \
-		_ctl_ = pci_irq_table[idsel - min_idsel][pin-1]; \
-	_ctl_; \
-})
-
-/*
- * Scan the buses below a given PCI host bridge and assign suitable
- * resources to all devices found.
- */
-extern int pciauto_bus_scan(struct pci_controller *, int);
-
-#ifdef CONFIG_PCI
-extern unsigned long pci_address_to_pio(phys_addr_t address);
-#else
-static inline unsigned long pci_address_to_pio(phys_addr_t address)
-{
-	return (unsigned long)-1;
-}
-#endif
-
-#endif /* _ASM_TILE_PCI_BRIDGE_H */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index b0c15da2d5d5..c3fc458a0d32 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -15,7 +15,29 @@
 #ifndef _ASM_TILE_PCI_H
 #define _ASM_TILE_PCI_H
 
-#include <asm/pci-bridge.h>
+#include <linux/pci.h>
+
+/*
+ * Structure of a PCI controller (host bridge)
+ */
+struct pci_controller {
+	int index;		/* PCI domain number */
+	struct pci_bus *root_bus;
+
+	int first_busno;
+	int last_busno;
+
+	int hv_cfg_fd[2];	/* config{0,1} fds for this PCIe controller */
+	int hv_mem_fd;		/* fd to Hypervisor for MMIO operations */
+
+	struct pci_ops *ops;
+
+	int irq_base;		/* Base IRQ from the Hypervisor	*/
+	int plx_gen1;		/* flag for PLX Gen 1 configuration */
+
+	/* Address ranges that are routed to this controller/bridge. */
+	struct resource mem_resources[3];
+};
 
 /*
  * The hypervisor maps the entirety of CPA-space as bus addresses, so
@@ -24,56 +46,12 @@
  */
 #define PCI_DMA_BUS_IS_PHYS     1
 
-struct pci_controller *pci_bus_to_hose(int bus);
-unsigned char __init common_swizzle(struct pci_dev *dev, unsigned char *pinp);
 int __init tile_pci_init(void);
-void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
-void __devinit pcibios_fixup_bus(struct pci_bus *bus);
 
-int __devinit _tile_cfg_read(struct pci_controller *hose,
-				    int bus,
-				    int slot,
-				    int function,
-				    int offset,
-				    int size,
-				    u32 *val);
-int __devinit _tile_cfg_write(struct pci_controller *hose,
-				     int bus,
-				     int slot,
-				     int function,
-				     int offset,
-				     int size,
-				     u32 val);
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
+static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
 
-/*
- * These are used to to config reads and writes in the early stages of
- * setup before the driver infrastructure has been set up enough to be
- * able to do config reads and writes.
- */
-#define early_cfg_read(where, size, value) \
-	_tile_cfg_read(controller, \
-		       current_bus, \
-		       pci_slot, \
-		       pci_fn, \
-		       where, \
-		       size, \
-		       value)
-
-#define early_cfg_write(where, size, value) \
-	_tile_cfg_write(controller, \
-		       current_bus, \
-		       pci_slot, \
-		       pci_fn, \
-		       where, \
-		       size, \
-		       value)
-
-
-
-#define PCICFG_BYTE	1
-#define PCICFG_WORD	2
-#define PCICFG_DWORD	4
+void __devinit pcibios_fixup_bus(struct pci_bus *bus);
 
 #define	TILE_NUM_PCIE	2
 
@@ -88,33 +66,33 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 }
 
 /*
- * I/O space is currently not supported.
+ * pcibios_assign_all_busses() tells whether or not the bus numbers
+ * should be reassigned, in case the BIOS didn't do it correctly, or
+ * in case we don't have a BIOS and we want to let Linux do it.
  */
+static inline int pcibios_assign_all_busses(void)
+{
+	return 1;
+}
 
-#define TILE_PCIE_LOWER_IO		0x0
-#define TILE_PCIE_UPPER_IO		0x10000
-#define TILE_PCIE_PCIE_IO_SIZE		0x0000FFFF
-
-#define _PAGE_NO_CACHE		0
-#define _PAGE_GUARDED		0
-
-
-#define pcibios_assign_all_busses()    pci_assign_all_buses
-extern int pci_assign_all_buses;
-
+/*
+ * No special bus mastering setup handling.
+ */
 static inline void pcibios_set_master(struct pci_dev *dev)
 {
-	/* No special bus mastering setup handling */
 }
 
 #define PCIBIOS_MIN_MEM		0
-#define PCIBIOS_MIN_IO		TILE_PCIE_LOWER_IO
+#define PCIBIOS_MIN_IO		0
 
 /*
  * This flag tells if the platform is TILEmpower that needs
  * special configuration for the PLX switch chip.
  */
-extern int blade_pci;
+extern int tile_plx_gen1;
+
+/* Use any cpu for PCI. */
+#define cpumask_of_pcibus(bus) cpu_online_mask
 
 /* implement the pci_ DMA API in terms of the generic device dma_ one */
 #include <asm-generic/pci-dma-compat.h>
@@ -122,7 +100,4 @@ extern int blade_pci;
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 
-/* Use any cpu for PCI. */
-#define cpumask_of_pcibus(bus) cpu_online_mask
-
 #endif /* _ASM_TILE_PCI_H */
-- 
cgit v1.2.3


From 81711cee933599fa114abb0d258d8bbabef8adfb Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Tue, 14 Dec 2010 16:07:25 -0500
Subject: arch/tile: handle rt_sigreturn() more cleanly

The current tile rt_sigreturn() syscall pattern uses the common idiom
of loading up pt_regs with all the saved registers from the time of
the signal, then anticipating the fact that we will clobber the ABI
"return value" register (r0) as we return from the syscall by setting
the rt_sigreturn return value to whatever random value was in the pt_regs
for r0.

However, this breaks in our 64-bit kernel when running "compat" tasks,
since we always sign-extend the "return value" register to properly
handle returned pointers that are in the upper 2GB of the 32-bit compat
address space.  Doing this to the sigreturn path then causes occasional
random corruption of the 64-bit r0 register.

Instead, we stop doing the crazy "load the return-value register"
hack in sigreturn.  We already have some sigreturn-specific assembly
code that we use to pass the pt_regs pointer to C code.  We extend that
code to also set the link register to point to a spot a few instructions
after the usual syscall return address so we don't clobber the saved r0.
Now it no longer matters what the rt_sigreturn syscall returns, and the
pt_regs structure can be cleanly and completely reloaded.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/signal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/tile/include')

diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h
index c1ee1d61d44c..81d92a45cd4b 100644
--- a/arch/tile/include/asm/signal.h
+++ b/arch/tile/include/asm/signal.h
@@ -25,7 +25,7 @@
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 struct pt_regs;
-int restore_sigcontext(struct pt_regs *, struct sigcontext __user *, long *);
+int restore_sigcontext(struct pt_regs *, struct sigcontext __user *);
 int setup_sigcontext(struct sigcontext __user *, struct pt_regs *);
 void do_signal(struct pt_regs *regs);
 #endif
-- 
cgit v1.2.3