add circuitpython code

author: Raghuram Subramani <raghus2247@gmail.com> 2022-06-19 19:47:51 +0530
committer: Raghuram Subramani <raghus2247@gmail.com> 2022-06-19 19:47:51 +0530
commit: 4fd287655a72b9aea14cdac715ad5b90ed082ed2 (patch)
tree: 65d393bc0e699dd12d05b29ba568e04cea666207 /circuitpython/lib/libffi/src/x86
parent: 0150f70ce9c39e9e6dd878766c0620c85e47bed0 (diff)
9 files changed, 3829 insertions, 0 deletions
diff --git a/circuitpython/lib/libffi/src/x86/ffi.c b/circuitpython/lib/libffi/src/x86/ffi.c
new file mode 100644
index 0000000..feb5cbb
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/ffi.c
@@ -0,0 +1,729 @@
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008  Red Hat, Inc.
+           Copyright (c) 2002  Ranjit Mathew
+           Copyright (c) 2002  Bo Thorsen
+           Copyright (c) 2002  Roger Sayle
+           Copyright (C) 2008, 2010  Free Software Foundation, Inc.
+
+   x86 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+#include "internal.h"
+
+/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
+   all further uses in this file will refer to the 80-bit type.  */
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+# if FFI_TYPE_LONGDOUBLE != 4
+#  error FFI_TYPE_LONGDOUBLE out of date
+# endif
+#else
+# undef FFI_TYPE_LONGDOUBLE
+# define FFI_TYPE_LONGDOUBLE 4
+#endif
+
+#if defined(__GNUC__) && !defined(__declspec)
+# define __declspec(x)  __attribute__((x))
+#endif
+
+/* Perform machine dependent cif processing.  */
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  size_t bytes = 0;
+  int i, n, flags, cabi = cif->abi;
+
+  switch (cabi)
+    {
+    case FFI_SYSV:
+    case FFI_STDCALL:
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+    case FFI_MS_CDECL:
+    case FFI_PASCAL:
+    case FFI_REGISTER:
+      break;
+    default:
+      return FFI_BAD_ABI;
+    }
+
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
+      flags = X86_RET_VOID;
+      break;
+    case FFI_TYPE_FLOAT:
+      flags = X86_RET_FLOAT;
+      break;
+    case FFI_TYPE_DOUBLE:
+      flags = X86_RET_DOUBLE;
+      break;
+    case FFI_TYPE_LONGDOUBLE:
+      flags = X86_RET_LDOUBLE;
+      break;
+    case FFI_TYPE_UINT8:
+      flags = X86_RET_UINT8;
+      break;
+    case FFI_TYPE_UINT16:
+      flags = X86_RET_UINT16;
+      break;
+    case FFI_TYPE_SINT8:
+      flags = X86_RET_SINT8;
+      break;
+    case FFI_TYPE_SINT16:
+      flags = X86_RET_SINT16;
+      break;
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_POINTER:
+      flags = X86_RET_INT32;
+      break;
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_UINT64:
+      flags = X86_RET_INT64;
+      break;
+    case FFI_TYPE_STRUCT:
+#ifndef X86
+      /* ??? This should be a different ABI rather than an ifdef.  */
+      if (cif->rtype->size == 1)
+	flags = X86_RET_STRUCT_1B;
+      else if (cif->rtype->size == 2)
+	flags = X86_RET_STRUCT_2B;
+      else if (cif->rtype->size == 4)
+	flags = X86_RET_INT32;
+      else if (cif->rtype->size == 8)
+	flags = X86_RET_INT64;
+      else
+#endif
+	{
+	do_struct:
+	  switch (cabi)
+	    {
+	    case FFI_THISCALL:
+	    case FFI_FASTCALL:
+	    case FFI_STDCALL:
+	    case FFI_MS_CDECL:
+	      flags = X86_RET_STRUCTARG;
+	      break;
+	    default:
+	      flags = X86_RET_STRUCTPOP;
+	      break;
+	    }
+	  /* Allocate space for return value pointer.  */
+	  bytes += ALIGN (sizeof(void*), FFI_SIZEOF_ARG);
+	}
+      break;
+    case FFI_TYPE_COMPLEX:
+      switch (cif->rtype->elements[0]->type)
+	{
+	case FFI_TYPE_DOUBLE:
+	case FFI_TYPE_LONGDOUBLE:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	  goto do_struct;
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_INT:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+	  flags = X86_RET_INT64;
+	  break;
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT16:
+	  flags = X86_RET_INT32;
+	  break;
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT8:
+	  flags = X86_RET_STRUCT_2B;
+	  break;
+	default:
+	  return FFI_BAD_TYPEDEF;
+	}
+      break;
+    default:
+      return FFI_BAD_TYPEDEF;
+    }
+  cif->flags = flags;
+
+  for (i = 0, n = cif->nargs; i < n; i++)
+    {
+      ffi_type *t = cif->arg_types[i];
+
+      bytes = ALIGN (bytes, t->alignment);
+      bytes += ALIGN (t->size, FFI_SIZEOF_ARG);
+    }
+  cif->bytes = ALIGN (bytes, 16);
+
+  return FFI_OK;
+}
+
+static ffi_arg
+extend_basic_type(void *arg, int type)
+{
+  switch (type)
+    {
+    case FFI_TYPE_SINT8:
+      return *(SINT8 *)arg;
+    case FFI_TYPE_UINT8:
+      return *(UINT8 *)arg;
+    case FFI_TYPE_SINT16:
+      return *(SINT16 *)arg;
+    case FFI_TYPE_UINT16:
+      return *(UINT16 *)arg;
+
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_POINTER:
+    case FFI_TYPE_FLOAT:
+      return *(UINT32 *)arg;
+
+    default:
+      abort();
+    }
+}
+
+struct call_frame
+{
+  void *ebp;		/* 0 */
+  void *retaddr;	/* 4 */
+  void (*fn)(void);	/* 8 */
+  int flags;		/* 12 */
+  void *rvalue;		/* 16 */
+  unsigned regs[3];	/* 20-28 */
+};
+
+struct abi_params
+{
+  int dir;		/* parameter growth direction */
+  int static_chain;	/* the static chain register used by gcc */
+  int nregs;		/* number of register parameters */
+  int regs[3];
+};
+
+static const struct abi_params abi_params[FFI_LAST_ABI] = {
+  [FFI_SYSV] = { 1, R_ECX, 0 },
+  [FFI_THISCALL] = { 1, R_EAX, 1, { R_ECX } },
+  [FFI_FASTCALL] = { 1, R_EAX, 2, { R_ECX, R_EDX } },
+  [FFI_STDCALL] = { 1, R_ECX, 0 },
+  [FFI_PASCAL] = { -1, R_ECX, 0 },
+  /* ??? No defined static chain; gcc does not support REGISTER.  */
+  [FFI_REGISTER] = { -1, R_ECX, 3, { R_EAX, R_EDX, R_ECX } },
+  [FFI_MS_CDECL] = { 1, R_ECX, 0 }
+};
+
+extern void ffi_call_i386(struct call_frame *, char *)
+#if HAVE_FASTCALL
+	__declspec(fastcall)
+#endif
+	FFI_HIDDEN;
+
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	      void **avalue, void *closure)
+{
+  size_t rsize, bytes;
+  struct call_frame *frame;
+  char *stack, *argp;
+  ffi_type **arg_types;
+  int flags, cabi, i, n, dir, narg_reg;
+  const struct abi_params *pabi;
+
+  flags = cif->flags;
+  cabi = cif->abi;
+  pabi = &abi_params[cabi];
+  dir = pabi->dir;
+
+  rsize = 0;
+  if (rvalue == NULL)
+    {
+      switch (flags)
+	{
+	case X86_RET_FLOAT:
+	case X86_RET_DOUBLE:
+	case X86_RET_LDOUBLE:
+	case X86_RET_STRUCTPOP:
+	case X86_RET_STRUCTARG:
+	  /* The float cases need to pop the 387 stack.
+	     The struct cases need to pass a valid pointer to the callee.  */
+	  rsize = cif->rtype->size;
+	  break;
+	default:
+	  /* We can pretend that the callee returns nothing.  */
+	  flags = X86_RET_VOID;
+	  break;
+	}
+    }
+
+  bytes = cif->bytes;
+  stack = alloca(bytes + sizeof(*frame) + rsize);
+  argp = (dir < 0 ? stack + bytes : stack);
+  frame = (struct call_frame *)(stack + bytes);
+  if (rsize)
+    rvalue = frame + 1;
+
+  frame->fn = fn;
+  frame->flags = flags;
+  frame->rvalue = rvalue;
+  frame->regs[pabi->static_chain] = (unsigned)closure;
+
+  narg_reg = 0;
+  switch (flags)
+    {
+    case X86_RET_STRUCTARG:
+      /* The pointer is passed as the first argument.  */
+      if (pabi->nregs > 0)
+	{
+	  frame->regs[pabi->regs[0]] = (unsigned)rvalue;
+	  narg_reg = 1;
+	  break;
+	}
+      /* fallthru */
+    case X86_RET_STRUCTPOP:
+      *(void **)argp = rvalue;
+      argp += sizeof(void *);
+      break;
+    }
+
+  arg_types = cif->arg_types;
+  for (i = 0, n = cif->nargs; i < n; i++)
+    {
+      ffi_type *ty = arg_types[i];
+      void *valp = avalue[i];
+      size_t z = ty->size;
+      int t = ty->type;
+
+      if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT)
+        {
+	  ffi_arg val = extend_basic_type (valp, t);
+
+	  if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs)
+	    frame->regs[pabi->regs[narg_reg++]] = val;
+	  else if (dir < 0)
+	    {
+	      argp -= 4;
+	      *(ffi_arg *)argp = val;
+	    }
+	  else
+	    {
+	      *(ffi_arg *)argp = val;
+	      argp += 4;
+	    }
+	}
+      else
+	{
+	  size_t za = ALIGN (z, FFI_SIZEOF_ARG);
+	  size_t align = FFI_SIZEOF_ARG;
+
+	  /* Alignment rules for arguments are quite complex.  Vectors and
+	     structures with 16 byte alignment get it.  Note that long double
+	     on Darwin does have 16 byte alignment, and does not get this
+	     alignment if passed directly; a structure with a long double
+	     inside, however, would get 16 byte alignment.  Since libffi does
+	     not support vectors, we need non concern ourselves with other
+	     cases.  */
+	  if (t == FFI_TYPE_STRUCT && ty->alignment >= 16)
+	    align = 16;
+	    
+	  if (dir < 0)
+	    {
+	      /* ??? These reverse argument ABIs are probably too old
+		 to have cared about alignment.  Someone should check.  */
+	      argp -= za;
+	      memcpy (argp, valp, z);
+	    }
+	  else
+	    {
+	      argp = (char *)ALIGN (argp, align);
+	      memcpy (argp, valp, z);
+	      argp += za;
+	    }
+	}
+    }
+  FFI_ASSERT (dir > 0 || argp == stack);
+
+  ffi_call_i386 (frame, stack);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	     void **avalue, void *closure)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+/** private members **/
+
+void FFI_HIDDEN ffi_closure_i386(void);
+void FFI_HIDDEN ffi_closure_STDCALL(void);
+void FFI_HIDDEN ffi_closure_REGISTER(void);
+
+struct closure_frame
+{
+  unsigned rettemp[4];				/* 0 */
+  unsigned regs[3];				/* 16-24 */
+  ffi_cif *cif;					/* 28 */
+  void (*fun)(ffi_cif*,void*,void**,void*);	/* 32 */
+  void *user_data;				/* 36 */
+};
+
+int FFI_HIDDEN
+#if HAVE_FASTCALL
+__declspec(fastcall)
+#endif
+ffi_closure_inner (struct closure_frame *frame, char *stack)
+{
+  ffi_cif *cif = frame->cif;
+  int cabi, i, n, flags, dir, narg_reg;
+  const struct abi_params *pabi;
+  ffi_type **arg_types;
+  char *argp;
+  void *rvalue;
+  void **avalue;
+
+  cabi = cif->abi;
+  flags = cif->flags;
+  narg_reg = 0;
+  rvalue = frame->rettemp;
+  pabi = &abi_params[cabi];
+  dir = pabi->dir;
+  argp = (dir < 0 ? stack + cif->bytes : stack);
+
+  switch (flags)
+    {
+    case X86_RET_STRUCTARG:
+      if (pabi->nregs > 0)
+	{
+	  rvalue = (void *)frame->regs[pabi->regs[0]];
+	  narg_reg = 1;
+	  frame->rettemp[0] = (unsigned)rvalue;
+	  break;
+	}
+      /* fallthru */
+    case X86_RET_STRUCTPOP:
+      rvalue = *(void **)argp;
+      argp += sizeof(void *);
+      frame->rettemp[0] = (unsigned)rvalue;
+      break;
+    }
+
+  n = cif->nargs;
+  avalue = alloca(sizeof(void *) * n);
+
+  arg_types = cif->arg_types;
+  for (i = 0; i < n; ++i)
+    {
+      ffi_type *ty = arg_types[i];
+      size_t z = ty->size;
+      int t = ty->type;
+      void *valp;
+
+      if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT)
+	{
+	  if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs)
+	    valp = &frame->regs[pabi->regs[narg_reg++]];
+	  else if (dir < 0)
+	    {
+	      argp -= 4;
+	      valp = argp;
+	    }
+	  else
+	    {
+	      valp = argp;
+	      argp += 4;
+	    }
+	}
+      else
+	{
+	  size_t za = ALIGN (z, FFI_SIZEOF_ARG);
+	  size_t align = FFI_SIZEOF_ARG;
+
+	  /* See the comment in ffi_call_int.  */
+	  if (t == FFI_TYPE_STRUCT && ty->alignment >= 16)
+	    align = 16;
+
+	  if (dir < 0)
+	    {
+	      /* ??? These reverse argument ABIs are probably too old
+		 to have cared about alignment.  Someone should check.  */
+	      argp -= za;
+	      valp = argp;
+	    }
+	  else
+	    {
+	      argp = (char *)ALIGN (argp, align);
+	      valp = argp;
+	      argp += za;
+	    }
+	}
+
+      avalue[i] = valp;
+    }
+
+  frame->fun (cif, rvalue, avalue, frame->user_data);
+
+  if (cabi == FFI_STDCALL)
+    return flags + (cif->bytes << X86_RET_POP_SHIFT);
+  else
+    return flags;
+}
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+                      ffi_cif* cif,
+                      void (*fun)(ffi_cif*,void*,void**,void*),
+                      void *user_data,
+                      void *codeloc)
+{
+  char *tramp = closure->tramp;
+  void (*dest)(void);
+  int op = 0xb8;  /* movl imm, %eax */
+
+  switch (cif->abi)
+    {
+    case FFI_SYSV:
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+    case FFI_MS_CDECL:
+      dest = ffi_closure_i386;
+      break;
+    case FFI_STDCALL:
+    case FFI_PASCAL:
+      dest = ffi_closure_STDCALL;
+      break;
+    case FFI_REGISTER:
+      dest = ffi_closure_REGISTER;
+      op = 0x68;  /* pushl imm */
+    default:
+      return FFI_BAD_ABI;
+    }
+
+  /* movl or pushl immediate.  */
+  tramp[0] = op;
+  *(void **)(tramp + 1) = codeloc;
+
+  /* jmp dest */
+  tramp[5] = 0xe9;
+  *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10);
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+void FFI_HIDDEN ffi_go_closure_EAX(void);
+void FFI_HIDDEN ffi_go_closure_ECX(void);
+void FFI_HIDDEN ffi_go_closure_STDCALL(void);
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+		     void (*fun)(ffi_cif*,void*,void**,void*))
+{
+  void (*dest)(void);
+
+  switch (cif->abi)
+    {
+    case FFI_SYSV:
+    case FFI_MS_CDECL:
+      dest = ffi_go_closure_ECX;
+      break;
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+      dest = ffi_go_closure_EAX;
+      break;
+    case FFI_STDCALL:
+    case FFI_PASCAL:
+      dest = ffi_go_closure_STDCALL;
+      break;
+    case FFI_REGISTER:
+    default:
+      return FFI_BAD_ABI;
+    }
+
+  closure->tramp = dest;
+  closure->cif = cif;
+  closure->fun = fun;
+
+  return FFI_OK;
+}
+
+/* ------- Native raw API support -------------------------------- */
+
+#if !FFI_NO_RAW_API
+
+void FFI_HIDDEN ffi_closure_raw_SYSV(void);
+void FFI_HIDDEN ffi_closure_raw_THISCALL(void);
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure *closure,
+                          ffi_cif *cif,
+                          void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+                          void *user_data,
+                          void *codeloc)
+{
+  char *tramp = closure->tramp;
+  void (*dest)(void);
+  int i;
+
+  /* We currently don't support certain kinds of arguments for raw
+     closures.  This should be implemented by a separate assembly
+     language routine, since it would require argument processing,
+     something we don't do now for performance.  */
+  for (i = cif->nargs-1; i >= 0; i--)
+    switch (cif->arg_types[i]->type)
+      {
+      case FFI_TYPE_STRUCT:
+      case FFI_TYPE_LONGDOUBLE:
+	return FFI_BAD_TYPEDEF;
+      }
+
+  switch (cif->abi)
+    {
+    case FFI_THISCALL:
+      dest = ffi_closure_raw_THISCALL;
+      break;
+    case FFI_SYSV:
+      dest = ffi_closure_raw_SYSV;
+      break;
+    default:
+      return FFI_BAD_ABI;
+    }
+
+  /* movl imm, %eax.  */
+  tramp[0] = 0xb8;
+  *(void **)(tramp + 1) = codeloc;
+
+  /* jmp dest */
+  tramp[5] = 0xe9;
+  *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10);
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+void
+ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue)
+{
+  size_t rsize, bytes;
+  struct call_frame *frame;
+  char *stack, *argp;
+  ffi_type **arg_types;
+  int flags, cabi, i, n, narg_reg;
+  const struct abi_params *pabi;
+
+  flags = cif->flags;
+  cabi = cif->abi;
+  pabi = &abi_params[cabi];
+
+  rsize = 0;
+  if (rvalue == NULL)
+    {
+      switch (flags)
+	{
+	case X86_RET_FLOAT:
+	case X86_RET_DOUBLE:
+	case X86_RET_LDOUBLE:
+	case X86_RET_STRUCTPOP:
+	case X86_RET_STRUCTARG:
+	  /* The float cases need to pop the 387 stack.
+	     The struct cases need to pass a valid pointer to the callee.  */
+	  rsize = cif->rtype->size;
+	  break;
+	default:
+	  /* We can pretend that the callee returns nothing.  */
+	  flags = X86_RET_VOID;
+	  break;
+	}
+    }
+
+  bytes = cif->bytes;
+  argp = stack = alloca(bytes + sizeof(*frame) + rsize);
+  frame = (struct call_frame *)(stack + bytes);
+  if (rsize)
+    rvalue = frame + 1;
+
+  frame->fn = fn;
+  frame->flags = flags;
+  frame->rvalue = rvalue;
+
+  narg_reg = 0;
+  switch (flags)
+    {
+    case X86_RET_STRUCTARG:
+      /* The pointer is passed as the first argument.  */
+      if (pabi->nregs > 0)
+	{
+	  frame->regs[pabi->regs[0]] = (unsigned)rvalue;
+	  narg_reg = 1;
+	  break;
+	}
+      /* fallthru */
+    case X86_RET_STRUCTPOP:
+      *(void **)argp = rvalue;
+      argp += sizeof(void *);
+      bytes -= sizeof(void *);
+      break;
+    }
+
+  arg_types = cif->arg_types;
+  for (i = 0, n = cif->nargs; narg_reg < pabi->nregs && i < n; i++)
+    {
+      ffi_type *ty = arg_types[i];
+      size_t z = ty->size;
+      int t = ty->type;
+
+      if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT && t != FFI_TYPE_FLOAT)
+	{
+	  ffi_arg val = extend_basic_type (avalue, t);
+	  frame->regs[pabi->regs[narg_reg++]] = val;
+	  z = FFI_SIZEOF_ARG;
+	}
+      else
+	{
+	  memcpy (argp, avalue, z);
+	  z = ALIGN (z, FFI_SIZEOF_ARG);
+	  argp += z;
+	}
+      avalue += z;
+      bytes -= z;
+    }
+  if (i < n)
+    memcpy (argp, avalue, bytes);
+
+  ffi_call_i386 (frame, stack);
+}
+#endif /* !FFI_NO_RAW_API */
+#endif /* !__x86_64__ */
diff --git a/circuitpython/lib/libffi/src/x86/ffi64.c b/circuitpython/lib/libffi/src/x86/ffi64.c
new file mode 100644
index 0000000..131b5e3
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/ffi64.c
@@ -0,0 +1,824 @@
+/* -----------------------------------------------------------------------
+   ffi64.c - Copyright (c) 2013  The Written Word, Inc.
+             Copyright (c) 2011  Anthony Green
+             Copyright (c) 2008, 2010  Red Hat, Inc.
+             Copyright (c) 2002, 2007  Bo Thorsen <bo@suse.de>
+
+   x86-64 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include "internal64.h"
+
+#ifdef __x86_64__
+
+#define MAX_GPR_REGS 6
+#define MAX_SSE_REGS 8
+
+#if defined(__INTEL_COMPILER)
+#include "xmmintrin.h"
+#define UINT128 __m128
+#else
+#if defined(__SUNPRO_C)
+#include <sunmedia_types.h>
+#define UINT128 __m128i
+#else
+#define UINT128 __int128_t
+#endif
+#endif
+
+union big_int_union
+{
+  UINT32 i32;
+  UINT64 i64;
+  UINT128 i128;
+};
+
+struct register_args
+{
+  /* Registers for argument passing.  */
+  UINT64 gpr[MAX_GPR_REGS];
+  union big_int_union sse[MAX_SSE_REGS];
+  UINT64 rax;	/* ssecount */
+  UINT64 r10;	/* static chain */
+};
+
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+			     void *raddr, void (*fnaddr)(void)) FFI_HIDDEN;
+
+/* All reference to register classes here is identical to the code in
+   gcc/config/i386/i386.c. Do *not* change one without the other.  */
+
+/* Register class used for passing given 64bit part of the argument.
+   These represent classes as documented by the PS ABI, with the
+   exception of SSESF, SSEDF classes, that are basically SSE class,
+   just gcc will use SF or DFmode move instead of DImode to avoid
+   reformatting penalties.
+
+   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
+   whenever possible (upper half does contain padding).  */
+enum x86_64_reg_class
+  {
+    X86_64_NO_CLASS,
+    X86_64_INTEGER_CLASS,
+    X86_64_INTEGERSI_CLASS,
+    X86_64_SSE_CLASS,
+    X86_64_SSESF_CLASS,
+    X86_64_SSEDF_CLASS,
+    X86_64_SSEUP_CLASS,
+    X86_64_X87_CLASS,
+    X86_64_X87UP_CLASS,
+    X86_64_COMPLEX_X87_CLASS,
+    X86_64_MEMORY_CLASS
+  };
+
+#define MAX_CLASSES 4
+
+#define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
+/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
+   of this code is to classify each 8bytes of incoming argument by the register
+   class and assign registers accordingly.  */
+
+/* Return the union class of CLASS1 and CLASS2.
+   See the x86-64 PS ABI for details.  */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+  /* Rule #1: If both classes are equal, this is the resulting class.  */
+  if (class1 == class2)
+    return class1;
+
+  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+     the other class.  */
+  if (class1 == X86_64_NO_CLASS)
+    return class2;
+  if (class2 == X86_64_NO_CLASS)
+    return class1;
+
+  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
+  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
+  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+    return X86_64_INTEGERSI_CLASS;
+  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+    return X86_64_INTEGER_CLASS;
+
+  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+     MEMORY is used.  */
+  if (class1 == X86_64_X87_CLASS
+      || class1 == X86_64_X87UP_CLASS
+      || class1 == X86_64_COMPLEX_X87_CLASS
+      || class2 == X86_64_X87_CLASS
+      || class2 == X86_64_X87UP_CLASS
+      || class2 == X86_64_COMPLEX_X87_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #6: Otherwise class SSE is used.  */
+  return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+   CLASSES will be filled by the register class used to pass each word
+   of the operand.  The number of words is returned.  In case the parameter
+   should be passed in memory, 0 is returned. As a special case for zero
+   sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+   See the x86-64 PS ABI for details.
+*/
+static size_t
+classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
+		   size_t byte_offset)
+{
+  switch (type->type)
+    {
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_POINTER:
+    do_integer:
+      {
+	size_t size = byte_offset + type->size;
+
+	if (size <= 4)
+	  {
+	    classes[0] = X86_64_INTEGERSI_CLASS;
+	    return 1;
+	  }
+	else if (size <= 8)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    return 1;
+	  }
+	else if (size <= 12)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else if (size <= 16)
+	  {
+	    classes[0] = classes[1] = X86_64_INTEGER_CLASS;
+	    return 2;
+	  }
+	else
+	  FFI_ASSERT (0);
+      }
+    case FFI_TYPE_FLOAT:
+      if (!(byte_offset % 8))
+	classes[0] = X86_64_SSESF_CLASS;
+      else
+	classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case FFI_TYPE_DOUBLE:
+      classes[0] = X86_64_SSEDF_CLASS;
+      return 1;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+      classes[0] = X86_64_X87_CLASS;
+      classes[1] = X86_64_X87UP_CLASS;
+      return 2;
+#endif
+    case FFI_TYPE_STRUCT:
+      {
+	const size_t UNITS_PER_WORD = 8;
+	size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+	ffi_type **ptr;
+	int i;
+	enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+	/* If the struct is larger than 32 bytes, pass it on the stack.  */
+	if (type->size > 32)
+	  return 0;
+
+	for (i = 0; i < words; i++)
+	  classes[i] = X86_64_NO_CLASS;
+
+	/* Zero sized arrays or structures are NO_CLASS.  We return 0 to
+	   signalize memory class, so handle it as special case.  */
+	if (!words)
+	  {
+    case FFI_TYPE_VOID:
+	    classes[0] = X86_64_NO_CLASS;
+	    return 1;
+	  }
+
+	/* Merge the fields of structure.  */
+	for (ptr = type->elements; *ptr != NULL; ptr++)
+	  {
+	    size_t num;
+
+	    byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+
+	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
+	    if (num == 0)
+	      return 0;
+	    for (i = 0; i < num; i++)
+	      {
+		size_t pos = byte_offset / 8;
+		classes[i + pos] =
+		  merge_classes (subclasses[i], classes[i + pos]);
+	      }
+
+	    byte_offset += (*ptr)->size;
+	  }
+
+	if (words > 2)
+	  {
+	    /* When size > 16 bytes, if the first one isn't
+	       X86_64_SSE_CLASS or any other ones aren't
+	       X86_64_SSEUP_CLASS, everything should be passed in
+	       memory.  */
+	    if (classes[0] != X86_64_SSE_CLASS)
+	      return 0;
+
+	    for (i = 1; i < words; i++)
+	      if (classes[i] != X86_64_SSEUP_CLASS)
+		return 0;
+	  }
+
+	/* Final merger cleanup.  */
+	for (i = 0; i < words; i++)
+	  {
+	    /* If one class is MEMORY, everything should be passed in
+	       memory.  */
+	    if (classes[i] == X86_64_MEMORY_CLASS)
+	      return 0;
+
+	    /* The X86_64_SSEUP_CLASS should be always preceded by
+	       X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
+	    if (classes[i] == X86_64_SSEUP_CLASS
+		&& classes[i - 1] != X86_64_SSE_CLASS
+		&& classes[i - 1] != X86_64_SSEUP_CLASS)
+	      {
+		/* The first one should never be X86_64_SSEUP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		classes[i] = X86_64_SSE_CLASS;
+	      }
+
+	    /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+		everything should be passed in memory.  */
+	    if (classes[i] == X86_64_X87UP_CLASS
+		&& (classes[i - 1] != X86_64_X87_CLASS))
+	      {
+		/* The first one should never be X86_64_X87UP_CLASS.  */
+		FFI_ASSERT (i != 0);
+		return 0;
+	      }
+	  }
+	return words;
+      }
+    case FFI_TYPE_COMPLEX:
+      {
+	ffi_type *inner = type->elements[0];
+	switch (inner->type)
+	  {
+	  case FFI_TYPE_INT:
+	  case FFI_TYPE_UINT8:
+	  case FFI_TYPE_SINT8:
+	  case FFI_TYPE_UINT16:
+	  case FFI_TYPE_SINT16:
+	  case FFI_TYPE_UINT32:
+	  case FFI_TYPE_SINT32:
+	  case FFI_TYPE_UINT64:
+	  case FFI_TYPE_SINT64:
+	    goto do_integer;
+
+	  case FFI_TYPE_FLOAT:
+	    classes[0] = X86_64_SSE_CLASS;
+	    if (byte_offset % 8)
+	      {
+		classes[1] = X86_64_SSESF_CLASS;
+		return 2;
+	      }
+	    return 1;
+	  case FFI_TYPE_DOUBLE:
+	    classes[0] = classes[1] = X86_64_SSEDF_CLASS;
+	    return 2;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	  case FFI_TYPE_LONGDOUBLE:
+	    classes[0] = X86_64_COMPLEX_X87_CLASS;
+	    return 1;
+#endif
+	  }
+      }
+    }
+  abort();
+}
+
+/* Examine the argument and return set number of register required in each
+   class.  Return zero iff parameter should be passed in memory, otherwise
+   the number of registers.  */
+
+static size_t
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+		  _Bool in_return, int *pngpr, int *pnsse)
+{
+  size_t n;
+  int i, ngpr, nsse;
+
+  n = classify_argument (type, classes, 0);
+  if (n == 0)
+    return 0;
+
+  ngpr = nsse = 0;
+  for (i = 0; i < n; ++i)
+    switch (classes[i])
+      {
+      case X86_64_INTEGER_CLASS:
+      case X86_64_INTEGERSI_CLASS:
+	ngpr++;
+	break;
+      case X86_64_SSE_CLASS:
+      case X86_64_SSESF_CLASS:
+      case X86_64_SSEDF_CLASS:
+	nsse++;
+	break;
+      case X86_64_NO_CLASS:
+      case X86_64_SSEUP_CLASS:
+	break;
+      case X86_64_X87_CLASS:
+      case X86_64_X87UP_CLASS:
+      case X86_64_COMPLEX_X87_CLASS:
+	return in_return != 0;
+      default:
+	abort ();
+      }
+
+  *pngpr = ngpr;
+  *pnsse = nsse;
+
+  return n;
+}
+
+/* Perform machine dependent cif processing.  */
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  int gprcount, ssecount, i, avn, ngpr, nsse, flags;
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  size_t bytes, n, rtype_size;
+  ffi_type *rtype;
+
+  if (cif->abi != FFI_UNIX64)
+    return FFI_BAD_ABI;
+
+  gprcount = ssecount = 0;
+
+  rtype = cif->rtype;
+  rtype_size = rtype->size;
+  switch (rtype->type)
+    {
+    case FFI_TYPE_VOID:
+      flags = UNIX64_RET_VOID;
+      break;
+    case FFI_TYPE_UINT8:
+      flags = UNIX64_RET_UINT8;
+      break;
+    case FFI_TYPE_SINT8:
+      flags = UNIX64_RET_SINT8;
+      break;
+    case FFI_TYPE_UINT16:
+      flags = UNIX64_RET_UINT16;
+      break;
+    case FFI_TYPE_SINT16:
+      flags = UNIX64_RET_SINT16;
+      break;
+    case FFI_TYPE_UINT32:
+      flags = UNIX64_RET_UINT32;
+      break;
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+      flags = UNIX64_RET_SINT32;
+      break;
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+      flags = UNIX64_RET_INT64;
+      break;
+    case FFI_TYPE_POINTER:
+      flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
+      break;
+    case FFI_TYPE_FLOAT:
+      flags = UNIX64_RET_XMM32;
+      break;
+    case FFI_TYPE_DOUBLE:
+      flags = UNIX64_RET_XMM64;
+      break;
+    case FFI_TYPE_LONGDOUBLE:
+      flags = UNIX64_RET_X87;
+      break;
+    case FFI_TYPE_STRUCT:
+      n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value is passed in memory.  A pointer to that
+	     memory is the first argument.  Allocate a register for it.  */
+	  gprcount++;
+	  /* We don't have to do anything in asm for the return.  */
+	  flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM;
+	}
+      else
+	{
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+
+	  if (rtype_size == 4 && sse0)
+	    flags = UNIX64_RET_XMM32;
+	  else if (rtype_size == 8)
+	    flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64;
+	  else
+	    {
+	      _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+	      if (sse0 && sse1)
+		flags = UNIX64_RET_ST_XMM0_XMM1;
+	      else if (sse0)
+		flags = UNIX64_RET_ST_XMM0_RAX;
+	      else if (sse1)
+		flags = UNIX64_RET_ST_RAX_XMM0;
+	      else
+		flags = UNIX64_RET_ST_RAX_RDX;
+	      flags |= rtype_size << UNIX64_SIZE_SHIFT;
+	    }
+	}
+      break;
+    case FFI_TYPE_COMPLEX:
+      switch (rtype->elements[0]->type)
+	{
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_INT:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	  flags = UNIX64_RET_ST_RAX_RDX | (rtype_size << UNIX64_SIZE_SHIFT);
+	  break;
+	case FFI_TYPE_FLOAT:
+	  flags = UNIX64_RET_XMM64;
+	  break;
+	case FFI_TYPE_DOUBLE:
+	  flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT);
+	  break;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	case FFI_TYPE_LONGDOUBLE:
+	  flags = UNIX64_RET_X87_2;
+	  break;
+#endif
+	default:
+	  return FFI_BAD_TYPEDEF;
+	}
+      break;
+    default:
+      return FFI_BAD_TYPEDEF;
+    }
+
+  /* Go over all arguments and determine the way they should be passed.
+     If it's in a register and there is space for it, let that be so. If
+     not, add it's size to the stack byte count.  */
+  for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
+    {
+      if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = cif->arg_types[i]->alignment;
+
+	  if (align < 8)
+	    align = 8;
+
+	  bytes = ALIGN (bytes, align);
+	  bytes += cif->arg_types[i]->size;
+	}
+      else
+	{
+	  gprcount += ngpr;
+	  ssecount += nsse;
+	}
+    }
+  if (ssecount)
+    flags |= UNIX64_FLAG_XMM_ARGS;
+
+  cif->flags = flags;
+  cif->bytes = ALIGN (bytes, 8);
+
+  return FFI_OK;
+}
+
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	      void **avalue, void *closure)
+{
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  char *stack, *argp;
+  ffi_type **arg_types;
+  int gprcount, ssecount, ngpr, nsse, i, avn, flags;
+  struct register_args *reg_args;
+
+  /* Can't call 32-bit mode from 64-bit mode.  */
+  FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+  /* If the return value is a struct and we don't have a return value
+     address then we need to make one.  Otherwise we can ignore it.  */
+  flags = cif->flags;
+  if (rvalue == NULL)
+    {
+      if (flags & UNIX64_FLAG_RET_IN_MEM)
+	rvalue = alloca (cif->rtype->size);
+      else
+	flags = UNIX64_RET_VOID;
+    }
+
+  /* Allocate the space for the arguments, plus 4 words of temp space.  */
+  stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+  reg_args = (struct register_args *) stack;
+  argp = stack + sizeof (struct register_args);
+
+  reg_args->r10 = (uintptr_t) closure;
+
+  gprcount = ssecount = 0;
+
+  /* If the return value is passed in memory, add the pointer as the
+     first integer argument.  */
+  if (flags & UNIX64_FLAG_RET_IN_MEM)
+    reg_args->gpr[gprcount++] = (unsigned long) rvalue;
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  for (i = 0; i < avn; ++i)
+    {
+      size_t n, size = arg_types[i]->size;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  memcpy (argp, avalue[i], size);
+	  argp += size;
+	}
+      else
+	{
+	  /* The argument is passed entirely in registers.  */
+	  char *a = (char *) avalue[i];
+	  int j;
+
+	  for (j = 0; j < n; j++, a += 8, size -= 8)
+	    {
+	      switch (classes[j])
+		{
+		case X86_64_NO_CLASS:
+		case X86_64_SSEUP_CLASS:
+		  break;
+		case X86_64_INTEGER_CLASS:
+		case X86_64_INTEGERSI_CLASS:
+		  /* Sign-extend integer arguments passed in general
+		     purpose registers, to cope with the fact that
+		     LLVM incorrectly assumes that this will be done
+		     (the x86-64 PS ABI does not specify this). */
+		  switch (arg_types[i]->type)
+		    {
+		    case FFI_TYPE_SINT8:
+		      reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
+		      break;
+		    case FFI_TYPE_SINT16:
+		      reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
+		      break;
+		    case FFI_TYPE_SINT32:
+		      reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
+		      break;
+		    default:
+		      reg_args->gpr[gprcount] = 0;
+		      memcpy (&reg_args->gpr[gprcount], a, size);
+		    }
+		  gprcount++;
+		  break;
+		case X86_64_SSE_CLASS:
+		case X86_64_SSEDF_CLASS:
+		  reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
+		  break;
+		case X86_64_SSESF_CLASS:
+		  reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
+		  break;
+		default:
+		  abort();
+		}
+	    }
+	}
+    }
+  reg_args->rax = ssecount;
+
+  ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
+		   flags, rvalue, fn);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	     void **avalue, void *closure)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+extern void ffi_closure_unix64(void) FFI_HIDDEN;
+extern void ffi_closure_unix64_sse(void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*, void*, void**, void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  static const unsigned char trampoline[16] = {
+    /* leaq  -0x7(%rip),%r10   # 0x0  */
+    0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
+    /* jmpq  *0x3(%rip)        # 0x10 */
+    0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
+    /* nopl  (%rax) */
+    0x0f, 0x1f, 0x00
+  };
+  void (*dest)(void);
+  char *tramp = closure->tramp;
+
+  if (cif->abi != FFI_UNIX64)
+    return FFI_BAD_ABI;
+
+  if (cif->flags & UNIX64_FLAG_XMM_ARGS)
+    dest = ffi_closure_unix64_sse;
+  else
+    dest = ffi_closure_unix64;
+
+  memcpy (tramp, trampoline, sizeof(trampoline));
+  *(UINT64 *)(tramp + 16) = (uintptr_t)dest;
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+int FFI_HIDDEN
+ffi_closure_unix64_inner(ffi_cif *cif,
+			 void (*fun)(ffi_cif*, void*, void**, void*),
+			 void *user_data,
+			 void *rvalue,
+			 struct register_args *reg_args,
+			 char *argp)
+{
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn;
+  int gprcount, ssecount, ngpr, nsse;
+  int flags;
+
+  avn = cif->nargs;
+  flags = cif->flags;
+  avalue = alloca(avn * sizeof(void *));
+  gprcount = ssecount = 0;
+
+  if (flags & UNIX64_FLAG_RET_IN_MEM)
+    {
+      /* On return, %rax will contain the address that was passed
+	 by the caller in %rdi.  */
+      void *r = (void *)(uintptr_t)reg_args->gpr[gprcount++];
+      *(void **)rvalue = r;
+      rvalue = r;
+      flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
+    }
+
+  arg_types = cif->arg_types;
+  for (i = 0; i < avn; ++i)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      size_t n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  avalue[i] = argp;
+	  argp += arg_types[i]->size;
+	}
+      /* If the argument is in a single register, or two consecutive
+	 integer registers, then we can use that address directly.  */
+      else if (n == 1
+	       || (n == 2 && !(SSE_CLASS_P (classes[0])
+			       || SSE_CLASS_P (classes[1]))))
+	{
+	  /* The argument is in a single register.  */
+	  if (SSE_CLASS_P (classes[0]))
+	    {
+	      avalue[i] = &reg_args->sse[ssecount];
+	      ssecount += n;
+	    }
+	  else
+	    {
+	      avalue[i] = &reg_args->gpr[gprcount];
+	      gprcount += n;
+	    }
+	}
+      /* Otherwise, allocate space to make them consecutive.  */
+      else
+	{
+	  char *a = alloca (16);
+	  int j;
+
+	  avalue[i] = a;
+	  for (j = 0; j < n; j++, a += 8)
+	    {
+	      if (SSE_CLASS_P (classes[j]))
+		memcpy (a, &reg_args->sse[ssecount++], 8);
+	      else
+		memcpy (a, &reg_args->gpr[gprcount++], 8);
+	    }
+	}
+    }
+
+  /* Invoke the closure.  */
+  fun (cif, rvalue, avalue, user_data);
+
+  /* Tell assembly how to perform return type promotions.  */
+  return flags;
+}
+
+extern void ffi_go_closure_unix64(void) FFI_HIDDEN;
+extern void ffi_go_closure_unix64_sse(void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+		     void (*fun)(ffi_cif*, void*, void**, void*))
+{
+  if (cif->abi != FFI_UNIX64)
+    return FFI_BAD_ABI;
+
+  closure->tramp = (cif->flags & UNIX64_FLAG_XMM_ARGS
+		    ? ffi_go_closure_unix64_sse
+		    : ffi_go_closure_unix64);
+  closure->cif = cif;
+  closure->fun = fun;
+
+  return FFI_OK;
+}
+
+#endif /* __x86_64__ */
diff --git a/circuitpython/lib/libffi/src/x86/ffitarget.h b/circuitpython/lib/libffi/src/x86/ffitarget.h
new file mode 100644
index 0000000..8c1dcac
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/ffitarget.h
@@ -0,0 +1,139 @@
+/* -----------------------------------------------------------------*-C-*-
+   ffitarget.h - Copyright (c) 2012, 2014  Anthony Green
+                 Copyright (c) 1996-2003, 2010  Red Hat, Inc.
+                 Copyright (C) 2008  Free Software Foundation, Inc.
+
+   Target configuration macros for x86 and x86-64.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead."
+#endif
+
+/* ---- System specific configurations ----------------------------------- */
+
+/* For code common to all platforms on x86 and x86_64. */
+#define X86_ANY
+
+#if defined (X86_64) && defined (__i386__)
+#undef X86_64
+#define X86
+#endif
+
+#ifdef X86_WIN64
+#define FFI_SIZEOF_ARG 8
+#define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */
+#endif
+
+#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
+#ifndef _MSC_VER
+#define FFI_TARGET_HAS_COMPLEX_TYPE
+#endif
+
+/* ---- Generic type definitions ----------------------------------------- */
+
+#ifndef LIBFFI_ASM
+#ifdef X86_WIN64
+#ifdef _MSC_VER
+typedef unsigned __int64       ffi_arg;
+typedef __int64                ffi_sarg;
+#else
+typedef unsigned long long     ffi_arg;
+typedef long long              ffi_sarg;
+#endif
+#else
+#if defined __x86_64__ && defined __ILP32__
+#define FFI_SIZEOF_ARG 8
+#define FFI_SIZEOF_JAVA_RAW  4
+typedef unsigned long long     ffi_arg;
+typedef long long              ffi_sarg;
+#else
+typedef unsigned long          ffi_arg;
+typedef signed long            ffi_sarg;
+#endif
+#endif
+
+typedef enum ffi_abi {
+#if defined(X86_WIN64)
+  FFI_FIRST_ABI = 0,
+  FFI_WIN64,
+  FFI_LAST_ABI,
+  FFI_DEFAULT_ABI = FFI_WIN64
+
+#elif defined(X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
+  FFI_FIRST_ABI = 1,
+  FFI_UNIX64,
+  FFI_LAST_ABI,
+  FFI_DEFAULT_ABI = FFI_UNIX64
+
+#elif defined(X86_WIN32)
+  FFI_FIRST_ABI = 0,
+  FFI_SYSV      = 1,
+  FFI_STDCALL   = 2,
+  FFI_THISCALL  = 3,
+  FFI_FASTCALL  = 4,
+  FFI_MS_CDECL  = 5,
+  FFI_PASCAL    = 6,
+  FFI_REGISTER  = 7,
+  FFI_LAST_ABI,
+  FFI_DEFAULT_ABI = FFI_MS_CDECL
+#else
+  FFI_FIRST_ABI = 0,
+  FFI_SYSV      = 1,
+  FFI_THISCALL  = 3,
+  FFI_FASTCALL  = 4,
+  FFI_STDCALL   = 5,
+  FFI_PASCAL    = 6,
+  FFI_REGISTER  = 7,
+  FFI_MS_CDECL  = 8,
+  FFI_LAST_ABI,
+  FFI_DEFAULT_ABI = FFI_SYSV
+#endif
+} ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_GO_CLOSURES 1
+
+#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
+#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
+#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
+#define FFI_TYPE_MS_STRUCT       (FFI_TYPE_LAST + 4)
+
+#if defined (X86_64) || defined(X86_WIN64) \
+    || (defined (__x86_64__) && defined (X86_DARWIN))
+# define FFI_TRAMPOLINE_SIZE 24
+# define FFI_NATIVE_RAW_API 0
+#else
+# define FFI_TRAMPOLINE_SIZE 12
+# define FFI_NATIVE_RAW_API 1  /* x86 has native raw api support */
+#endif
+
+#endif
+
diff --git a/circuitpython/lib/libffi/src/x86/ffiw64.c b/circuitpython/lib/libffi/src/x86/ffiw64.c
new file mode 100644
index 0000000..8a33a6c
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/ffiw64.c
@@ -0,0 +1,281 @@
+/* -----------------------------------------------------------------------
+   ffiw64.c - Copyright (c) 2014 Red Hat, Inc.
+
+   x86 win64 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#ifdef X86_WIN64
+
+struct win64_call_frame
+{
+  UINT64 rbp;		/* 0 */
+  UINT64 retaddr;	/* 8 */
+  UINT64 fn;		/* 16 */
+  UINT64 flags;		/* 24 */
+  UINT64 rvalue;	/* 32 */
+};
+
+extern void ffi_call_win64 (void *stack, struct win64_call_frame *,
+			    void *closure) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  int flags, n;
+
+  if (cif->abi != FFI_WIN64)
+    return FFI_BAD_ABI;
+
+  flags = cif->rtype->type;
+  switch (flags)
+    {
+    default:
+      break;
+    case FFI_TYPE_LONGDOUBLE:
+      flags = FFI_TYPE_STRUCT;
+      break;
+    case FFI_TYPE_COMPLEX:
+      flags = FFI_TYPE_STRUCT;
+      /* FALLTHRU */
+    case FFI_TYPE_STRUCT:
+      switch (cif->rtype->size)
+	{
+	case 8:
+	  flags = FFI_TYPE_UINT64;
+	  break;
+	case 4:
+	  flags = FFI_TYPE_SMALL_STRUCT_4B;
+	  break;
+	case 2:
+	  flags = FFI_TYPE_SMALL_STRUCT_2B;
+	  break;
+	case 1:
+	  flags = FFI_TYPE_SMALL_STRUCT_1B;
+	  break;
+	}
+      break;
+    }
+  cif->flags = flags;
+
+  /* Each argument either fits in a register, an 8 byte slot, or is
+     passed by reference with the pointer in the 8 byte slot.  */
+  n = cif->nargs;
+  n += (flags == FFI_TYPE_STRUCT);
+  if (n < 4)
+    n = 4;
+  cif->bytes = n * 8;
+
+  return FFI_OK;
+}
+
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	      void **avalue, void *closure)
+{
+  int i, j, n, flags;
+  UINT64 *stack;
+  size_t rsize;
+  struct win64_call_frame *frame;
+
+  FFI_ASSERT(cif->abi == FFI_WIN64);
+
+  flags = cif->flags;
+  rsize = 0;
+
+  /* If we have no return value for a structure, we need to create one.
+     Otherwise we can ignore the return type entirely.  */
+  if (rvalue == NULL)
+    {
+      if (flags == FFI_TYPE_STRUCT)
+	rsize = cif->rtype->size;
+      else
+	flags = FFI_TYPE_VOID;
+    }
+
+  stack = alloca(cif->bytes + sizeof(struct win64_call_frame) + rsize);
+  frame = (struct win64_call_frame *)((char *)stack + cif->bytes);
+  if (rsize)
+    rvalue = frame + 1;
+
+  frame->fn = (uintptr_t)fn;
+  frame->flags = flags;
+  frame->rvalue = (uintptr_t)rvalue;
+
+  j = 0;
+  if (flags == FFI_TYPE_STRUCT)
+    {
+      stack[0] = (uintptr_t)rvalue;
+      j = 1;
+    }
+
+  for (i = 0, n = cif->nargs; i < n; ++i, ++j)
+    {
+      switch (cif->arg_types[i]->size)
+	{
+	case 8:
+	  stack[j] = *(UINT64 *)avalue[i];
+	  break;
+	case 4:
+	  stack[j] = *(UINT32 *)avalue[i];
+	  break;
+	case 2:
+	  stack[j] = *(UINT16 *)avalue[i];
+	  break;
+	case 1:
+	  stack[j] = *(UINT8 *)avalue[i];
+	  break;
+	default:
+	  stack[j] = (uintptr_t)avalue[i];
+	  break;
+	}
+    }
+
+  ffi_call_win64 (stack, frame, closure);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	     void **avalue, void *closure)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+
+extern void ffi_closure_win64(void) FFI_HIDDEN;
+extern void ffi_go_closure_win64(void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*, void*, void**, void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  static const unsigned char trampoline[16] = {
+    /* leaq  -0x7(%rip),%r10   # 0x0  */
+    0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
+    /* jmpq  *0x3(%rip)        # 0x10 */
+    0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
+    /* nopl  (%rax) */
+    0x0f, 0x1f, 0x00
+  };
+  unsigned char *tramp = closure->tramp;
+
+  if (cif->abi != FFI_WIN64)
+    return FFI_BAD_ABI;
+
+  memcpy (tramp, trampoline, sizeof(trampoline));
+  *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64;
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+		     void (*fun)(ffi_cif*, void*, void**, void*))
+{
+  if (cif->abi != FFI_WIN64)
+    return FFI_BAD_ABI;
+
+  closure->tramp = ffi_go_closure_win64;
+  closure->cif = cif;
+  closure->fun = fun;
+
+  return FFI_OK;
+}
+
+struct win64_closure_frame
+{
+  UINT64 rvalue[2];
+  UINT64 fargs[4];
+  UINT64 retaddr;
+  UINT64 args[];
+};
+
+int FFI_HIDDEN
+ffi_closure_win64_inner(ffi_cif *cif,
+			void (*fun)(ffi_cif*, void*, void**, void*),
+			void *user_data,
+			struct win64_closure_frame *frame)
+{
+  void **avalue;
+  void *rvalue;
+  int i, n, nreg, flags;
+
+  avalue = alloca(cif->nargs * sizeof(void *));
+  rvalue = frame->rvalue;
+  nreg = 0;
+
+  /* When returning a structure, the address is in the first argument.
+     We must also be prepared to return the same address in eax, so
+     install that address in the frame and pretend we return a pointer.  */
+  flags = cif->flags;
+  if (flags == FFI_TYPE_STRUCT)
+    {
+      rvalue = (void *)(uintptr_t)frame->args[0];
+      frame->rvalue[0] = frame->args[0];
+      nreg = 1;
+    }
+
+  for (i = 0, n = cif->nargs; i < n; ++i, ++nreg)
+    {
+      size_t size = cif->arg_types[i]->size;
+      size_t type = cif->arg_types[i]->type;
+      void *a;
+
+      if (type == FFI_TYPE_DOUBLE || type == FFI_TYPE_FLOAT)
+	{
+	  if (nreg < 4)
+	    a = &frame->fargs[nreg];
+	  else
+	    a = &frame->args[nreg];
+	}
+      else if (size == 1 || size == 2 || size == 4 || size == 8)
+	a = &frame->args[nreg];
+      else
+	a = (void *)(uintptr_t)frame->args[nreg];
+
+      avalue[i] = a;
+    }
+
+  /* Invoke the closure.  */
+  fun (cif, rvalue, avalue, user_data);
+  return flags;
+}
+
+#endif /* X86_WIN64 */
diff --git a/circuitpython/lib/libffi/src/x86/internal.h b/circuitpython/lib/libffi/src/x86/internal.h
new file mode 100644
index 0000000..09771ba
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/internal.h
@@ -0,0 +1,29 @@
+#define X86_RET_FLOAT		0
+#define X86_RET_DOUBLE		1
+#define X86_RET_LDOUBLE		2
+#define X86_RET_SINT8		3
+#define X86_RET_SINT16		4
+#define X86_RET_UINT8		5
+#define X86_RET_UINT16		6
+#define X86_RET_INT64		7
+#define X86_RET_INT32		8
+#define X86_RET_VOID		9
+#define X86_RET_STRUCTPOP	10
+#define X86_RET_STRUCTARG       11
+#define X86_RET_STRUCT_1B	12
+#define X86_RET_STRUCT_2B	13
+#define X86_RET_UNUSED14	14
+#define X86_RET_UNUSED15	15
+
+#define X86_RET_TYPE_MASK	15
+#define X86_RET_POP_SHIFT	4
+
+#define R_EAX	0
+#define R_EDX	1
+#define R_ECX	2
+
+#ifdef __PCC__
+# define HAVE_FASTCALL 0
+#else
+# define HAVE_FASTCALL 1
+#endif
diff --git a/circuitpython/lib/libffi/src/x86/internal64.h b/circuitpython/lib/libffi/src/x86/internal64.h
new file mode 100644
index 0000000..512e955
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/internal64.h
@@ -0,0 +1,22 @@
+#define UNIX64_RET_VOID		0
+#define UNIX64_RET_UINT8	1
+#define UNIX64_RET_UINT16	2
+#define UNIX64_RET_UINT32	3
+#define UNIX64_RET_SINT8	4
+#define UNIX64_RET_SINT16	5
+#define UNIX64_RET_SINT32	6
+#define UNIX64_RET_INT64	7
+#define UNIX64_RET_XMM32	8
+#define UNIX64_RET_XMM64	9
+#define UNIX64_RET_X87		10
+#define UNIX64_RET_X87_2	11
+#define UNIX64_RET_ST_XMM0_RAX	12
+#define UNIX64_RET_ST_RAX_XMM0	13
+#define UNIX64_RET_ST_XMM0_XMM1	14
+#define UNIX64_RET_ST_RAX_RDX	15
+
+#define UNIX64_RET_LAST		15
+
+#define UNIX64_FLAG_RET_IN_MEM	(1 << 10)
+#define UNIX64_FLAG_XMM_ARGS	(1 << 11)
+#define UNIX64_SIZE_SHIFT	12
diff --git a/circuitpython/lib/libffi/src/x86/sysv.S b/circuitpython/lib/libffi/src/x86/sysv.S
new file mode 100644
index 0000000..78f245b
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/sysv.S
@@ -0,0 +1,1040 @@
+/* -----------------------------------------------------------------------
+   sysv.S - Copyright (c) 2013  The Written Word, Inc.
+	  - Copyright (c) 1996,1998,2001-2003,2005,2008,2010  Red Hat, Inc.
+   
+   X86 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+#include "internal.h"
+
+#define C2(X, Y)  X ## Y
+#define C1(X, Y)  C2(X, Y)
+#ifdef __USER_LABEL_PREFIX__
+# define C(X)     C1(__USER_LABEL_PREFIX__, X)
+#else
+# define C(X)     X
+#endif
+
+#ifdef X86_DARWIN
+# define L(X)     C1(L, X)
+#else
+# define L(X)     C1(.L, X)
+#endif
+
+#ifdef __ELF__
+# define ENDF(X)  .type	X,@function; .size X, . - X
+#else
+# define ENDF(X)
+#endif
+
+/* Handle win32 fastcall name mangling.  */
+#ifdef X86_WIN32
+# define ffi_call_i386		@ffi_call_i386@8
+# define ffi_closure_inner	@ffi_closure_inner@8
+#else
+# define ffi_call_i386		C(ffi_call_i386)
+# define ffi_closure_inner	C(ffi_closure_inner)
+#endif
+
+/* This macro allows the safe creation of jump tables without an
+   actual table.  The entry points into the table are all 8 bytes.
+   The use of ORG asserts that we're at the correct location.  */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X)	.balign 8
+#else
+# define E(BASE, X)	.balign 8; .org BASE + X * 8
+#endif
+
+	.text
+	.balign	16
+	.globl	ffi_call_i386
+	FFI_HIDDEN(ffi_call_i386)
+
+/* This is declared as
+
+   void ffi_call_i386(struct call_frame *frame, char *argp)
+        __attribute__((fastcall));
+
+   Thus the arguments are present in
+
+        ecx: frame
+        edx: argp
+*/
+
+ffi_call_i386:
+L(UW0):
+	# cfi_startproc
+#if !HAVE_FASTCALL
+	movl	4(%esp), %ecx
+	movl	8(%esp), %edx
+#endif
+	movl	(%esp), %eax		/* move the return address */
+	movl	%ebp, (%ecx)		/* store %ebp into local frame */
+	movl	%eax, 4(%ecx)		/* store retaddr into local frame */
+
+	/* New stack frame based off ebp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-4, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+	movl	%ecx, %ebp
+L(UW1):
+	# cfi_def_cfa(%ebp, 8)
+	# cfi_rel_offset(%ebp, 0)
+
+	movl	%edx, %esp		/* set outgoing argument stack */
+	movl	20+R_EAX*4(%ebp), %eax	/* set register arguments */
+	movl	20+R_EDX*4(%ebp), %edx
+	movl	20+R_ECX*4(%ebp), %ecx
+
+	call	*8(%ebp)
+
+	movl	12(%ebp), %ecx		/* load return type code */
+	movl	%ebx, 8(%ebp)		/* preserve %ebx */
+L(UW2):
+	# cfi_rel_offset(%ebx, 8)
+
+	andl	$X86_RET_TYPE_MASK, %ecx
+#ifdef __PIC__
+	call	C(__x86.get_pc_thunk.bx)
+L(pc1):
+	leal	L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx
+#else
+	leal	L(store_table)(,%ecx, 8), %ebx
+#endif
+	movl	16(%ebp), %ecx		/* load result address */
+	jmp	*%ebx
+
+	.balign	8
+L(store_table):
+E(L(store_table), X86_RET_FLOAT)
+	fstps	(%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_DOUBLE)
+	fstpl	(%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_LDOUBLE)
+	fstpt	(%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_SINT8)
+	movsbl	%al, %eax
+	mov	%eax, (%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_SINT16)
+	movswl	%ax, %eax
+	mov	%eax, (%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_UINT8)
+	movzbl	%al, %eax
+	mov	%eax, (%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_UINT16)
+	movzwl	%ax, %eax
+	mov	%eax, (%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_INT64)
+	movl	%edx, 4(%ecx)
+	/* fallthru */
+E(L(store_table), X86_RET_INT32)
+	movl	%eax, (%ecx)
+	/* fallthru */
+E(L(store_table), X86_RET_VOID)
+L(e1):
+	movl	8(%ebp), %ebx
+	movl	%ebp, %esp
+	popl	%ebp
+L(UW3):
+	# cfi_remember_state
+	# cfi_def_cfa(%esp, 4)
+	# cfi_restore(%ebx)
+	# cfi_restore(%ebp)
+	ret
+L(UW4):
+	# cfi_restore_state
+
+E(L(store_table), X86_RET_STRUCTPOP)
+	jmp	L(e1)
+E(L(store_table), X86_RET_STRUCTARG)
+	jmp	L(e1)
+E(L(store_table), X86_RET_STRUCT_1B)
+	movb	%al, (%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_STRUCT_2B)
+	movw	%ax, (%ecx)
+	jmp	L(e1)
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(store_table), X86_RET_UNUSED14)
+	ud2
+E(L(store_table), X86_RET_UNUSED15)
+	ud2
+
+L(UW5):
+	# cfi_endproc
+ENDF(ffi_call_i386)
+
+/* The inner helper is declared as
+
+   void ffi_closure_inner(struct closure_frame *frame, char *argp)
+	__attribute_((fastcall))
+
+   Thus the arguments are placed in
+
+	ecx:	frame
+	edx:	argp
+*/
+
+/* Macros to help setting up the closure_data structure.  */
+
+#if HAVE_FASTCALL
+# define closure_FS	(40 + 4)
+# define closure_CF	0
+#else
+# define closure_FS	(8 + 40 + 12)
+# define closure_CF	8
+#endif
+
+#define FFI_CLOSURE_SAVE_REGS		\
+	movl	%eax, closure_CF+16+R_EAX*4(%esp);	\
+	movl	%edx, closure_CF+16+R_EDX*4(%esp);	\
+	movl	%ecx, closure_CF+16+R_ECX*4(%esp)
+
+#define FFI_CLOSURE_COPY_TRAMP_DATA					\
+	movl	FFI_TRAMPOLINE_SIZE(%eax), %edx;	/* copy cif */	\
+	movl	FFI_TRAMPOLINE_SIZE+4(%eax), %ecx;	/* copy fun */	\
+	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %eax;	/* copy user_data */ \
+	movl	%edx, closure_CF+28(%esp);				\
+	movl	%ecx, closure_CF+32(%esp);				\
+	movl	%eax, closure_CF+36(%esp)
+
+#if HAVE_FASTCALL
+# define FFI_CLOSURE_PREP_CALL						\
+	movl	%esp, %ecx;			/* load closure_data */	\
+	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */
+#else
+# define FFI_CLOSURE_PREP_CALL						\
+	leal	closure_CF(%esp), %ecx;		/* load closure_data */	\
+	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */ \
+	movl	%ecx, (%esp);						\
+	movl	%edx, 4(%esp)
+#endif
+
+#define FFI_CLOSURE_CALL_INNER(UWN) \
+	call	ffi_closure_inner
+
+#define FFI_CLOSURE_MASK_AND_JUMP(N, UW)				\
+	andl	$X86_RET_TYPE_MASK, %eax;				\
+	leal	L(C1(load_table,N))(, %eax, 8), %edx;			\
+	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
+	jmp	*%edx
+
+#ifdef __PIC__
+# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
+#  undef FFI_CLOSURE_MASK_AND_JUMP
+#  define FFI_CLOSURE_MASK_AND_JUMP(N, UW)				\
+	andl	$X86_RET_TYPE_MASK, %eax;				\
+	call	C(__x86.get_pc_thunk.dx);				\
+L(C1(pc,N)):								\
+	leal	L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx;	\
+	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
+	jmp	*%edx
+# else
+#  define FFI_CLOSURE_CALL_INNER_SAVE_EBX
+#  undef FFI_CLOSURE_CALL_INNER
+#  define FFI_CLOSURE_CALL_INNER(UWN)					\
+	movl	%ebx, 40(%esp);			/* save ebx */		\
+L(C1(UW,UWN)):								\
+	# cfi_rel_offset(%ebx, 40);					\
+	call	C(__x86.get_pc_thunk.bx);	/* load got register */	\
+	addl	$C(_GLOBAL_OFFSET_TABLE_), %ebx;			\
+	call	ffi_closure_inner@PLT
+#  undef FFI_CLOSURE_MASK_AND_JUMP
+#  define FFI_CLOSURE_MASK_AND_JUMP(N, UWN)				\
+	andl	$X86_RET_TYPE_MASK, %eax;				\
+	leal	L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %edx;	\
+	movl	40(%esp), %ebx;			/* restore ebx */	\
+L(C1(UW,UWN)):								\
+	# cfi_restore(%ebx);						\
+	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
+	jmp	*%edx
+# endif /* DARWIN || HIDDEN */
+#endif /* __PIC__ */
+
+	.balign	16
+	.globl	C(ffi_go_closure_EAX)
+	FFI_HIDDEN(C(ffi_go_closure_EAX))
+C(ffi_go_closure_EAX):
+L(UW6):
+	# cfi_startproc
+	subl	$closure_FS, %esp
+L(UW7):
+	# cfi_def_cfa_offset(closure_FS + 4)
+	FFI_CLOSURE_SAVE_REGS
+	movl	4(%eax), %edx			/* copy cif */
+	movl	8(%eax), %ecx			/* copy fun */
+	movl	%edx, closure_CF+28(%esp)
+	movl	%ecx, closure_CF+32(%esp)
+	movl	%eax, closure_CF+36(%esp)	/* closure is user_data */
+	jmp	L(do_closure_i386)
+L(UW8):
+	# cfi_endproc
+ENDF(C(ffi_go_closure_EAX))
+
+	.balign	16
+	.globl	C(ffi_go_closure_ECX)
+	FFI_HIDDEN(C(ffi_go_closure_ECX))
+C(ffi_go_closure_ECX):
+L(UW9):
+	# cfi_startproc
+	subl	$closure_FS, %esp
+L(UW10):
+	# cfi_def_cfa_offset(closure_FS + 4)
+	FFI_CLOSURE_SAVE_REGS
+	movl	4(%ecx), %edx			/* copy cif */
+	movl	8(%ecx), %eax			/* copy fun */
+	movl	%edx, closure_CF+28(%esp)
+	movl	%eax, closure_CF+32(%esp)
+	movl	%ecx, closure_CF+36(%esp)	/* closure is user_data */
+	jmp	L(do_closure_i386)
+L(UW11):
+	# cfi_endproc
+ENDF(C(ffi_go_closure_ECX))
+
+/* The closure entry points are reached from the ffi_closure trampoline.
+   On entry, %eax contains the address of the ffi_closure.  */
+
+	.balign	16
+	.globl	C(ffi_closure_i386)
+	FFI_HIDDEN(C(ffi_closure_i386))
+
+C(ffi_closure_i386):
+L(UW12):
+	# cfi_startproc
+	subl	$closure_FS, %esp
+L(UW13):
+	# cfi_def_cfa_offset(closure_FS + 4)
+
+	FFI_CLOSURE_SAVE_REGS
+	FFI_CLOSURE_COPY_TRAMP_DATA
+
+	/* Entry point from preceeding Go closures.  */
+L(do_closure_i386):
+
+	FFI_CLOSURE_PREP_CALL
+	FFI_CLOSURE_CALL_INNER(14)
+	FFI_CLOSURE_MASK_AND_JUMP(2, 15)
+
+	.balign	8
+L(load_table2):
+E(L(load_table2), X86_RET_FLOAT)
+	flds	closure_CF(%esp)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_DOUBLE)
+	fldl	closure_CF(%esp)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_LDOUBLE)
+	fldt	closure_CF(%esp)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_SINT8)
+	movsbl	%al, %eax
+	jmp	L(e2)
+E(L(load_table2), X86_RET_SINT16)
+	movswl	%ax, %eax
+	jmp	L(e2)
+E(L(load_table2), X86_RET_UINT8)
+	movzbl	%al, %eax
+	jmp	L(e2)
+E(L(load_table2), X86_RET_UINT16)
+	movzwl	%ax, %eax
+	jmp	L(e2)
+E(L(load_table2), X86_RET_INT64)
+	movl	closure_CF+4(%esp), %edx
+	jmp	L(e2)
+E(L(load_table2), X86_RET_INT32)
+	nop
+	/* fallthru */
+E(L(load_table2), X86_RET_VOID)
+L(e2):
+	addl	$closure_FS, %esp
+L(UW16):
+	# cfi_adjust_cfa_offset(-closure_FS)
+	ret
+L(UW17):
+	# cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTPOP)
+	addl	$closure_FS, %esp
+L(UW18):
+	# cfi_adjust_cfa_offset(-closure_FS)
+	ret	$4
+L(UW19):
+	# cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTARG)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_STRUCT_1B)
+	movzbl	%al, %eax
+	jmp	L(e2)
+E(L(load_table2), X86_RET_STRUCT_2B)
+	movzwl	%ax, %eax
+	jmp	L(e2)
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(load_table2), X86_RET_UNUSED14)
+	ud2
+E(L(load_table2), X86_RET_UNUSED15)
+	ud2
+
+L(UW20):
+	# cfi_endproc
+ENDF(C(ffi_closure_i386))
+
+	.balign	16
+	.globl	C(ffi_go_closure_STDCALL)
+	FFI_HIDDEN(C(ffi_go_closure_STDCALL))
+C(ffi_go_closure_STDCALL):
+L(UW21):
+	# cfi_startproc
+	subl	$closure_FS, %esp
+L(UW22):
+	# cfi_def_cfa_offset(closure_FS + 4)
+	FFI_CLOSURE_SAVE_REGS
+	movl	4(%ecx), %edx			/* copy cif */
+	movl	8(%ecx), %eax			/* copy fun */
+	movl	%edx, closure_CF+28(%esp)
+	movl	%eax, closure_CF+32(%esp)
+	movl	%ecx, closure_CF+36(%esp)	/* closure is user_data */
+	jmp	L(do_closure_STDCALL)
+L(UW23):
+	# cfi_endproc
+ENDF(C(ffi_go_closure_STDCALL))
+
+/* For REGISTER, we have no available parameter registers, and so we
+   enter here having pushed the closure onto the stack.  */
+
+	.balign	16
+	.globl	C(ffi_closure_REGISTER)
+	FFI_HIDDEN(C(ffi_closure_REGISTER))
+C(ffi_closure_REGISTER):
+L(UW24):
+	# cfi_startproc
+	# cfi_def_cfa(%esp, 8)
+	# cfi_offset(%eip, -8)
+	subl	$closure_FS-4, %esp
+L(UW25):
+	# cfi_def_cfa_offset(closure_FS + 4)
+	FFI_CLOSURE_SAVE_REGS
+	movl	closure_FS-4(%esp), %ecx	/* load retaddr */
+	movl	closure_FS(%esp), %eax		/* load closure */
+	movl	%ecx, closure_FS(%esp)		/* move retaddr */
+	jmp	L(do_closure_REGISTER)
+L(UW26):
+	# cfi_endproc
+ENDF(C(ffi_closure_REGISTER))
+
+/* For STDCALL (and others), we need to pop N bytes of arguments off
+   the stack following the closure.  The amount needing to be popped
+   is returned to us from ffi_closure_inner.  */
+
+	.balign	16
+	.globl	C(ffi_closure_STDCALL)
+	FFI_HIDDEN(C(ffi_closure_STDCALL))
+C(ffi_closure_STDCALL):
+L(UW27):
+	# cfi_startproc
+	subl	$closure_FS, %esp
+L(UW28):
+	# cfi_def_cfa_offset(closure_FS + 4)
+
+	FFI_CLOSURE_SAVE_REGS
+
+	/* Entry point from ffi_closure_REGISTER.  */
+L(do_closure_REGISTER):
+
+	FFI_CLOSURE_COPY_TRAMP_DATA
+
+	/* Entry point from preceeding Go closure.  */
+L(do_closure_STDCALL):
+
+	FFI_CLOSURE_PREP_CALL
+	FFI_CLOSURE_CALL_INNER(29)
+
+	movl	%eax, %ecx
+	shrl	$X86_RET_POP_SHIFT, %ecx	/* isolate pop count */
+	leal	closure_FS(%esp, %ecx), %ecx	/* compute popped esp */
+	movl	closure_FS(%esp), %edx		/* move return address */
+	movl	%edx, (%ecx)
+
+	/* From this point on, the value of %esp upon return is %ecx+4,
+	   and we've copied the return address to %ecx to make return easy.
+	   There's no point in representing this in the unwind info, as
+	   there is always a window between the mov and the ret which
+	   will be wrong from one point of view or another.  */
+
+	FFI_CLOSURE_MASK_AND_JUMP(3, 30)
+
+	.balign	8
+L(load_table3):
+E(L(load_table3), X86_RET_FLOAT)
+	flds    closure_CF(%esp)
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_DOUBLE)
+	fldl    closure_CF(%esp)
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_LDOUBLE)
+	fldt    closure_CF(%esp)
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_SINT8)
+	movsbl  %al, %eax
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_SINT16)
+	movswl  %ax, %eax
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_UINT8)
+	movzbl  %al, %eax
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_UINT16)
+	movzwl  %ax, %eax
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_INT64)
+	movl	closure_CF+4(%esp), %edx
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_INT32)
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_VOID)
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_STRUCTPOP)
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_STRUCTARG)
+	movl	%ecx, %esp
+	ret
+E(L(load_table3), X86_RET_STRUCT_1B)
+	movzbl	%al, %eax
+	movl	%ecx, %esp
+	ret
+E(L(load_table3), X86_RET_STRUCT_2B)
+	movzwl	%ax, %eax
+	movl	%ecx, %esp
+	ret
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(load_table3), X86_RET_UNUSED14)
+	ud2
+E(L(load_table3), X86_RET_UNUSED15)
+	ud2
+
+L(UW31):
+	# cfi_endproc
+ENDF(C(ffi_closure_STDCALL))
+
+#if !FFI_NO_RAW_API
+
+#define raw_closure_S_FS	(16+16+12)
+
+	.balign	16
+	.globl	C(ffi_closure_raw_SYSV)
+	FFI_HIDDEN(C(ffi_closure_raw_SYSV))
+C(ffi_closure_raw_SYSV):
+L(UW32):
+	# cfi_startproc
+	subl	$raw_closure_S_FS, %esp
+L(UW33):
+	# cfi_def_cfa_offset(raw_closure_S_FS + 4)
+	movl	%ebx, raw_closure_S_FS-4(%esp)
+L(UW34):
+	# cfi_rel_offset(%ebx, raw_closure_S_FS-4)
+
+	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %edx	/* load cl->user_data */
+	movl	%edx, 12(%esp)
+	leal	raw_closure_S_FS+4(%esp), %edx		/* load raw_args */
+	movl	%edx, 8(%esp)
+	leal	16(%esp), %edx				/* load &res */
+	movl	%edx, 4(%esp)
+	movl	FFI_TRAMPOLINE_SIZE(%eax), %ebx		/* load cl->cif */
+	movl	%ebx, (%esp)
+	call	*FFI_TRAMPOLINE_SIZE+4(%eax)		/* call cl->fun */
+
+	movl	20(%ebx), %eax				/* load cif->flags */
+	andl	$X86_RET_TYPE_MASK, %eax
+#ifdef __PIC__
+	call	C(__x86.get_pc_thunk.bx)
+L(pc4):
+	leal	L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx
+#else
+	leal	L(load_table4)(,%eax, 8), %ecx
+#endif
+	movl	raw_closure_S_FS-4(%esp), %ebx
+L(UW35):
+	# cfi_restore(%ebx)
+	movl	16(%esp), %eax				/* Optimistic load */
+	jmp	*%ecx
+
+	.balign	8
+L(load_table4):
+E(L(load_table4), X86_RET_FLOAT)
+	flds	16(%esp)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_DOUBLE)
+	fldl	16(%esp)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_LDOUBLE)
+	fldt	16(%esp)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_SINT8)
+	movsbl	%al, %eax
+	jmp	L(e4)
+E(L(load_table4), X86_RET_SINT16)
+	movswl	%ax, %eax
+	jmp	L(e4)
+E(L(load_table4), X86_RET_UINT8)
+	movzbl	%al, %eax
+	jmp	L(e4)
+E(L(load_table4), X86_RET_UINT16)
+	movzwl	%ax, %eax
+	jmp	L(e4)
+E(L(load_table4), X86_RET_INT64)
+	movl	16+4(%esp), %edx
+	jmp	L(e4)
+E(L(load_table4), X86_RET_INT32)
+	nop
+	/* fallthru */
+E(L(load_table4), X86_RET_VOID)
+L(e4):
+	addl	$raw_closure_S_FS, %esp
+L(UW36):
+	# cfi_adjust_cfa_offset(-raw_closure_S_FS)
+	ret
+L(UW37):
+	# cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTPOP)
+	addl	$raw_closure_S_FS, %esp
+L(UW38):
+	# cfi_adjust_cfa_offset(-raw_closure_S_FS)
+	ret	$4
+L(UW39):
+	# cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTARG)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_STRUCT_1B)
+	movzbl	%al, %eax
+	jmp	L(e4)
+E(L(load_table4), X86_RET_STRUCT_2B)
+	movzwl	%ax, %eax
+	jmp	L(e4)
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(load_table4), X86_RET_UNUSED14)
+	ud2
+E(L(load_table4), X86_RET_UNUSED15)
+	ud2
+
+L(UW40):
+	# cfi_endproc
+ENDF(C(ffi_closure_raw_SYSV))
+
+#define raw_closure_T_FS	(16+16+8)
+
+	.balign	16
+	.globl	C(ffi_closure_raw_THISCALL)
+	FFI_HIDDEN(C(ffi_closure_raw_THISCALL))
+C(ffi_closure_raw_THISCALL):
+L(UW41):
+	# cfi_startproc
+	/* Rearrange the stack such that %ecx is the first argument.
+	   This means moving the return address.  */
+	popl	%edx
+L(UW42):
+	# cfi_def_cfa_offset(0)
+	# cfi_register(%eip, %edx)
+	pushl	%ecx
+L(UW43):
+	# cfi_adjust_cfa_offset(4)
+	pushl	%edx
+L(UW44):
+	# cfi_adjust_cfa_offset(4)
+	# cfi_rel_offset(%eip, 0)
+	subl	$raw_closure_T_FS, %esp
+L(UW45):
+	# cfi_adjust_cfa_offset(raw_closure_T_FS)
+	movl	%ebx, raw_closure_T_FS-4(%esp)
+L(UW46):
+	# cfi_rel_offset(%ebx, raw_closure_T_FS-4)
+
+	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %edx	/* load cl->user_data */
+	movl	%edx, 12(%esp)
+	leal	raw_closure_T_FS+4(%esp), %edx		/* load raw_args */
+	movl	%edx, 8(%esp)
+	leal	16(%esp), %edx				/* load &res */
+	movl	%edx, 4(%esp)
+	movl	FFI_TRAMPOLINE_SIZE(%eax), %ebx		/* load cl->cif */
+	movl	%ebx, (%esp)
+	call	*FFI_TRAMPOLINE_SIZE+4(%eax)		/* call cl->fun */
+
+	movl	20(%ebx), %eax				/* load cif->flags */
+	andl	$X86_RET_TYPE_MASK, %eax
+#ifdef __PIC__
+	call	C(__x86.get_pc_thunk.bx)
+L(pc5):
+	leal	L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx
+#else
+	leal	L(load_table5)(,%eax, 8), %ecx
+#endif
+	movl	raw_closure_T_FS-4(%esp), %ebx
+L(UW47):
+	# cfi_restore(%ebx)
+	movl	16(%esp), %eax				/* Optimistic load */
+	jmp	*%ecx
+
+	.balign	8
+L(load_table5):
+E(L(load_table5), X86_RET_FLOAT)
+	flds	16(%esp)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_DOUBLE)
+	fldl	16(%esp)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_LDOUBLE)
+	fldt	16(%esp)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_SINT8)
+	movsbl	%al, %eax
+	jmp	L(e5)
+E(L(load_table5), X86_RET_SINT16)
+	movswl	%ax, %eax
+	jmp	L(e5)
+E(L(load_table5), X86_RET_UINT8)
+	movzbl	%al, %eax
+	jmp	L(e5)
+E(L(load_table5), X86_RET_UINT16)
+	movzwl	%ax, %eax
+	jmp	L(e5)
+E(L(load_table5), X86_RET_INT64)
+	movl	16+4(%esp), %edx
+	jmp	L(e5)
+E(L(load_table5), X86_RET_INT32)
+	nop
+	/* fallthru */
+E(L(load_table5), X86_RET_VOID)
+L(e5):
+	addl	$raw_closure_T_FS, %esp
+L(UW48):
+	# cfi_adjust_cfa_offset(-raw_closure_T_FS)
+	/* Remove the extra %ecx argument we pushed.  */
+	ret	$4
+L(UW49):
+	# cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTPOP)
+	addl	$raw_closure_T_FS, %esp
+L(UW50):
+	# cfi_adjust_cfa_offset(-raw_closure_T_FS)
+	ret	$8
+L(UW51):
+	# cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTARG)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_STRUCT_1B)
+	movzbl	%al, %eax
+	jmp	L(e5)
+E(L(load_table5), X86_RET_STRUCT_2B)
+	movzwl	%ax, %eax
+	jmp	L(e5)
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(load_table5), X86_RET_UNUSED14)
+	ud2
+E(L(load_table5), X86_RET_UNUSED15)
+	ud2
+
+L(UW52):
+	# cfi_endproc
+ENDF(C(ffi_closure_raw_THISCALL))
+
+#endif /* !FFI_NO_RAW_API */
+
+#ifdef X86_DARWIN
+# define COMDAT(X)							\
+        .section __TEXT,__textcoal_nt,coalesced,pure_instructions;	\
+        .weak_definition X;						\
+        .private_extern X
+#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__))
+# define COMDAT(X)							\
+	.section .text.X,"axG",@progbits,X,comdat;			\
+	.globl	X;							\
+	FFI_HIDDEN(X)
+#else
+# define COMDAT(X)
+#endif
+
+#if defined(__PIC__)
+	COMDAT(C(__x86.get_pc_thunk.bx))
+C(__x86.get_pc_thunk.bx):
+	movl	(%esp), %ebx
+	ret
+ENDF(C(__x86.get_pc_thunk.bx))
+# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
+	COMDAT(C(__x86.get_pc_thunk.dx))
+C(__x86.get_pc_thunk.dx):
+	movl	(%esp), %edx
+	ret
+ENDF(C(__x86.get_pc_thunk.dx))
+#endif /* DARWIN || HIDDEN */
+#endif /* __PIC__ */
+
+/* Sadly, OSX cctools-as doesn't understand .cfi directives at all.  */
+
+#ifdef __APPLE__
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EHFrame0:
+#elif defined(X86_WIN32)
+.section .eh_frame,"r"
+#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
+.section .eh_frame,EH_FRAME_FLAGS,@unwind
+#else
+.section .eh_frame,EH_FRAME_FLAGS,@progbits
+#endif
+
+#ifdef HAVE_AS_X86_PCREL
+# define PCREL(X)	X - .
+#else
+# define PCREL(X)	X@rel
+#endif
+
+/* Simplify advancing between labels.  Assume DW_CFA_advance_loc1 fits.  */
+#define ADV(N, P)	.byte 2, L(N)-L(P)
+
+	.balign 4
+L(CIE):
+	.set	L(set0),L(ECIE)-L(SCIE)
+	.long	L(set0)			/* CIE Length */
+L(SCIE):
+	.long	0			/* CIE Identifier Tag */
+	.byte	1			/* CIE Version */
+	.ascii	"zR\0"			/* CIE Augmentation */
+	.byte	1			/* CIE Code Alignment Factor */
+	.byte	0x7c			/* CIE Data Alignment Factor */
+	.byte	0x8			/* CIE RA Column */
+	.byte	1			/* Augmentation size */
+	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp offset 4 */
+	.byte	0x80+8, 1		/* DW_CFA_offset, %eip offset 1*-4 */
+	.balign 4
+L(ECIE):
+
+	.set	L(set1),L(EFDE1)-L(SFDE1)
+	.long	L(set1)			/* FDE Length */
+L(SFDE1):
+	.long	L(SFDE1)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW0))		/* Initial location */
+	.long	L(UW5)-L(UW0)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW1, UW0)
+	.byte	0xc, 5, 8		/* DW_CFA_def_cfa, %ebp 8 */
+	.byte	0x80+5, 2		/* DW_CFA_offset, %ebp 2*-4 */
+	ADV(UW2, UW1)
+	.byte	0x80+3, 0		/* DW_CFA_offset, %ebx 0*-4 */
+	ADV(UW3, UW2)
+	.byte	0xa			/* DW_CFA_remember_state */
+	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp 4 */
+	.byte	0xc0+3			/* DW_CFA_restore, %ebx */
+	.byte	0xc0+5			/* DW_CFA_restore, %ebp */
+	ADV(UW4, UW3)
+	.byte	0xb			/* DW_CFA_restore_state */
+	.balign	4
+L(EFDE1):
+
+	.set	L(set2),L(EFDE2)-L(SFDE2)
+	.long	L(set2)			/* FDE Length */
+L(SFDE2):
+	.long	L(SFDE2)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW6))		/* Initial location */
+	.long	L(UW8)-L(UW6)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW7, UW6)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE2):
+
+	.set	L(set3),L(EFDE3)-L(SFDE3)
+	.long	L(set3)			/* FDE Length */
+L(SFDE3):
+	.long	L(SFDE3)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW9))		/* Initial location */
+	.long	L(UW11)-L(UW9)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW10, UW9)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE3):
+
+	.set	L(set4),L(EFDE4)-L(SFDE4)
+	.long	L(set4)			/* FDE Length */
+L(SFDE4):
+	.long	L(SFDE4)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW12))		/* Initial location */
+	.long	L(UW20)-L(UW12)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW13, UW12)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+	ADV(UW14, UW13)
+	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
+	ADV(UW15, UW14)
+	.byte	0xc0+3			/* DW_CFA_restore %ebx */
+	ADV(UW16, UW15)
+#else
+	ADV(UW16, UW13)
+#endif
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW17, UW16)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	ADV(UW18, UW17)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW19, UW18)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE4):
+
+	.set	L(set5),L(EFDE5)-L(SFDE5)
+	.long	L(set5)			/* FDE Length */
+L(SFDE5):
+	.long	L(SFDE5)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW21))		/* Initial location */
+	.long	L(UW23)-L(UW21)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW22, UW21)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE5):
+
+	.set	L(set6),L(EFDE6)-L(SFDE6)
+	.long	L(set6)			/* FDE Length */
+L(SFDE6):
+	.long	L(SFDE6)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW24))		/* Initial location */
+	.long	L(UW26)-L(UW24)		/* Address range */
+	.byte	0			/* Augmentation size */
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
+	.byte	0x80+8, 2		/* DW_CFA_offset %eip, 2*-4 */
+	ADV(UW25, UW24)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE6):
+
+	.set	L(set7),L(EFDE7)-L(SFDE7)
+	.long	L(set7)			/* FDE Length */
+L(SFDE7):
+	.long	L(SFDE7)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW27))		/* Initial location */
+	.long	L(UW31)-L(UW27)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW28, UW27)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+	ADV(UW29, UW28)
+	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
+	ADV(UW30, UW29)
+	.byte	0xc0+3			/* DW_CFA_restore %ebx */
+#endif
+	.balign	4
+L(EFDE7):
+
+#if !FFI_NO_RAW_API
+	.set	L(set8),L(EFDE8)-L(SFDE8)
+	.long	L(set8)			/* FDE Length */
+L(SFDE8):
+	.long	L(SFDE8)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW32))		/* Initial location */
+	.long	L(UW40)-L(UW32)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW33, UW32)
+	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
+	ADV(UW34, UW33)
+	.byte	0x80+3, 2		/* DW_CFA_offset %ebx 2*-4 */
+	ADV(UW35, UW34)
+	.byte	0xc0+3			/* DW_CFA_restore %ebx */
+	ADV(UW36, UW35)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW37, UW36)
+	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
+	ADV(UW38, UW37)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW39, UW38)
+	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE8):
+
+	.set	L(set9),L(EFDE9)-L(SFDE9)
+	.long	L(set9)			/* FDE Length */
+L(SFDE9):
+	.long	L(SFDE9)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW41))		/* Initial location */
+	.long	L(UW52)-L(UW41)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW42, UW41)
+	.byte	0xe, 0			/* DW_CFA_def_cfa_offset */
+	.byte	0x9, 8, 2		/* DW_CFA_register %eip, %edx */
+	ADV(UW43, UW42)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW44, UW43)
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
+	.byte	0x80+8, 2		/* DW_CFA_offset %eip 2*-4 */
+	ADV(UW45, UW44)
+	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
+	ADV(UW46, UW45)
+	.byte	0x80+3, 3		/* DW_CFA_offset %ebx 3*-4 */
+	ADV(UW47, UW46)
+	.byte	0xc0+3			/* DW_CFA_restore %ebx */
+	ADV(UW48, UW47)
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
+	ADV(UW49, UW48)
+	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
+	ADV(UW50, UW49)
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
+	ADV(UW51, UW50)
+	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE9):
+#endif /* !FFI_NO_RAW_API */
+
+#endif /* ifndef __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/circuitpython/lib/libffi/src/x86/unix64.S b/circuitpython/lib/libffi/src/x86/unix64.S
new file mode 100644
index 0000000..c83010c
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/unix64.S
@@ -0,0 +1,546 @@
+/* -----------------------------------------------------------------------
+   unix64.S - Copyright (c) 2013  The Written Word, Inc.
+	    - Copyright (c) 2008  Red Hat, Inc
+	    - Copyright (c) 2002  Bo Thorsen <bo@suse.de>
+
+   x86-64 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+#include "internal64.h"
+
+	.text
+
+#define C2(X, Y)  X ## Y
+#define C1(X, Y)  C2(X, Y)
+#ifdef __USER_LABEL_PREFIX__
+# define C(X)     C1(__USER_LABEL_PREFIX__, X)
+#else
+# define C(X)     X
+#endif
+
+#ifdef __APPLE__
+# define L(X)     C1(L, X)
+#else
+# define L(X)     C1(.L, X)
+#endif
+
+#ifdef __ELF__
+# define PLT(X)	  X@PLT
+# define ENDF(X)  .type	X,@function; .size X, . - X
+#else
+# define PLT(X)	  X
+# define ENDF(X)
+#endif
+
+/* This macro allows the safe creation of jump tables without an
+   actual table.  The entry points into the table are all 8 bytes.
+   The use of ORG asserts that we're at the correct location.  */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X)	.balign 8
+#else
+# define E(BASE, X)	.balign 8; .org BASE + X * 8
+#endif
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+	            void *raddr, void (*fnaddr)(void));
+
+   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.balign	8
+	.globl	C(ffi_call_unix64)
+	FFI_HIDDEN(C(ffi_call_unix64))
+
+C(ffi_call_unix64):
+L(UW0):
+	movq	(%rsp), %r10		/* Load return address.  */
+	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
+	movq	%rdx, (%rax)		/* Save flags.  */
+	movq	%rcx, 8(%rax)		/* Save raddr.  */
+	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
+	movq	%r10, 24(%rax)		/* Relocate return address.  */
+	movq	%rax, %rbp		/* Finalize local stack frame.  */
+
+	/* New stack frame based off rbp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-8, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+L(UW1):
+	/* cfi_def_cfa(%rbp, 32) */
+	/* cfi_rel_offset(%rbp, 16) */
+
+	movq	%rdi, %r10		/* Save a copy of the register area. */
+	movq	%r8, %r11		/* Save a copy of the target fn.  */
+	movl	%r9d, %eax		/* Set number of SSE registers.  */
+
+	/* Load up all argument registers.  */
+	movq	(%r10), %rdi
+	movq	0x08(%r10), %rsi
+	movq	0x10(%r10), %rdx
+	movq	0x18(%r10), %rcx
+	movq	0x20(%r10), %r8
+	movq	0x28(%r10), %r9
+	movl	0xb0(%r10), %eax
+	testl	%eax, %eax
+	jnz	L(load_sse)
+L(ret_from_load_sse):
+
+	/* Deallocate the reg arg area, except for r10, then load via pop.  */
+	leaq	0xb8(%r10), %rsp
+	popq	%r10
+
+	/* Call the user function.  */
+	call	*%r11
+
+	/* Deallocate stack arg area; local stack frame in redzone.  */
+	leaq	24(%rbp), %rsp
+
+	movq	0(%rbp), %rcx		/* Reload flags.  */
+	movq	8(%rbp), %rdi		/* Reload raddr.  */
+	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
+L(UW2):
+	/* cfi_remember_state */
+	/* cfi_def_cfa(%rsp, 8) */
+	/* cfi_restore(%rbp) */
+
+	/* The first byte of the flags contains the FFI_TYPE.  */
+	cmpb	$UNIX64_RET_LAST, %cl
+	movzbl	%cl, %r10d
+	leaq	L(store_table)(%rip), %r11
+	ja	L(sa)
+	leaq	(%r11, %r10, 8), %r10
+
+	/* Prep for the structure cases: scratch area in redzone.  */
+	leaq	-20(%rsp), %rsi
+	jmp	*%r10
+
+	.balign	8
+L(store_table):
+E(L(store_table), UNIX64_RET_VOID)
+	ret
+E(L(store_table), UNIX64_RET_UINT8)
+	movzbl	%al, %eax
+	movq	%rax, (%rdi)
+	ret
+E(L(store_table), UNIX64_RET_UINT16)
+	movzwl	%ax, %eax
+	movq	%rax, (%rdi)
+	ret
+E(L(store_table), UNIX64_RET_UINT32)
+	movl	%eax, %eax
+	movq	%rax, (%rdi)
+	ret
+E(L(store_table), UNIX64_RET_SINT8)
+	movsbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+E(L(store_table), UNIX64_RET_SINT16)
+	movswq	%ax, %rax
+	movq	%rax, (%rdi)
+	ret
+E(L(store_table), UNIX64_RET_SINT32)
+	cltq
+	movq	%rax, (%rdi)
+	ret
+E(L(store_table), UNIX64_RET_INT64)
+	movq	%rax, (%rdi)
+	ret
+E(L(store_table), UNIX64_RET_XMM32)
+	movd	%xmm0, (%rdi)
+	ret
+E(L(store_table), UNIX64_RET_XMM64)
+	movq	%xmm0, (%rdi)
+	ret
+E(L(store_table), UNIX64_RET_X87)
+	fstpt	(%rdi)
+	ret
+E(L(store_table), UNIX64_RET_X87_2)
+	fstpt	(%rdi)
+	fstpt	16(%rdi)
+	ret
+E(L(store_table), UNIX64_RET_ST_XMM0_RAX)
+	movq	%rax, 8(%rsi)
+	jmp	L(s3)
+E(L(store_table), UNIX64_RET_ST_RAX_XMM0)
+	movq	%xmm0, 8(%rsi)
+	jmp	L(s2)
+E(L(store_table), UNIX64_RET_ST_XMM0_XMM1)
+	movq	%xmm1, 8(%rsi)
+	jmp	L(s3)
+E(L(store_table), UNIX64_RET_ST_RAX_RDX)
+	movq	%rdx, 8(%rsi)
+L(s2):
+	movq	%rax, (%rsi)
+	shrl	$UNIX64_SIZE_SHIFT, %ecx
+	rep movsb
+	ret
+	.balign 8
+L(s3):
+	movq	%xmm0, (%rsi)
+	shrl	$UNIX64_SIZE_SHIFT, %ecx
+	rep movsb
+	ret
+
+L(sa):	call	PLT(C(abort))
+
+	/* Many times we can avoid loading any SSE registers at all.
+	   It's not worth an indirect jump to load the exact set of
+	   SSE registers needed; zero or all is a good compromise.  */
+	.balign 2
+L(UW3):
+	/* cfi_restore_state */
+L(load_sse):
+	movdqa	0x30(%r10), %xmm0
+	movdqa	0x40(%r10), %xmm1
+	movdqa	0x50(%r10), %xmm2
+	movdqa	0x60(%r10), %xmm3
+	movdqa	0x70(%r10), %xmm4
+	movdqa	0x80(%r10), %xmm5
+	movdqa	0x90(%r10), %xmm6
+	movdqa	0xa0(%r10), %xmm7
+	jmp	L(ret_from_load_sse)
+
+L(UW4):
+ENDF(C(ffi_call_unix64))
+
+/* 6 general registers, 8 vector registers,
+   32 bytes of rvalue, 8 bytes of alignment.  */
+#define ffi_closure_OFS_G	0
+#define ffi_closure_OFS_V	(6*8)
+#define ffi_closure_OFS_RVALUE	(ffi_closure_OFS_V + 8*16)
+#define ffi_closure_FS		(ffi_closure_OFS_RVALUE + 32 + 8)
+
+/* The location of rvalue within the red zone after deallocating the frame.  */
+#define ffi_closure_RED_RVALUE	(ffi_closure_OFS_RVALUE - ffi_closure_FS)
+
+	.balign	2
+	.globl	C(ffi_closure_unix64_sse)
+	FFI_HIDDEN(C(ffi_closure_unix64_sse))
+
+C(ffi_closure_unix64_sse):
+L(UW5):
+	subq	$ffi_closure_FS, %rsp
+L(UW6):
+	/* cfi_adjust_cfa_offset(ffi_closure_FS) */
+
+	movdqa	%xmm0, ffi_closure_OFS_V+0x00(%rsp)
+	movdqa	%xmm1, ffi_closure_OFS_V+0x10(%rsp)
+	movdqa	%xmm2, ffi_closure_OFS_V+0x20(%rsp)
+	movdqa	%xmm3, ffi_closure_OFS_V+0x30(%rsp)
+	movdqa	%xmm4, ffi_closure_OFS_V+0x40(%rsp)
+	movdqa	%xmm5, ffi_closure_OFS_V+0x50(%rsp)
+	movdqa	%xmm6, ffi_closure_OFS_V+0x60(%rsp)
+	movdqa	%xmm7, ffi_closure_OFS_V+0x70(%rsp)
+	jmp	L(sse_entry1)
+
+L(UW7):
+ENDF(C(ffi_closure_unix64_sse))
+
+	.balign	2
+	.globl	C(ffi_closure_unix64)
+	FFI_HIDDEN(C(ffi_closure_unix64))
+
+C(ffi_closure_unix64):
+L(UW8):
+	subq	$ffi_closure_FS, %rsp
+L(UW9):
+	/* cfi_adjust_cfa_offset(ffi_closure_FS) */
+L(sse_entry1):
+	movq	%rdi, ffi_closure_OFS_G+0x00(%rsp)
+	movq    %rsi, ffi_closure_OFS_G+0x08(%rsp)
+	movq    %rdx, ffi_closure_OFS_G+0x10(%rsp)
+	movq    %rcx, ffi_closure_OFS_G+0x18(%rsp)
+	movq    %r8,  ffi_closure_OFS_G+0x20(%rsp)
+	movq    %r9,  ffi_closure_OFS_G+0x28(%rsp)
+
+#ifdef __ILP32__
+	movl	FFI_TRAMPOLINE_SIZE(%r10), %edi		/* Load cif */
+	movl	FFI_TRAMPOLINE_SIZE+4(%r10), %esi	/* Load fun */
+	movl	FFI_TRAMPOLINE_SIZE+8(%r10), %edx	/* Load user_data */
+#else
+	movq	FFI_TRAMPOLINE_SIZE(%r10), %rdi		/* Load cif */
+	movq	FFI_TRAMPOLINE_SIZE+8(%r10), %rsi	/* Load fun */
+	movq	FFI_TRAMPOLINE_SIZE+16(%r10), %rdx	/* Load user_data */
+#endif
+L(do_closure):
+	leaq	ffi_closure_OFS_RVALUE(%rsp), %rcx	/* Load rvalue */
+	movq	%rsp, %r8				/* Load reg_args */
+	leaq	ffi_closure_FS+8(%rsp), %r9		/* Load argp */
+	call	C(ffi_closure_unix64_inner)
+
+	/* Deallocate stack frame early; return value is now in redzone.  */
+	addq	$ffi_closure_FS, %rsp
+L(UW10):
+	/* cfi_adjust_cfa_offset(-ffi_closure_FS) */
+
+	/* The first byte of the return value contains the FFI_TYPE.  */
+	cmpb	$UNIX64_RET_LAST, %al
+	movzbl	%al, %r10d
+	leaq	L(load_table)(%rip), %r11
+	ja	L(la)
+	leaq	(%r11, %r10, 8), %r10
+	leaq	ffi_closure_RED_RVALUE(%rsp), %rsi
+	jmp	*%r10
+
+	.balign	8
+L(load_table):
+E(L(load_table), UNIX64_RET_VOID)
+	ret
+E(L(load_table), UNIX64_RET_UINT8)
+	movzbl	(%rsi), %eax
+	ret
+E(L(load_table), UNIX64_RET_UINT16)
+	movzwl	(%rsi), %eax
+	ret
+E(L(load_table), UNIX64_RET_UINT32)
+	movl	(%rsi), %eax
+	ret
+E(L(load_table), UNIX64_RET_SINT8)
+	movsbl	(%rsi), %eax
+	ret
+E(L(load_table), UNIX64_RET_SINT16)
+	movswl	(%rsi), %eax
+	ret
+E(L(load_table), UNIX64_RET_SINT32)
+	movl	(%rsi), %eax
+	ret
+E(L(load_table), UNIX64_RET_INT64)
+	movq	(%rsi), %rax
+	ret
+E(L(load_table), UNIX64_RET_XMM32)
+	movd	(%rsi), %xmm0
+	ret
+E(L(load_table), UNIX64_RET_XMM64)
+	movq	(%rsi), %xmm0
+	ret
+E(L(load_table), UNIX64_RET_X87)
+	fldt	(%rsi)
+	ret
+E(L(load_table), UNIX64_RET_X87_2)
+	fldt	16(%rsi)
+	fldt	(%rsi)
+	ret
+E(L(load_table), UNIX64_RET_ST_XMM0_RAX)
+	movq	8(%rsi), %rax
+	jmp	L(l3)
+E(L(load_table), UNIX64_RET_ST_RAX_XMM0)
+	movq	8(%rsi), %xmm0
+	jmp	L(l2)
+E(L(load_table), UNIX64_RET_ST_XMM0_XMM1)
+	movq	8(%rsi), %xmm1
+	jmp	L(l3)
+E(L(load_table), UNIX64_RET_ST_RAX_RDX)
+	movq	8(%rsi), %rdx
+L(l2):
+	movq	(%rsi), %rax
+	ret
+	.balign	8
+L(l3):
+	movq	(%rsi), %xmm0
+	ret
+
+L(la):	call	PLT(C(abort))
+
+L(UW11):
+ENDF(C(ffi_closure_unix64))
+
+	.balign	2
+	.globl	C(ffi_go_closure_unix64_sse)
+	FFI_HIDDEN(C(ffi_go_closure_unix64_sse))
+
+C(ffi_go_closure_unix64_sse):
+L(UW12):
+	subq	$ffi_closure_FS, %rsp
+L(UW13):
+	/* cfi_adjust_cfa_offset(ffi_closure_FS) */
+
+	movdqa	%xmm0, ffi_closure_OFS_V+0x00(%rsp)
+	movdqa	%xmm1, ffi_closure_OFS_V+0x10(%rsp)
+	movdqa	%xmm2, ffi_closure_OFS_V+0x20(%rsp)
+	movdqa	%xmm3, ffi_closure_OFS_V+0x30(%rsp)
+	movdqa	%xmm4, ffi_closure_OFS_V+0x40(%rsp)
+	movdqa	%xmm5, ffi_closure_OFS_V+0x50(%rsp)
+	movdqa	%xmm6, ffi_closure_OFS_V+0x60(%rsp)
+	movdqa	%xmm7, ffi_closure_OFS_V+0x70(%rsp)
+	jmp	L(sse_entry2)
+
+L(UW14):
+ENDF(C(ffi_go_closure_unix64_sse))
+
+	.balign	2
+	.globl	C(ffi_go_closure_unix64)
+	FFI_HIDDEN(C(ffi_go_closure_unix64))
+
+C(ffi_go_closure_unix64):
+L(UW15):
+	subq	$ffi_closure_FS, %rsp
+L(UW16):
+	/* cfi_adjust_cfa_offset(ffi_closure_FS) */
+L(sse_entry2):
+	movq	%rdi, ffi_closure_OFS_G+0x00(%rsp)
+	movq    %rsi, ffi_closure_OFS_G+0x08(%rsp)
+	movq    %rdx, ffi_closure_OFS_G+0x10(%rsp)
+	movq    %rcx, ffi_closure_OFS_G+0x18(%rsp)
+	movq    %r8,  ffi_closure_OFS_G+0x20(%rsp)
+	movq    %r9,  ffi_closure_OFS_G+0x28(%rsp)
+
+#ifdef __ILP32__
+	movl	4(%r10), %edi		/* Load cif */
+	movl	8(%r10), %esi		/* Load fun */
+	movl	%r10d, %edx		/* Load closure (user_data) */
+#else
+	movq	8(%r10), %rdi		/* Load cif */
+	movq	16(%r10), %rsi		/* Load fun */
+	movq	%r10, %rdx		/* Load closure (user_data) */
+#endif
+	jmp	L(do_closure)
+
+L(UW17):
+ENDF(C(ffi_go_closure_unix64))
+
+/* Sadly, OSX cctools-as doesn't understand .cfi directives at all.  */
+
+#ifdef __APPLE__
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EHFrame0:
+#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
+.section .eh_frame,"a",@unwind
+#else
+.section .eh_frame,"a",@progbits
+#endif
+
+#ifdef HAVE_AS_X86_PCREL
+# define PCREL(X)	X - .
+#else
+# define PCREL(X)	X@rel
+#endif
+
+/* Simplify advancing between labels.  Assume DW_CFA_advance_loc1 fits.  */
+#define ADV(N, P)	.byte 2, L(N)-L(P)
+
+	.balign 8
+L(CIE):
+	.set	L(set0),L(ECIE)-L(SCIE)
+	.long	L(set0)			/* CIE Length */
+L(SCIE):
+	.long	0			/* CIE Identifier Tag */
+	.byte	1			/* CIE Version */
+	.ascii	"zR\0"			/* CIE Augmentation */
+	.byte	1			/* CIE Code Alignment Factor */
+	.byte	0x78			/* CIE Data Alignment Factor */
+	.byte	0x10			/* CIE RA Column */
+	.byte	1			/* Augmentation size */
+	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc, 7, 8		/* DW_CFA_def_cfa, %rsp offset 8 */
+	.byte	0x80+16, 1		/* DW_CFA_offset, %rip offset 1*-8 */
+	.balign 8
+L(ECIE):
+
+	.set	L(set1),L(EFDE1)-L(SFDE1)
+	.long	L(set1)			/* FDE Length */
+L(SFDE1):
+	.long	L(SFDE1)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW0))		/* Initial location */
+	.long	L(UW4)-L(UW0)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW1, UW0)
+	.byte	0xc, 6, 32		/* DW_CFA_def_cfa, %rbp 32 */
+	.byte	0x80+6, 2		/* DW_CFA_offset, %rbp 2*-8 */
+	ADV(UW2, UW1)
+	.byte	0xa			/* DW_CFA_remember_state */
+	.byte	0xc, 7, 8		/* DW_CFA_def_cfa, %rsp 8 */
+	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
+	ADV(UW3, UW2)
+	.byte	0xb			/* DW_CFA_restore_state */
+	.balign	8
+L(EFDE1):
+
+	.set	L(set2),L(EFDE2)-L(SFDE2)
+	.long	L(set2)			/* FDE Length */
+L(SFDE2):
+	.long	L(SFDE2)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW5))		/* Initial location */
+	.long	L(UW7)-L(UW5)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW6, UW5)
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */
+	.balign	8
+L(EFDE2):
+
+	.set	L(set3),L(EFDE3)-L(SFDE3)
+	.long	L(set3)			/* FDE Length */
+L(SFDE3):
+	.long	L(SFDE3)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW8))		/* Initial location */
+	.long	L(UW11)-L(UW8)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW9, UW8)
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */
+	ADV(UW10, UW9)
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset 8 */
+L(EFDE3):
+
+	.set	L(set4),L(EFDE4)-L(SFDE4)
+	.long	L(set4)			/* FDE Length */
+L(SFDE4):
+	.long	L(SFDE4)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW12))		/* Initial location */
+	.long	L(UW14)-L(UW12)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW13, UW12)
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */
+	.balign	8
+L(EFDE4):
+
+	.set	L(set5),L(EFDE5)-L(SFDE5)
+	.long	L(set5)			/* FDE Length */
+L(SFDE5):
+	.long	L(SFDE5)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW15))		/* Initial location */
+	.long	L(UW17)-L(UW15)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW16, UW15)
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */
+	.balign	8
+L(EFDE5):
+#ifdef __APPLE__
+	.subsections_via_symbols
+#endif
+
+#endif /* __x86_64__ */
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/circuitpython/lib/libffi/src/x86/win64.S b/circuitpython/lib/libffi/src/x86/win64.S
new file mode 100644
index 0000000..a5a20b6
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/win64.S
@@ -0,0 +1,219 @@
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+
+#if defined(HAVE_AS_CFI_PSEUDO_OP)
+        .cfi_sections   .debug_frame
+#endif
+
+#define arg0	%rcx
+#define arg1	%rdx
+#define arg2	%r8
+#define arg3	%r9
+
+#ifdef SYMBOL_UNDERSCORE
+#define SYMBOL_NAME(name) _##name
+#else
+#define SYMBOL_NAME(name) name
+#endif
+
+.macro E which
+	.align	8
+	.org	0b + \which * 8
+.endm
+
+	.text
+
+/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10)
+
+   Bit o trickiness here -- FRAME is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.align	8
+	.globl	ffi_call_win64
+
+	.seh_proc ffi_call_win64
+ffi_call_win64:
+	cfi_startproc
+	/* Set up the local stack frame and install it in rbp/rsp.  */
+	movq	(%rsp), %rax
+	movq	%rbp, (arg1)
+	movq	%rax, 8(arg1)
+	movq	arg1, %rbp
+	cfi_def_cfa(%rbp, 16)
+	cfi_rel_offset(%rbp, 0)
+	.seh_pushreg %rbp
+	.seh_setframe %rbp, 0
+	.seh_endprologue
+	movq	arg0, %rsp
+
+	movq	arg2, %r10
+
+	/* Load all slots into both general and xmm registers.  */
+	movq	(%rsp), %rcx
+	movsd	(%rsp), %xmm0
+	movq	8(%rsp), %rdx
+	movsd	8(%rsp), %xmm1
+	movq	16(%rsp), %r8
+	movsd	16(%rsp), %xmm2
+	movq	24(%rsp), %r9
+	movsd	24(%rsp), %xmm3
+
+	call	*16(%rbp)
+
+	movl	24(%rbp), %ecx
+	movq	32(%rbp), %r8
+	leaq	0f(%rip), %r10
+	cmpl	$FFI_TYPE_SMALL_STRUCT_4B, %ecx
+	leaq	(%r10, %rcx, 8), %r10
+	ja	99f
+	jmp	*%r10
+
+/* Below, we're space constrained most of the time.  Thus we eschew the
+   modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes).  */
+.macro epilogue
+	leaveq
+	cfi_remember_state
+	cfi_def_cfa(%rsp, 8)
+	cfi_restore(%rbp)
+	ret
+	cfi_restore_state
+.endm
+
+	.align	8
+0:
+E FFI_TYPE_VOID
+	epilogue
+E FFI_TYPE_INT
+	movslq	%eax, %rax
+	movq	%rax, (%r8)
+	epilogue
+E FFI_TYPE_FLOAT
+	movss	%xmm0, (%r8)
+	epilogue
+E FFI_TYPE_DOUBLE
+	movsd	%xmm0, (%r8)
+	epilogue
+E FFI_TYPE_LONGDOUBLE
+	call	abort
+E FFI_TYPE_UINT8
+	movzbl	%al, %eax
+	movq	%rax, (%r8)
+	epilogue
+E FFI_TYPE_SINT8
+	movsbq	%al, %rax
+	jmp	98f
+E FFI_TYPE_UINT16
+	movzwl	%ax, %eax
+	movq	%rax, (%r8)
+	epilogue
+E FFI_TYPE_SINT16
+	movswq	%ax, %rax
+	jmp	98f
+E FFI_TYPE_UINT32
+	movl	%eax, %eax
+	movq	%rax, (%r8)
+	epilogue
+E FFI_TYPE_SINT32
+	movslq	%eax, %rax
+	movq	%rax, (%r8)
+	epilogue
+E FFI_TYPE_UINT64
+98:	movq	%rax, (%r8)
+	epilogue
+E FFI_TYPE_SINT64
+	movq	%rax, (%r8)
+	epilogue
+E FFI_TYPE_STRUCT
+	epilogue
+E FFI_TYPE_POINTER
+	movq	%rax, (%r8)
+	epilogue
+E FFI_TYPE_COMPLEX
+	call	abort
+E FFI_TYPE_SMALL_STRUCT_1B
+	movb	%al, (%r8)
+	epilogue
+E FFI_TYPE_SMALL_STRUCT_2B
+	movw	%ax, (%r8)
+	epilogue
+E FFI_TYPE_SMALL_STRUCT_4B
+	movl	%eax, (%r8)
+	epilogue
+
+	.align	8
+99:	call	abort
+
+.purgem epilogue
+
+	cfi_endproc
+	.seh_endproc
+
+
+/* 32 bytes of outgoing register stack space, 8 bytes of alignment,
+   16 bytes of result, 32 bytes of xmm registers.  */
+#define ffi_clo_FS	(32+8+16+32)
+#define ffi_clo_OFF_R	(32+8)
+#define ffi_clo_OFF_X	(32+8+16)
+
+	.align	8
+	.globl	ffi_go_closure_win64
+
+	.seh_proc ffi_go_closure_win64
+ffi_go_closure_win64:
+	cfi_startproc
+	/* Save all integer arguments into the incoming reg stack space.  */
+	movq	arg0, 8(%rsp)
+	movq	arg1, 16(%rsp)
+	movq	arg2, 24(%rsp)
+	movq	arg3, 32(%rsp)
+
+	movq	8(%r10), arg0			/* load cif */
+	movq	16(%r10), arg1			/* load fun */
+	movq	%r10, arg2			/* closure is user_data */
+	jmp	0f
+	cfi_endproc
+	.seh_endproc
+
+	.align	8
+	.globl	ffi_closure_win64
+
+	.seh_proc ffi_closure_win64
+ffi_closure_win64:
+	cfi_startproc
+	/* Save all integer arguments into the incoming reg stack space.  */
+	movq	arg0, 8(%rsp)
+	movq	arg1, 16(%rsp)
+	movq	arg2, 24(%rsp)
+	movq	arg3, 32(%rsp)
+
+	movq	FFI_TRAMPOLINE_SIZE(%r10), arg0		/* load cif */
+	movq	FFI_TRAMPOLINE_SIZE+8(%r10), arg1	/* load fun */
+	movq	FFI_TRAMPOLINE_SIZE+16(%r10), arg2	/* load user_data */
+0:
+	subq	$ffi_clo_FS, %rsp
+	cfi_adjust_cfa_offset(ffi_clo_FS)
+	.seh_stackalloc ffi_clo_FS
+	.seh_endprologue
+
+	/* Save all sse arguments into the stack frame.  */
+	movsd	%xmm0, ffi_clo_OFF_X(%rsp)
+	movsd	%xmm1, ffi_clo_OFF_X+8(%rsp)
+	movsd	%xmm2, ffi_clo_OFF_X+16(%rsp)
+	movsd	%xmm3, ffi_clo_OFF_X+24(%rsp)
+
+	leaq	ffi_clo_OFF_R(%rsp), arg3
+	call	ffi_closure_win64_inner
+
+	/* Load the result into both possible result registers.  */
+	movq    ffi_clo_OFF_R(%rsp), %rax
+	movsd   ffi_clo_OFF_R(%rsp), %xmm0
+
+	addq	$ffi_clo_FS, %rsp
+	cfi_adjust_cfa_offset(-ffi_clo_FS)
+	ret
+
+	cfi_endproc
+	.seh_endproc
author	Raghuram Subramani <raghus2247@gmail.com>	2022-06-19 19:47:51 +0530
committer	Raghuram Subramani <raghus2247@gmail.com>	2022-06-19 19:47:51 +0530
commit	4fd287655a72b9aea14cdac715ad5b90ed082ed2 (patch)
tree	65d393bc0e699dd12d05b29ba568e04cea666207 /circuitpython/lib/libffi/src/x86
parent	0150f70ce9c39e9e6dd878766c0620c85e47bed0 (diff)