aboutsummaryrefslogtreecommitdiff
path: root/circuitpython/lib/libffi/src/x86
diff options
context:
space:
mode:
authorRaghuram Subramani <raghus2247@gmail.com>2022-06-19 19:47:51 +0530
committerRaghuram Subramani <raghus2247@gmail.com>2022-06-19 19:47:51 +0530
commit4fd287655a72b9aea14cdac715ad5b90ed082ed2 (patch)
tree65d393bc0e699dd12d05b29ba568e04cea666207 /circuitpython/lib/libffi/src/x86
parent0150f70ce9c39e9e6dd878766c0620c85e47bed0 (diff)
add circuitpython code
Diffstat (limited to 'circuitpython/lib/libffi/src/x86')
-rw-r--r--circuitpython/lib/libffi/src/x86/ffi.c729
-rw-r--r--circuitpython/lib/libffi/src/x86/ffi64.c824
-rw-r--r--circuitpython/lib/libffi/src/x86/ffitarget.h139
-rw-r--r--circuitpython/lib/libffi/src/x86/ffiw64.c281
-rw-r--r--circuitpython/lib/libffi/src/x86/internal.h29
-rw-r--r--circuitpython/lib/libffi/src/x86/internal64.h22
-rw-r--r--circuitpython/lib/libffi/src/x86/sysv.S1040
-rw-r--r--circuitpython/lib/libffi/src/x86/unix64.S546
-rw-r--r--circuitpython/lib/libffi/src/x86/win64.S219
9 files changed, 3829 insertions, 0 deletions
diff --git a/circuitpython/lib/libffi/src/x86/ffi.c b/circuitpython/lib/libffi/src/x86/ffi.c
new file mode 100644
index 0000000..feb5cbb
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/ffi.c
@@ -0,0 +1,729 @@
+/* -----------------------------------------------------------------------
+ ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008 Red Hat, Inc.
+ Copyright (c) 2002 Ranjit Mathew
+ Copyright (c) 2002 Bo Thorsen
+ Copyright (c) 2002 Roger Sayle
+ Copyright (C) 2008, 2010 Free Software Foundation, Inc.
+
+ x86 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+#include "internal.h"
+
+/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
+ all further uses in this file will refer to the 80-bit type. */
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+# if FFI_TYPE_LONGDOUBLE != 4
+# error FFI_TYPE_LONGDOUBLE out of date
+# endif
+#else
+# undef FFI_TYPE_LONGDOUBLE
+# define FFI_TYPE_LONGDOUBLE 4
+#endif
+
+#if defined(__GNUC__) && !defined(__declspec)
+# define __declspec(x) __attribute__((x))
+#endif
+
+/* Perform machine dependent cif processing. */
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep(ffi_cif *cif)
+{
+ size_t bytes = 0;
+ int i, n, flags, cabi = cif->abi;
+
+ switch (cabi)
+ {
+ case FFI_SYSV:
+ case FFI_STDCALL:
+ case FFI_THISCALL:
+ case FFI_FASTCALL:
+ case FFI_MS_CDECL:
+ case FFI_PASCAL:
+ case FFI_REGISTER:
+ break;
+ default:
+ return FFI_BAD_ABI;
+ }
+
+ switch (cif->rtype->type)
+ {
+ case FFI_TYPE_VOID:
+ flags = X86_RET_VOID;
+ break;
+ case FFI_TYPE_FLOAT:
+ flags = X86_RET_FLOAT;
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = X86_RET_DOUBLE;
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ flags = X86_RET_LDOUBLE;
+ break;
+ case FFI_TYPE_UINT8:
+ flags = X86_RET_UINT8;
+ break;
+ case FFI_TYPE_UINT16:
+ flags = X86_RET_UINT16;
+ break;
+ case FFI_TYPE_SINT8:
+ flags = X86_RET_SINT8;
+ break;
+ case FFI_TYPE_SINT16:
+ flags = X86_RET_SINT16;
+ break;
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_POINTER:
+ flags = X86_RET_INT32;
+ break;
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+ flags = X86_RET_INT64;
+ break;
+ case FFI_TYPE_STRUCT:
+#ifndef X86
+ /* ??? This should be a different ABI rather than an ifdef. */
+ if (cif->rtype->size == 1)
+ flags = X86_RET_STRUCT_1B;
+ else if (cif->rtype->size == 2)
+ flags = X86_RET_STRUCT_2B;
+ else if (cif->rtype->size == 4)
+ flags = X86_RET_INT32;
+ else if (cif->rtype->size == 8)
+ flags = X86_RET_INT64;
+ else
+#endif
+ {
+ do_struct:
+ switch (cabi)
+ {
+ case FFI_THISCALL:
+ case FFI_FASTCALL:
+ case FFI_STDCALL:
+ case FFI_MS_CDECL:
+ flags = X86_RET_STRUCTARG;
+ break;
+ default:
+ flags = X86_RET_STRUCTPOP;
+ break;
+ }
+ /* Allocate space for return value pointer. */
+ bytes += ALIGN (sizeof(void*), FFI_SIZEOF_ARG);
+ }
+ break;
+ case FFI_TYPE_COMPLEX:
+ switch (cif->rtype->elements[0]->type)
+ {
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_LONGDOUBLE:
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+ goto do_struct;
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT32:
+ flags = X86_RET_INT64;
+ break;
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT16:
+ flags = X86_RET_INT32;
+ break;
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT8:
+ flags = X86_RET_STRUCT_2B;
+ break;
+ default:
+ return FFI_BAD_TYPEDEF;
+ }
+ break;
+ default:
+ return FFI_BAD_TYPEDEF;
+ }
+ cif->flags = flags;
+
+ for (i = 0, n = cif->nargs; i < n; i++)
+ {
+ ffi_type *t = cif->arg_types[i];
+
+ bytes = ALIGN (bytes, t->alignment);
+ bytes += ALIGN (t->size, FFI_SIZEOF_ARG);
+ }
+ cif->bytes = ALIGN (bytes, 16);
+
+ return FFI_OK;
+}
+
+static ffi_arg
+extend_basic_type(void *arg, int type)
+{
+ switch (type)
+ {
+ case FFI_TYPE_SINT8:
+ return *(SINT8 *)arg;
+ case FFI_TYPE_UINT8:
+ return *(UINT8 *)arg;
+ case FFI_TYPE_SINT16:
+ return *(SINT16 *)arg;
+ case FFI_TYPE_UINT16:
+ return *(UINT16 *)arg;
+
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_POINTER:
+ case FFI_TYPE_FLOAT:
+ return *(UINT32 *)arg;
+
+ default:
+ abort();
+ }
+}
+
+struct call_frame
+{
+ void *ebp; /* 0 */
+ void *retaddr; /* 4 */
+ void (*fn)(void); /* 8 */
+ int flags; /* 12 */
+ void *rvalue; /* 16 */
+ unsigned regs[3]; /* 20-28 */
+};
+
+struct abi_params
+{
+ int dir; /* parameter growth direction */
+ int static_chain; /* the static chain register used by gcc */
+ int nregs; /* number of register parameters */
+ int regs[3];
+};
+
+static const struct abi_params abi_params[FFI_LAST_ABI] = {
+ [FFI_SYSV] = { 1, R_ECX, 0 },
+ [FFI_THISCALL] = { 1, R_EAX, 1, { R_ECX } },
+ [FFI_FASTCALL] = { 1, R_EAX, 2, { R_ECX, R_EDX } },
+ [FFI_STDCALL] = { 1, R_ECX, 0 },
+ [FFI_PASCAL] = { -1, R_ECX, 0 },
+ /* ??? No defined static chain; gcc does not support REGISTER. */
+ [FFI_REGISTER] = { -1, R_ECX, 3, { R_EAX, R_EDX, R_ECX } },
+ [FFI_MS_CDECL] = { 1, R_ECX, 0 }
+};
+
+extern void ffi_call_i386(struct call_frame *, char *)
+#if HAVE_FASTCALL
+ __declspec(fastcall)
+#endif
+ FFI_HIDDEN;
+
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ size_t rsize, bytes;
+ struct call_frame *frame;
+ char *stack, *argp;
+ ffi_type **arg_types;
+ int flags, cabi, i, n, dir, narg_reg;
+ const struct abi_params *pabi;
+
+ flags = cif->flags;
+ cabi = cif->abi;
+ pabi = &abi_params[cabi];
+ dir = pabi->dir;
+
+ rsize = 0;
+ if (rvalue == NULL)
+ {
+ switch (flags)
+ {
+ case X86_RET_FLOAT:
+ case X86_RET_DOUBLE:
+ case X86_RET_LDOUBLE:
+ case X86_RET_STRUCTPOP:
+ case X86_RET_STRUCTARG:
+ /* The float cases need to pop the 387 stack.
+ The struct cases need to pass a valid pointer to the callee. */
+ rsize = cif->rtype->size;
+ break;
+ default:
+ /* We can pretend that the callee returns nothing. */
+ flags = X86_RET_VOID;
+ break;
+ }
+ }
+
+ bytes = cif->bytes;
+ stack = alloca(bytes + sizeof(*frame) + rsize);
+ argp = (dir < 0 ? stack + bytes : stack);
+ frame = (struct call_frame *)(stack + bytes);
+ if (rsize)
+ rvalue = frame + 1;
+
+ frame->fn = fn;
+ frame->flags = flags;
+ frame->rvalue = rvalue;
+ frame->regs[pabi->static_chain] = (unsigned)closure;
+
+ narg_reg = 0;
+ switch (flags)
+ {
+ case X86_RET_STRUCTARG:
+ /* The pointer is passed as the first argument. */
+ if (pabi->nregs > 0)
+ {
+ frame->regs[pabi->regs[0]] = (unsigned)rvalue;
+ narg_reg = 1;
+ break;
+ }
+ /* fallthru */
+ case X86_RET_STRUCTPOP:
+ *(void **)argp = rvalue;
+ argp += sizeof(void *);
+ break;
+ }
+
+ arg_types = cif->arg_types;
+ for (i = 0, n = cif->nargs; i < n; i++)
+ {
+ ffi_type *ty = arg_types[i];
+ void *valp = avalue[i];
+ size_t z = ty->size;
+ int t = ty->type;
+
+ if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT)
+ {
+ ffi_arg val = extend_basic_type (valp, t);
+
+ if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs)
+ frame->regs[pabi->regs[narg_reg++]] = val;
+ else if (dir < 0)
+ {
+ argp -= 4;
+ *(ffi_arg *)argp = val;
+ }
+ else
+ {
+ *(ffi_arg *)argp = val;
+ argp += 4;
+ }
+ }
+ else
+ {
+ size_t za = ALIGN (z, FFI_SIZEOF_ARG);
+ size_t align = FFI_SIZEOF_ARG;
+
+ /* Alignment rules for arguments are quite complex. Vectors and
+ structures with 16 byte alignment get it. Note that long double
+ on Darwin does have 16 byte alignment, and does not get this
+ alignment if passed directly; a structure with a long double
+ inside, however, would get 16 byte alignment. Since libffi does
+ not support vectors, we need non concern ourselves with other
+ cases. */
+ if (t == FFI_TYPE_STRUCT && ty->alignment >= 16)
+ align = 16;
+
+ if (dir < 0)
+ {
+ /* ??? These reverse argument ABIs are probably too old
+ to have cared about alignment. Someone should check. */
+ argp -= za;
+ memcpy (argp, valp, z);
+ }
+ else
+ {
+ argp = (char *)ALIGN (argp, align);
+ memcpy (argp, valp, z);
+ argp += za;
+ }
+ }
+ }
+ FFI_ASSERT (dir > 0 || argp == stack);
+
+ ffi_call_i386 (frame, stack);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+/** private members **/
+
+void FFI_HIDDEN ffi_closure_i386(void);
+void FFI_HIDDEN ffi_closure_STDCALL(void);
+void FFI_HIDDEN ffi_closure_REGISTER(void);
+
+struct closure_frame
+{
+ unsigned rettemp[4]; /* 0 */
+ unsigned regs[3]; /* 16-24 */
+ ffi_cif *cif; /* 28 */
+ void (*fun)(ffi_cif*,void*,void**,void*); /* 32 */
+ void *user_data; /* 36 */
+};
+
+int FFI_HIDDEN
+#if HAVE_FASTCALL
+__declspec(fastcall)
+#endif
+ffi_closure_inner (struct closure_frame *frame, char *stack)
+{
+ ffi_cif *cif = frame->cif;
+ int cabi, i, n, flags, dir, narg_reg;
+ const struct abi_params *pabi;
+ ffi_type **arg_types;
+ char *argp;
+ void *rvalue;
+ void **avalue;
+
+ cabi = cif->abi;
+ flags = cif->flags;
+ narg_reg = 0;
+ rvalue = frame->rettemp;
+ pabi = &abi_params[cabi];
+ dir = pabi->dir;
+ argp = (dir < 0 ? stack + cif->bytes : stack);
+
+ switch (flags)
+ {
+ case X86_RET_STRUCTARG:
+ if (pabi->nregs > 0)
+ {
+ rvalue = (void *)frame->regs[pabi->regs[0]];
+ narg_reg = 1;
+ frame->rettemp[0] = (unsigned)rvalue;
+ break;
+ }
+ /* fallthru */
+ case X86_RET_STRUCTPOP:
+ rvalue = *(void **)argp;
+ argp += sizeof(void *);
+ frame->rettemp[0] = (unsigned)rvalue;
+ break;
+ }
+
+ n = cif->nargs;
+ avalue = alloca(sizeof(void *) * n);
+
+ arg_types = cif->arg_types;
+ for (i = 0; i < n; ++i)
+ {
+ ffi_type *ty = arg_types[i];
+ size_t z = ty->size;
+ int t = ty->type;
+ void *valp;
+
+ if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT)
+ {
+ if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs)
+ valp = &frame->regs[pabi->regs[narg_reg++]];
+ else if (dir < 0)
+ {
+ argp -= 4;
+ valp = argp;
+ }
+ else
+ {
+ valp = argp;
+ argp += 4;
+ }
+ }
+ else
+ {
+ size_t za = ALIGN (z, FFI_SIZEOF_ARG);
+ size_t align = FFI_SIZEOF_ARG;
+
+ /* See the comment in ffi_call_int. */
+ if (t == FFI_TYPE_STRUCT && ty->alignment >= 16)
+ align = 16;
+
+ if (dir < 0)
+ {
+ /* ??? These reverse argument ABIs are probably too old
+ to have cared about alignment. Someone should check. */
+ argp -= za;
+ valp = argp;
+ }
+ else
+ {
+ argp = (char *)ALIGN (argp, align);
+ valp = argp;
+ argp += za;
+ }
+ }
+
+ avalue[i] = valp;
+ }
+
+ frame->fun (cif, rvalue, avalue, frame->user_data);
+
+ if (cabi == FFI_STDCALL)
+ return flags + (cif->bytes << X86_RET_POP_SHIFT);
+ else
+ return flags;
+}
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,void**,void*),
+ void *user_data,
+ void *codeloc)
+{
+ char *tramp = closure->tramp;
+ void (*dest)(void);
+ int op = 0xb8; /* movl imm, %eax */
+
+ switch (cif->abi)
+ {
+ case FFI_SYSV:
+ case FFI_THISCALL:
+ case FFI_FASTCALL:
+ case FFI_MS_CDECL:
+ dest = ffi_closure_i386;
+ break;
+ case FFI_STDCALL:
+ case FFI_PASCAL:
+ dest = ffi_closure_STDCALL;
+ break;
+ case FFI_REGISTER:
+ dest = ffi_closure_REGISTER;
+ op = 0x68; /* pushl imm */
+ default:
+ return FFI_BAD_ABI;
+ }
+
+ /* movl or pushl immediate. */
+ tramp[0] = op;
+ *(void **)(tramp + 1) = codeloc;
+
+ /* jmp dest */
+ tramp[5] = 0xe9;
+ *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10);
+
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
+
+ return FFI_OK;
+}
+
+void FFI_HIDDEN ffi_go_closure_EAX(void);
+void FFI_HIDDEN ffi_go_closure_ECX(void);
+void FFI_HIDDEN ffi_go_closure_STDCALL(void);
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,void**,void*))
+{
+ void (*dest)(void);
+
+ switch (cif->abi)
+ {
+ case FFI_SYSV:
+ case FFI_MS_CDECL:
+ dest = ffi_go_closure_ECX;
+ break;
+ case FFI_THISCALL:
+ case FFI_FASTCALL:
+ dest = ffi_go_closure_EAX;
+ break;
+ case FFI_STDCALL:
+ case FFI_PASCAL:
+ dest = ffi_go_closure_STDCALL;
+ break;
+ case FFI_REGISTER:
+ default:
+ return FFI_BAD_ABI;
+ }
+
+ closure->tramp = dest;
+ closure->cif = cif;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
+/* ------- Native raw API support -------------------------------- */
+
+#if !FFI_NO_RAW_API
+
+void FFI_HIDDEN ffi_closure_raw_SYSV(void);
+void FFI_HIDDEN ffi_closure_raw_THISCALL(void);
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure *closure,
+ ffi_cif *cif,
+ void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+ void *user_data,
+ void *codeloc)
+{
+ char *tramp = closure->tramp;
+ void (*dest)(void);
+ int i;
+
+ /* We currently don't support certain kinds of arguments for raw
+ closures. This should be implemented by a separate assembly
+ language routine, since it would require argument processing,
+ something we don't do now for performance. */
+ for (i = cif->nargs-1; i >= 0; i--)
+ switch (cif->arg_types[i]->type)
+ {
+ case FFI_TYPE_STRUCT:
+ case FFI_TYPE_LONGDOUBLE:
+ return FFI_BAD_TYPEDEF;
+ }
+
+ switch (cif->abi)
+ {
+ case FFI_THISCALL:
+ dest = ffi_closure_raw_THISCALL;
+ break;
+ case FFI_SYSV:
+ dest = ffi_closure_raw_SYSV;
+ break;
+ default:
+ return FFI_BAD_ABI;
+ }
+
+ /* movl imm, %eax. */
+ tramp[0] = 0xb8;
+ *(void **)(tramp + 1) = codeloc;
+
+ /* jmp dest */
+ tramp[5] = 0xe9;
+ *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10);
+
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
+
+ return FFI_OK;
+}
+
+void
+ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue)
+{
+ size_t rsize, bytes;
+ struct call_frame *frame;
+ char *stack, *argp;
+ ffi_type **arg_types;
+ int flags, cabi, i, n, narg_reg;
+ const struct abi_params *pabi;
+
+ flags = cif->flags;
+ cabi = cif->abi;
+ pabi = &abi_params[cabi];
+
+ rsize = 0;
+ if (rvalue == NULL)
+ {
+ switch (flags)
+ {
+ case X86_RET_FLOAT:
+ case X86_RET_DOUBLE:
+ case X86_RET_LDOUBLE:
+ case X86_RET_STRUCTPOP:
+ case X86_RET_STRUCTARG:
+ /* The float cases need to pop the 387 stack.
+ The struct cases need to pass a valid pointer to the callee. */
+ rsize = cif->rtype->size;
+ break;
+ default:
+ /* We can pretend that the callee returns nothing. */
+ flags = X86_RET_VOID;
+ break;
+ }
+ }
+
+ bytes = cif->bytes;
+ argp = stack = alloca(bytes + sizeof(*frame) + rsize);
+ frame = (struct call_frame *)(stack + bytes);
+ if (rsize)
+ rvalue = frame + 1;
+
+ frame->fn = fn;
+ frame->flags = flags;
+ frame->rvalue = rvalue;
+
+ narg_reg = 0;
+ switch (flags)
+ {
+ case X86_RET_STRUCTARG:
+ /* The pointer is passed as the first argument. */
+ if (pabi->nregs > 0)
+ {
+ frame->regs[pabi->regs[0]] = (unsigned)rvalue;
+ narg_reg = 1;
+ break;
+ }
+ /* fallthru */
+ case X86_RET_STRUCTPOP:
+ *(void **)argp = rvalue;
+ argp += sizeof(void *);
+ bytes -= sizeof(void *);
+ break;
+ }
+
+ arg_types = cif->arg_types;
+ for (i = 0, n = cif->nargs; narg_reg < pabi->nregs && i < n; i++)
+ {
+ ffi_type *ty = arg_types[i];
+ size_t z = ty->size;
+ int t = ty->type;
+
+ if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT && t != FFI_TYPE_FLOAT)
+ {
+ ffi_arg val = extend_basic_type (avalue, t);
+ frame->regs[pabi->regs[narg_reg++]] = val;
+ z = FFI_SIZEOF_ARG;
+ }
+ else
+ {
+ memcpy (argp, avalue, z);
+ z = ALIGN (z, FFI_SIZEOF_ARG);
+ argp += z;
+ }
+ avalue += z;
+ bytes -= z;
+ }
+ if (i < n)
+ memcpy (argp, avalue, bytes);
+
+ ffi_call_i386 (frame, stack);
+}
+#endif /* !FFI_NO_RAW_API */
+#endif /* !__x86_64__ */
diff --git a/circuitpython/lib/libffi/src/x86/ffi64.c b/circuitpython/lib/libffi/src/x86/ffi64.c
new file mode 100644
index 0000000..131b5e3
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/ffi64.c
@@ -0,0 +1,824 @@
+/* -----------------------------------------------------------------------
+ ffi64.c - Copyright (c) 2013 The Written Word, Inc.
+ Copyright (c) 2011 Anthony Green
+ Copyright (c) 2008, 2010 Red Hat, Inc.
+ Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
+
+ x86-64 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include "internal64.h"
+
+#ifdef __x86_64__
+
+#define MAX_GPR_REGS 6
+#define MAX_SSE_REGS 8
+
+#if defined(__INTEL_COMPILER)
+#include "xmmintrin.h"
+#define UINT128 __m128
+#else
+#if defined(__SUNPRO_C)
+#include <sunmedia_types.h>
+#define UINT128 __m128i
+#else
+#define UINT128 __int128_t
+#endif
+#endif
+
+union big_int_union
+{
+ UINT32 i32;
+ UINT64 i64;
+ UINT128 i128;
+};
+
+struct register_args
+{
+ /* Registers for argument passing. */
+ UINT64 gpr[MAX_GPR_REGS];
+ union big_int_union sse[MAX_SSE_REGS];
+ UINT64 rax; /* ssecount */
+ UINT64 r10; /* static chain */
+};
+
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+ void *raddr, void (*fnaddr)(void)) FFI_HIDDEN;
+
+/* All reference to register classes here is identical to the code in
+ gcc/config/i386/i386.c. Do *not* change one without the other. */
+
+/* Register class used for passing given 64bit part of the argument.
+ These represent classes as documented by the PS ABI, with the
+ exception of SSESF, SSEDF classes, that are basically SSE class,
+ just gcc will use SF or DFmode move instead of DImode to avoid
+ reformatting penalties.
+
+ Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
+ whenever possible (upper half does contain padding). */
+enum x86_64_reg_class
+ {
+ X86_64_NO_CLASS,
+ X86_64_INTEGER_CLASS,
+ X86_64_INTEGERSI_CLASS,
+ X86_64_SSE_CLASS,
+ X86_64_SSESF_CLASS,
+ X86_64_SSEDF_CLASS,
+ X86_64_SSEUP_CLASS,
+ X86_64_X87_CLASS,
+ X86_64_X87UP_CLASS,
+ X86_64_COMPLEX_X87_CLASS,
+ X86_64_MEMORY_CLASS
+ };
+
+#define MAX_CLASSES 4
+
+#define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
+/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
+ of this code is to classify each 8bytes of incoming argument by the register
+ class and assign registers accordingly. */
+
+/* Return the union class of CLASS1 and CLASS2.
+ See the x86-64 PS ABI for details. */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+ /* Rule #1: If both classes are equal, this is the resulting class. */
+ if (class1 == class2)
+ return class1;
+
+ /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+ the other class. */
+ if (class1 == X86_64_NO_CLASS)
+ return class2;
+ if (class2 == X86_64_NO_CLASS)
+ return class1;
+
+ /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
+ if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+ return X86_64_MEMORY_CLASS;
+
+ /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
+ if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+ || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+ return X86_64_INTEGERSI_CLASS;
+ if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+ || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+ return X86_64_INTEGER_CLASS;
+
+ /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+ MEMORY is used. */
+ if (class1 == X86_64_X87_CLASS
+ || class1 == X86_64_X87UP_CLASS
+ || class1 == X86_64_COMPLEX_X87_CLASS
+ || class2 == X86_64_X87_CLASS
+ || class2 == X86_64_X87UP_CLASS
+ || class2 == X86_64_COMPLEX_X87_CLASS)
+ return X86_64_MEMORY_CLASS;
+
+ /* Rule #6: Otherwise class SSE is used. */
+ return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+ CLASSES will be filled by the register class used to pass each word
+ of the operand. The number of words is returned. In case the parameter
+ should be passed in memory, 0 is returned. As a special case for zero
+ sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+ See the x86-64 PS ABI for details.
+*/
+static size_t
+classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
+ size_t byte_offset)
+{
+ switch (type->type)
+ {
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_POINTER:
+ do_integer:
+ {
+ size_t size = byte_offset + type->size;
+
+ if (size <= 4)
+ {
+ classes[0] = X86_64_INTEGERSI_CLASS;
+ return 1;
+ }
+ else if (size <= 8)
+ {
+ classes[0] = X86_64_INTEGER_CLASS;
+ return 1;
+ }
+ else if (size <= 12)
+ {
+ classes[0] = X86_64_INTEGER_CLASS;
+ classes[1] = X86_64_INTEGERSI_CLASS;
+ return 2;
+ }
+ else if (size <= 16)
+ {
+ classes[0] = classes[1] = X86_64_INTEGER_CLASS;
+ return 2;
+ }
+ else
+ FFI_ASSERT (0);
+ }
+ case FFI_TYPE_FLOAT:
+ if (!(byte_offset % 8))
+ classes[0] = X86_64_SSESF_CLASS;
+ else
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case FFI_TYPE_DOUBLE:
+ classes[0] = X86_64_SSEDF_CLASS;
+ return 1;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ classes[0] = X86_64_X87_CLASS;
+ classes[1] = X86_64_X87UP_CLASS;
+ return 2;
+#endif
+ case FFI_TYPE_STRUCT:
+ {
+ const size_t UNITS_PER_WORD = 8;
+ size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+ ffi_type **ptr;
+ int i;
+ enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+ /* If the struct is larger than 32 bytes, pass it on the stack. */
+ if (type->size > 32)
+ return 0;
+
+ for (i = 0; i < words; i++)
+ classes[i] = X86_64_NO_CLASS;
+
+ /* Zero sized arrays or structures are NO_CLASS. We return 0 to
+ signalize memory class, so handle it as special case. */
+ if (!words)
+ {
+ case FFI_TYPE_VOID:
+ classes[0] = X86_64_NO_CLASS;
+ return 1;
+ }
+
+ /* Merge the fields of structure. */
+ for (ptr = type->elements; *ptr != NULL; ptr++)
+ {
+ size_t num;
+
+ byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+
+ num = classify_argument (*ptr, subclasses, byte_offset % 8);
+ if (num == 0)
+ return 0;
+ for (i = 0; i < num; i++)
+ {
+ size_t pos = byte_offset / 8;
+ classes[i + pos] =
+ merge_classes (subclasses[i], classes[i + pos]);
+ }
+
+ byte_offset += (*ptr)->size;
+ }
+
+ if (words > 2)
+ {
+ /* When size > 16 bytes, if the first one isn't
+ X86_64_SSE_CLASS or any other ones aren't
+ X86_64_SSEUP_CLASS, everything should be passed in
+ memory. */
+ if (classes[0] != X86_64_SSE_CLASS)
+ return 0;
+
+ for (i = 1; i < words; i++)
+ if (classes[i] != X86_64_SSEUP_CLASS)
+ return 0;
+ }
+
+ /* Final merger cleanup. */
+ for (i = 0; i < words; i++)
+ {
+ /* If one class is MEMORY, everything should be passed in
+ memory. */
+ if (classes[i] == X86_64_MEMORY_CLASS)
+ return 0;
+
+ /* The X86_64_SSEUP_CLASS should be always preceded by
+ X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
+ if (classes[i] == X86_64_SSEUP_CLASS
+ && classes[i - 1] != X86_64_SSE_CLASS
+ && classes[i - 1] != X86_64_SSEUP_CLASS)
+ {
+ /* The first one should never be X86_64_SSEUP_CLASS. */
+ FFI_ASSERT (i != 0);
+ classes[i] = X86_64_SSE_CLASS;
+ }
+
+ /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+ everything should be passed in memory. */
+ if (classes[i] == X86_64_X87UP_CLASS
+ && (classes[i - 1] != X86_64_X87_CLASS))
+ {
+ /* The first one should never be X86_64_X87UP_CLASS. */
+ FFI_ASSERT (i != 0);
+ return 0;
+ }
+ }
+ return words;
+ }
+ case FFI_TYPE_COMPLEX:
+ {
+ ffi_type *inner = type->elements[0];
+ switch (inner->type)
+ {
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ goto do_integer;
+
+ case FFI_TYPE_FLOAT:
+ classes[0] = X86_64_SSE_CLASS;
+ if (byte_offset % 8)
+ {
+ classes[1] = X86_64_SSESF_CLASS;
+ return 2;
+ }
+ return 1;
+ case FFI_TYPE_DOUBLE:
+ classes[0] = classes[1] = X86_64_SSEDF_CLASS;
+ return 2;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ classes[0] = X86_64_COMPLEX_X87_CLASS;
+ return 1;
+#endif
+ }
+ }
+ }
+ abort();
+}
+
+/* Examine the argument and return set number of register required in each
+ class. Return zero iff parameter should be passed in memory, otherwise
+ the number of registers. */
+
+static size_t
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+ _Bool in_return, int *pngpr, int *pnsse)
+{
+ size_t n;
+ int i, ngpr, nsse;
+
+ n = classify_argument (type, classes, 0);
+ if (n == 0)
+ return 0;
+
+ ngpr = nsse = 0;
+ for (i = 0; i < n; ++i)
+ switch (classes[i])
+ {
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ ngpr++;
+ break;
+ case X86_64_SSE_CLASS:
+ case X86_64_SSESF_CLASS:
+ case X86_64_SSEDF_CLASS:
+ nsse++;
+ break;
+ case X86_64_NO_CLASS:
+ case X86_64_SSEUP_CLASS:
+ break;
+ case X86_64_X87_CLASS:
+ case X86_64_X87UP_CLASS:
+ case X86_64_COMPLEX_X87_CLASS:
+ return in_return != 0;
+ default:
+ abort ();
+ }
+
+ *pngpr = ngpr;
+ *pnsse = nsse;
+
+ return n;
+}
+
+/* Perform machine dependent cif processing. */
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+ int gprcount, ssecount, i, avn, ngpr, nsse, flags;
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ size_t bytes, n, rtype_size;
+ ffi_type *rtype;
+
+ if (cif->abi != FFI_UNIX64)
+ return FFI_BAD_ABI;
+
+ gprcount = ssecount = 0;
+
+ rtype = cif->rtype;
+ rtype_size = rtype->size;
+ switch (rtype->type)
+ {
+ case FFI_TYPE_VOID:
+ flags = UNIX64_RET_VOID;
+ break;
+ case FFI_TYPE_UINT8:
+ flags = UNIX64_RET_UINT8;
+ break;
+ case FFI_TYPE_SINT8:
+ flags = UNIX64_RET_SINT8;
+ break;
+ case FFI_TYPE_UINT16:
+ flags = UNIX64_RET_UINT16;
+ break;
+ case FFI_TYPE_SINT16:
+ flags = UNIX64_RET_SINT16;
+ break;
+ case FFI_TYPE_UINT32:
+ flags = UNIX64_RET_UINT32;
+ break;
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ flags = UNIX64_RET_SINT32;
+ break;
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ flags = UNIX64_RET_INT64;
+ break;
+ case FFI_TYPE_POINTER:
+ flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
+ break;
+ case FFI_TYPE_FLOAT:
+ flags = UNIX64_RET_XMM32;
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = UNIX64_RET_XMM64;
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ flags = UNIX64_RET_X87;
+ break;
+ case FFI_TYPE_STRUCT:
+ n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+ if (n == 0)
+ {
+ /* The return value is passed in memory. A pointer to that
+ memory is the first argument. Allocate a register for it. */
+ gprcount++;
+ /* We don't have to do anything in asm for the return. */
+ flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM;
+ }
+ else
+ {
+ _Bool sse0 = SSE_CLASS_P (classes[0]);
+
+ if (rtype_size == 4 && sse0)
+ flags = UNIX64_RET_XMM32;
+ else if (rtype_size == 8)
+ flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64;
+ else
+ {
+ _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+ if (sse0 && sse1)
+ flags = UNIX64_RET_ST_XMM0_XMM1;
+ else if (sse0)
+ flags = UNIX64_RET_ST_XMM0_RAX;
+ else if (sse1)
+ flags = UNIX64_RET_ST_RAX_XMM0;
+ else
+ flags = UNIX64_RET_ST_RAX_RDX;
+ flags |= rtype_size << UNIX64_SIZE_SHIFT;
+ }
+ }
+ break;
+ case FFI_TYPE_COMPLEX:
+ switch (rtype->elements[0]->type)
+ {
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ flags = UNIX64_RET_ST_RAX_RDX | (rtype_size << UNIX64_SIZE_SHIFT);
+ break;
+ case FFI_TYPE_FLOAT:
+ flags = UNIX64_RET_XMM64;
+ break;
+ case FFI_TYPE_DOUBLE:
+ flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT);
+ break;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ flags = UNIX64_RET_X87_2;
+ break;
+#endif
+ default:
+ return FFI_BAD_TYPEDEF;
+ }
+ break;
+ default:
+ return FFI_BAD_TYPEDEF;
+ }
+
+ /* Go over all arguments and determine the way they should be passed.
+ If it's in a register and there is space for it, let that be so. If
+ not, add it's size to the stack byte count. */
+ for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
+ {
+ if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+ || gprcount + ngpr > MAX_GPR_REGS
+ || ssecount + nsse > MAX_SSE_REGS)
+ {
+ long align = cif->arg_types[i]->alignment;
+
+ if (align < 8)
+ align = 8;
+
+ bytes = ALIGN (bytes, align);
+ bytes += cif->arg_types[i]->size;
+ }
+ else
+ {
+ gprcount += ngpr;
+ ssecount += nsse;
+ }
+ }
+ if (ssecount)
+ flags |= UNIX64_FLAG_XMM_ARGS;
+
+ cif->flags = flags;
+ cif->bytes = ALIGN (bytes, 8);
+
+ return FFI_OK;
+}
+
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ char *stack, *argp;
+ ffi_type **arg_types;
+ int gprcount, ssecount, ngpr, nsse, i, avn, flags;
+ struct register_args *reg_args;
+
+ /* Can't call 32-bit mode from 64-bit mode. */
+ FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+ /* If the return value is a struct and we don't have a return value
+ address then we need to make one. Otherwise we can ignore it. */
+ flags = cif->flags;
+ if (rvalue == NULL)
+ {
+ if (flags & UNIX64_FLAG_RET_IN_MEM)
+ rvalue = alloca (cif->rtype->size);
+ else
+ flags = UNIX64_RET_VOID;
+ }
+
+ /* Allocate the space for the arguments, plus 4 words of temp space. */
+ stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+ reg_args = (struct register_args *) stack;
+ argp = stack + sizeof (struct register_args);
+
+ reg_args->r10 = (uintptr_t) closure;
+
+ gprcount = ssecount = 0;
+
+ /* If the return value is passed in memory, add the pointer as the
+ first integer argument. */
+ if (flags & UNIX64_FLAG_RET_IN_MEM)
+ reg_args->gpr[gprcount++] = (unsigned long) rvalue;
+
+ avn = cif->nargs;
+ arg_types = cif->arg_types;
+
+ for (i = 0; i < avn; ++i)
+ {
+ size_t n, size = arg_types[i]->size;
+
+ n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+ if (n == 0
+ || gprcount + ngpr > MAX_GPR_REGS
+ || ssecount + nsse > MAX_SSE_REGS)
+ {
+ long align = arg_types[i]->alignment;
+
+ /* Stack arguments are *always* at least 8 byte aligned. */
+ if (align < 8)
+ align = 8;
+
+ /* Pass this argument in memory. */
+ argp = (void *) ALIGN (argp, align);
+ memcpy (argp, avalue[i], size);
+ argp += size;
+ }
+ else
+ {
+ /* The argument is passed entirely in registers. */
+ char *a = (char *) avalue[i];
+ int j;
+
+ for (j = 0; j < n; j++, a += 8, size -= 8)
+ {
+ switch (classes[j])
+ {
+ case X86_64_NO_CLASS:
+ case X86_64_SSEUP_CLASS:
+ break;
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ /* Sign-extend integer arguments passed in general
+ purpose registers, to cope with the fact that
+ LLVM incorrectly assumes that this will be done
+ (the x86-64 PS ABI does not specify this). */
+ switch (arg_types[i]->type)
+ {
+ case FFI_TYPE_SINT8:
+ reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
+ break;
+ case FFI_TYPE_SINT16:
+ reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
+ break;
+ case FFI_TYPE_SINT32:
+ reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
+ break;
+ default:
+ reg_args->gpr[gprcount] = 0;
+ memcpy (&reg_args->gpr[gprcount], a, size);
+ }
+ gprcount++;
+ break;
+ case X86_64_SSE_CLASS:
+ case X86_64_SSEDF_CLASS:
+ reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
+ break;
+ case X86_64_SSESF_CLASS:
+ reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
+ break;
+ default:
+ abort();
+ }
+ }
+ }
+ }
+ reg_args->rax = ssecount;
+
+ ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
+ flags, rvalue, fn);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+extern void ffi_closure_unix64(void) FFI_HIDDEN;
+extern void ffi_closure_unix64_sse(void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ void *codeloc)
+{
+ static const unsigned char trampoline[16] = {
+ /* leaq -0x7(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
+ /* jmpq *0x3(%rip) # 0x10 */
+ 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
+ /* nopl (%rax) */
+ 0x0f, 0x1f, 0x00
+ };
+ void (*dest)(void);
+ char *tramp = closure->tramp;
+
+ if (cif->abi != FFI_UNIX64)
+ return FFI_BAD_ABI;
+
+ if (cif->flags & UNIX64_FLAG_XMM_ARGS)
+ dest = ffi_closure_unix64_sse;
+ else
+ dest = ffi_closure_unix64;
+
+ memcpy (tramp, trampoline, sizeof(trampoline));
+ *(UINT64 *)(tramp + 16) = (uintptr_t)dest;
+
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
+
+ return FFI_OK;
+}
+
+int FFI_HIDDEN
+ffi_closure_unix64_inner(ffi_cif *cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ void *rvalue,
+ struct register_args *reg_args,
+ char *argp)
+{
+ void **avalue;
+ ffi_type **arg_types;
+ long i, avn;
+ int gprcount, ssecount, ngpr, nsse;
+ int flags;
+
+ avn = cif->nargs;
+ flags = cif->flags;
+ avalue = alloca(avn * sizeof(void *));
+ gprcount = ssecount = 0;
+
+ if (flags & UNIX64_FLAG_RET_IN_MEM)
+ {
+ /* On return, %rax will contain the address that was passed
+ by the caller in %rdi. */
+ void *r = (void *)(uintptr_t)reg_args->gpr[gprcount++];
+ *(void **)rvalue = r;
+ rvalue = r;
+ flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
+ }
+
+ arg_types = cif->arg_types;
+ for (i = 0; i < avn; ++i)
+ {
+ enum x86_64_reg_class classes[MAX_CLASSES];
+ size_t n;
+
+ n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+ if (n == 0
+ || gprcount + ngpr > MAX_GPR_REGS
+ || ssecount + nsse > MAX_SSE_REGS)
+ {
+ long align = arg_types[i]->alignment;
+
+ /* Stack arguments are *always* at least 8 byte aligned. */
+ if (align < 8)
+ align = 8;
+
+ /* Pass this argument in memory. */
+ argp = (void *) ALIGN (argp, align);
+ avalue[i] = argp;
+ argp += arg_types[i]->size;
+ }
+ /* If the argument is in a single register, or two consecutive
+ integer registers, then we can use that address directly. */
+ else if (n == 1
+ || (n == 2 && !(SSE_CLASS_P (classes[0])
+ || SSE_CLASS_P (classes[1]))))
+ {
+ /* The argument is in a single register. */
+ if (SSE_CLASS_P (classes[0]))
+ {
+ avalue[i] = &reg_args->sse[ssecount];
+ ssecount += n;
+ }
+ else
+ {
+ avalue[i] = &reg_args->gpr[gprcount];
+ gprcount += n;
+ }
+ }
+ /* Otherwise, allocate space to make them consecutive. */
+ else
+ {
+ char *a = alloca (16);
+ int j;
+
+ avalue[i] = a;
+ for (j = 0; j < n; j++, a += 8)
+ {
+ if (SSE_CLASS_P (classes[j]))
+ memcpy (a, &reg_args->sse[ssecount++], 8);
+ else
+ memcpy (a, &reg_args->gpr[gprcount++], 8);
+ }
+ }
+ }
+
+ /* Invoke the closure. */
+ fun (cif, rvalue, avalue, user_data);
+
+ /* Tell assembly how to perform return type promotions. */
+ return flags;
+}
+
+extern void ffi_go_closure_unix64(void) FFI_HIDDEN;
+extern void ffi_go_closure_unix64_sse(void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*))
+{
+ if (cif->abi != FFI_UNIX64)
+ return FFI_BAD_ABI;
+
+ closure->tramp = (cif->flags & UNIX64_FLAG_XMM_ARGS
+ ? ffi_go_closure_unix64_sse
+ : ffi_go_closure_unix64);
+ closure->cif = cif;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
+#endif /* __x86_64__ */
diff --git a/circuitpython/lib/libffi/src/x86/ffitarget.h b/circuitpython/lib/libffi/src/x86/ffitarget.h
new file mode 100644
index 0000000..8c1dcac
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/ffitarget.h
@@ -0,0 +1,139 @@
+/* -----------------------------------------------------------------*-C-*-
+ ffitarget.h - Copyright (c) 2012, 2014 Anthony Green
+ Copyright (c) 1996-2003, 2010 Red Hat, Inc.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+
+ Target configuration macros for x86 and x86-64.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+
+ ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead."
+#endif
+
+/* ---- System specific configurations ----------------------------------- */
+
+/* For code common to all platforms on x86 and x86_64. */
+#define X86_ANY
+
+#if defined (X86_64) && defined (__i386__)
+#undef X86_64
+#define X86
+#endif
+
+#ifdef X86_WIN64
+#define FFI_SIZEOF_ARG 8
+#define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */
+#endif
+
+#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
+#ifndef _MSC_VER
+#define FFI_TARGET_HAS_COMPLEX_TYPE
+#endif
+
+/* ---- Generic type definitions ----------------------------------------- */
+
+#ifndef LIBFFI_ASM
+#ifdef X86_WIN64
+#ifdef _MSC_VER
+typedef unsigned __int64 ffi_arg;
+typedef __int64 ffi_sarg;
+#else
+typedef unsigned long long ffi_arg;
+typedef long long ffi_sarg;
+#endif
+#else
+#if defined __x86_64__ && defined __ILP32__
+#define FFI_SIZEOF_ARG 8
+#define FFI_SIZEOF_JAVA_RAW 4
+typedef unsigned long long ffi_arg;
+typedef long long ffi_sarg;
+#else
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+#endif
+#endif
+
+typedef enum ffi_abi {
+#if defined(X86_WIN64)
+ FFI_FIRST_ABI = 0,
+ FFI_WIN64,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_WIN64
+
+#elif defined(X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
+ FFI_FIRST_ABI = 1,
+ FFI_UNIX64,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_UNIX64
+
+#elif defined(X86_WIN32)
+ FFI_FIRST_ABI = 0,
+ FFI_SYSV = 1,
+ FFI_STDCALL = 2,
+ FFI_THISCALL = 3,
+ FFI_FASTCALL = 4,
+ FFI_MS_CDECL = 5,
+ FFI_PASCAL = 6,
+ FFI_REGISTER = 7,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_MS_CDECL
+#else
+ FFI_FIRST_ABI = 0,
+ FFI_SYSV = 1,
+ FFI_THISCALL = 3,
+ FFI_FASTCALL = 4,
+ FFI_STDCALL = 5,
+ FFI_PASCAL = 6,
+ FFI_REGISTER = 7,
+ FFI_MS_CDECL = 8,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_SYSV
+#endif
+} ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_GO_CLOSURES 1
+
+#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
+#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
+#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
+#define FFI_TYPE_MS_STRUCT (FFI_TYPE_LAST + 4)
+
+#if defined (X86_64) || defined(X86_WIN64) \
+ || (defined (__x86_64__) && defined (X86_DARWIN))
+# define FFI_TRAMPOLINE_SIZE 24
+# define FFI_NATIVE_RAW_API 0
+#else
+# define FFI_TRAMPOLINE_SIZE 12
+# define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */
+#endif
+
+#endif
+
diff --git a/circuitpython/lib/libffi/src/x86/ffiw64.c b/circuitpython/lib/libffi/src/x86/ffiw64.c
new file mode 100644
index 0000000..8a33a6c
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/ffiw64.c
@@ -0,0 +1,281 @@
+/* -----------------------------------------------------------------------
+ ffiw64.c - Copyright (c) 2014 Red Hat, Inc.
+
+ x86 win64 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#ifdef X86_WIN64
+
+struct win64_call_frame
+{
+ UINT64 rbp; /* 0 */
+ UINT64 retaddr; /* 8 */
+ UINT64 fn; /* 16 */
+ UINT64 flags; /* 24 */
+ UINT64 rvalue; /* 32 */
+};
+
+extern void ffi_call_win64 (void *stack, struct win64_call_frame *,
+ void *closure) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+ int flags, n;
+
+ if (cif->abi != FFI_WIN64)
+ return FFI_BAD_ABI;
+
+ flags = cif->rtype->type;
+ switch (flags)
+ {
+ default:
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ flags = FFI_TYPE_STRUCT;
+ break;
+ case FFI_TYPE_COMPLEX:
+ flags = FFI_TYPE_STRUCT;
+ /* FALLTHRU */
+ case FFI_TYPE_STRUCT:
+ switch (cif->rtype->size)
+ {
+ case 8:
+ flags = FFI_TYPE_UINT64;
+ break;
+ case 4:
+ flags = FFI_TYPE_SMALL_STRUCT_4B;
+ break;
+ case 2:
+ flags = FFI_TYPE_SMALL_STRUCT_2B;
+ break;
+ case 1:
+ flags = FFI_TYPE_SMALL_STRUCT_1B;
+ break;
+ }
+ break;
+ }
+ cif->flags = flags;
+
+ /* Each argument either fits in a register, an 8 byte slot, or is
+ passed by reference with the pointer in the 8 byte slot. */
+ n = cif->nargs;
+ n += (flags == FFI_TYPE_STRUCT);
+ if (n < 4)
+ n = 4;
+ cif->bytes = n * 8;
+
+ return FFI_OK;
+}
+
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ int i, j, n, flags;
+ UINT64 *stack;
+ size_t rsize;
+ struct win64_call_frame *frame;
+
+ FFI_ASSERT(cif->abi == FFI_WIN64);
+
+ flags = cif->flags;
+ rsize = 0;
+
+ /* If we have no return value for a structure, we need to create one.
+ Otherwise we can ignore the return type entirely. */
+ if (rvalue == NULL)
+ {
+ if (flags == FFI_TYPE_STRUCT)
+ rsize = cif->rtype->size;
+ else
+ flags = FFI_TYPE_VOID;
+ }
+
+ stack = alloca(cif->bytes + sizeof(struct win64_call_frame) + rsize);
+ frame = (struct win64_call_frame *)((char *)stack + cif->bytes);
+ if (rsize)
+ rvalue = frame + 1;
+
+ frame->fn = (uintptr_t)fn;
+ frame->flags = flags;
+ frame->rvalue = (uintptr_t)rvalue;
+
+ j = 0;
+ if (flags == FFI_TYPE_STRUCT)
+ {
+ stack[0] = (uintptr_t)rvalue;
+ j = 1;
+ }
+
+ for (i = 0, n = cif->nargs; i < n; ++i, ++j)
+ {
+ switch (cif->arg_types[i]->size)
+ {
+ case 8:
+ stack[j] = *(UINT64 *)avalue[i];
+ break;
+ case 4:
+ stack[j] = *(UINT32 *)avalue[i];
+ break;
+ case 2:
+ stack[j] = *(UINT16 *)avalue[i];
+ break;
+ case 1:
+ stack[j] = *(UINT8 *)avalue[i];
+ break;
+ default:
+ stack[j] = (uintptr_t)avalue[i];
+ break;
+ }
+ }
+
+ ffi_call_win64 (stack, frame, closure);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ void **avalue, void *closure)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+
+extern void ffi_closure_win64(void) FFI_HIDDEN;
+extern void ffi_go_closure_win64(void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ void *codeloc)
+{
+ static const unsigned char trampoline[16] = {
+ /* leaq -0x7(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
+ /* jmpq *0x3(%rip) # 0x10 */
+ 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
+ /* nopl (%rax) */
+ 0x0f, 0x1f, 0x00
+ };
+ unsigned char *tramp = closure->tramp;
+
+ if (cif->abi != FFI_WIN64)
+ return FFI_BAD_ABI;
+
+ memcpy (tramp, trampoline, sizeof(trampoline));
+ *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64;
+
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
+
+ return FFI_OK;
+}
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*))
+{
+ if (cif->abi != FFI_WIN64)
+ return FFI_BAD_ABI;
+
+ closure->tramp = ffi_go_closure_win64;
+ closure->cif = cif;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
+struct win64_closure_frame
+{
+ UINT64 rvalue[2];
+ UINT64 fargs[4];
+ UINT64 retaddr;
+ UINT64 args[];
+};
+
+int FFI_HIDDEN
+ffi_closure_win64_inner(ffi_cif *cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ struct win64_closure_frame *frame)
+{
+ void **avalue;
+ void *rvalue;
+ int i, n, nreg, flags;
+
+ avalue = alloca(cif->nargs * sizeof(void *));
+ rvalue = frame->rvalue;
+ nreg = 0;
+
+ /* When returning a structure, the address is in the first argument.
+ We must also be prepared to return the same address in eax, so
+ install that address in the frame and pretend we return a pointer. */
+ flags = cif->flags;
+ if (flags == FFI_TYPE_STRUCT)
+ {
+ rvalue = (void *)(uintptr_t)frame->args[0];
+ frame->rvalue[0] = frame->args[0];
+ nreg = 1;
+ }
+
+ for (i = 0, n = cif->nargs; i < n; ++i, ++nreg)
+ {
+ size_t size = cif->arg_types[i]->size;
+ size_t type = cif->arg_types[i]->type;
+ void *a;
+
+ if (type == FFI_TYPE_DOUBLE || type == FFI_TYPE_FLOAT)
+ {
+ if (nreg < 4)
+ a = &frame->fargs[nreg];
+ else
+ a = &frame->args[nreg];
+ }
+ else if (size == 1 || size == 2 || size == 4 || size == 8)
+ a = &frame->args[nreg];
+ else
+ a = (void *)(uintptr_t)frame->args[nreg];
+
+ avalue[i] = a;
+ }
+
+ /* Invoke the closure. */
+ fun (cif, rvalue, avalue, user_data);
+ return flags;
+}
+
+#endif /* X86_WIN64 */
diff --git a/circuitpython/lib/libffi/src/x86/internal.h b/circuitpython/lib/libffi/src/x86/internal.h
new file mode 100644
index 0000000..09771ba
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/internal.h
@@ -0,0 +1,29 @@
+#define X86_RET_FLOAT 0
+#define X86_RET_DOUBLE 1
+#define X86_RET_LDOUBLE 2
+#define X86_RET_SINT8 3
+#define X86_RET_SINT16 4
+#define X86_RET_UINT8 5
+#define X86_RET_UINT16 6
+#define X86_RET_INT64 7
+#define X86_RET_INT32 8
+#define X86_RET_VOID 9
+#define X86_RET_STRUCTPOP 10
+#define X86_RET_STRUCTARG 11
+#define X86_RET_STRUCT_1B 12
+#define X86_RET_STRUCT_2B 13
+#define X86_RET_UNUSED14 14
+#define X86_RET_UNUSED15 15
+
+#define X86_RET_TYPE_MASK 15
+#define X86_RET_POP_SHIFT 4
+
+#define R_EAX 0
+#define R_EDX 1
+#define R_ECX 2
+
+#ifdef __PCC__
+# define HAVE_FASTCALL 0
+#else
+# define HAVE_FASTCALL 1
+#endif
diff --git a/circuitpython/lib/libffi/src/x86/internal64.h b/circuitpython/lib/libffi/src/x86/internal64.h
new file mode 100644
index 0000000..512e955
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/internal64.h
@@ -0,0 +1,22 @@
+#define UNIX64_RET_VOID 0
+#define UNIX64_RET_UINT8 1
+#define UNIX64_RET_UINT16 2
+#define UNIX64_RET_UINT32 3
+#define UNIX64_RET_SINT8 4
+#define UNIX64_RET_SINT16 5
+#define UNIX64_RET_SINT32 6
+#define UNIX64_RET_INT64 7
+#define UNIX64_RET_XMM32 8
+#define UNIX64_RET_XMM64 9
+#define UNIX64_RET_X87 10
+#define UNIX64_RET_X87_2 11
+#define UNIX64_RET_ST_XMM0_RAX 12
+#define UNIX64_RET_ST_RAX_XMM0 13
+#define UNIX64_RET_ST_XMM0_XMM1 14
+#define UNIX64_RET_ST_RAX_RDX 15
+
+#define UNIX64_RET_LAST 15
+
+#define UNIX64_FLAG_RET_IN_MEM (1 << 10)
+#define UNIX64_FLAG_XMM_ARGS (1 << 11)
+#define UNIX64_SIZE_SHIFT 12
diff --git a/circuitpython/lib/libffi/src/x86/sysv.S b/circuitpython/lib/libffi/src/x86/sysv.S
new file mode 100644
index 0000000..78f245b
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/sysv.S
@@ -0,0 +1,1040 @@
+/* -----------------------------------------------------------------------
+ sysv.S - Copyright (c) 2013 The Written Word, Inc.
+ - Copyright (c) 1996,1998,2001-2003,2005,2008,2010 Red Hat, Inc.
+
+ X86 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include "internal.h"
+
+#define C2(X, Y) X ## Y
+#define C1(X, Y) C2(X, Y)
+#ifdef __USER_LABEL_PREFIX__
+# define C(X) C1(__USER_LABEL_PREFIX__, X)
+#else
+# define C(X) X
+#endif
+
+#ifdef X86_DARWIN
+# define L(X) C1(L, X)
+#else
+# define L(X) C1(.L, X)
+#endif
+
+#ifdef __ELF__
+# define ENDF(X) .type X,@function; .size X, . - X
+#else
+# define ENDF(X)
+#endif
+
+/* Handle win32 fastcall name mangling. */
+#ifdef X86_WIN32
+# define ffi_call_i386 @ffi_call_i386@8
+# define ffi_closure_inner @ffi_closure_inner@8
+#else
+# define ffi_call_i386 C(ffi_call_i386)
+# define ffi_closure_inner C(ffi_closure_inner)
+#endif
+
+/* This macro allows the safe creation of jump tables without an
+ actual table. The entry points into the table are all 8 bytes.
+ The use of ORG asserts that we're at the correct location. */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X) .balign 8
+#else
+# define E(BASE, X) .balign 8; .org BASE + X * 8
+#endif
+
+ .text
+ .balign 16
+ .globl ffi_call_i386
+ FFI_HIDDEN(ffi_call_i386)
+
+/* This is declared as
+
+ void ffi_call_i386(struct call_frame *frame, char *argp)
+ __attribute__((fastcall));
+
+ Thus the arguments are present in
+
+ ecx: frame
+ edx: argp
+*/
+
+ffi_call_i386:
+L(UW0):
+ # cfi_startproc
+#if !HAVE_FASTCALL
+ movl 4(%esp), %ecx
+ movl 8(%esp), %edx
+#endif
+ movl (%esp), %eax /* move the return address */
+ movl %ebp, (%ecx) /* store %ebp into local frame */
+ movl %eax, 4(%ecx) /* store retaddr into local frame */
+
+ /* New stack frame based off ebp. This is a itty bit of unwind
+ trickery in that the CFA *has* changed. There is no easy way
+ to describe it correctly on entry to the function. Fortunately,
+ it doesn't matter too much since at all points we can correctly
+ unwind back to ffi_call. Note that the location to which we
+ moved the return address is (the new) CFA-4, so from the
+ perspective of the unwind info, it hasn't moved. */
+ movl %ecx, %ebp
+L(UW1):
+ # cfi_def_cfa(%ebp, 8)
+ # cfi_rel_offset(%ebp, 0)
+
+ movl %edx, %esp /* set outgoing argument stack */
+ movl 20+R_EAX*4(%ebp), %eax /* set register arguments */
+ movl 20+R_EDX*4(%ebp), %edx
+ movl 20+R_ECX*4(%ebp), %ecx
+
+ call *8(%ebp)
+
+ movl 12(%ebp), %ecx /* load return type code */
+ movl %ebx, 8(%ebp) /* preserve %ebx */
+L(UW2):
+ # cfi_rel_offset(%ebx, 8)
+
+ andl $X86_RET_TYPE_MASK, %ecx
+#ifdef __PIC__
+ call C(__x86.get_pc_thunk.bx)
+L(pc1):
+ leal L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx
+#else
+ leal L(store_table)(,%ecx, 8), %ebx
+#endif
+ movl 16(%ebp), %ecx /* load result address */
+ jmp *%ebx
+
+ .balign 8
+L(store_table):
+E(L(store_table), X86_RET_FLOAT)
+ fstps (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_DOUBLE)
+ fstpl (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_LDOUBLE)
+ fstpt (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_SINT8)
+ movsbl %al, %eax
+ mov %eax, (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_SINT16)
+ movswl %ax, %eax
+ mov %eax, (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_UINT8)
+ movzbl %al, %eax
+ mov %eax, (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_UINT16)
+ movzwl %ax, %eax
+ mov %eax, (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_INT64)
+ movl %edx, 4(%ecx)
+ /* fallthru */
+E(L(store_table), X86_RET_INT32)
+ movl %eax, (%ecx)
+ /* fallthru */
+E(L(store_table), X86_RET_VOID)
+L(e1):
+ movl 8(%ebp), %ebx
+ movl %ebp, %esp
+ popl %ebp
+L(UW3):
+ # cfi_remember_state
+ # cfi_def_cfa(%esp, 4)
+ # cfi_restore(%ebx)
+ # cfi_restore(%ebp)
+ ret
+L(UW4):
+ # cfi_restore_state
+
+E(L(store_table), X86_RET_STRUCTPOP)
+ jmp L(e1)
+E(L(store_table), X86_RET_STRUCTARG)
+ jmp L(e1)
+E(L(store_table), X86_RET_STRUCT_1B)
+ movb %al, (%ecx)
+ jmp L(e1)
+E(L(store_table), X86_RET_STRUCT_2B)
+ movw %ax, (%ecx)
+ jmp L(e1)
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(store_table), X86_RET_UNUSED14)
+ ud2
+E(L(store_table), X86_RET_UNUSED15)
+ ud2
+
+L(UW5):
+ # cfi_endproc
+ENDF(ffi_call_i386)
+
+/* The inner helper is declared as
+
+ void ffi_closure_inner(struct closure_frame *frame, char *argp)
+ __attribute_((fastcall))
+
+ Thus the arguments are placed in
+
+ ecx: frame
+ edx: argp
+*/
+
+/* Macros to help setting up the closure_data structure. */
+
+#if HAVE_FASTCALL
+# define closure_FS (40 + 4)
+# define closure_CF 0
+#else
+# define closure_FS (8 + 40 + 12)
+# define closure_CF 8
+#endif
+
+#define FFI_CLOSURE_SAVE_REGS \
+ movl %eax, closure_CF+16+R_EAX*4(%esp); \
+ movl %edx, closure_CF+16+R_EDX*4(%esp); \
+ movl %ecx, closure_CF+16+R_ECX*4(%esp)
+
+#define FFI_CLOSURE_COPY_TRAMP_DATA \
+ movl FFI_TRAMPOLINE_SIZE(%eax), %edx; /* copy cif */ \
+ movl FFI_TRAMPOLINE_SIZE+4(%eax), %ecx; /* copy fun */ \
+ movl FFI_TRAMPOLINE_SIZE+8(%eax), %eax; /* copy user_data */ \
+ movl %edx, closure_CF+28(%esp); \
+ movl %ecx, closure_CF+32(%esp); \
+ movl %eax, closure_CF+36(%esp)
+
+#if HAVE_FASTCALL
+# define FFI_CLOSURE_PREP_CALL \
+ movl %esp, %ecx; /* load closure_data */ \
+ leal closure_FS+4(%esp), %edx; /* load incoming stack */
+#else
+# define FFI_CLOSURE_PREP_CALL \
+ leal closure_CF(%esp), %ecx; /* load closure_data */ \
+ leal closure_FS+4(%esp), %edx; /* load incoming stack */ \
+ movl %ecx, (%esp); \
+ movl %edx, 4(%esp)
+#endif
+
+#define FFI_CLOSURE_CALL_INNER(UWN) \
+ call ffi_closure_inner
+
+#define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \
+ andl $X86_RET_TYPE_MASK, %eax; \
+ leal L(C1(load_table,N))(, %eax, 8), %edx; \
+ movl closure_CF(%esp), %eax; /* optimiztic load */ \
+ jmp *%edx
+
+#ifdef __PIC__
+# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
+# undef FFI_CLOSURE_MASK_AND_JUMP
+# define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \
+ andl $X86_RET_TYPE_MASK, %eax; \
+ call C(__x86.get_pc_thunk.dx); \
+L(C1(pc,N)): \
+ leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx; \
+ movl closure_CF(%esp), %eax; /* optimiztic load */ \
+ jmp *%edx
+# else
+# define FFI_CLOSURE_CALL_INNER_SAVE_EBX
+# undef FFI_CLOSURE_CALL_INNER
+# define FFI_CLOSURE_CALL_INNER(UWN) \
+ movl %ebx, 40(%esp); /* save ebx */ \
+L(C1(UW,UWN)): \
+ # cfi_rel_offset(%ebx, 40); \
+ call C(__x86.get_pc_thunk.bx); /* load got register */ \
+ addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \
+ call ffi_closure_inner@PLT
+# undef FFI_CLOSURE_MASK_AND_JUMP
+# define FFI_CLOSURE_MASK_AND_JUMP(N, UWN) \
+ andl $X86_RET_TYPE_MASK, %eax; \
+ leal L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %edx; \
+ movl 40(%esp), %ebx; /* restore ebx */ \
+L(C1(UW,UWN)): \
+ # cfi_restore(%ebx); \
+ movl closure_CF(%esp), %eax; /* optimiztic load */ \
+ jmp *%edx
+# endif /* DARWIN || HIDDEN */
+#endif /* __PIC__ */
+
+ .balign 16
+ .globl C(ffi_go_closure_EAX)
+ FFI_HIDDEN(C(ffi_go_closure_EAX))
+C(ffi_go_closure_EAX):
+L(UW6):
+ # cfi_startproc
+ subl $closure_FS, %esp
+L(UW7):
+ # cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ movl 4(%eax), %edx /* copy cif */
+ movl 8(%eax), %ecx /* copy fun */
+ movl %edx, closure_CF+28(%esp)
+ movl %ecx, closure_CF+32(%esp)
+ movl %eax, closure_CF+36(%esp) /* closure is user_data */
+ jmp L(do_closure_i386)
+L(UW8):
+ # cfi_endproc
+ENDF(C(ffi_go_closure_EAX))
+
+ .balign 16
+ .globl C(ffi_go_closure_ECX)
+ FFI_HIDDEN(C(ffi_go_closure_ECX))
+C(ffi_go_closure_ECX):
+L(UW9):
+ # cfi_startproc
+ subl $closure_FS, %esp
+L(UW10):
+ # cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ movl 4(%ecx), %edx /* copy cif */
+ movl 8(%ecx), %eax /* copy fun */
+ movl %edx, closure_CF+28(%esp)
+ movl %eax, closure_CF+32(%esp)
+ movl %ecx, closure_CF+36(%esp) /* closure is user_data */
+ jmp L(do_closure_i386)
+L(UW11):
+ # cfi_endproc
+ENDF(C(ffi_go_closure_ECX))
+
+/* The closure entry points are reached from the ffi_closure trampoline.
+ On entry, %eax contains the address of the ffi_closure. */
+
+ .balign 16
+ .globl C(ffi_closure_i386)
+ FFI_HIDDEN(C(ffi_closure_i386))
+
+C(ffi_closure_i386):
+L(UW12):
+ # cfi_startproc
+ subl $closure_FS, %esp
+L(UW13):
+ # cfi_def_cfa_offset(closure_FS + 4)
+
+ FFI_CLOSURE_SAVE_REGS
+ FFI_CLOSURE_COPY_TRAMP_DATA
+
+ /* Entry point from preceeding Go closures. */
+L(do_closure_i386):
+
+ FFI_CLOSURE_PREP_CALL
+ FFI_CLOSURE_CALL_INNER(14)
+ FFI_CLOSURE_MASK_AND_JUMP(2, 15)
+
+ .balign 8
+L(load_table2):
+E(L(load_table2), X86_RET_FLOAT)
+ flds closure_CF(%esp)
+ jmp L(e2)
+E(L(load_table2), X86_RET_DOUBLE)
+ fldl closure_CF(%esp)
+ jmp L(e2)
+E(L(load_table2), X86_RET_LDOUBLE)
+ fldt closure_CF(%esp)
+ jmp L(e2)
+E(L(load_table2), X86_RET_SINT8)
+ movsbl %al, %eax
+ jmp L(e2)
+E(L(load_table2), X86_RET_SINT16)
+ movswl %ax, %eax
+ jmp L(e2)
+E(L(load_table2), X86_RET_UINT8)
+ movzbl %al, %eax
+ jmp L(e2)
+E(L(load_table2), X86_RET_UINT16)
+ movzwl %ax, %eax
+ jmp L(e2)
+E(L(load_table2), X86_RET_INT64)
+ movl closure_CF+4(%esp), %edx
+ jmp L(e2)
+E(L(load_table2), X86_RET_INT32)
+ nop
+ /* fallthru */
+E(L(load_table2), X86_RET_VOID)
+L(e2):
+ addl $closure_FS, %esp
+L(UW16):
+ # cfi_adjust_cfa_offset(-closure_FS)
+ ret
+L(UW17):
+ # cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTPOP)
+ addl $closure_FS, %esp
+L(UW18):
+ # cfi_adjust_cfa_offset(-closure_FS)
+ ret $4
+L(UW19):
+ # cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTARG)
+ jmp L(e2)
+E(L(load_table2), X86_RET_STRUCT_1B)
+ movzbl %al, %eax
+ jmp L(e2)
+E(L(load_table2), X86_RET_STRUCT_2B)
+ movzwl %ax, %eax
+ jmp L(e2)
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(load_table2), X86_RET_UNUSED14)
+ ud2
+E(L(load_table2), X86_RET_UNUSED15)
+ ud2
+
+L(UW20):
+ # cfi_endproc
+ENDF(C(ffi_closure_i386))
+
+ .balign 16
+ .globl C(ffi_go_closure_STDCALL)
+ FFI_HIDDEN(C(ffi_go_closure_STDCALL))
+C(ffi_go_closure_STDCALL):
+L(UW21):
+ # cfi_startproc
+ subl $closure_FS, %esp
+L(UW22):
+ # cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ movl 4(%ecx), %edx /* copy cif */
+ movl 8(%ecx), %eax /* copy fun */
+ movl %edx, closure_CF+28(%esp)
+ movl %eax, closure_CF+32(%esp)
+ movl %ecx, closure_CF+36(%esp) /* closure is user_data */
+ jmp L(do_closure_STDCALL)
+L(UW23):
+ # cfi_endproc
+ENDF(C(ffi_go_closure_STDCALL))
+
+/* For REGISTER, we have no available parameter registers, and so we
+ enter here having pushed the closure onto the stack. */
+
+ .balign 16
+ .globl C(ffi_closure_REGISTER)
+ FFI_HIDDEN(C(ffi_closure_REGISTER))
+C(ffi_closure_REGISTER):
+L(UW24):
+ # cfi_startproc
+ # cfi_def_cfa(%esp, 8)
+ # cfi_offset(%eip, -8)
+ subl $closure_FS-4, %esp
+L(UW25):
+ # cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ movl closure_FS-4(%esp), %ecx /* load retaddr */
+ movl closure_FS(%esp), %eax /* load closure */
+ movl %ecx, closure_FS(%esp) /* move retaddr */
+ jmp L(do_closure_REGISTER)
+L(UW26):
+ # cfi_endproc
+ENDF(C(ffi_closure_REGISTER))
+
+/* For STDCALL (and others), we need to pop N bytes of arguments off
+ the stack following the closure. The amount needing to be popped
+ is returned to us from ffi_closure_inner. */
+
+ .balign 16
+ .globl C(ffi_closure_STDCALL)
+ FFI_HIDDEN(C(ffi_closure_STDCALL))
+C(ffi_closure_STDCALL):
+L(UW27):
+ # cfi_startproc
+ subl $closure_FS, %esp
+L(UW28):
+ # cfi_def_cfa_offset(closure_FS + 4)
+
+ FFI_CLOSURE_SAVE_REGS
+
+ /* Entry point from ffi_closure_REGISTER. */
+L(do_closure_REGISTER):
+
+ FFI_CLOSURE_COPY_TRAMP_DATA
+
+ /* Entry point from preceeding Go closure. */
+L(do_closure_STDCALL):
+
+ FFI_CLOSURE_PREP_CALL
+ FFI_CLOSURE_CALL_INNER(29)
+
+ movl %eax, %ecx
+ shrl $X86_RET_POP_SHIFT, %ecx /* isolate pop count */
+ leal closure_FS(%esp, %ecx), %ecx /* compute popped esp */
+ movl closure_FS(%esp), %edx /* move return address */
+ movl %edx, (%ecx)
+
+ /* From this point on, the value of %esp upon return is %ecx+4,
+ and we've copied the return address to %ecx to make return easy.
+ There's no point in representing this in the unwind info, as
+ there is always a window between the mov and the ret which
+ will be wrong from one point of view or another. */
+
+ FFI_CLOSURE_MASK_AND_JUMP(3, 30)
+
+ .balign 8
+L(load_table3):
+E(L(load_table3), X86_RET_FLOAT)
+ flds closure_CF(%esp)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_DOUBLE)
+ fldl closure_CF(%esp)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_LDOUBLE)
+ fldt closure_CF(%esp)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_SINT8)
+ movsbl %al, %eax
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_SINT16)
+ movswl %ax, %eax
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_UINT8)
+ movzbl %al, %eax
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_UINT16)
+ movzwl %ax, %eax
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_INT64)
+ movl closure_CF+4(%esp), %edx
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_INT32)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_VOID)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_STRUCTPOP)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_STRUCTARG)
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_STRUCT_1B)
+ movzbl %al, %eax
+ movl %ecx, %esp
+ ret
+E(L(load_table3), X86_RET_STRUCT_2B)
+ movzwl %ax, %eax
+ movl %ecx, %esp
+ ret
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(load_table3), X86_RET_UNUSED14)
+ ud2
+E(L(load_table3), X86_RET_UNUSED15)
+ ud2
+
+L(UW31):
+ # cfi_endproc
+ENDF(C(ffi_closure_STDCALL))
+
+#if !FFI_NO_RAW_API
+
+#define raw_closure_S_FS (16+16+12)
+
+ .balign 16
+ .globl C(ffi_closure_raw_SYSV)
+ FFI_HIDDEN(C(ffi_closure_raw_SYSV))
+C(ffi_closure_raw_SYSV):
+L(UW32):
+ # cfi_startproc
+ subl $raw_closure_S_FS, %esp
+L(UW33):
+ # cfi_def_cfa_offset(raw_closure_S_FS + 4)
+ movl %ebx, raw_closure_S_FS-4(%esp)
+L(UW34):
+ # cfi_rel_offset(%ebx, raw_closure_S_FS-4)
+
+ movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */
+ movl %edx, 12(%esp)
+ leal raw_closure_S_FS+4(%esp), %edx /* load raw_args */
+ movl %edx, 8(%esp)
+ leal 16(%esp), %edx /* load &res */
+ movl %edx, 4(%esp)
+ movl FFI_TRAMPOLINE_SIZE(%eax), %ebx /* load cl->cif */
+ movl %ebx, (%esp)
+ call *FFI_TRAMPOLINE_SIZE+4(%eax) /* call cl->fun */
+
+ movl 20(%ebx), %eax /* load cif->flags */
+ andl $X86_RET_TYPE_MASK, %eax
+#ifdef __PIC__
+ call C(__x86.get_pc_thunk.bx)
+L(pc4):
+ leal L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx
+#else
+ leal L(load_table4)(,%eax, 8), %ecx
+#endif
+ movl raw_closure_S_FS-4(%esp), %ebx
+L(UW35):
+ # cfi_restore(%ebx)
+ movl 16(%esp), %eax /* Optimistic load */
+ jmp *%ecx
+
+ .balign 8
+L(load_table4):
+E(L(load_table4), X86_RET_FLOAT)
+ flds 16(%esp)
+ jmp L(e4)
+E(L(load_table4), X86_RET_DOUBLE)
+ fldl 16(%esp)
+ jmp L(e4)
+E(L(load_table4), X86_RET_LDOUBLE)
+ fldt 16(%esp)
+ jmp L(e4)
+E(L(load_table4), X86_RET_SINT8)
+ movsbl %al, %eax
+ jmp L(e4)
+E(L(load_table4), X86_RET_SINT16)
+ movswl %ax, %eax
+ jmp L(e4)
+E(L(load_table4), X86_RET_UINT8)
+ movzbl %al, %eax
+ jmp L(e4)
+E(L(load_table4), X86_RET_UINT16)
+ movzwl %ax, %eax
+ jmp L(e4)
+E(L(load_table4), X86_RET_INT64)
+ movl 16+4(%esp), %edx
+ jmp L(e4)
+E(L(load_table4), X86_RET_INT32)
+ nop
+ /* fallthru */
+E(L(load_table4), X86_RET_VOID)
+L(e4):
+ addl $raw_closure_S_FS, %esp
+L(UW36):
+ # cfi_adjust_cfa_offset(-raw_closure_S_FS)
+ ret
+L(UW37):
+ # cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTPOP)
+ addl $raw_closure_S_FS, %esp
+L(UW38):
+ # cfi_adjust_cfa_offset(-raw_closure_S_FS)
+ ret $4
+L(UW39):
+ # cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTARG)
+ jmp L(e4)
+E(L(load_table4), X86_RET_STRUCT_1B)
+ movzbl %al, %eax
+ jmp L(e4)
+E(L(load_table4), X86_RET_STRUCT_2B)
+ movzwl %ax, %eax
+ jmp L(e4)
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(load_table4), X86_RET_UNUSED14)
+ ud2
+E(L(load_table4), X86_RET_UNUSED15)
+ ud2
+
+L(UW40):
+ # cfi_endproc
+ENDF(C(ffi_closure_raw_SYSV))
+
+#define raw_closure_T_FS (16+16+8)
+
+ .balign 16
+ .globl C(ffi_closure_raw_THISCALL)
+ FFI_HIDDEN(C(ffi_closure_raw_THISCALL))
+C(ffi_closure_raw_THISCALL):
+L(UW41):
+ # cfi_startproc
+ /* Rearrange the stack such that %ecx is the first argument.
+ This means moving the return address. */
+ popl %edx
+L(UW42):
+ # cfi_def_cfa_offset(0)
+ # cfi_register(%eip, %edx)
+ pushl %ecx
+L(UW43):
+ # cfi_adjust_cfa_offset(4)
+ pushl %edx
+L(UW44):
+ # cfi_adjust_cfa_offset(4)
+ # cfi_rel_offset(%eip, 0)
+ subl $raw_closure_T_FS, %esp
+L(UW45):
+ # cfi_adjust_cfa_offset(raw_closure_T_FS)
+ movl %ebx, raw_closure_T_FS-4(%esp)
+L(UW46):
+ # cfi_rel_offset(%ebx, raw_closure_T_FS-4)
+
+ movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */
+ movl %edx, 12(%esp)
+ leal raw_closure_T_FS+4(%esp), %edx /* load raw_args */
+ movl %edx, 8(%esp)
+ leal 16(%esp), %edx /* load &res */
+ movl %edx, 4(%esp)
+ movl FFI_TRAMPOLINE_SIZE(%eax), %ebx /* load cl->cif */
+ movl %ebx, (%esp)
+ call *FFI_TRAMPOLINE_SIZE+4(%eax) /* call cl->fun */
+
+ movl 20(%ebx), %eax /* load cif->flags */
+ andl $X86_RET_TYPE_MASK, %eax
+#ifdef __PIC__
+ call C(__x86.get_pc_thunk.bx)
+L(pc5):
+ leal L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx
+#else
+ leal L(load_table5)(,%eax, 8), %ecx
+#endif
+ movl raw_closure_T_FS-4(%esp), %ebx
+L(UW47):
+ # cfi_restore(%ebx)
+ movl 16(%esp), %eax /* Optimistic load */
+ jmp *%ecx
+
+ .balign 8
+L(load_table5):
+E(L(load_table5), X86_RET_FLOAT)
+ flds 16(%esp)
+ jmp L(e5)
+E(L(load_table5), X86_RET_DOUBLE)
+ fldl 16(%esp)
+ jmp L(e5)
+E(L(load_table5), X86_RET_LDOUBLE)
+ fldt 16(%esp)
+ jmp L(e5)
+E(L(load_table5), X86_RET_SINT8)
+ movsbl %al, %eax
+ jmp L(e5)
+E(L(load_table5), X86_RET_SINT16)
+ movswl %ax, %eax
+ jmp L(e5)
+E(L(load_table5), X86_RET_UINT8)
+ movzbl %al, %eax
+ jmp L(e5)
+E(L(load_table5), X86_RET_UINT16)
+ movzwl %ax, %eax
+ jmp L(e5)
+E(L(load_table5), X86_RET_INT64)
+ movl 16+4(%esp), %edx
+ jmp L(e5)
+E(L(load_table5), X86_RET_INT32)
+ nop
+ /* fallthru */
+E(L(load_table5), X86_RET_VOID)
+L(e5):
+ addl $raw_closure_T_FS, %esp
+L(UW48):
+ # cfi_adjust_cfa_offset(-raw_closure_T_FS)
+ /* Remove the extra %ecx argument we pushed. */
+ ret $4
+L(UW49):
+ # cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTPOP)
+ addl $raw_closure_T_FS, %esp
+L(UW50):
+ # cfi_adjust_cfa_offset(-raw_closure_T_FS)
+ ret $8
+L(UW51):
+ # cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTARG)
+ jmp L(e5)
+E(L(load_table5), X86_RET_STRUCT_1B)
+ movzbl %al, %eax
+ jmp L(e5)
+E(L(load_table5), X86_RET_STRUCT_2B)
+ movzwl %ax, %eax
+ jmp L(e5)
+
+ /* Fill out the table so that bad values are predictable. */
+E(L(load_table5), X86_RET_UNUSED14)
+ ud2
+E(L(load_table5), X86_RET_UNUSED15)
+ ud2
+
+L(UW52):
+ # cfi_endproc
+ENDF(C(ffi_closure_raw_THISCALL))
+
+#endif /* !FFI_NO_RAW_API */
+
+#ifdef X86_DARWIN
+# define COMDAT(X) \
+ .section __TEXT,__textcoal_nt,coalesced,pure_instructions; \
+ .weak_definition X; \
+ .private_extern X
+#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__))
+# define COMDAT(X) \
+ .section .text.X,"axG",@progbits,X,comdat; \
+ .globl X; \
+ FFI_HIDDEN(X)
+#else
+# define COMDAT(X)
+#endif
+
+#if defined(__PIC__)
+ COMDAT(C(__x86.get_pc_thunk.bx))
+C(__x86.get_pc_thunk.bx):
+ movl (%esp), %ebx
+ ret
+ENDF(C(__x86.get_pc_thunk.bx))
+# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
+ COMDAT(C(__x86.get_pc_thunk.dx))
+C(__x86.get_pc_thunk.dx):
+ movl (%esp), %edx
+ ret
+ENDF(C(__x86.get_pc_thunk.dx))
+#endif /* DARWIN || HIDDEN */
+#endif /* __PIC__ */
+
+/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */
+
+#ifdef __APPLE__
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EHFrame0:
+#elif defined(X86_WIN32)
+.section .eh_frame,"r"
+#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
+.section .eh_frame,EH_FRAME_FLAGS,@unwind
+#else
+.section .eh_frame,EH_FRAME_FLAGS,@progbits
+#endif
+
+#ifdef HAVE_AS_X86_PCREL
+# define PCREL(X) X - .
+#else
+# define PCREL(X) X@rel
+#endif
+
+/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */
+#define ADV(N, P) .byte 2, L(N)-L(P)
+
+ .balign 4
+L(CIE):
+ .set L(set0),L(ECIE)-L(SCIE)
+ .long L(set0) /* CIE Length */
+L(SCIE):
+ .long 0 /* CIE Identifier Tag */
+ .byte 1 /* CIE Version */
+ .ascii "zR\0" /* CIE Augmentation */
+ .byte 1 /* CIE Code Alignment Factor */
+ .byte 0x7c /* CIE Data Alignment Factor */
+ .byte 0x8 /* CIE RA Column */
+ .byte 1 /* Augmentation size */
+ .byte 0x1b /* FDE Encoding (pcrel sdata4) */
+ .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp offset 4 */
+ .byte 0x80+8, 1 /* DW_CFA_offset, %eip offset 1*-4 */
+ .balign 4
+L(ECIE):
+
+ .set L(set1),L(EFDE1)-L(SFDE1)
+ .long L(set1) /* FDE Length */
+L(SFDE1):
+ .long L(SFDE1)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW0)) /* Initial location */
+ .long L(UW5)-L(UW0) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW1, UW0)
+ .byte 0xc, 5, 8 /* DW_CFA_def_cfa, %ebp 8 */
+ .byte 0x80+5, 2 /* DW_CFA_offset, %ebp 2*-4 */
+ ADV(UW2, UW1)
+ .byte 0x80+3, 0 /* DW_CFA_offset, %ebx 0*-4 */
+ ADV(UW3, UW2)
+ .byte 0xa /* DW_CFA_remember_state */
+ .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp 4 */
+ .byte 0xc0+3 /* DW_CFA_restore, %ebx */
+ .byte 0xc0+5 /* DW_CFA_restore, %ebp */
+ ADV(UW4, UW3)
+ .byte 0xb /* DW_CFA_restore_state */
+ .balign 4
+L(EFDE1):
+
+ .set L(set2),L(EFDE2)-L(SFDE2)
+ .long L(set2) /* FDE Length */
+L(SFDE2):
+ .long L(SFDE2)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW6)) /* Initial location */
+ .long L(UW8)-L(UW6) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW7, UW6)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE2):
+
+ .set L(set3),L(EFDE3)-L(SFDE3)
+ .long L(set3) /* FDE Length */
+L(SFDE3):
+ .long L(SFDE3)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW9)) /* Initial location */
+ .long L(UW11)-L(UW9) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW10, UW9)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE3):
+
+ .set L(set4),L(EFDE4)-L(SFDE4)
+ .long L(set4) /* FDE Length */
+L(SFDE4):
+ .long L(SFDE4)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW12)) /* Initial location */
+ .long L(UW20)-L(UW12) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW13, UW12)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+ ADV(UW14, UW13)
+ .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */
+ ADV(UW15, UW14)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+ ADV(UW16, UW15)
+#else
+ ADV(UW16, UW13)
+#endif
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW17, UW16)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ ADV(UW18, UW17)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW19, UW18)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE4):
+
+ .set L(set5),L(EFDE5)-L(SFDE5)
+ .long L(set5) /* FDE Length */
+L(SFDE5):
+ .long L(SFDE5)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW21)) /* Initial location */
+ .long L(UW23)-L(UW21) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW22, UW21)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE5):
+
+ .set L(set6),L(EFDE6)-L(SFDE6)
+ .long L(set6) /* FDE Length */
+L(SFDE6):
+ .long L(SFDE6)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW24)) /* Initial location */
+ .long L(UW26)-L(UW24) /* Address range */
+ .byte 0 /* Augmentation size */
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ .byte 0x80+8, 2 /* DW_CFA_offset %eip, 2*-4 */
+ ADV(UW25, UW24)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE6):
+
+ .set L(set7),L(EFDE7)-L(SFDE7)
+ .long L(set7) /* FDE Length */
+L(SFDE7):
+ .long L(SFDE7)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW27)) /* Initial location */
+ .long L(UW31)-L(UW27) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW28, UW27)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+ ADV(UW29, UW28)
+ .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */
+ ADV(UW30, UW29)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+#endif
+ .balign 4
+L(EFDE7):
+
+#if !FFI_NO_RAW_API
+ .set L(set8),L(EFDE8)-L(SFDE8)
+ .long L(set8) /* FDE Length */
+L(SFDE8):
+ .long L(SFDE8)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW32)) /* Initial location */
+ .long L(UW40)-L(UW32) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW33, UW32)
+ .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */
+ ADV(UW34, UW33)
+ .byte 0x80+3, 2 /* DW_CFA_offset %ebx 2*-4 */
+ ADV(UW35, UW34)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+ ADV(UW36, UW35)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW37, UW36)
+ .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */
+ ADV(UW38, UW37)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW39, UW38)
+ .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE8):
+
+ .set L(set9),L(EFDE9)-L(SFDE9)
+ .long L(set9) /* FDE Length */
+L(SFDE9):
+ .long L(SFDE9)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW41)) /* Initial location */
+ .long L(UW52)-L(UW41) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW42, UW41)
+ .byte 0xe, 0 /* DW_CFA_def_cfa_offset */
+ .byte 0x9, 8, 2 /* DW_CFA_register %eip, %edx */
+ ADV(UW43, UW42)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW44, UW43)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ .byte 0x80+8, 2 /* DW_CFA_offset %eip 2*-4 */
+ ADV(UW45, UW44)
+ .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */
+ ADV(UW46, UW45)
+ .byte 0x80+3, 3 /* DW_CFA_offset %ebx 3*-4 */
+ ADV(UW47, UW46)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+ ADV(UW48, UW47)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ ADV(UW49, UW48)
+ .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */
+ ADV(UW50, UW49)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ ADV(UW51, UW50)
+ .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE9):
+#endif /* !FFI_NO_RAW_API */
+
+#endif /* ifndef __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+ .section .note.GNU-stack,"",@progbits
+#endif
diff --git a/circuitpython/lib/libffi/src/x86/unix64.S b/circuitpython/lib/libffi/src/x86/unix64.S
new file mode 100644
index 0000000..c83010c
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/unix64.S
@@ -0,0 +1,546 @@
+/* -----------------------------------------------------------------------
+ unix64.S - Copyright (c) 2013 The Written Word, Inc.
+ - Copyright (c) 2008 Red Hat, Inc
+ - Copyright (c) 2002 Bo Thorsen <bo@suse.de>
+
+ x86-64 Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include "internal64.h"
+
+ .text
+
+#define C2(X, Y) X ## Y
+#define C1(X, Y) C2(X, Y)
+#ifdef __USER_LABEL_PREFIX__
+# define C(X) C1(__USER_LABEL_PREFIX__, X)
+#else
+# define C(X) X
+#endif
+
+#ifdef __APPLE__
+# define L(X) C1(L, X)
+#else
+# define L(X) C1(.L, X)
+#endif
+
+#ifdef __ELF__
+# define PLT(X) X@PLT
+# define ENDF(X) .type X,@function; .size X, . - X
+#else
+# define PLT(X) X
+# define ENDF(X)
+#endif
+
+/* This macro allows the safe creation of jump tables without an
+ actual table. The entry points into the table are all 8 bytes.
+ The use of ORG asserts that we're at the correct location. */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X) .balign 8
+#else
+# define E(BASE, X) .balign 8; .org BASE + X * 8
+#endif
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+ void *raddr, void (*fnaddr)(void));
+
+ Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+ for this function. This has been allocated by ffi_call. We also
+ deallocate some of the stack that has been alloca'd. */
+
+ .balign 8
+ .globl C(ffi_call_unix64)
+ FFI_HIDDEN(C(ffi_call_unix64))
+
+C(ffi_call_unix64):
+L(UW0):
+ movq (%rsp), %r10 /* Load return address. */
+ leaq (%rdi, %rsi), %rax /* Find local stack base. */
+ movq %rdx, (%rax) /* Save flags. */
+ movq %rcx, 8(%rax) /* Save raddr. */
+ movq %rbp, 16(%rax) /* Save old frame pointer. */
+ movq %r10, 24(%rax) /* Relocate return address. */
+ movq %rax, %rbp /* Finalize local stack frame. */
+
+ /* New stack frame based off rbp. This is a itty bit of unwind
+ trickery in that the CFA *has* changed. There is no easy way
+ to describe it correctly on entry to the function. Fortunately,
+ it doesn't matter too much since at all points we can correctly
+ unwind back to ffi_call. Note that the location to which we
+ moved the return address is (the new) CFA-8, so from the
+ perspective of the unwind info, it hasn't moved. */
+L(UW1):
+ /* cfi_def_cfa(%rbp, 32) */
+ /* cfi_rel_offset(%rbp, 16) */
+
+ movq %rdi, %r10 /* Save a copy of the register area. */
+ movq %r8, %r11 /* Save a copy of the target fn. */
+ movl %r9d, %eax /* Set number of SSE registers. */
+
+ /* Load up all argument registers. */
+ movq (%r10), %rdi
+ movq 0x08(%r10), %rsi
+ movq 0x10(%r10), %rdx
+ movq 0x18(%r10), %rcx
+ movq 0x20(%r10), %r8
+ movq 0x28(%r10), %r9
+ movl 0xb0(%r10), %eax
+ testl %eax, %eax
+ jnz L(load_sse)
+L(ret_from_load_sse):
+
+ /* Deallocate the reg arg area, except for r10, then load via pop. */
+ leaq 0xb8(%r10), %rsp
+ popq %r10
+
+ /* Call the user function. */
+ call *%r11
+
+ /* Deallocate stack arg area; local stack frame in redzone. */
+ leaq 24(%rbp), %rsp
+
+ movq 0(%rbp), %rcx /* Reload flags. */
+ movq 8(%rbp), %rdi /* Reload raddr. */
+ movq 16(%rbp), %rbp /* Reload old frame pointer. */
+L(UW2):
+ /* cfi_remember_state */
+ /* cfi_def_cfa(%rsp, 8) */
+ /* cfi_restore(%rbp) */
+
+ /* The first byte of the flags contains the FFI_TYPE. */
+ cmpb $UNIX64_RET_LAST, %cl
+ movzbl %cl, %r10d
+ leaq L(store_table)(%rip), %r11
+ ja L(sa)
+ leaq (%r11, %r10, 8), %r10
+
+ /* Prep for the structure cases: scratch area in redzone. */
+ leaq -20(%rsp), %rsi
+ jmp *%r10
+
+ .balign 8
+L(store_table):
+E(L(store_table), UNIX64_RET_VOID)
+ ret
+E(L(store_table), UNIX64_RET_UINT8)
+ movzbl %al, %eax
+ movq %rax, (%rdi)
+ ret
+E(L(store_table), UNIX64_RET_UINT16)
+ movzwl %ax, %eax
+ movq %rax, (%rdi)
+ ret
+E(L(store_table), UNIX64_RET_UINT32)
+ movl %eax, %eax
+ movq %rax, (%rdi)
+ ret
+E(L(store_table), UNIX64_RET_SINT8)
+ movsbq %al, %rax
+ movq %rax, (%rdi)
+ ret
+E(L(store_table), UNIX64_RET_SINT16)
+ movswq %ax, %rax
+ movq %rax, (%rdi)
+ ret
+E(L(store_table), UNIX64_RET_SINT32)
+ cltq
+ movq %rax, (%rdi)
+ ret
+E(L(store_table), UNIX64_RET_INT64)
+ movq %rax, (%rdi)
+ ret
+E(L(store_table), UNIX64_RET_XMM32)
+ movd %xmm0, (%rdi)
+ ret
+E(L(store_table), UNIX64_RET_XMM64)
+ movq %xmm0, (%rdi)
+ ret
+E(L(store_table), UNIX64_RET_X87)
+ fstpt (%rdi)
+ ret
+E(L(store_table), UNIX64_RET_X87_2)
+ fstpt (%rdi)
+ fstpt 16(%rdi)
+ ret
+E(L(store_table), UNIX64_RET_ST_XMM0_RAX)
+ movq %rax, 8(%rsi)
+ jmp L(s3)
+E(L(store_table), UNIX64_RET_ST_RAX_XMM0)
+ movq %xmm0, 8(%rsi)
+ jmp L(s2)
+E(L(store_table), UNIX64_RET_ST_XMM0_XMM1)
+ movq %xmm1, 8(%rsi)
+ jmp L(s3)
+E(L(store_table), UNIX64_RET_ST_RAX_RDX)
+ movq %rdx, 8(%rsi)
+L(s2):
+ movq %rax, (%rsi)
+ shrl $UNIX64_SIZE_SHIFT, %ecx
+ rep movsb
+ ret
+ .balign 8
+L(s3):
+ movq %xmm0, (%rsi)
+ shrl $UNIX64_SIZE_SHIFT, %ecx
+ rep movsb
+ ret
+
+L(sa): call PLT(C(abort))
+
+ /* Many times we can avoid loading any SSE registers at all.
+ It's not worth an indirect jump to load the exact set of
+ SSE registers needed; zero or all is a good compromise. */
+ .balign 2
+L(UW3):
+ /* cfi_restore_state */
+L(load_sse):
+ movdqa 0x30(%r10), %xmm0
+ movdqa 0x40(%r10), %xmm1
+ movdqa 0x50(%r10), %xmm2
+ movdqa 0x60(%r10), %xmm3
+ movdqa 0x70(%r10), %xmm4
+ movdqa 0x80(%r10), %xmm5
+ movdqa 0x90(%r10), %xmm6
+ movdqa 0xa0(%r10), %xmm7
+ jmp L(ret_from_load_sse)
+
+L(UW4):
+ENDF(C(ffi_call_unix64))
+
+/* 6 general registers, 8 vector registers,
+ 32 bytes of rvalue, 8 bytes of alignment. */
+#define ffi_closure_OFS_G 0
+#define ffi_closure_OFS_V (6*8)
+#define ffi_closure_OFS_RVALUE (ffi_closure_OFS_V + 8*16)
+#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 32 + 8)
+
+/* The location of rvalue within the red zone after deallocating the frame. */
+#define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS)
+
+ .balign 2
+ .globl C(ffi_closure_unix64_sse)
+ FFI_HIDDEN(C(ffi_closure_unix64_sse))
+
+C(ffi_closure_unix64_sse):
+L(UW5):
+ subq $ffi_closure_FS, %rsp
+L(UW6):
+ /* cfi_adjust_cfa_offset(ffi_closure_FS) */
+
+ movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp)
+ movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp)
+ movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp)
+ movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp)
+ movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp)
+ movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp)
+ movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp)
+ movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp)
+ jmp L(sse_entry1)
+
+L(UW7):
+ENDF(C(ffi_closure_unix64_sse))
+
+ .balign 2
+ .globl C(ffi_closure_unix64)
+ FFI_HIDDEN(C(ffi_closure_unix64))
+
+C(ffi_closure_unix64):
+L(UW8):
+ subq $ffi_closure_FS, %rsp
+L(UW9):
+ /* cfi_adjust_cfa_offset(ffi_closure_FS) */
+L(sse_entry1):
+ movq %rdi, ffi_closure_OFS_G+0x00(%rsp)
+ movq %rsi, ffi_closure_OFS_G+0x08(%rsp)
+ movq %rdx, ffi_closure_OFS_G+0x10(%rsp)
+ movq %rcx, ffi_closure_OFS_G+0x18(%rsp)
+ movq %r8, ffi_closure_OFS_G+0x20(%rsp)
+ movq %r9, ffi_closure_OFS_G+0x28(%rsp)
+
+#ifdef __ILP32__
+ movl FFI_TRAMPOLINE_SIZE(%r10), %edi /* Load cif */
+ movl FFI_TRAMPOLINE_SIZE+4(%r10), %esi /* Load fun */
+ movl FFI_TRAMPOLINE_SIZE+8(%r10), %edx /* Load user_data */
+#else
+ movq FFI_TRAMPOLINE_SIZE(%r10), %rdi /* Load cif */
+ movq FFI_TRAMPOLINE_SIZE+8(%r10), %rsi /* Load fun */
+ movq FFI_TRAMPOLINE_SIZE+16(%r10), %rdx /* Load user_data */
+#endif
+L(do_closure):
+ leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */
+ movq %rsp, %r8 /* Load reg_args */
+ leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */
+ call C(ffi_closure_unix64_inner)
+
+ /* Deallocate stack frame early; return value is now in redzone. */
+ addq $ffi_closure_FS, %rsp
+L(UW10):
+ /* cfi_adjust_cfa_offset(-ffi_closure_FS) */
+
+ /* The first byte of the return value contains the FFI_TYPE. */
+ cmpb $UNIX64_RET_LAST, %al
+ movzbl %al, %r10d
+ leaq L(load_table)(%rip), %r11
+ ja L(la)
+ leaq (%r11, %r10, 8), %r10
+ leaq ffi_closure_RED_RVALUE(%rsp), %rsi
+ jmp *%r10
+
+ .balign 8
+L(load_table):
+E(L(load_table), UNIX64_RET_VOID)
+ ret
+E(L(load_table), UNIX64_RET_UINT8)
+ movzbl (%rsi), %eax
+ ret
+E(L(load_table), UNIX64_RET_UINT16)
+ movzwl (%rsi), %eax
+ ret
+E(L(load_table), UNIX64_RET_UINT32)
+ movl (%rsi), %eax
+ ret
+E(L(load_table), UNIX64_RET_SINT8)
+ movsbl (%rsi), %eax
+ ret
+E(L(load_table), UNIX64_RET_SINT16)
+ movswl (%rsi), %eax
+ ret
+E(L(load_table), UNIX64_RET_SINT32)
+ movl (%rsi), %eax
+ ret
+E(L(load_table), UNIX64_RET_INT64)
+ movq (%rsi), %rax
+ ret
+E(L(load_table), UNIX64_RET_XMM32)
+ movd (%rsi), %xmm0
+ ret
+E(L(load_table), UNIX64_RET_XMM64)
+ movq (%rsi), %xmm0
+ ret
+E(L(load_table), UNIX64_RET_X87)
+ fldt (%rsi)
+ ret
+E(L(load_table), UNIX64_RET_X87_2)
+ fldt 16(%rsi)
+ fldt (%rsi)
+ ret
+E(L(load_table), UNIX64_RET_ST_XMM0_RAX)
+ movq 8(%rsi), %rax
+ jmp L(l3)
+E(L(load_table), UNIX64_RET_ST_RAX_XMM0)
+ movq 8(%rsi), %xmm0
+ jmp L(l2)
+E(L(load_table), UNIX64_RET_ST_XMM0_XMM1)
+ movq 8(%rsi), %xmm1
+ jmp L(l3)
+E(L(load_table), UNIX64_RET_ST_RAX_RDX)
+ movq 8(%rsi), %rdx
+L(l2):
+ movq (%rsi), %rax
+ ret
+ .balign 8
+L(l3):
+ movq (%rsi), %xmm0
+ ret
+
+L(la): call PLT(C(abort))
+
+L(UW11):
+ENDF(C(ffi_closure_unix64))
+
+ .balign 2
+ .globl C(ffi_go_closure_unix64_sse)
+ FFI_HIDDEN(C(ffi_go_closure_unix64_sse))
+
+C(ffi_go_closure_unix64_sse):
+L(UW12):
+ subq $ffi_closure_FS, %rsp
+L(UW13):
+ /* cfi_adjust_cfa_offset(ffi_closure_FS) */
+
+ movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp)
+ movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp)
+ movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp)
+ movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp)
+ movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp)
+ movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp)
+ movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp)
+ movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp)
+ jmp L(sse_entry2)
+
+L(UW14):
+ENDF(C(ffi_go_closure_unix64_sse))
+
+ .balign 2
+ .globl C(ffi_go_closure_unix64)
+ FFI_HIDDEN(C(ffi_go_closure_unix64))
+
+C(ffi_go_closure_unix64):
+L(UW15):
+ subq $ffi_closure_FS, %rsp
+L(UW16):
+ /* cfi_adjust_cfa_offset(ffi_closure_FS) */
+L(sse_entry2):
+ movq %rdi, ffi_closure_OFS_G+0x00(%rsp)
+ movq %rsi, ffi_closure_OFS_G+0x08(%rsp)
+ movq %rdx, ffi_closure_OFS_G+0x10(%rsp)
+ movq %rcx, ffi_closure_OFS_G+0x18(%rsp)
+ movq %r8, ffi_closure_OFS_G+0x20(%rsp)
+ movq %r9, ffi_closure_OFS_G+0x28(%rsp)
+
+#ifdef __ILP32__
+ movl 4(%r10), %edi /* Load cif */
+ movl 8(%r10), %esi /* Load fun */
+ movl %r10d, %edx /* Load closure (user_data) */
+#else
+ movq 8(%r10), %rdi /* Load cif */
+ movq 16(%r10), %rsi /* Load fun */
+ movq %r10, %rdx /* Load closure (user_data) */
+#endif
+ jmp L(do_closure)
+
+L(UW17):
+ENDF(C(ffi_go_closure_unix64))
+
+/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */
+
+#ifdef __APPLE__
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EHFrame0:
+#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
+.section .eh_frame,"a",@unwind
+#else
+.section .eh_frame,"a",@progbits
+#endif
+
+#ifdef HAVE_AS_X86_PCREL
+# define PCREL(X) X - .
+#else
+# define PCREL(X) X@rel
+#endif
+
+/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */
+#define ADV(N, P) .byte 2, L(N)-L(P)
+
+ .balign 8
+L(CIE):
+ .set L(set0),L(ECIE)-L(SCIE)
+ .long L(set0) /* CIE Length */
+L(SCIE):
+ .long 0 /* CIE Identifier Tag */
+ .byte 1 /* CIE Version */
+ .ascii "zR\0" /* CIE Augmentation */
+ .byte 1 /* CIE Code Alignment Factor */
+ .byte 0x78 /* CIE Data Alignment Factor */
+ .byte 0x10 /* CIE RA Column */
+ .byte 1 /* Augmentation size */
+ .byte 0x1b /* FDE Encoding (pcrel sdata4) */
+ .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp offset 8 */
+ .byte 0x80+16, 1 /* DW_CFA_offset, %rip offset 1*-8 */
+ .balign 8
+L(ECIE):
+
+ .set L(set1),L(EFDE1)-L(SFDE1)
+ .long L(set1) /* FDE Length */
+L(SFDE1):
+ .long L(SFDE1)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW0)) /* Initial location */
+ .long L(UW4)-L(UW0) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW1, UW0)
+ .byte 0xc, 6, 32 /* DW_CFA_def_cfa, %rbp 32 */
+ .byte 0x80+6, 2 /* DW_CFA_offset, %rbp 2*-8 */
+ ADV(UW2, UW1)
+ .byte 0xa /* DW_CFA_remember_state */
+ .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp 8 */
+ .byte 0xc0+6 /* DW_CFA_restore, %rbp */
+ ADV(UW3, UW2)
+ .byte 0xb /* DW_CFA_restore_state */
+ .balign 8
+L(EFDE1):
+
+ .set L(set2),L(EFDE2)-L(SFDE2)
+ .long L(set2) /* FDE Length */
+L(SFDE2):
+ .long L(SFDE2)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW5)) /* Initial location */
+ .long L(UW7)-L(UW5) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW6, UW5)
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */
+ .balign 8
+L(EFDE2):
+
+ .set L(set3),L(EFDE3)-L(SFDE3)
+ .long L(set3) /* FDE Length */
+L(SFDE3):
+ .long L(SFDE3)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW8)) /* Initial location */
+ .long L(UW11)-L(UW8) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW9, UW8)
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */
+ ADV(UW10, UW9)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset 8 */
+L(EFDE3):
+
+ .set L(set4),L(EFDE4)-L(SFDE4)
+ .long L(set4) /* FDE Length */
+L(SFDE4):
+ .long L(SFDE4)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW12)) /* Initial location */
+ .long L(UW14)-L(UW12) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW13, UW12)
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */
+ .balign 8
+L(EFDE4):
+
+ .set L(set5),L(EFDE5)-L(SFDE5)
+ .long L(set5) /* FDE Length */
+L(SFDE5):
+ .long L(SFDE5)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW15)) /* Initial location */
+ .long L(UW17)-L(UW15) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW16, UW15)
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */
+ .balign 8
+L(EFDE5):
+#ifdef __APPLE__
+ .subsections_via_symbols
+#endif
+
+#endif /* __x86_64__ */
+#if defined __ELF__ && defined __linux__
+ .section .note.GNU-stack,"",@progbits
+#endif
diff --git a/circuitpython/lib/libffi/src/x86/win64.S b/circuitpython/lib/libffi/src/x86/win64.S
new file mode 100644
index 0000000..a5a20b6
--- /dev/null
+++ b/circuitpython/lib/libffi/src/x86/win64.S
@@ -0,0 +1,219 @@
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+
+#if defined(HAVE_AS_CFI_PSEUDO_OP)
+ .cfi_sections .debug_frame
+#endif
+
+#define arg0 %rcx
+#define arg1 %rdx
+#define arg2 %r8
+#define arg3 %r9
+
+#ifdef SYMBOL_UNDERSCORE
+#define SYMBOL_NAME(name) _##name
+#else
+#define SYMBOL_NAME(name) name
+#endif
+
+.macro E which
+ .align 8
+ .org 0b + \which * 8
+.endm
+
+ .text
+
+/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10)
+
+ Bit o trickiness here -- FRAME is the base of the stack frame
+ for this function. This has been allocated by ffi_call. We also
+ deallocate some of the stack that has been alloca'd. */
+
+ .align 8
+ .globl ffi_call_win64
+
+ .seh_proc ffi_call_win64
+ffi_call_win64:
+ cfi_startproc
+ /* Set up the local stack frame and install it in rbp/rsp. */
+ movq (%rsp), %rax
+ movq %rbp, (arg1)
+ movq %rax, 8(arg1)
+ movq arg1, %rbp
+ cfi_def_cfa(%rbp, 16)
+ cfi_rel_offset(%rbp, 0)
+ .seh_pushreg %rbp
+ .seh_setframe %rbp, 0
+ .seh_endprologue
+ movq arg0, %rsp
+
+ movq arg2, %r10
+
+ /* Load all slots into both general and xmm registers. */
+ movq (%rsp), %rcx
+ movsd (%rsp), %xmm0
+ movq 8(%rsp), %rdx
+ movsd 8(%rsp), %xmm1
+ movq 16(%rsp), %r8
+ movsd 16(%rsp), %xmm2
+ movq 24(%rsp), %r9
+ movsd 24(%rsp), %xmm3
+
+ call *16(%rbp)
+
+ movl 24(%rbp), %ecx
+ movq 32(%rbp), %r8
+ leaq 0f(%rip), %r10
+ cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx
+ leaq (%r10, %rcx, 8), %r10
+ ja 99f
+ jmp *%r10
+
+/* Below, we're space constrained most of the time. Thus we eschew the
+ modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */
+.macro epilogue
+ leaveq
+ cfi_remember_state
+ cfi_def_cfa(%rsp, 8)
+ cfi_restore(%rbp)
+ ret
+ cfi_restore_state
+.endm
+
+ .align 8
+0:
+E FFI_TYPE_VOID
+ epilogue
+E FFI_TYPE_INT
+ movslq %eax, %rax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_FLOAT
+ movss %xmm0, (%r8)
+ epilogue
+E FFI_TYPE_DOUBLE
+ movsd %xmm0, (%r8)
+ epilogue
+E FFI_TYPE_LONGDOUBLE
+ call abort
+E FFI_TYPE_UINT8
+ movzbl %al, %eax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_SINT8
+ movsbq %al, %rax
+ jmp 98f
+E FFI_TYPE_UINT16
+ movzwl %ax, %eax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_SINT16
+ movswq %ax, %rax
+ jmp 98f
+E FFI_TYPE_UINT32
+ movl %eax, %eax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_SINT32
+ movslq %eax, %rax
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_UINT64
+98: movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_SINT64
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_STRUCT
+ epilogue
+E FFI_TYPE_POINTER
+ movq %rax, (%r8)
+ epilogue
+E FFI_TYPE_COMPLEX
+ call abort
+E FFI_TYPE_SMALL_STRUCT_1B
+ movb %al, (%r8)
+ epilogue
+E FFI_TYPE_SMALL_STRUCT_2B
+ movw %ax, (%r8)
+ epilogue
+E FFI_TYPE_SMALL_STRUCT_4B
+ movl %eax, (%r8)
+ epilogue
+
+ .align 8
+99: call abort
+
+.purgem epilogue
+
+ cfi_endproc
+ .seh_endproc
+
+
+/* 32 bytes of outgoing register stack space, 8 bytes of alignment,
+ 16 bytes of result, 32 bytes of xmm registers. */
+#define ffi_clo_FS (32+8+16+32)
+#define ffi_clo_OFF_R (32+8)
+#define ffi_clo_OFF_X (32+8+16)
+
+ .align 8
+ .globl ffi_go_closure_win64
+
+ .seh_proc ffi_go_closure_win64
+ffi_go_closure_win64:
+ cfi_startproc
+ /* Save all integer arguments into the incoming reg stack space. */
+ movq arg0, 8(%rsp)
+ movq arg1, 16(%rsp)
+ movq arg2, 24(%rsp)
+ movq arg3, 32(%rsp)
+
+ movq 8(%r10), arg0 /* load cif */
+ movq 16(%r10), arg1 /* load fun */
+ movq %r10, arg2 /* closure is user_data */
+ jmp 0f
+ cfi_endproc
+ .seh_endproc
+
+ .align 8
+ .globl ffi_closure_win64
+
+ .seh_proc ffi_closure_win64
+ffi_closure_win64:
+ cfi_startproc
+ /* Save all integer arguments into the incoming reg stack space. */
+ movq arg0, 8(%rsp)
+ movq arg1, 16(%rsp)
+ movq arg2, 24(%rsp)
+ movq arg3, 32(%rsp)
+
+ movq FFI_TRAMPOLINE_SIZE(%r10), arg0 /* load cif */
+ movq FFI_TRAMPOLINE_SIZE+8(%r10), arg1 /* load fun */
+ movq FFI_TRAMPOLINE_SIZE+16(%r10), arg2 /* load user_data */
+0:
+ subq $ffi_clo_FS, %rsp
+ cfi_adjust_cfa_offset(ffi_clo_FS)
+ .seh_stackalloc ffi_clo_FS
+ .seh_endprologue
+
+ /* Save all sse arguments into the stack frame. */
+ movsd %xmm0, ffi_clo_OFF_X(%rsp)
+ movsd %xmm1, ffi_clo_OFF_X+8(%rsp)
+ movsd %xmm2, ffi_clo_OFF_X+16(%rsp)
+ movsd %xmm3, ffi_clo_OFF_X+24(%rsp)
+
+ leaq ffi_clo_OFF_R(%rsp), arg3
+ call ffi_closure_win64_inner
+
+ /* Load the result into both possible result registers. */
+ movq ffi_clo_OFF_R(%rsp), %rax
+ movsd ffi_clo_OFF_R(%rsp), %xmm0
+
+ addq $ffi_clo_FS, %rsp
+ cfi_adjust_cfa_offset(-ffi_clo_FS)
+ ret
+
+ cfi_endproc
+ .seh_endproc