diff options
Diffstat (limited to 'circuitpython/lib/libffi/src/aarch64/ffi.c')
| -rw-r--r-- | circuitpython/lib/libffi/src/aarch64/ffi.c | 1152 |
1 files changed, 1152 insertions, 0 deletions
diff --git a/circuitpython/lib/libffi/src/aarch64/ffi.c b/circuitpython/lib/libffi/src/aarch64/ffi.c new file mode 100644 index 0000000..f79602b --- /dev/null +++ b/circuitpython/lib/libffi/src/aarch64/ffi.c @@ -0,0 +1,1152 @@ +/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +``Software''), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <ffi.h> +#include <ffi_common.h> +#include "internal.h" + +/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE; + all further uses in this file will refer to the 128-bit type. */ +#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE +# if FFI_TYPE_LONGDOUBLE != 4 +# error FFI_TYPE_LONGDOUBLE out of date +# endif +#else +# undef FFI_TYPE_LONGDOUBLE +# define FFI_TYPE_LONGDOUBLE 4 +#endif + +union _d +{ + UINT64 d; + UINT32 s[2]; +}; + +struct _v +{ + union _d d[2] __attribute__((aligned(16))); +}; + +struct call_context +{ + struct _v v[N_V_ARG_REG]; + UINT64 x[N_X_ARG_REG]; +}; + +#if defined (__clang__) && defined (__APPLE__) +extern void sys_icache_invalidate (void *start, size_t len); +#endif + +static inline void +ffi_clear_cache (void *start, void *end) +{ +#if defined (__clang__) && defined (__APPLE__) + sys_icache_invalidate (start, (char *)end - (char *)start); +#elif defined (__GNUC__) + __builtin___clear_cache (start, end); +#else +#error "Missing builtin to flush instruction cache" +#endif +} + +/* A subroutine of is_vfp_type. Given a structure type, return the type code + of the first non-structure element. Recurse for structure elements. + Return -1 if the structure is in fact empty, i.e. no nested elements. */ + +static int +is_hfa0 (const ffi_type *ty) +{ + ffi_type **elements = ty->elements; + int i, ret = -1; + + if (elements != NULL) + for (i = 0; elements[i]; ++i) + { + ret = elements[i]->type; + if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX) + { + ret = is_hfa0 (elements[i]); + if (ret < 0) + continue; + } + break; + } + + return ret; +} + +/* A subroutine of is_vfp_type. Given a structure type, return true if all + of the non-structure elements are the same as CANDIDATE. */ + +static int +is_hfa1 (const ffi_type *ty, int candidate) +{ + ffi_type **elements = ty->elements; + int i; + + if (elements != NULL) + for (i = 0; elements[i]; ++i) + { + int t = elements[i]->type; + if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX) + { + if (!is_hfa1 (elements[i], candidate)) + return 0; + } + else if (t != candidate) + return 0; + } + + return 1; +} + +/* Determine if TY may be allocated to the FP registers. This is both an + fp scalar type as well as an homogenous floating point aggregate (HFA). + That is, a structure consisting of 1 to 4 members of all the same type, + where that type is an fp scalar. + + Returns non-zero iff TY is an HFA. The result is the AARCH64_RET_* + constant for the type. */ + +static int +is_vfp_type (const ffi_type *ty) +{ + ffi_type **elements; + int candidate, i; + size_t size, ele_count; + + /* Quickest tests first. */ + candidate = ty->type; + switch (candidate) + { + default: + return 0; + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + ele_count = 1; + goto done; + case FFI_TYPE_COMPLEX: + candidate = ty->elements[0]->type; + switch (candidate) + { + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + ele_count = 2; + goto done; + } + return 0; + case FFI_TYPE_STRUCT: + break; + } + + /* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */ + size = ty->size; + if (size < 4 || size > 64) + return 0; + + /* Find the type of the first non-structure member. */ + elements = ty->elements; + candidate = elements[0]->type; + if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX) + { + for (i = 0; ; ++i) + { + candidate = is_hfa0 (elements[i]); + if (candidate >= 0) + break; + } + } + + /* If the first member is not a floating point type, it's not an HFA. + Also quickly re-check the size of the structure. */ + switch (candidate) + { + case FFI_TYPE_FLOAT: + ele_count = size / sizeof(float); + if (size != ele_count * sizeof(float)) + return 0; + break; + case FFI_TYPE_DOUBLE: + ele_count = size / sizeof(double); + if (size != ele_count * sizeof(double)) + return 0; + break; + case FFI_TYPE_LONGDOUBLE: + ele_count = size / sizeof(long double); + if (size != ele_count * sizeof(long double)) + return 0; + break; + default: + return 0; + } + if (ele_count > 4) + return 0; + + /* Finally, make sure that all scalar elements are the same type. */ + for (i = 0; elements[i]; ++i) + { + int t = elements[i]->type; + if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX) + { + if (!is_hfa1 (elements[i], candidate)) + return 0; + } + else if (t != candidate) + return 0; + } + + /* All tests succeeded. Encode the result. */ + done: + return candidate * 4 + (4 - ele_count); +} + +/* Representation of the procedure call argument marshalling + state. + + The terse state variable names match the names used in the AARCH64 + PCS. */ + +struct arg_state +{ + unsigned ngrn; /* Next general-purpose register number. */ + unsigned nsrn; /* Next vector register number. */ + size_t nsaa; /* Next stack offset. */ + +#if defined (__APPLE__) + unsigned allocating_variadic; +#endif +}; + +/* Initialize a procedure call argument marshalling state. */ +static void +arg_init (struct arg_state *state) +{ + state->ngrn = 0; + state->nsrn = 0; + state->nsaa = 0; +#if defined (__APPLE__) + state->allocating_variadic = 0; +#endif +} + +/* Allocate an aligned slot on the stack and return a pointer to it. */ +static void * +allocate_to_stack (struct arg_state *state, void *stack, + size_t alignment, size_t size) +{ + size_t nsaa = state->nsaa; + + /* Round up the NSAA to the larger of 8 or the natural + alignment of the argument's type. */ +#if defined (__APPLE__) + if (state->allocating_variadic && alignment < 8) + alignment = 8; +#else + if (alignment < 8) + alignment = 8; +#endif + + nsaa = ALIGN (nsaa, alignment); + state->nsaa = nsaa + size; + + return (char *)stack + nsaa; +} + +static ffi_arg +extend_integer_type (void *source, int type) +{ + switch (type) + { + case FFI_TYPE_UINT8: + return *(UINT8 *) source; + case FFI_TYPE_SINT8: + return *(SINT8 *) source; + case FFI_TYPE_UINT16: + return *(UINT16 *) source; + case FFI_TYPE_SINT16: + return *(SINT16 *) source; + case FFI_TYPE_UINT32: + return *(UINT32 *) source; + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + return *(SINT32 *) source; + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + return *(UINT64 *) source; + break; + case FFI_TYPE_POINTER: + return *(uintptr_t *) source; + default: + abort(); + } +} + +static void +extend_hfa_type (void *dest, void *src, int h) +{ + int f = h - AARCH64_RET_S4; + void *x0; + + asm volatile ( + "adr %0, 0f\n" +" add %0, %0, %1\n" +" br %0\n" +"0: ldp s16, s17, [%3]\n" /* S4 */ +" ldp s18, s19, [%3, #8]\n" +" b 4f\n" +" ldp s16, s17, [%3]\n" /* S3 */ +" ldr s18, [%3, #8]\n" +" b 3f\n" +" ldp s16, s17, [%3]\n" /* S2 */ +" b 2f\n" +" nop\n" +" ldr s16, [%3]\n" /* S1 */ +" b 1f\n" +" nop\n" +" ldp d16, d17, [%3]\n" /* D4 */ +" ldp d18, d19, [%3, #16]\n" +" b 4f\n" +" ldp d16, d17, [%3]\n" /* D3 */ +" ldr d18, [%3, #16]\n" +" b 3f\n" +" ldp d16, d17, [%3]\n" /* D2 */ +" b 2f\n" +" nop\n" +" ldr d16, [%3]\n" /* D1 */ +" b 1f\n" +" nop\n" +" ldp q16, q17, [%3]\n" /* Q4 */ +" ldp q18, q19, [%3, #16]\n" +" b 4f\n" +" ldp q16, q17, [%3]\n" /* Q3 */ +" ldr q18, [%3, #16]\n" +" b 3f\n" +" ldp q16, q17, [%3]\n" /* Q2 */ +" b 2f\n" +" nop\n" +" ldr q16, [%3]\n" /* Q1 */ +" b 1f\n" +"4: str q19, [%2, #48]\n" +"3: str q18, [%2, #32]\n" +"2: str q17, [%2, #16]\n" +"1: str q16, [%2]" + : "=&r"(x0) + : "r"(f * 12), "r"(dest), "r"(src) + : "memory", "v16", "v17", "v18", "v19"); +} + +static void * +compress_hfa_type (void *dest, void *reg, int h) +{ + switch (h) + { + case AARCH64_RET_S1: + if (dest == reg) + { +#ifdef __AARCH64EB__ + dest += 12; +#endif + } + else + *(float *)dest = *(float *)reg; + break; + case AARCH64_RET_S2: + asm ("ldp q16, q17, [%1]\n\t" + "st2 { v16.s, v17.s }[0], [%0]" + : : "r"(dest), "r"(reg) : "memory", "v16", "v17"); + break; + case AARCH64_RET_S3: + asm ("ldp q16, q17, [%1]\n\t" + "ldr q18, [%1, #32]\n\t" + "st3 { v16.s, v17.s, v18.s }[0], [%0]" + : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18"); + break; + case AARCH64_RET_S4: + asm ("ldp q16, q17, [%1]\n\t" + "ldp q18, q19, [%1, #32]\n\t" + "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]" + : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19"); + break; + + case AARCH64_RET_D1: + if (dest == reg) + { +#ifdef __AARCH64EB__ + dest += 8; +#endif + } + else + *(double *)dest = *(double *)reg; + break; + case AARCH64_RET_D2: + asm ("ldp q16, q17, [%1]\n\t" + "st2 { v16.d, v17.d }[0], [%0]" + : : "r"(dest), "r"(reg) : "memory", "v16", "v17"); + break; + case AARCH64_RET_D3: + asm ("ldp q16, q17, [%1]\n\t" + "ldr q18, [%1, #32]\n\t" + "st3 { v16.d, v17.d, v18.d }[0], [%0]" + : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18"); + break; + case AARCH64_RET_D4: + asm ("ldp q16, q17, [%1]\n\t" + "ldp q18, q19, [%1, #32]\n\t" + "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]" + : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19"); + break; + + default: + if (dest != reg) + return memcpy (dest, reg, 16 * (4 - (h & 3))); + break; + } + return dest; +} + +/* Either allocate an appropriate register for the argument type, or if + none are available, allocate a stack slot and return a pointer + to the allocated space. */ + +static void * +allocate_int_to_reg_or_stack (struct call_context *context, + struct arg_state *state, + void *stack, size_t size) +{ + if (state->ngrn < N_X_ARG_REG) + return &context->x[state->ngrn++]; + + state->ngrn = N_X_ARG_REG; + return allocate_to_stack (state, stack, size, size); +} + +ffi_status +ffi_prep_cif_machdep (ffi_cif *cif) +{ + ffi_type *rtype = cif->rtype; + size_t bytes = cif->bytes; + int flags, i, n; + + switch (rtype->type) + { + case FFI_TYPE_VOID: + flags = AARCH64_RET_VOID; + break; + case FFI_TYPE_UINT8: + flags = AARCH64_RET_UINT8; + break; + case FFI_TYPE_UINT16: + flags = AARCH64_RET_UINT16; + break; + case FFI_TYPE_UINT32: + flags = AARCH64_RET_UINT32; + break; + case FFI_TYPE_SINT8: + flags = AARCH64_RET_SINT8; + break; + case FFI_TYPE_SINT16: + flags = AARCH64_RET_SINT16; + break; + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + flags = AARCH64_RET_SINT32; + break; + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + flags = AARCH64_RET_INT64; + break; + case FFI_TYPE_POINTER: + flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64); + break; + + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + case FFI_TYPE_STRUCT: + case FFI_TYPE_COMPLEX: + flags = is_vfp_type (rtype); + if (flags == 0) + { + size_t s = rtype->size; + if (s > 16) + { + flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM; + bytes += 8; + } + else if (s == 16) + flags = AARCH64_RET_INT128; + else if (s == 8) + flags = AARCH64_RET_INT64; + else + flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY; + } + break; + + default: + abort(); + } + + for (i = 0, n = cif->nargs; i < n; i++) + if (is_vfp_type (cif->arg_types[i])) + { + flags |= AARCH64_FLAG_ARG_V; + break; + } + + /* Round the stack up to a multiple of the stack alignment requirement. */ + cif->bytes = ALIGN(bytes, 16); + cif->flags = flags; +#if defined (__APPLE__) + cif->aarch64_nfixedargs = 0; +#endif + + return FFI_OK; +} + +#if defined (__APPLE__) +/* Perform Apple-specific cif processing for variadic calls */ +ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif, + unsigned int nfixedargs, + unsigned int ntotalargs) +{ + ffi_status status = ffi_prep_cif_machdep (cif); + cif->aarch64_nfixedargs = nfixedargs; + return status; +} +#endif /* __APPLE__ */ + +extern void ffi_call_SYSV (struct call_context *context, void *frame, + void (*fn)(void), void *rvalue, int flags, + void *closure) FFI_HIDDEN; + +/* Call a function with the provided arguments and capture the return + value. */ +static void +ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue, + void **avalue, void *closure) +{ + struct call_context *context; + void *stack, *frame, *rvalue; + struct arg_state state; + size_t stack_bytes, rtype_size, rsize; + int i, nargs, flags; + ffi_type *rtype; + + flags = cif->flags; + rtype = cif->rtype; + rtype_size = rtype->size; + stack_bytes = cif->bytes; + + /* If the target function returns a structure via hidden pointer, + then we cannot allow a null rvalue. Otherwise, mash a null + rvalue to void return type. */ + rsize = 0; + if (flags & AARCH64_RET_IN_MEM) + { + if (orig_rvalue == NULL) + rsize = rtype_size; + } + else if (orig_rvalue == NULL) + flags &= AARCH64_FLAG_ARG_V; + else if (flags & AARCH64_RET_NEED_COPY) + rsize = 16; + + /* Allocate consectutive stack for everything we'll need. */ + context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize); + stack = context + 1; + frame = stack + stack_bytes; + rvalue = (rsize ? frame + 32 : orig_rvalue); + + arg_init (&state); + for (i = 0, nargs = cif->nargs; i < nargs; i++) + { + ffi_type *ty = cif->arg_types[i]; + size_t s = ty->size; + void *a = avalue[i]; + int h, t; + + t = ty->type; + switch (t) + { + case FFI_TYPE_VOID: + FFI_ASSERT (0); + break; + + /* If the argument is a basic type the argument is allocated to an + appropriate register, or if none are available, to the stack. */ + case FFI_TYPE_INT: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_POINTER: + do_pointer: + { + ffi_arg ext = extend_integer_type (a, t); + if (state.ngrn < N_X_ARG_REG) + context->x[state.ngrn++] = ext; + else + { + void *d = allocate_to_stack (&state, stack, ty->alignment, s); + state.ngrn = N_X_ARG_REG; + /* Note that the default abi extends each argument + to a full 64-bit slot, while the iOS abi allocates + only enough space. */ +#ifdef __APPLE__ + memcpy(d, a, s); +#else + *(ffi_arg *)d = ext; +#endif + } + } + break; + + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + case FFI_TYPE_STRUCT: + case FFI_TYPE_COMPLEX: + { + void *dest; + + h = is_vfp_type (ty); + if (h) + { + int elems = 4 - (h & 3); + if (state.nsrn + elems <= N_V_ARG_REG) + { + dest = &context->v[state.nsrn]; + state.nsrn += elems; + extend_hfa_type (dest, a, h); + break; + } + state.nsrn = N_V_ARG_REG; + dest = allocate_to_stack (&state, stack, ty->alignment, s); + } + else if (s > 16) + { + /* If the argument is a composite type that is larger than 16 + bytes, then the argument has been copied to memory, and + the argument is replaced by a pointer to the copy. */ + a = &avalue[i]; + t = FFI_TYPE_POINTER; + goto do_pointer; + } + else + { + size_t n = (s + 7) / 8; + if (state.ngrn + n <= N_X_ARG_REG) + { + /* If the argument is a composite type and the size in + double-words is not more than the number of available + X registers, then the argument is copied into + consecutive X registers. */ + dest = &context->x[state.ngrn]; + state.ngrn += n; + } + else + { + /* Otherwise, there are insufficient X registers. Further + X register allocations are prevented, the NSAA is + adjusted and the argument is copied to memory at the + adjusted NSAA. */ + state.ngrn = N_X_ARG_REG; + dest = allocate_to_stack (&state, stack, ty->alignment, s); + } + } + memcpy (dest, a, s); + } + break; + + default: + abort(); + } + +#if defined (__APPLE__) + if (i + 1 == cif->aarch64_nfixedargs) + { + state.ngrn = N_X_ARG_REG; + state.nsrn = N_V_ARG_REG; + state.allocating_variadic = 1; + } +#endif + } + + ffi_call_SYSV (context, frame, fn, rvalue, flags, closure); + + if (flags & AARCH64_RET_NEED_COPY) + memcpy (orig_rvalue, rvalue, rtype_size); +} + +void +ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue) +{ + ffi_call_int (cif, fn, rvalue, avalue, NULL); +} + +#ifdef FFI_GO_CLOSURES +void +ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, + void **avalue, void *closure) +{ + ffi_call_int (cif, fn, rvalue, avalue, closure); +} +#endif /* FFI_GO_CLOSURES */ + +/* Build a trampoline. */ + +extern void ffi_closure_SYSV (void) FFI_HIDDEN; +extern void ffi_closure_SYSV_V (void) FFI_HIDDEN; + +#if FFI_EXEC_TRAMPOLINE_TABLE + +#include <mach/mach.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> + +extern void *ffi_closure_trampoline_table_page; + +typedef struct ffi_trampoline_table ffi_trampoline_table; +typedef struct ffi_trampoline_table_entry ffi_trampoline_table_entry; + +struct ffi_trampoline_table +{ + /* contiguous writable and executable pages */ + vm_address_t config_page; + vm_address_t trampoline_page; + + /* free list tracking */ + uint16_t free_count; + ffi_trampoline_table_entry *free_list; + ffi_trampoline_table_entry *free_list_pool; + + ffi_trampoline_table *prev; + ffi_trampoline_table *next; +}; + +struct ffi_trampoline_table_entry +{ + void *(*trampoline) (); + ffi_trampoline_table_entry *next; +}; + +/* The trampoline configuration is placed a page prior to the trampoline's entry point */ +#define FFI_TRAMPOLINE_CODELOC_CONFIG(codeloc) ((void **) (((uint8_t *) codeloc) - PAGE_SIZE)); + +/* Total number of trampolines that fit in one trampoline table */ +#define FFI_TRAMPOLINE_COUNT (PAGE_SIZE / FFI_TRAMPOLINE_SIZE) + +static pthread_mutex_t ffi_trampoline_lock = PTHREAD_MUTEX_INITIALIZER; +static ffi_trampoline_table *ffi_trampoline_tables = NULL; + +static ffi_trampoline_table * +ffi_trampoline_table_alloc () +{ + ffi_trampoline_table *table = NULL; + + /* Loop until we can allocate two contiguous pages */ + while (table == NULL) + { + vm_address_t config_page = 0x0; + kern_return_t kt; + + /* Try to allocate two pages */ + kt = + vm_allocate (mach_task_self (), &config_page, PAGE_SIZE * 2, + VM_FLAGS_ANYWHERE); + if (kt != KERN_SUCCESS) + { + fprintf (stderr, "vm_allocate() failure: %d at %s:%d\n", kt, + __FILE__, __LINE__); + break; + } + + /* Now drop the second half of the allocation to make room for the trampoline table */ + vm_address_t trampoline_page = config_page + PAGE_SIZE; + kt = vm_deallocate (mach_task_self (), trampoline_page, PAGE_SIZE); + if (kt != KERN_SUCCESS) + { + fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt, + __FILE__, __LINE__); + break; + } + + /* Remap the trampoline table to directly follow the config page */ + vm_prot_t cur_prot; + vm_prot_t max_prot; + + kt = + vm_remap (mach_task_self (), &trampoline_page, PAGE_SIZE, 0x0, FALSE, + mach_task_self (), + (vm_address_t) & ffi_closure_trampoline_table_page, FALSE, + &cur_prot, &max_prot, VM_INHERIT_SHARE); + + /* If we lost access to the destination trampoline page, drop our config allocation mapping and retry */ + if (kt != KERN_SUCCESS) + { + /* Log unexpected failures */ + if (kt != KERN_NO_SPACE) + { + fprintf (stderr, "vm_remap() failure: %d at %s:%d\n", kt, + __FILE__, __LINE__); + } + + vm_deallocate (mach_task_self (), config_page, PAGE_SIZE); + continue; + } + + /* We have valid trampoline and config pages */ + table = calloc (1, sizeof (ffi_trampoline_table)); + table->free_count = FFI_TRAMPOLINE_COUNT; + table->config_page = config_page; + table->trampoline_page = trampoline_page; + + /* Create and initialize the free list */ + table->free_list_pool = + calloc (FFI_TRAMPOLINE_COUNT, sizeof (ffi_trampoline_table_entry)); + + uint16_t i; + for (i = 0; i < table->free_count; i++) + { + ffi_trampoline_table_entry *entry = &table->free_list_pool[i]; + entry->trampoline = + (void *) (table->trampoline_page + (i * FFI_TRAMPOLINE_SIZE)); + + if (i < table->free_count - 1) + entry->next = &table->free_list_pool[i + 1]; + } + + table->free_list = table->free_list_pool; + } + + return table; +} + +void * +ffi_closure_alloc (size_t size, void **code) +{ + /* Create the closure */ + ffi_closure *closure = malloc (size); + if (closure == NULL) + return NULL; + + pthread_mutex_lock (&ffi_trampoline_lock); + + /* Check for an active trampoline table with available entries. */ + ffi_trampoline_table *table = ffi_trampoline_tables; + if (table == NULL || table->free_list == NULL) + { + table = ffi_trampoline_table_alloc (); + if (table == NULL) + { + free (closure); + return NULL; + } + + /* Insert the new table at the top of the list */ + table->next = ffi_trampoline_tables; + if (table->next != NULL) + table->next->prev = table; + + ffi_trampoline_tables = table; + } + + /* Claim the free entry */ + ffi_trampoline_table_entry *entry = ffi_trampoline_tables->free_list; + ffi_trampoline_tables->free_list = entry->next; + ffi_trampoline_tables->free_count--; + entry->next = NULL; + + pthread_mutex_unlock (&ffi_trampoline_lock); + + /* Initialize the return values */ + *code = entry->trampoline; + closure->trampoline_table = table; + closure->trampoline_table_entry = entry; + + return closure; +} + +void +ffi_closure_free (void *ptr) +{ + ffi_closure *closure = ptr; + + pthread_mutex_lock (&ffi_trampoline_lock); + + /* Fetch the table and entry references */ + ffi_trampoline_table *table = closure->trampoline_table; + ffi_trampoline_table_entry *entry = closure->trampoline_table_entry; + + /* Return the entry to the free list */ + entry->next = table->free_list; + table->free_list = entry; + table->free_count++; + + /* If all trampolines within this table are free, and at least one other table exists, deallocate + * the table */ + if (table->free_count == FFI_TRAMPOLINE_COUNT + && ffi_trampoline_tables != table) + { + /* Remove from the list */ + if (table->prev != NULL) + table->prev->next = table->next; + + if (table->next != NULL) + table->next->prev = table->prev; + + /* Deallocate pages */ + kern_return_t kt; + kt = vm_deallocate (mach_task_self (), table->config_page, PAGE_SIZE); + if (kt != KERN_SUCCESS) + fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt, + __FILE__, __LINE__); + + kt = + vm_deallocate (mach_task_self (), table->trampoline_page, PAGE_SIZE); + if (kt != KERN_SUCCESS) + fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt, + __FILE__, __LINE__); + + /* Deallocate free list */ + free (table->free_list_pool); + free (table); + } + else if (ffi_trampoline_tables != table) + { + /* Otherwise, bump this table to the top of the list */ + table->prev = NULL; + table->next = ffi_trampoline_tables; + if (ffi_trampoline_tables != NULL) + ffi_trampoline_tables->prev = table; + + ffi_trampoline_tables = table; + } + + pthread_mutex_unlock (&ffi_trampoline_lock); + + /* Free the closure */ + free (closure); +} + +#endif + +ffi_status +ffi_prep_closure_loc (ffi_closure *closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + void *codeloc) +{ + if (cif->abi != FFI_SYSV) + return FFI_BAD_ABI; + + void (*start)(void); + + if (cif->flags & AARCH64_FLAG_ARG_V) + start = ffi_closure_SYSV_V; + else + start = ffi_closure_SYSV; + +#if FFI_EXEC_TRAMPOLINE_TABLE + void **config = FFI_TRAMPOLINE_CODELOC_CONFIG (codeloc); + config[0] = closure; + config[1] = start; +#else + static const unsigned char trampoline[16] = { + 0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */ + 0xf1, 0xff, 0xff, 0x10, /* adr x17, tramp+0 */ + 0x00, 0x02, 0x1f, 0xd6 /* br x16 */ + }; + char *tramp = closure->tramp; + + memcpy (tramp, trampoline, sizeof(trampoline)); + + *(UINT64 *)(tramp + 16) = (uintptr_t)start; + + ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE); +#endif + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + return FFI_OK; +} + +#ifdef FFI_GO_CLOSURES +extern void ffi_go_closure_SYSV (void) FFI_HIDDEN; +extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN; + +ffi_status +ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*)) +{ + void (*start)(void); + + if (cif->abi != FFI_SYSV) + return FFI_BAD_ABI; + + if (cif->flags & AARCH64_FLAG_ARG_V) + start = ffi_go_closure_SYSV_V; + else + start = ffi_go_closure_SYSV; + + closure->tramp = start; + closure->cif = cif; + closure->fun = fun; + + return FFI_OK; +} +#endif /* FFI_GO_CLOSURES */ + +/* Primary handler to setup and invoke a function within a closure. + + A closure when invoked enters via the assembler wrapper + ffi_closure_SYSV(). The wrapper allocates a call context on the + stack, saves the interesting registers (from the perspective of + the calling convention) into the context then passes control to + ffi_closure_SYSV_inner() passing the saved context and a pointer to + the stack at the point ffi_closure_SYSV() was invoked. + + On the return path the assembler wrapper will reload call context + registers. + + ffi_closure_SYSV_inner() marshalls the call context into ffi value + descriptors, invokes the wrapped function, then marshalls the return + value back into the call context. */ + +int FFI_HIDDEN +ffi_closure_SYSV_inner (ffi_cif *cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + struct call_context *context, + void *stack, void *rvalue, void *struct_rvalue) +{ + void **avalue = (void**) alloca (cif->nargs * sizeof (void*)); + int i, h, nargs, flags; + struct arg_state state; + + arg_init (&state); + + for (i = 0, nargs = cif->nargs; i < nargs; i++) + { + ffi_type *ty = cif->arg_types[i]; + int t = ty->type; + size_t n, s = ty->size; + + switch (t) + { + case FFI_TYPE_VOID: + FFI_ASSERT (0); + break; + + case FFI_TYPE_INT: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_POINTER: + avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s); + break; + + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + case FFI_TYPE_STRUCT: + case FFI_TYPE_COMPLEX: + h = is_vfp_type (ty); + if (h) + { + n = 4 - (h & 3); + if (state.nsrn + n <= N_V_ARG_REG) + { + void *reg = &context->v[state.nsrn]; + state.nsrn += n; + + /* Eeek! We need a pointer to the structure, however the + homogeneous float elements are being passed in individual + registers, therefore for float and double the structure + is not represented as a contiguous sequence of bytes in + our saved register context. We don't need the original + contents of the register storage, so we reformat the + structure into the same memory. */ + avalue[i] = compress_hfa_type (reg, reg, h); + } + else + { + state.nsrn = N_V_ARG_REG; + avalue[i] = allocate_to_stack (&state, stack, + ty->alignment, s); + } + } + else if (s > 16) + { + /* Replace Composite type of size greater than 16 with a + pointer. */ + avalue[i] = *(void **) + allocate_int_to_reg_or_stack (context, &state, stack, + sizeof (void *)); + } + else + { + n = (s + 7) / 8; + if (state.ngrn + n <= N_X_ARG_REG) + { + avalue[i] = &context->x[state.ngrn]; + state.ngrn += n; + } + else + { + state.ngrn = N_X_ARG_REG; + avalue[i] = allocate_to_stack (&state, stack, + ty->alignment, s); + } + } + break; + + default: + abort(); + } + } + + flags = cif->flags; + if (flags & AARCH64_RET_IN_MEM) + rvalue = struct_rvalue; + + fun (cif, rvalue, avalue, user_data); + + return flags; +} |
