diff --git a/lib/luajit/src/Makefile b/lib/luajit/src/Makefile index fb0622a829..fdc3ddaccd 100644 --- a/lib/luajit/src/Makefile +++ b/lib/luajit/src/Makefile @@ -81,6 +81,14 @@ XCFLAGS= # with "make clean", followed by "make". # Note that most of these are NOT suitable for benchmarking or release mode! # +# Use the system provided memory allocator (realloc) instead of the +# bundled memory allocator. This is slower, but sometimes helpful for +# debugging. This option cannot be enabled on x64 without GC64, since +# realloc usually doesn't return addresses in the right address range. +# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and +# the only way to get useful results from it for all other architectures. +#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC +# # This define is required to run LuaJIT under Valgrind. The Valgrind # header files must be installed. You should enable debug information, too. # Use --suppressions=lj.supp to avoid some false positives. @@ -228,7 +236,8 @@ LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_asm.o lj_trace.o lj_gdbjit.o lj_auditlog.o \ lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ lj_carith.o lj_clib.o lj_cparse.o \ - lj_lib.o lib_aux.o lj_dwarf_dwo.o \ + lj_lib.o lj_alloc.o lib_aux.o \ + lj_dwarf_dwo.o \ $(LJLIB_O) lib_init.o DWARF_DWO= lj_dwarf.dwo diff --git a/lib/luajit/src/Makefile.dep b/lib/luajit/src/Makefile.dep index cccc62aa42..78bc41819d 100644 --- a/lib/luajit/src/Makefile.dep +++ b/lib/luajit/src/Makefile.dep @@ -45,6 +45,8 @@ lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h +lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h \ + lj_prng.h lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ @@ -199,9 +201,10 @@ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \ - lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h luajit.h + lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \ + lj_alloc.h luajit.h lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h + lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_meta.h lj_state.h \ lj_char.h lj_strfmt.h lj_ctype.h lj_lib.h diff --git a/lib/luajit/src/lib_aux.c b/lib/luajit/src/lib_aux.c index 93af7f303f..c52c3e4c6b 100644 --- a/lib/luajit/src/lib_aux.c +++ b/lib/luajit/src/lib_aux.c @@ -308,6 +308,9 @@ static int panic(lua_State *L) return 0; } +#ifdef LUAJIT_USE_SYSMALLOC + + static void *mem_alloc(void *ud, void *ptr, size_t osize, size_t nsize) { (void)ud; @@ -328,3 +331,18 @@ LUALIB_API lua_State *luaL_newstate(void) } return L; } + +#else + +LUALIB_API lua_State *luaL_newstate(void) +{ + lua_State *L; + L = lua_newstate(LJ_ALLOCF_INTERNAL, NULL); + if (L) { + G(L)->panic = panic; + } + return L; +} + +#endif + diff --git a/lib/luajit/src/lj_alloc.c b/lib/luajit/src/lj_alloc.c new file mode 100644 index 0000000000..d364d1ec21 --- /dev/null +++ b/lib/luajit/src/lj_alloc.c @@ -0,0 +1,1343 @@ +/* +** Bundled memory allocator. +** +** Beware: this is a HEAVILY CUSTOMIZED version of dlmalloc. +** The original bears the following remark: +** +** This is a version (aka dlmalloc) of malloc/free/realloc written by +** Doug Lea and released to the public domain, as explained at +** https://creativecommons.org/licenses/publicdomain. +** +** * Version pre-2.8.4 Wed Mar 29 19:46:29 2006 (dl at gee) +** +** No additional copyright is claimed over the customizations. +** Please do NOT bother the original author about this version here! +** +** If you want to use dlmalloc in another project, you should get +** the original from: ftp://gee.cs.oswego.edu/pub/misc/ +** For thread-safe derivatives, take a look at: +** - ptmalloc: https://www.malloc.de/ +** - nedmalloc: https://www.nedprod.com/programs/portable/nedmalloc/ +*/ + +#define lj_alloc_c +#define LUA_CORE + +/* To get the mremap prototype. Must be defined before any system includes. */ +#if defined(__linux__) && !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif + +#include "lj_def.h" +#include "lj_arch.h" +#include "lj_alloc.h" +#include "lj_prng.h" + +#ifndef LUAJIT_USE_SYSMALLOC + +#define MAX_SIZE_T (~(size_t)0) +#define MALLOC_ALIGNMENT ((size_t)8U) + +#define DEFAULT_GRANULARITY ((size_t)128U * (size_t)1024U) +#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) +#define DEFAULT_MMAP_THRESHOLD ((size_t)128U * (size_t)1024U) +#define MAX_RELEASE_CHECK_RATE 255 + +/* ------------------- size_t and alignment properties -------------------- */ + +/* The byte and bit size of a size_t */ +#define SIZE_T_SIZE (sizeof(size_t)) +#define SIZE_T_BITSIZE (sizeof(size_t) << 3) + +/* Some constants coerced to size_t */ +/* Annoying but necessary to avoid errors on some platforms */ +#define SIZE_T_ZERO ((size_t)0) +#define SIZE_T_ONE ((size_t)1) +#define SIZE_T_TWO ((size_t)2) +#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) +#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) +#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) + +/* The bit mask value corresponding to MALLOC_ALIGNMENT */ +#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE) + +/* the number of bytes to offset an address to align it */ +#define align_offset(A)\ + ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ + ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) + +/* -------------------------- MMAP support ------------------------------- */ + +#define MFAIL ((void *)(MAX_SIZE_T)) +#define CMFAIL ((char *)(MFAIL)) /* defined for convenience */ + +#define IS_DIRECT_BIT (SIZE_T_ONE) + + +/* Determine system-specific block allocation method. */ + +#include +/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */ +#include + +#define LJ_ALLOC_MMAP 1 + +#define LJ_ALLOC_MMAP_PROBE 1 + +#define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */ + +#define LJ_ALLOC_MREMAP 1 + + +#if LJ_ALLOC_MMAP + +#define MMAP_PROT (PROT_READ|PROT_WRITE) +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +#define MAP_ANONYMOUS MAP_ANON +#endif +#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) + +#if LJ_ALLOC_MMAP_PROBE + +#ifdef MAP_TRYFIXED +#define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED) +#else +#define MMAP_FLAGS_PROBE MMAP_FLAGS +#endif + +#define LJ_ALLOC_MMAP_PROBE_MAX 30 +#define LJ_ALLOC_MMAP_PROBE_LINEAR 5 + +#define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000) + +static void *mmap_probe(PRNGState *rs, size_t size) +{ + /* Hint for next allocation. Doesn't need to be thread-safe. */ + static uintptr_t hint_addr = 0; + int olderr = errno; + int retry; + for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) { + void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0); + uintptr_t addr = (uintptr_t)p; + if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER && + ((addr + size) >> LJ_ALLOC_MBITS) == 0) { + /* We got a suitable address. Bump the hint address. */ + hint_addr = addr + size; + errno = olderr; + return p; + } + if (p != MFAIL) { + munmap(p, size); + } else if (errno == ENOMEM) { + return MFAIL; + } + if (hint_addr) { + /* First, try linear probing. */ + if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) { + hint_addr += 0x1000000; + if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0) + hint_addr = 0; + continue; + } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) { + /* Next, try a no-hint probe to get back an ASLR address. */ + hint_addr = 0; + continue; + } + } + /* Finally, try pseudo-random probing. */ + do { + hint_addr = lj_prng_u64(rs) & (((uintptr_t)1<head & CINUSE_BIT) +#define pinuse(p) ((p)->head & PINUSE_BIT) +#define chunksize(p) ((p)->head & ~(INUSE_BITS)) + +#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) +#define clear_cinuse(p) ((p)->head &= ~CINUSE_BIT) + +/* Treat space at ptr +/- offset as a chunk */ +#define chunk_plus_offset(p, s) ((mchunkptr)(((char *)(p)) + (s))) +#define chunk_minus_offset(p, s) ((mchunkptr)(((char *)(p)) - (s))) + +/* Ptr to next or previous physical malloc_chunk. */ +#define next_chunk(p) ((mchunkptr)(((char *)(p)) + ((p)->head & ~INUSE_BITS))) +#define prev_chunk(p) ((mchunkptr)(((char *)(p)) - ((p)->prev_foot) )) + +/* extract next chunk's pinuse bit */ +#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT) + +/* Get/set size at footer */ +#define get_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot) +#define set_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot = (s)) + +/* Set size, pinuse bit, and foot */ +#define set_size_and_pinuse_of_free_chunk(p, s)\ + ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) + +/* Set size, pinuse bit, foot, and clear next pinuse */ +#define set_free_with_pinuse(p, s, n)\ + (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) + +#define is_direct(p)\ + (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_DIRECT_BIT)) + +/* Get the internal overhead associated with chunk p */ +#define overhead_for(p)\ + (is_direct(p)? DIRECT_CHUNK_OVERHEAD : CHUNK_OVERHEAD) + +/* ---------------------- Overlaid data structures ----------------------- */ + +struct malloc_tree_chunk { + /* The first four fields must be compatible with malloc_chunk */ + size_t prev_foot; + size_t head; + struct malloc_tree_chunk *fd; + struct malloc_tree_chunk *bk; + + struct malloc_tree_chunk *child[2]; + struct malloc_tree_chunk *parent; + bindex_t index; +}; + +typedef struct malloc_tree_chunk tchunk; +typedef struct malloc_tree_chunk *tchunkptr; +typedef struct malloc_tree_chunk *tbinptr; /* The type of bins of trees */ + +/* A little helper macro for trees */ +#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) + +/* ----------------------------- Segments -------------------------------- */ + +struct malloc_segment { + char *base; /* base address */ + size_t size; /* allocated size */ + struct malloc_segment *next; /* ptr to next segment */ +}; + +typedef struct malloc_segment msegment; +typedef struct malloc_segment *msegmentptr; + +/* ---------------------------- malloc_state ----------------------------- */ + +/* Bin types, widths and sizes */ +#define NSMALLBINS (32U) +#define NTREEBINS (32U) +#define SMALLBIN_SHIFT (3U) +#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT) +#define TREEBIN_SHIFT (8U) +#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT) +#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE) +#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) + +struct malloc_state { + binmap_t smallmap; + binmap_t treemap; + size_t dvsize; + size_t topsize; + mchunkptr dv; + mchunkptr top; + size_t trim_check; + size_t release_checks; + mchunkptr smallbins[(NSMALLBINS+1)*2]; + tbinptr treebins[NTREEBINS]; + msegment seg; + PRNGState *prng; +}; + +typedef struct malloc_state *mstate; + +#define is_initialized(M) ((M)->top != 0) + +/* -------------------------- system alloc setup ------------------------- */ + +/* page-align a size */ +#define page_align(S)\ + (((S) + (LJ_PAGESIZE - SIZE_T_ONE)) & ~(LJ_PAGESIZE - SIZE_T_ONE)) + +/* granularity-align a size */ +#define granularity_align(S)\ + (((S) + (DEFAULT_GRANULARITY - SIZE_T_ONE))\ + & ~(DEFAULT_GRANULARITY - SIZE_T_ONE)) + +#define mmap_align(S) page_align(S) + +/* True if segment S holds address A */ +#define segment_holds(S, A)\ + ((char *)(A) >= S->base && (char *)(A) < S->base + S->size) + +/* Return segment holding given address */ +static msegmentptr segment_holding(mstate m, char *addr) +{ + msegmentptr sp = &m->seg; + for (;;) { + if (addr >= sp->base && addr < sp->base + sp->size) + return sp; + if ((sp = sp->next) == 0) + return 0; + } +} + +/* Return true if segment contains a segment link */ +static int has_segment_link(mstate m, msegmentptr ss) +{ + msegmentptr sp = &m->seg; + for (;;) { + if ((char *)sp >= ss->base && (char *)sp < ss->base + ss->size) + return 1; + if ((sp = sp->next) == 0) + return 0; + } +} + +/* + TOP_FOOT_SIZE is padding at the end of a segment, including space + that may be needed to place segment records and fenceposts when new + noncontiguous segments are added. +*/ +#define TOP_FOOT_SIZE\ + (align_offset(TWO_SIZE_T_SIZES)+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) + +/* ---------------------------- Indexing Bins ---------------------------- */ + +#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) +#define small_index(s) ((s) >> SMALLBIN_SHIFT) +#define small_index2size(i) ((i) << SMALLBIN_SHIFT) +#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) + +/* addressing by index. See above about smallbin repositioning */ +#define smallbin_at(M, i) ((sbinptr)((char *)&((M)->smallbins[(i)<<1]))) +#define treebin_at(M,i) (&((M)->treebins[i])) + +/* assign tree index for size S to variable I */ +#define compute_tree_index(S, I)\ +{\ + unsigned int X = (unsigned int)(S >> TREEBIN_SHIFT);\ + if (X == 0) {\ + I = 0;\ + } else if (X > 0xFFFF) {\ + I = NTREEBINS-1;\ + } else {\ + unsigned int K = lj_fls(X);\ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +/* Bit representing maximum resolved size in a treebin at i */ +#define bit_for_tree_index(i) \ + (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) + +/* Shift placing maximum resolved bit in a treebin at i as sign bit */ +#define leftshift_for_tree_index(i) \ + ((i == NTREEBINS-1)? 0 : \ + ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) + +/* The size of the smallest chunk held in bin with index i */ +#define minsize_for_tree_index(i) \ + ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \ + (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) + +/* ------------------------ Operations on bin maps ----------------------- */ + +/* bit corresponding to given index */ +#define idx2bit(i) ((binmap_t)(1) << (i)) + +/* Mark/Clear bits with given index */ +#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i)) +#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i)) +#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i)) + +#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i)) +#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) +#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) + +/* mask with all bits to left of least bit of x on */ +#define left_bits(x) ((x<<1) | (~(x<<1)+1)) + +/* Set cinuse bit and pinuse bit of next chunk */ +#define set_inuse(M,p,s)\ + ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ + ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set cinuse and pinuse of this chunk and pinuse of next chunk */ +#define set_inuse_and_pinuse(M,p,s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set size, cinuse and pinuse bit of this chunk */ +#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) + +/* ----------------------- Operations on smallbins ----------------------- */ + +/* Link a free chunk into a smallbin */ +#define insert_small_chunk(M, P, S) {\ + bindex_t I = small_index(S);\ + mchunkptr B = smallbin_at(M, I);\ + mchunkptr F = B;\ + if (!smallmap_is_marked(M, I))\ + mark_smallmap(M, I);\ + else\ + F = B->fd;\ + B->fd = P;\ + F->bk = P;\ + P->fd = F;\ + P->bk = B;\ +} + +/* Unlink a chunk from a smallbin */ +#define unlink_small_chunk(M, P, S) {\ + mchunkptr F = P->fd;\ + mchunkptr B = P->bk;\ + bindex_t I = small_index(S);\ + if (F == B) {\ + clear_smallmap(M, I);\ + } else {\ + F->bk = B;\ + B->fd = F;\ + }\ +} + +/* Unlink the first chunk from a smallbin */ +#define unlink_first_small_chunk(M, B, P, I) {\ + mchunkptr F = P->fd;\ + if (B == F) {\ + clear_smallmap(M, I);\ + } else {\ + B->fd = F;\ + F->bk = B;\ + }\ +} + +/* Replace dv node, binning the old one */ +/* Used only when dvsize known to be small */ +#define replace_dv(M, P, S) {\ + size_t DVS = M->dvsize;\ + if (DVS != 0) {\ + mchunkptr DV = M->dv;\ + insert_small_chunk(M, DV, DVS);\ + }\ + M->dvsize = S;\ + M->dv = P;\ +} + +/* ------------------------- Operations on trees ------------------------- */ + +/* Insert chunk into tree */ +#define insert_large_chunk(M, X, S) {\ + tbinptr *H;\ + bindex_t I;\ + compute_tree_index(S, I);\ + H = treebin_at(M, I);\ + X->index = I;\ + X->child[0] = X->child[1] = 0;\ + if (!treemap_is_marked(M, I)) {\ + mark_treemap(M, I);\ + *H = X;\ + X->parent = (tchunkptr)H;\ + X->fd = X->bk = X;\ + } else {\ + tchunkptr T = *H;\ + size_t K = S << leftshift_for_tree_index(I);\ + for (;;) {\ + if (chunksize(T) != S) {\ + tchunkptr *C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ + K <<= 1;\ + if (*C != 0) {\ + T = *C;\ + } else {\ + *C = X;\ + X->parent = T;\ + X->fd = X->bk = X;\ + break;\ + }\ + } else {\ + tchunkptr F = T->fd;\ + T->fd = F->bk = X;\ + X->fd = F;\ + X->bk = T;\ + X->parent = 0;\ + break;\ + }\ + }\ + }\ +} + +#define unlink_large_chunk(M, X) {\ + tchunkptr XP = X->parent;\ + tchunkptr R;\ + if (X->bk != X) {\ + tchunkptr F = X->fd;\ + R = X->bk;\ + F->bk = R;\ + R->fd = F;\ + } else {\ + tchunkptr *RP;\ + if (((R = *(RP = &(X->child[1]))) != 0) ||\ + ((R = *(RP = &(X->child[0]))) != 0)) {\ + tchunkptr *CP;\ + while ((*(CP = &(R->child[1])) != 0) ||\ + (*(CP = &(R->child[0])) != 0)) {\ + R = *(RP = CP);\ + }\ + *RP = 0;\ + }\ + }\ + if (XP != 0) {\ + tbinptr *H = treebin_at(M, X->index);\ + if (X == *H) {\ + if ((*H = R) == 0) \ + clear_treemap(M, X->index);\ + } else {\ + if (XP->child[0] == X) \ + XP->child[0] = R;\ + else \ + XP->child[1] = R;\ + }\ + if (R != 0) {\ + tchunkptr C0, C1;\ + R->parent = XP;\ + if ((C0 = X->child[0]) != 0) {\ + R->child[0] = C0;\ + C0->parent = R;\ + }\ + if ((C1 = X->child[1]) != 0) {\ + R->child[1] = C1;\ + C1->parent = R;\ + }\ + }\ + }\ +} + +/* Relays to large vs small bin operations */ + +#define insert_chunk(M, P, S)\ + if (is_small(S)) { insert_small_chunk(M, P, S)\ + } else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } + +#define unlink_chunk(M, P, S)\ + if (is_small(S)) { unlink_small_chunk(M, P, S)\ + } else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } + +/* ----------------------- Direct-mmapping chunks ----------------------- */ + +static void *direct_alloc(mstate m, size_t nb) +{ + size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */ + char *mm = (char *)(DIRECT_MMAP(m->prng, mmsize)); + if (mm != CMFAIL) { + size_t offset = align_offset(chunk2mem(mm)); + size_t psize = mmsize - offset - DIRECT_FOOT_PAD; + mchunkptr p = (mchunkptr)(mm + offset); + p->prev_foot = offset | IS_DIRECT_BIT; + p->head = psize|CINUSE_BIT; + chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; + return chunk2mem(p); + } + } + UNUSED(m); + return NULL; +} + +static mchunkptr direct_resize(mchunkptr oldp, size_t nb) +{ + size_t oldsize = chunksize(oldp); + if (is_small(nb)) /* Can't shrink direct regions below small size */ + return NULL; + /* Keep old chunk if big enough but not too big */ + if (oldsize >= nb + SIZE_T_SIZE && + (oldsize - nb) <= (DEFAULT_GRANULARITY >> 1)) { + return oldp; + } else { + size_t offset = oldp->prev_foot & ~IS_DIRECT_BIT; + size_t oldmmsize = oldsize + offset + DIRECT_FOOT_PAD; + size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + char *cp = (char *)CALL_MREMAP((char *)oldp - offset, + oldmmsize, newmmsize, CALL_MREMAP_MV); + if (cp != CMFAIL) { + mchunkptr newp = (mchunkptr)(cp + offset); + size_t psize = newmmsize - offset - DIRECT_FOOT_PAD; + newp->head = psize|CINUSE_BIT; + chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; + return newp; + } + } + return NULL; +} + +/* -------------------------- mspace management -------------------------- */ + +/* Initialize top chunk and its size */ +static void init_top(mstate m, mchunkptr p, size_t psize) +{ + /* Ensure alignment */ + size_t offset = align_offset(chunk2mem(p)); + p = (mchunkptr)((char *)p + offset); + psize -= offset; + + m->top = p; + m->topsize = psize; + p->head = psize | PINUSE_BIT; + /* set size of fake trailing chunk holding overhead space only once */ + chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; + m->trim_check = DEFAULT_TRIM_THRESHOLD; /* reset on each update */ +} + +/* Initialize bins for a new mstate that is otherwise zeroed out */ +static void init_bins(mstate m) +{ + /* Establish circular links for smallbins */ + bindex_t i; + for (i = 0; i < NSMALLBINS; i++) { + sbinptr bin = smallbin_at(m,i); + bin->fd = bin->bk = bin; + } +} + +/* Allocate chunk and prepend remainder with chunk in successor base. */ +static void *prepend_alloc(mstate m, char *newbase, char *oldbase, size_t nb) +{ + mchunkptr p = align_as_chunk(newbase); + mchunkptr oldfirst = align_as_chunk(oldbase); + size_t psize = (size_t)((char *)oldfirst - (char *)p); + mchunkptr q = chunk_plus_offset(p, nb); + size_t qsize = psize - nb; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + + /* consolidate remainder with first chunk of old base */ + if (oldfirst == m->top) { + size_t tsize = m->topsize += qsize; + m->top = q; + q->head = tsize | PINUSE_BIT; + } else if (oldfirst == m->dv) { + size_t dsize = m->dvsize += qsize; + m->dv = q; + set_size_and_pinuse_of_free_chunk(q, dsize); + } else { + if (!cinuse(oldfirst)) { + size_t nsize = chunksize(oldfirst); + unlink_chunk(m, oldfirst, nsize); + oldfirst = chunk_plus_offset(oldfirst, nsize); + qsize += nsize; + } + set_free_with_pinuse(q, qsize, oldfirst); + insert_chunk(m, q, qsize); + } + + return chunk2mem(p); +} + +/* Add a segment to hold a new noncontiguous region */ +static void add_segment(mstate m, char *tbase, size_t tsize) +{ + /* Determine locations and sizes of segment, fenceposts, old top */ + char *old_top = (char *)m->top; + msegmentptr oldsp = segment_holding(m, old_top); + char *old_end = oldsp->base + oldsp->size; + size_t ssize = pad_request(sizeof(struct malloc_segment)); + char *rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + size_t offset = align_offset(chunk2mem(rawsp)); + char *asp = rawsp + offset; + char *csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; + mchunkptr sp = (mchunkptr)csp; + msegmentptr ss = (msegmentptr)(chunk2mem(sp)); + mchunkptr tnext = chunk_plus_offset(sp, ssize); + mchunkptr p = tnext; + + /* reset top to new space */ + init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + + /* Set up segment record */ + set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); + *ss = m->seg; /* Push current record */ + m->seg.base = tbase; + m->seg.size = tsize; + m->seg.next = ss; + + /* Insert trailing fenceposts */ + for (;;) { + mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); + p->head = FENCEPOST_HEAD; + if ((char *)(&(nextp->head)) < old_end) + p = nextp; + else + break; + } + + /* Insert the rest of old top into a bin as an ordinary free chunk */ + if (csp != old_top) { + mchunkptr q = (mchunkptr)old_top; + size_t psize = (size_t)(csp - old_top); + mchunkptr tn = chunk_plus_offset(q, psize); + set_free_with_pinuse(q, psize, tn); + insert_chunk(m, q, psize); + } +} + +/* -------------------------- System allocation -------------------------- */ + +static void *alloc_sys(mstate m, size_t nb) +{ + char *tbase = CMFAIL; + size_t tsize = 0; + + /* Directly map large chunks */ + if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) { + void *mem = direct_alloc(m, nb); + if (mem != 0) + return mem; + } + + { + size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; + size_t rsize = granularity_align(req); + if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */ + char *mp = (char *)(CALL_MMAP(m->prng, rsize)); + if (mp != CMFAIL) { + tbase = mp; + tsize = rsize; + } + } + } + + if (tbase != CMFAIL) { + msegmentptr sp = &m->seg; + /* Try to merge with an existing segment */ + while (sp != 0 && tbase != sp->base + sp->size) + sp = sp->next; + if (sp != 0 && segment_holds(sp, m->top)) { /* append */ + sp->size += tsize; + init_top(m, m->top, m->topsize + tsize); + } else { + sp = &m->seg; + while (sp != 0 && sp->base != tbase + tsize) + sp = sp->next; + if (sp != 0) { + char *oldbase = sp->base; + sp->base = tbase; + sp->size += tsize; + return prepend_alloc(m, tbase, oldbase, nb); + } else { + add_segment(m, tbase, tsize); + } + } + + if (nb < m->topsize) { /* Allocate from new or extended top space */ + size_t rsize = m->topsize -= nb; + mchunkptr p = m->top; + mchunkptr r = m->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + return chunk2mem(p); + } + } + + return NULL; +} + +/* ----------------------- system deallocation -------------------------- */ + +/* Unmap and unlink any mmapped segments that don't contain used chunks */ +static size_t release_unused_segments(mstate m) +{ + size_t released = 0; + size_t nsegs = 0; + msegmentptr pred = &m->seg; + msegmentptr sp = pred->next; + while (sp != 0) { + char *base = sp->base; + size_t size = sp->size; + msegmentptr next = sp->next; + nsegs++; + { + mchunkptr p = align_as_chunk(base); + size_t psize = chunksize(p); + /* Can unmap if first chunk holds entire segment and not pinned */ + if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) { + tchunkptr tp = (tchunkptr)p; + if (p == m->dv) { + m->dv = 0; + m->dvsize = 0; + } else { + unlink_large_chunk(m, tp); + } + if (CALL_MUNMAP(base, size) == 0) { + released += size; + /* unlink obsoleted record */ + sp = pred; + sp->next = next; + } else { /* back out if cannot unmap */ + insert_large_chunk(m, tp, psize); + } + } + } + pred = sp; + sp = next; + } + /* Reset check counter */ + m->release_checks = nsegs > MAX_RELEASE_CHECK_RATE ? + nsegs : MAX_RELEASE_CHECK_RATE; + return released; +} + +static int alloc_trim(mstate m, size_t pad) +{ + size_t released = 0; + if (pad < MAX_REQUEST && is_initialized(m)) { + pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ + + if (m->topsize > pad) { + /* Shrink top space in granularity-size units, keeping at least one */ + size_t unit = DEFAULT_GRANULARITY; + size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - + SIZE_T_ONE) * unit; + msegmentptr sp = segment_holding(m, (char *)m->top); + + if (sp->size >= extra && + !has_segment_link(m, sp)) { /* can't shrink if pinned */ + size_t newsize = sp->size - extra; + /* Prefer mremap, fall back to munmap */ + if ((CALL_MREMAP(sp->base, sp->size, newsize, CALL_MREMAP_NOMOVE) != MFAIL) || + (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { + released = extra; + } + } + + if (released != 0) { + sp->size -= released; + init_top(m, m->top, m->topsize - released); + } + } + + /* Unmap any unused mmapped segments */ + released += release_unused_segments(m); + + /* On failure, disable autotrim to avoid repeated failed future calls */ + if (released == 0 && m->topsize > m->trim_check) + m->trim_check = MAX_SIZE_T; + } + + return (released != 0)? 1 : 0; +} + +/* ---------------------------- malloc support --------------------------- */ + +/* allocate a large request from the best fitting chunk in a treebin */ +static void *tmalloc_large(mstate m, size_t nb) +{ + tchunkptr v = 0; + size_t rsize = ~nb+1; /* Unsigned negation */ + tchunkptr t; + bindex_t idx; + compute_tree_index(nb, idx); + + if ((t = *treebin_at(m, idx)) != 0) { + /* Traverse tree for this bin looking for node with size == nb */ + size_t sizebits = nb << leftshift_for_tree_index(idx); + tchunkptr rst = 0; /* The deepest untaken right subtree */ + for (;;) { + tchunkptr rt; + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + v = t; + if ((rsize = trem) == 0) + break; + } + rt = t->child[1]; + t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; + if (rt != 0 && rt != t) + rst = rt; + if (t == 0) { + t = rst; /* set t to least subtree holding sizes > nb */ + break; + } + sizebits <<= 1; + } + } + + if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ + binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; + if (leftbits != 0) + t = *treebin_at(m, lj_ffs(leftbits)); + } + + while (t != 0) { /* find smallest of tree or subtree */ + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + t = leftmost_child(t); + } + + /* If dv is a better fit, return NULL so malloc will use it */ + if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { + mchunkptr r = chunk_plus_offset(v, nb); + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) { + set_inuse_and_pinuse(m, v, (rsize + nb)); + } else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + insert_chunk(m, r, rsize); + } + return chunk2mem(v); + } + return NULL; +} + +/* allocate a small request from the best fitting chunk in a treebin */ +static void *tmalloc_small(mstate m, size_t nb) +{ + tchunkptr t, v; + mchunkptr r; + size_t rsize; + bindex_t i = lj_ffs(m->treemap); + + v = t = *treebin_at(m, i); + rsize = chunksize(t) - nb; + + while ((t = leftmost_child(t)) != 0) { + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + } + + r = chunk_plus_offset(v, nb); + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) { + set_inuse_and_pinuse(m, v, (rsize + nb)); + } else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(m, r, rsize); + } + return chunk2mem(v); +} + +/* ----------------------------------------------------------------------- */ + +void *lj_alloc_create(PRNGState *rs) +{ + size_t tsize = DEFAULT_GRANULARITY; + char *tbase; + INIT_MMAP(); + UNUSED(rs); + tbase = (char *)(CALL_MMAP(rs, tsize)); + if (tbase != CMFAIL) { + size_t msize = pad_request(sizeof(struct malloc_state)); + mchunkptr mn; + mchunkptr msp = align_as_chunk(tbase); + mstate m = (mstate)(chunk2mem(msp)); + memset(m, 0, msize); + msp->head = (msize|PINUSE_BIT|CINUSE_BIT); + m->seg.base = tbase; + m->seg.size = tsize; + m->release_checks = MAX_RELEASE_CHECK_RATE; + init_bins(m); + mn = next_chunk(mem2chunk(m)); + init_top(m, mn, (size_t)((tbase + tsize) - (char *)mn) - TOP_FOOT_SIZE); + return m; + } + return NULL; +} + +void lj_alloc_setprng(void *msp, PRNGState *rs) +{ + mstate ms = (mstate)msp; + ms->prng = rs; +} + +void lj_alloc_destroy(void *msp) +{ + mstate ms = (mstate)msp; + msegmentptr sp = &ms->seg; + while (sp != 0) { + char *base = sp->base; + size_t size = sp->size; + sp = sp->next; + CALL_MUNMAP(base, size); + } +} + +static LJ_NOINLINE void *lj_alloc_malloc(void *msp, size_t nsize) +{ + mstate ms = (mstate)msp; + void *mem; + size_t nb; + if (nsize <= MAX_SMALL_REQUEST) { + bindex_t idx; + binmap_t smallbits; + nb = (nsize < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(nsize); + idx = small_index(nb); + smallbits = ms->smallmap >> idx; + + if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ + mchunkptr b, p; + idx += ~smallbits & 1; /* Uses next bin if idx empty */ + b = smallbin_at(ms, idx); + p = b->fd; + unlink_first_small_chunk(ms, b, p, idx); + set_inuse_and_pinuse(ms, p, small_index2size(idx)); + mem = chunk2mem(p); + return mem; + } else if (nb > ms->dvsize) { + if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ + mchunkptr b, p, r; + size_t rsize; + binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); + bindex_t i = lj_ffs(leftbits); + b = smallbin_at(ms, i); + p = b->fd; + unlink_first_small_chunk(ms, b, p, i); + rsize = small_index2size(i) - nb; + /* Fit here cannot be remainderless if 4byte sizes */ + if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) { + set_inuse_and_pinuse(ms, p, small_index2size(i)); + } else { + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + r = chunk_plus_offset(p, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(ms, r, rsize); + } + mem = chunk2mem(p); + return mem; + } else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { + return mem; + } + } + } else if (nsize >= MAX_REQUEST) { + nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ + } else { + nb = pad_request(nsize); + if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { + return mem; + } + } + + if (nb <= ms->dvsize) { + size_t rsize = ms->dvsize - nb; + mchunkptr p = ms->dv; + if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ + mchunkptr r = ms->dv = chunk_plus_offset(p, nb); + ms->dvsize = rsize; + set_size_and_pinuse_of_free_chunk(r, rsize); + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + } else { /* exhaust dv */ + size_t dvs = ms->dvsize; + ms->dvsize = 0; + ms->dv = 0; + set_inuse_and_pinuse(ms, p, dvs); + } + mem = chunk2mem(p); + return mem; + } else if (nb < ms->topsize) { /* Split top */ + size_t rsize = ms->topsize -= nb; + mchunkptr p = ms->top; + mchunkptr r = ms->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + mem = chunk2mem(p); + return mem; + } + return alloc_sys(ms, nb); +} + +static LJ_NOINLINE void *lj_alloc_free(void *msp, void *ptr) +{ + if (ptr != 0) { + mchunkptr p = mem2chunk(ptr); + mstate fm = (mstate)msp; + size_t psize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + size_t prevsize = p->prev_foot; + if ((prevsize & IS_DIRECT_BIT) != 0) { + prevsize &= ~IS_DIRECT_BIT; + psize += prevsize + DIRECT_FOOT_PAD; + CALL_MUNMAP((char *)p - prevsize, psize); + return NULL; + } else { + mchunkptr prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + /* consolidate backward */ + if (p != fm->dv) { + unlink_chunk(fm, p, prevsize); + } else if ((next->head & INUSE_BITS) == INUSE_BITS) { + fm->dvsize = psize; + set_free_with_pinuse(p, psize, next); + return NULL; + } + } + } + if (!cinuse(next)) { /* consolidate forward */ + if (next == fm->top) { + size_t tsize = fm->topsize += psize; + fm->top = p; + p->head = tsize | PINUSE_BIT; + if (p == fm->dv) { + fm->dv = 0; + fm->dvsize = 0; + } + if (tsize > fm->trim_check) + alloc_trim(fm, 0); + return NULL; + } else if (next == fm->dv) { + size_t dsize = fm->dvsize += psize; + fm->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + return NULL; + } else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(fm, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == fm->dv) { + fm->dvsize = psize; + return NULL; + } + } + } else { + set_free_with_pinuse(p, psize, next); + } + + if (is_small(psize)) { + insert_small_chunk(fm, p, psize); + } else { + tchunkptr tp = (tchunkptr)p; + insert_large_chunk(fm, tp, psize); + if (--fm->release_checks == 0) + release_unused_segments(fm); + } + } + return NULL; +} + +static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize) +{ + if (nsize >= MAX_REQUEST) { + return NULL; + } else { + mstate m = (mstate)msp; + mchunkptr oldp = mem2chunk(ptr); + size_t oldsize = chunksize(oldp); + mchunkptr next = chunk_plus_offset(oldp, oldsize); + mchunkptr newp = 0; + size_t nb = request2size(nsize); + + /* Try to either shrink or extend into top. Else malloc-copy-free */ + if (is_direct(oldp)) { + newp = direct_resize(oldp, nb); /* this may return NULL. */ + } else if (oldsize >= nb) { /* already big enough */ + size_t rsize = oldsize - nb; + newp = oldp; + if (rsize >= MIN_CHUNK_SIZE) { + mchunkptr rem = chunk_plus_offset(newp, nb); + set_inuse(m, newp, nb); + set_inuse(m, rem, rsize); + lj_alloc_free(m, chunk2mem(rem)); + } + } else if (next == m->top && oldsize + m->topsize > nb) { + /* Expand into top */ + size_t newsize = oldsize + m->topsize; + size_t newtopsize = newsize - nb; + mchunkptr newtop = chunk_plus_offset(oldp, nb); + set_inuse(m, oldp, nb); + newtop->head = newtopsize |PINUSE_BIT; + m->top = newtop; + m->topsize = newtopsize; + newp = oldp; + } + + if (newp != 0) { + return chunk2mem(newp); + } else { + void *newmem = lj_alloc_malloc(m, nsize); + if (newmem != 0) { + size_t oc = oldsize - overhead_for(oldp); + memcpy(newmem, ptr, oc < nsize ? oc : nsize); + lj_alloc_free(m, ptr); + } + return newmem; + } + } +} + +void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize) +{ + (void)osize; + if (nsize == 0) { + return lj_alloc_free(msp, ptr); + } else if (ptr == NULL) { + return lj_alloc_malloc(msp, nsize); + } else { + return lj_alloc_realloc(msp, ptr, nsize); + } +} + +#endif diff --git a/lib/luajit/src/lj_alloc.h b/lib/luajit/src/lj_alloc.h new file mode 100644 index 0000000000..669f50b79e --- /dev/null +++ b/lib/luajit/src/lj_alloc.h @@ -0,0 +1,18 @@ +/* +** Bundled memory allocator. +** Donated to the public domain. +*/ + +#ifndef _LJ_ALLOC_H +#define _LJ_ALLOC_H + +#include "lj_def.h" + +#ifndef LUAJIT_USE_SYSMALLOC +LJ_FUNC void *lj_alloc_create(PRNGState *rs); +LJ_FUNC void lj_alloc_setprng(void *msp, PRNGState *rs); +LJ_FUNC void lj_alloc_destroy(void *msp); +LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize); +#endif + +#endif diff --git a/lib/luajit/src/lj_state.c b/lib/luajit/src/lj_state.c index 05e11f668b..43488743ac 100644 --- a/lib/luajit/src/lj_state.c +++ b/lib/luajit/src/lj_state.c @@ -25,6 +25,7 @@ #include "lj_vm.h" #include "lj_prng.h" #include "lj_lex.h" +#include "lj_alloc.h" #include "luajit.h" /* -- Stack handling ------------------------------------------------------ */ @@ -183,10 +184,15 @@ static void close_state(lua_State *L) lj_assertG(g->gc.total == sizeof(GG_State), "memory leak of %lld bytes", (long long)(g->gc.total - sizeof(GG_State))); - g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); +#ifndef LUAJIT_USE_SYSMALLOC + if (g->allocf == lj_alloc_f) + lj_alloc_destroy(g->allocd); + else +#endif + g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); } -LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) +LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd) { PRNGState prng; if (!lj_prng_seed_secure(&prng)) { @@ -194,7 +200,14 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) /* Can only return NULL here, so this errors with "not enough memory". */ return NULL; } - GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); +#ifndef LUAJIT_USE_SYSMALLOC + if (allocf == LJ_ALLOCF_INTERNAL) { + allocd = lj_alloc_create(&prng); + if (!allocd) return NULL; + allocf = lj_alloc_f; + } +#endif + GG_State *GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State)); lua_State *L = &GG->L; global_State *g = &GG->g; jit_State *J = &GG->J; @@ -209,9 +222,14 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED; g->strempty.marked = LJ_GC_WHITE0; g->strempty.gct = ~LJ_TSTR; - g->allocf = f; - g->allocd = ud; + g->allocf = allocf; + g->allocd = allocd; g->prng = prng; +#ifndef LUAJIT_USE_SYSMALLOC + if (allocf == lj_alloc_f) { + lj_alloc_setprng(allocd, &g->prng); + } +#endif setgcref(g->mainthref, obj2gco(L)); setgcref(g->uvhead.prev, obj2gco(&g->uvhead)); setgcref(g->uvhead.next, obj2gco(&g->uvhead)); diff --git a/lib/luajit/src/lj_state.h b/lib/luajit/src/lj_state.h index df4a592e59..da17a5fd2e 100644 --- a/lib/luajit/src/lj_state.h +++ b/lib/luajit/src/lj_state.h @@ -29,4 +29,6 @@ static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) LJ_FUNC lua_State *lj_state_new(lua_State *L); LJ_FUNC void lj_state_free(global_State *g, lua_State *L); +#define LJ_ALLOCF_INTERNAL ((lua_Alloc)(void *)(uintptr_t)(1237<<4)) + #endif diff --git a/src/apps/ipfix/README.templates.md b/src/apps/ipfix/README.templates.md index f96c9cfaac..5baf224416 100644 --- a/src/apps/ipfix/README.templates.md +++ b/src/apps/ipfix/README.templates.md @@ -13,8 +13,18 @@ | v4 | 256 | v4 | ip | `sourceIPv4Address`, `destinationIPv4Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced` | | v4_DNS | 258 | v4 | ip and udp port 53 | `sourceIPv4Address`, `destinationIPv4Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort`, `dnsFlagsCodes`, `dnsQuestionCount`, `dnsAnswerCount`, `dnsQuestionName=64`, `dnsQuestionType`, `dnsQuestionClass`, `dnsAnswerName=64`, `dnsAnswerType`, `dnsAnswerClass`, `dnsAnswerTtl`, `dnsAnswerRdata=64`, `dnsAnswerRdataLen` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount` | | v4_HTTP | 257 | v4 | ip and tcp dst port 80 | `sourceIPv4Address`, `destinationIPv4Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `httpRequestMethod=8`, `httpRequestHost=32`, `httpRequestTarget=64` | -| v4_extended | 1256 | v4 | ip | `sourceIPv4Address`, `destinationIPv4Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `sourceMacAddress`, `postDestinationMacAddress`, `vlanId`, `ipClassOfService`, `bgpSourceAsNumber`, `bgpDestinationAsNumber`, `bgpPrevAdjacentAsNumber`, `bgpNextAdjacentAsNumber`, `tcpControlBitsReduced`, `icmpTypeCodeIPv4`, `ingressInterface`, `egressInterface` | `mac_to_as`, `vlan_to_ifindex`, `pfx4_to_as` +| v4_HTTPS_Flowmon | 259 | v4 | ip and tcp and (dst port 443 or dst port 8443) | `sourceIPv4Address`, `destinationIPv4Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `fmTlsSNI=64`, `fmTlsSNILength` | +| v4_HTTP_Flowmon | 260 | v4 | ip and tcp dst port 80 | `sourceIPv4Address`, `destinationIPv4Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `fmHttpRequestMethod`, `fmHttpRequestHost=32`, `fmHttpRequestTarget=64` | +| v4_extended | 1256 | v4 | ip | `sourceIPv4Address`, `destinationIPv4Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `sourceMacAddress`, `postDestinationMacAddress`, `vlanId`, `ipClassOfService`, `bgpSourceAsNumber`, `bgpDestinationAsNumber`, `bgpPrevAdjacentAsNumber`, `bgpNextAdjacentAsNumber`, `ingressInterface`, `egressInterface`, `icmpTypeCodeIPv4` | `mac_to_as`, `vlan_to_ifindex`, `pfx4_to_as` +| v4_extended_HTTP | 1257 | v4 | ip and tcp dst port 80 | `sourceIPv4Address`, `destinationIPv4Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `sourceMacAddress`, `postDestinationMacAddress`, `vlanId`, `ipClassOfService`, `bgpSourceAsNumber`, `bgpDestinationAsNumber`, `bgpPrevAdjacentAsNumber`, `bgpNextAdjacentAsNumber`, `ingressInterface`, `egressInterface`, `icmpTypeCodeIPv4`, `httpRequestMethod=8`, `httpRequestHost=32`, `httpRequestTarget=64` | +| v4_extended_HTTPS_Flowmon | 1258 | v4 | ip and tcp and (dst port 443 or dst port 8443) | `sourceIPv4Address`, `destinationIPv4Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `sourceMacAddress`, `postDestinationMacAddress`, `vlanId`, `ipClassOfService`, `bgpSourceAsNumber`, `bgpDestinationAsNumber`, `bgpPrevAdjacentAsNumber`, `bgpNextAdjacentAsNumber`, `ingressInterface`, `egressInterface`, `icmpTypeCodeIPv4`, `fmTlsSNI=64`, `fmTlsSNILength` | +| v4_extended_HTTP_Flowmon | 1259 | v4 | ip and tcp dst port 80 | `sourceIPv4Address`, `destinationIPv4Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `sourceMacAddress`, `postDestinationMacAddress`, `vlanId`, `ipClassOfService`, `bgpSourceAsNumber`, `bgpDestinationAsNumber`, `bgpPrevAdjacentAsNumber`, `bgpNextAdjacentAsNumber`, `ingressInterface`, `egressInterface`, `icmpTypeCodeIPv4`, `fmHttpRequestMethod`, `fmHttpRequestHost=32`, `fmHttpRequestTarget=64` | | v6 | 512 | v6 | ip6 | `sourceIPv6Address`, `destinationIPv6Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced` | | v6_DNS | 514 | v6 | ip6 and udp port 53 | `sourceIPv6Address`, `destinationIPv6Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort`, `dnsFlagsCodes`, `dnsQuestionCount`, `dnsAnswerCount`, `dnsQuestionName=64`, `dnsQuestionType`, `dnsQuestionClass`, `dnsAnswerName=64`, `dnsAnswerType`, `dnsAnswerClass`, `dnsAnswerTtl`, `dnsAnswerRdata=64`, `dnsAnswerRdataLen` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount` | | v6_HTTP | 513 | v6 | ip6 and tcp dst port 80 | `sourceIPv6Address`, `destinationIPv6Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `httpRequestMethod=8`, `httpRequestHost=32`, `httpRequestTarget=64` | -| v6_extended | 1512 | v6 | ip6 | `sourceIPv6Address`, `destinationIPv6Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `sourceMacAddress`, `postDestinationMacAddress`, `vlanId`, `ipClassOfService`, `bgpSourceAsNumber`, `bgpDestinationAsNumber`, `bgpNextAdjacentAsNumber`, `bgpPrevAdjacentAsNumber`, `tcpControlBitsReduced`, `icmpTypeCodeIPv6`, `ingressInterface`, `egressInterface` | `mac_to_as`, `vlan_to_ifindex`, `pfx6_to_as` +| v6_HTTPS_Flowmon | 515 | v6 | ip6 and tcp and (dst port 443 or dst port 8443) | `sourceIPv6Address`, `destinationIPv6Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `fmTlsSNI=64`, `fmTlsSNILength` | +| v6_HTTP_Flowmon | 516 | v6 | ip6 and tcp dst port 80 | `sourceIPv6Address`, `destinationIPv6Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `fmHttpRequestMethod`, `fmHttpRequestHost=32`, `fmHttpRequestTarget=64` | +| v6_extended | 1512 | v6 | ip6 | `sourceIPv6Address`, `destinationIPv6Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `sourceMacAddress`, `postDestinationMacAddress`, `vlanId`, `ipClassOfService`, `bgpSourceAsNumber`, `bgpDestinationAsNumber`, `bgpPrevAdjacentAsNumber`, `bgpNextAdjacentAsNumber`, `ingressInterface`, `egressInterface`, `icmpTypeCodeIPv6` | `mac_to_as`, `vlan_to_ifindex`, `pfx6_to_as` +| v6_extended_HTTP | 1513 | v6 | ip6 and tcp dst port 80 | `sourceIPv6Address`, `destinationIPv6Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `sourceMacAddress`, `postDestinationMacAddress`, `vlanId`, `ipClassOfService`, `bgpSourceAsNumber`, `bgpDestinationAsNumber`, `bgpPrevAdjacentAsNumber`, `bgpNextAdjacentAsNumber`, `ingressInterface`, `egressInterface`, `icmpTypeCodeIPv6`, `httpRequestMethod=8`, `httpRequestHost=32`, `httpRequestTarget=64` | +| v6_extended_HTTPS_Flowmon | 1514 | v6 | ip6 and tcp and (dst port 443 or dst port 8443) | `sourceIPv6Address`, `destinationIPv6Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `sourceMacAddress`, `postDestinationMacAddress`, `vlanId`, `ipClassOfService`, `bgpSourceAsNumber`, `bgpDestinationAsNumber`, `bgpPrevAdjacentAsNumber`, `bgpNextAdjacentAsNumber`, `ingressInterface`, `egressInterface`, `icmpTypeCodeIPv6`, `fmTlsSNI=64`, `fmTlsSNILength` | +| v6_extended_HTTP_Flowmon | 1515 | v6 | ip6 and tcp dst port 80 | `sourceIPv6Address`, `destinationIPv6Address`, `protocolIdentifier`, `sourceTransportPort`, `destinationTransportPort` | `flowStartMilliseconds`, `flowEndMilliseconds`, `packetDeltaCount`, `octetDeltaCount`, `tcpControlBitsReduced`, `sourceMacAddress`, `postDestinationMacAddress`, `vlanId`, `ipClassOfService`, `bgpSourceAsNumber`, `bgpDestinationAsNumber`, `bgpPrevAdjacentAsNumber`, `bgpNextAdjacentAsNumber`, `ingressInterface`, `egressInterface`, `icmpTypeCodeIPv6`, `fmHttpRequestMethod`, `fmHttpRequestHost=32`, `fmHttpRequestTarget=64` | diff --git a/src/apps/ipfix/http.lua b/src/apps/ipfix/http.lua new file mode 100644 index 0000000000..2365a26662 --- /dev/null +++ b/src/apps/ipfix/http.lua @@ -0,0 +1,212 @@ +module(..., package.seeall) + +local ffi = require("ffi") +local lib = require("core.lib") +local ctable = require("lib.ctable") +local metadata = require("apps.rss.metadata") + +-- Poor man's method to create a perfect hash table for a small number +-- of keys. This is not guaranteed to work. In that case, just use the +-- best one we found (with the smallest maximum displacement). The +-- keys are assumed to be strings. +local function perfect_hash(table) + local min, best + local i, key_type_size = 0, 0 + for key, _ in pairs(table) do + if #key > key_type_size then + key_type_size = #key + end + end + while i < 50 do + local t = ctable.new({ + key_type = ffi.typeof("char[$]", key_type_size), + -- Used for Flowmon + value_type = ffi.typeof("uint16_t"), + initial_size = #table*2, + }) + local entry = t.entry_type() + for key, value in pairs(table) do + ffi.fill(entry, ffi.sizeof(entry)) + entry.key = key + entry.value = value + t:add(entry.key, entry.value) + end + if t.max_displacement == 0 then return t end + if min == nil or t.max_displacement < min then + min = t.max_displacement + best = t + end + i = i + 1 + end + return best +end + +-- RFC9110, section 9. The values in this table are only used for the +-- HTTP_Flowmon template, which stores the request method as a bitmask +-- rather than as a string like the standard IPFIX element. +local methods = { + ["GET"] = 0x0001, + ["POST"] = 0x0002, + ["HEAD"] = 0x0004, + ["PUT"] = 0x0008, + ["OPTIONS"] = 0x0010, + ["DELETE"] = 0x0020, + ["TRACE"] = 0x0040, + ["CONNECT"] = 0x0100, + -- Not implemented + ["PATCH"] = 0x0080, + ["SSL"] = 0x0200, +} + +-- Pre-allocated objects used in accumulate() +local methods_t = perfect_hash(methods) +local methods_t_entry = methods_t.entry_type() +local methods_t_key_size = ffi.sizeof(methods_t_entry.key) +local message, field = {}, {} + +--- Utility functions to search for specific sequences of bytes in a +--- region of size length starting at ptr +local function init (str, ptr, length) + str.start = ptr + str.bytes = length + str.pos = 0 +end + +-- Return pointer and length of the string at the current position +local function str (str) + return str.start + str.pos, str.bytes - str.pos +end + +-- Scan the buffer from the current location until the match condition +-- is met. Returns a pointer to the start position and a length that +-- does not include the matching pattern. Advances the current +-- position to the first byte following the matching pattern. +local function mk_search_fn (ctype, match_fn) + local ptr_t = ffi.typeof("$*", ctype) + return function (str) + local pos = str.pos + local start = str.start + local ptr = start + pos + local found = false + while (pos < str.bytes and not found) do + found = match_fn(ffi.cast(ptr_t, start + pos)) + pos = pos + 1 + end + local length = pos - str.pos - 1 + str.pos = pos - 1 + ffi.sizeof(ctype) + return found, ptr, length + end +end + +local function find_bytes (type, bytes) + return mk_search_fn( + ffi.typeof(type), + function (str) + return str[0] == bytes + end + ) +end + +local find_spc = find_bytes("uint8_t", 0x20) +local find_colon = find_bytes("uint8_t", 0x3a) +local find_crlf = find_bytes("uint16_t", 0x0a0d) -- correct for endianness +local wspc = { + [0x20] = true, + [0x09] = true, + [0x0d] = true, + [0x0a] = true +} +-- Strip leading and trailing white space from str +local function strip_wspc (str) + local pos = str.pos + while wspc[str.start[pos]] and pos < str.bytes do + pos = pos + 1 + end + str.pos = pos + pos = str.bytes + while wspc[str.start[pos-1]] and pos > str.pos do + pos = pos - 1 + end + str.bytes = pos +end + +local function copy (ptr, length, obj) + local obj_len = ffi.sizeof(obj) + local eff_length = math.min(length, obj_len) + ffi.fill(obj, obj_len) + ffi.copy(obj, ptr, eff_length) +end + +local headers = { + { + name = "host", + fn = function (entry, ptr, length, flowmon) + if flowmon then + copy(ptr, length, entry.fmHttpRequestHost) + else + copy(ptr, length, entry.httpRequestHost) + end + end + } +} + +local function decode_field (entry, ptr, length, flowmon) + init(field, ptr, length) + local found, ptr, length = find_colon(field) + if not found then return end + for _, header in ipairs(headers) do + if length == #header.name and ffi.C.strncasecmp(ptr, header.name, length) == 0 then + strip_wspc(field) + local ptr, length = str(field) + header.fn(entry, ptr, length, flowmon) + end + end +end + +function accumulate (self, entry, pkt, flowmon) + local md = metadata.get(pkt) + local tcp_header_size = 4 * bit.rshift(ffi.cast("uint8_t*", md.l4)[12], 4) + local payload = md.l4 + tcp_header_size + local size = pkt.data + pkt.length - payload + if (md.length_delta > 0) then + -- Remove padding + size = size - md.length_delta + end + if (size == 0) then + return + end + -- Only process the first packet with non-zero payload after the + -- TCP handshake is completed. + entry.state.done = 1 + self.counters.HTTP_flows_examined = self.counters.HTTP_flows_examined + 1 + init(message, payload, size) + local found, ptr, length = find_spc(message) + if not found or length > methods_t_key_size then return end + copy(ptr, length, methods_t_entry.key) + local method = methods_t:lookup_ptr(methods_t_entry.key) + if method == nil then + self.counters.HTTP_invalid_method = self.counters.HTTP_invalid_method + 1 + return + end + if flowmon then + entry.fmHttpRequestMethod = method.value + else + copy(ptr, length, entry.httpRequestMethod) + end + found, ptr, length = find_spc(message) + if not found then return end + if flowmon then + copy(ptr, length, entry.fmHttpRequestTarget) + else + copy(ptr, length, entry.httpRequestTarget) + end + -- Skip HTTP version + found, _, _ = find_crlf(message) + if not found then return end + while true do + found, ptr, length = find_crlf(message) + -- The sequence of fields is terminated by a an empty line + if not found or length == 0 then break end + decode_field(entry, ptr, length, flowmon) + end +end diff --git a/src/apps/ipfix/ipfix-information-elements-local.inc b/src/apps/ipfix/ipfix-information-elements-local.inc index 8e3c2ea15c..6d86351ed8 100644 --- a/src/apps/ipfix/ipfix-information-elements-local.inc +++ b/src/apps/ipfix/ipfix-information-elements-local.inc @@ -13,5 +13,19 @@ this is the name of the first non-CNAME answer record or the name of the last CN 2946:110,dnsAnswerRdata,octetArray,"On-the-wire encoding of the answer record's rdata section. For well-known record types, compressed domain names have been replaced with their uncompressed counterparts",,,,,,,, 2946:111,dnsAnswerRdataLen,unsigned16,,,,,,,,, -39499:338,tlsSNI,string,DNS name from the TLS Server Name Indication extension,,,,,,,, -39499:339,tlsSNILength,unsigned16,Length of tlsSNI in bytes,,,,,,,, +39499:338,fmTlsSNI,string,DNS name from the TLS Server Name Indication extension,,,,,,,, +39499:339,fmTlsSNILength,unsigned16,Length of tlsSNI in bytes,,,,,,,, +39499:1,fmHttpRequestHost,string,,,,,,,,,, +39499:2,fmHttpRequestTarget,string,,,,,,,,,, +39499:4,fmHttpRequestMethod,unsigned16,"Bit mask of HTTP request types +GET 0x0001 +POST 0x0002 +HEAD 0x0004 +PUT 0x0008 +OPTIONS 0x0010 +DELETE 0x0020 +TRACE 0x0040 +PATCH 0x0080 +CONNECT 0x0100 +SSL 0x0200 +",,,,,,,,, diff --git a/src/apps/ipfix/strings.lua b/src/apps/ipfix/strings.lua deleted file mode 100644 index 90d2947a1e..0000000000 --- a/src/apps/ipfix/strings.lua +++ /dev/null @@ -1,86 +0,0 @@ -module(..., package.seeall) - -local ffi = require("ffi") - -ct_t = ffi.typeof([[ - struct { - uint8_t *text; - uint16_t length; - uint16_t pos; - } -]]) - -function ct_set(ct, pos) - ct.pos = pos -end - -function ct_get(ct) - return ct.pos -end - -function ct_at(ct) - return ct.text + ct.pos -end - -function ct_init(ct, text, length, pos) - ct.text = text - ct.length = length - ct.pos = pos or 0 -end - -function search(string, ct, tail) - local slen = string.len - local pos = ct.pos - while (pos + slen < ct.length) do - if ffi.C.strncasecmp(string.buf, ct.text + pos, slen) == 0 then - if tail then pos = pos + slen end - ct.pos = pos - return pos - end - pos = pos + 1 - end - return nil -end - -function upto_space_or_cr(ct) - local text = ct.text - local pos = ct.pos - local pos_start = pos - while (pos < ct.length and text[pos] ~= 32 and text[pos] ~= 13) do - pos = pos + 1 - end - ct.pos = pos - return pos, pos - pos_start -end - -function skip_space(ct) - local text = ct.text - local pos = ct.pos - local pos_start = pos - while (pos < ct.length and text[pos] == 32) do - pos = pos + 1 - end - ct.pos = pos - return pos, pos - pos_start -end - -function string_to_buf(s) - -- Using ffi.new("uint8_t[?]", #s) results in trace aborts due to - -- "bad argument type" in ffi.sizeof() - local buf = ffi.new("uint8_t["..#s.."]") - for i = 1, #s do - buf[i-1] = s:byte(i,i) - end - return buf -end - -function strings_to_buf(t) - local result = {} - for k, v in pairs(t) do - result[k] = { - buf = string_to_buf(v), - len = #v - } - end - return result -end diff --git a/src/apps/ipfix/template.lua b/src/apps/ipfix/template.lua index 0bb34b6225..c8dbae0dee 100644 --- a/src/apps/ipfix/template.lua +++ b/src/apps/ipfix/template.lua @@ -13,8 +13,8 @@ local ethernet = require("lib.protocol.ethernet") local ipv4 = require("lib.protocol.ipv4") local ipv6 = require("lib.protocol.ipv6") local metadata = require("apps.rss.metadata") -local strings = require("apps.ipfix.strings") local dns = require("apps.ipfix.dns") +local http = require("apps.ipfix.http") local tls = require("apps.ipfix.tls") local S = require("syscall") @@ -427,81 +427,37 @@ local function v6_extract (self, pkt, timestamp, entry) end end ---- Helper functions for HTTP templates +--- Helper functions for HTTP, HTTPS templates --- We want to be able to find a "Host:" header even if it is not in --- the same TCP segment as the GET request, which requires to keep --- state. local HTTP_state_t = ffi.typeof([[ struct { - uint8_t have_GET; - uint8_t have_host; - uint8_t examined; + uint8_t done; } __attribute__((packed)) ]]) --- The number of TCP segments to scan for the first GET request --- (including the SYN segment, which is skipped). Most requests are --- found in the first non-handshake packet (segment #3 from the --- client). Empirical evidence shows a strong peak there with a long --- tail. A cutoff of 10 is expected to find at least 80% of the GET --- requests. -local HTTP_scan_threshold = 10 + -- HTTP-specific statistics counters local function HTTP_counters() return { + -- Flows for which we have seen a complete TCP handshake and one + -- packet with non-zero payload HTTP_flows_examined = 0, - HTTP_GET_matches = 0, - HTTP_host_matches = 0 + -- Subset of examined flows with invalid HTTP request methods + HTTP_invalid_method = 0, } end - -local HTTP_strings = strings.strings_to_buf({ - GET = 'GET ', - Host = 'Host:' -}) - -local HTTP_ct = strings.ct_t() - -local function HTTP_accumulate(self, dst, new, pkt) - local md = metadata_get(pkt) - if ((dst.value.packetDeltaCount >= HTTP_scan_threshold or - -- TCP SYN - bit.band(new.value.tcpControlBitsReduced, 0x02) == 0x02)) then - return - end - local state = dst.value.state - if state.examined == 0 then - self.counters.HTTP_flows_examined = - self.counters.HTTP_flows_examined + 1 - state.examined = 1 - end - strings.ct_init(HTTP_ct, pkt.data, pkt.length, md.l4 - pkt.data) - if (state.have_GET == 0 and - strings.search(HTTP_strings.GET, HTTP_ct, true)) then - ffi.copy(dst.value.httpRequestMethod, 'GET') - state.have_GET = 1 - strings.skip_space(HTTP_ct) - local start = strings.ct_at(HTTP_ct) - local _, length = strings.upto_space_or_cr(HTTP_ct) - length = math.min(length, ffi.sizeof(dst.value.httpRequestTarget) - 1) - ffi.copy(dst.value.httpRequestTarget, start, length) - self.counters.HTTP_GET_matches = self.counters.HTTP_GET_matches + 1 - end - if (state.have_GET == 1 and state.have_host == 0 and - strings.search(HTTP_strings.Host, HTTP_ct, true)) then - state.have_host = 1 - strings.skip_space(HTTP_ct) - local start = strings.ct_at(HTTP_ct) - local _, length = strings.upto_space_or_cr(HTTP_ct) - length = math.min(length, ffi.sizeof(dst.value.httpRequestHost) - 1) - ffi.copy(dst.value.httpRequestHost, start, length) - self.counters.HTTP_host_matches = self.counters.HTTP_host_matches + 1 +local function HTTP_accumulate(self, dst, new, pkt, flowmon) + accumulate_generic(dst, new) + accumulate_tcp_flags_reduced(dst, new) + if (dst.value.state.done == 0 and bit.band(dst.value.tcpControlBitsReduced, 0x12) == 0x12) then + -- Handshake complete (SYN/ACK) + http.accumulate(self, dst.value, pkt, flowmon) end end -- HTTPS-specific statistics counters local function HTTPS_counters() return { + HTTPS_flows_examined = 0, HTTPS_client_hellos = 0, HTTPS_extensions_present = 0, HTTPS_snis = 0, @@ -511,7 +467,10 @@ end local function HTTPS_accumulate(self, dst, new, pkt) accumulate_generic(dst, new) accumulate_tcp_flags_reduced(dst, new) - tls.accumulate(self, dst.value, pkt) + if (dst.value.state.done == 0 and bit.band(dst.value.tcpControlBitsReduced, 0x12) == 0x12) then + -- Handshake complete (SYN/ACK) + tls.accumulate(self, dst.value, pkt) + end end local function DNS_extract(self, pkt, timestamp, entry, extract_addr_fn) @@ -646,21 +605,110 @@ local function v6_extended_accumulate (self, dst, new) end end +local function concat_lists(...) + local arg = {...} + local result = {} + for _, l in ipairs(arg) do + for i = 1, #l do + result[#result+1] = l[i] + end + end + return result +end + +local keys_ipv4 = { + "sourceIPv4Address", + "destinationIPv4Address", + "protocolIdentifier", + "sourceTransportPort", + "destinationTransportPort" +} +local keys_ipv6 = { + "sourceIPv6Address", + "destinationIPv6Address", + "protocolIdentifier", + "sourceTransportPort", + "destinationTransportPort" +} +local keys_dns = { + "dnsFlagsCodes", + "dnsQuestionCount", + "dnsAnswerCount", + "dnsQuestionName=64", + "dnsQuestionType", + "dnsQuestionClass", + "dnsAnswerName=64", + "dnsAnswerType", + "dnsAnswerClass", + "dnsAnswerTtl", + "dnsAnswerRdata=64", + "dnsAnswerRdataLen" +} +local keys_ipv4_dns = concat_lists(keys_ipv4, keys_dns) +local keys_ipv6_dns = concat_lists(keys_ipv6, keys_dns) + +local values_min = { + "flowStartMilliseconds", + "flowEndMilliseconds", + "packetDeltaCount", + "octetDeltaCount" +} +local values = concat_lists( + values_min, + { + "tcpControlBitsReduced" + } +) +local values_extended = concat_lists( + values, + { + "sourceMacAddress", + "postDestinationMacAddress", + "vlanId", + "ipClassOfService", + "bgpSourceAsNumber", + "bgpDestinationAsNumber", + "bgpPrevAdjacentAsNumber", + "bgpNextAdjacentAsNumber", + "ingressInterface", + "egressInterface" + } +) +local values_extended_ipv4 = concat_lists( + values_extended, + { + "icmpTypeCodeIPv4", + } +) +local values_extended_ipv6 = concat_lists( + values_extended, + { + "icmpTypeCodeIPv6", + } +) +local values_HTTP = { + "httpRequestMethod=8", + "httpRequestHost=32", + "httpRequestTarget=64" +} +-- Proprietary Flowmon HTTP/HTTPS fields +local values_HTTP_Flowmon = { + "fmHttpRequestMethod", + "fmHttpRequestHost=32", + "fmHttpRequestTarget=64" +} +local values_HTTPS_Flowmon = { + "fmTlsSNI=64", + "fmTlsSNILength" +} + templates = { v4 = { id = 256, filter = "ip", aggregation_type = 'v4', - keys = { "sourceIPv4Address", - "destinationIPv4Address", - "protocolIdentifier", - "sourceTransportPort", - "destinationTransportPort" }, - values = { "flowStartMilliseconds", - "flowEndMilliseconds", - "packetDeltaCount", - "octetDeltaCount", - "tcpControlBitsReduced" }, + keys = keys_ipv4, + values = values, extract = v4_extract, accumulate = function (self, dst, new) accumulate_generic(dst, new) @@ -684,122 +732,102 @@ templates = { id = 257, filter = "ip and tcp dst port 80", aggregation_type = 'v4', - keys = { "sourceIPv4Address", - "destinationIPv4Address", - "protocolIdentifier", - "sourceTransportPort", - "destinationTransportPort" }, - values = { "flowStartMilliseconds", - "flowEndMilliseconds", - "packetDeltaCount", - "octetDeltaCount", - "tcpControlBitsReduced", - "httpRequestMethod=8", - "httpRequestHost=32", - "httpRequestTarget=64" }, + keys = keys_ipv4, + values = concat_lists(values, values_HTTP), state_t = HTTP_state_t, counters = HTTP_counters(), extract = v4_extract, - accumulate = function (self, dst, new, pkt) - accumulate_generic(dst, new) - accumulate_tcp_flags_reduced(dst, new) - HTTP_accumulate(self, dst, new, pkt) - end + accumulate = HTTP_accumulate, }, v4_DNS = { id = 258, filter = "ip and udp port 53", aggregation_type = 'v4', - keys = { "sourceIPv4Address", - "destinationIPv4Address", - "protocolIdentifier", - "sourceTransportPort", - "destinationTransportPort", - "dnsFlagsCodes", - "dnsQuestionCount", - "dnsAnswerCount", - "dnsQuestionName=64", - "dnsQuestionType", - "dnsQuestionClass", - "dnsAnswerName=64", - "dnsAnswerType", - "dnsAnswerClass", - "dnsAnswerTtl", - "dnsAnswerRdata=64", - "dnsAnswerRdataLen" }, - values = { "flowStartMilliseconds", - "flowEndMilliseconds", - "packetDeltaCount", - "octetDeltaCount" }, + keys = keys_ipv4_dns, + values = values_min, extract = function (self, pkt, timestamp, entry) DNS_extract(self, pkt, timestamp, entry, extract_v4_addr) end, accumulate = DNS_accumulate }, - v4_HTTPS = { + v4_HTTPS_Flowmon = { id = 259, filter = "ip and tcp and (dst port 443 or dst port 8443)", aggregation_type = 'v4', - keys = { "sourceIPv4Address", - "destinationIPv4Address", - "protocolIdentifier", - "sourceTransportPort", - "destinationTransportPort" }, - values = { "flowStartMilliseconds", - "flowEndMilliseconds", - "packetDeltaCount", - "octetDeltaCount", - "tcpControlBitsReduced", - "tlsSNI=64", - "tlsSNILength"}, + keys = keys_ipv4, + values = concat_lists(values, values_HTTPS_Flowmon), + state_t = HTTP_state_t, counters = HTTPS_counters(), extract = v4_extract, accumulate = HTTPS_accumulate, }, + v4_HTTP_Flowmon = { + id = 260, + filter = "ip and tcp dst port 80", + aggregation_type = 'v4', + keys = keys_ipv4, + values = concat_lists(values, values_HTTP_Flowmon), + state_t = HTTP_state_t, + counters = HTTP_counters(), + extract = v4_extract, + accumulate = function (self, dst, new, pkt) + HTTP_accumulate(self, dst, new, pkt, "flowmon") + end + }, v4_extended = { id = 1256, filter = "ip", aggregation_type = 'v4', - keys = { "sourceIPv4Address", - "destinationIPv4Address", - "protocolIdentifier", - "sourceTransportPort", - "destinationTransportPort" }, - values = { "flowStartMilliseconds", - "flowEndMilliseconds", - "packetDeltaCount", - "octetDeltaCount", - "sourceMacAddress", - -- This is destinationMacAddress per NetFlowV9 - "postDestinationMacAddress", - "vlanId", - "ipClassOfService", - "bgpSourceAsNumber", - "bgpDestinationAsNumber", - "bgpPrevAdjacentAsNumber", - "bgpNextAdjacentAsNumber", - "tcpControlBitsReduced", - "icmpTypeCodeIPv4", - "ingressInterface", - "egressInterface" }, + keys = keys_ipv4, + values = values_extended_ipv4, require_maps = { 'mac_to_as', 'vlan_to_ifindex', 'pfx4_to_as' }, extract = v4_extended_extract, accumulate = v4_extended_accumulate }, + v4_extended_HTTP = { + id = 1257, + filter = "ip and tcp dst port 80", + aggregation_type = 'v4', + keys = keys_ipv4, + values = concat_lists(values_extended_ipv4, values_HTTP), + require_maps = { 'mac_to_as', 'vlan_to_ifindex', 'pfx4_to_as' }, + state_t = HTTP_state_t, + counters = HTTP_counters(), + extract = v4_extended_extract, + accumulate = HTTP_accumulate, + }, + v4_extended_HTTPS_Flowmon = { + id = 1258, + filter = "ip and tcp and (dst port 443 or dst port 8443)", + aggregation_type = 'v4', + keys = keys_ipv4, + values = concat_lists(values_extended_ipv4, values_HTTPS_Flowmon), + require_maps = { 'mac_to_as', 'vlan_to_ifindex', 'pfx4_to_as' }, + state_t = HTTP_state_t, + counters = HTTPS_counters(), + extract = v4_extended_extract, + accumulate = HTTPS_accumulate, + }, + v4_extended_HTTP_Flowmon = { + id = 1259, + filter = "ip and tcp dst port 80", + aggregation_type = 'v4', + keys = keys_ipv4, + values = concat_lists(values_extended_ipv4, values_HTTP_Flowmon), + require_maps = { 'mac_to_as', 'vlan_to_ifindex', 'pfx4_to_as' }, + state_t = HTTP_state_t, + counters = HTTP_counters(), + extract = v4_extended_extract, + accumulate = function (self, dst, new, pkt) + HTTP_accumulate(self, dst, new, pkt, "flowmon") + end + }, v6 = { id = 512, filter = "ip6", aggregation_type = 'v6', - keys = { "sourceIPv6Address", - "destinationIPv6Address", - "protocolIdentifier", - "sourceTransportPort", - "destinationTransportPort" }, - values = { "flowStartMilliseconds", - "flowEndMilliseconds", - "packetDeltaCount", - "octetDeltaCount", - "tcpControlBitsReduced" }, + keys = keys_ipv6, + values = values, extract = v6_extract, accumulate = function (self, dst, new) accumulate_generic(dst, new) @@ -823,108 +851,96 @@ templates = { id = 513, filter = "ip6 and tcp dst port 80", aggregation_type = 'v6', - keys = { "sourceIPv6Address", - "destinationIPv6Address", - "protocolIdentifier", - "sourceTransportPort", - "destinationTransportPort" }, - values = { "flowStartMilliseconds", - "flowEndMilliseconds", - "packetDeltaCount", - "octetDeltaCount", - "tcpControlBitsReduced", - "httpRequestMethod=8", - "httpRequestHost=32", - "httpRequestTarget=64" }, + keys = keys_ipv6, + values = concat_lists(values, values_HTTP), state_t = HTTP_state_t, counters = HTTP_counters(), extract = v6_extract, - accumulate = function (self, dst, new, pkt) - accumulate_generic(dst, new) - accumulate_tcp_flags_reduced(dst, new) - HTTP_accumulate(self, dst, new, pkt) - end + accumulate = HTTP_accumulate, }, v6_DNS = { id = 514, filter = "ip6 and udp port 53", aggregation_type = 'v6', - keys = { "sourceIPv6Address", - "destinationIPv6Address", - "protocolIdentifier", - "sourceTransportPort", - "destinationTransportPort", - "dnsFlagsCodes", - "dnsQuestionCount", - "dnsAnswerCount", - "dnsQuestionName=64", - "dnsQuestionType", - "dnsQuestionClass", - "dnsAnswerName=64", - "dnsAnswerType", - "dnsAnswerClass", - "dnsAnswerTtl", - "dnsAnswerRdata=64", - "dnsAnswerRdataLen" }, - values = { "flowStartMilliseconds", - "flowEndMilliseconds", - "packetDeltaCount", - "octetDeltaCount" }, + keys = keys_ipv6_dns, + values = values_min, extract = function (self, pkt, timestamp, entry) DNS_extract(self, pkt, timestamp, entry, extract_v6_addr) end, accumulate = DNS_accumulate }, - v6_HTTPS = { + v6_HTTPS_Flowmon = { id = 515, filter = "ip6 and tcp and (dst port 443 or dst port 8443)", aggregation_type = 'v6', - keys = { "sourceIPv6Address", - "destinationIPv6Address", - "protocolIdentifier", - "sourceTransportPort", - "destinationTransportPort" }, - values = { "flowStartMilliseconds", - "flowEndMilliseconds", - "packetDeltaCount", - "octetDeltaCount", - "tcpControlBitsReduced", - "tlsSNI=64", - "tlsSNILength"}, + keys = keys_ipv6, + values = concat_lists(values, values_HTTPS_Flowmon), + state_t = HTTP_state_t, counters = HTTPS_counters(), extract = v6_extract, accumulate = HTTPS_accumulate, }, + v6_HTTP_Flowmon = { + id = 516, + filter = "ip6 and tcp dst port 80", + aggregation_type = 'v6', + keys = keys_ipv6, + values = concat_lists(values, values_HTTP_Flowmon), + state_t = HTTP_state_t, + counters = HTTP_counters(), + extract = v6_extract, + accumulate = function (self, dst, new, pkt) + HTTP_accumulate(self, dst, new, pkt, "flowmon") + end + }, v6_extended = { id = 1512, filter = "ip6", aggregation_type = 'v6', - keys = { "sourceIPv6Address", - "destinationIPv6Address", - "protocolIdentifier", - "sourceTransportPort", - "destinationTransportPort" }, - values = { "flowStartMilliseconds", - "flowEndMilliseconds", - "packetDeltaCount", - "octetDeltaCount", - "sourceMacAddress", - -- This is destinationMacAddress per NetFlowV9 - "postDestinationMacAddress", - "vlanId", - "ipClassOfService", - "bgpSourceAsNumber", - "bgpDestinationAsNumber", - "bgpNextAdjacentAsNumber", - "bgpPrevAdjacentAsNumber", - "tcpControlBitsReduced", - "icmpTypeCodeIPv6", - "ingressInterface", - "egressInterface" }, + keys = keys_ipv6, + values = values_extended_ipv6, require_maps = { 'mac_to_as', 'vlan_to_ifindex', 'pfx6_to_as' }, extract = v6_extended_extract, accumulate = v6_extended_accumulate, }, + v6_extended_HTTP = { + id = 1513, + filter = "ip6 and tcp dst port 80", + aggregation_type = 'v6', + keys = keys_ipv6, + values = concat_lists(values_extended_ipv6, values_HTTP), + require_maps = { 'mac_to_as', 'vlan_to_ifindex', 'pfx6_to_as' }, + state_t = HTTP_state_t, + counters = HTTP_counters(), + extract = v6_extended_extract, + accumulate = HTTP_accumulate, + }, + v6_extended_HTTPS_Flowmon = { + id = 1514, + filter = "ip6 and tcp and (dst port 443 or dst port 8443)", + aggregation_type = 'v6', + keys = keys_ipv6, + values = concat_lists(values_extended_ipv6, values_HTTPS_Flowmon), + require_maps = { 'mac_to_as', 'vlan_to_ifindex', 'pfx6_to_as' }, + state_t = HTTP_state_t, + counters = HTTPS_counters(), + extract = v6_extended_extract, + accumulate = HTTPS_accumulate, + }, + v6_extended_HTTP_Flowmon = { + id = 1515, + filter = "ip6 and tcp dst port 80", + aggregation_type = 'v6', + keys = keys_ipv6, + values = concat_lists(values_extended_ipv6, values_HTTP_Flowmon), + require_maps = { 'mac_to_as', 'vlan_to_ifindex', 'pfx6_to_as' }, + state_t = HTTP_state_t, + counters = HTTP_counters(), + extract = v6_extended_extract, + accumulate = function (self, dst, new, pkt) + HTTP_accumulate(self, dst, new, pkt, "flowmon") + end + } } local templates_legend = [[ diff --git a/src/apps/ipfix/tls.lua b/src/apps/ipfix/tls.lua index 00c71bde40..de76b750a3 100644 --- a/src/apps/ipfix/tls.lua +++ b/src/apps/ipfix/tls.lua @@ -80,35 +80,29 @@ local function skip_lv2(data) return tlv.data + ntohs(tlv.length) end -local function tcp_header_size(l4) - local offset = bit.rshift(ffi.cast("uint8_t*", l4)[12], 4) - return offset * 4 -end - local function out_of_bounds (eop, ptr, size) return ffi.cast("uint8_t *", ptr) + size > eop end function accumulate(self, entry, pkt) local md = metadata_get(pkt) - -- The TLS handshake starts right after the TCP handshake, - -- i.e. either in the second (piggy-backed on the handshake ACK) or - -- third packet of the flow. - local payload = md.l4 + tcp_header_size(md.l4) - -- The effective payload size is the amount of the payload that is - -- actually present. This can be smaller than the actual payload - -- size if the packet has been truncated, e.g. by a port-mirror. It - -- can also be larger if the packet has been padded to the minimum - -- frame size (64 bytes). This can be safely ignored. - local eff_payload_size = pkt.length - md.l3_offset - (payload - md.l3) - if ((entry.packetDeltaCount == 1 or -- SYN - (entry.packetDeltaCount == 2 and eff_payload_size == 0) or -- Empty ACK - entry.packetDeltaCount > 3)) then + local tcp_header_size = 4 * bit.rshift(ffi.cast("uint8_t*", md.l4)[12], 4) + local payload = md.l4 + tcp_header_size + local eff_payload_size = pkt.data + pkt.length - payload + if (md.length_delta > 0) then + -- Remove padding + eff_payload_size = eff_payload_size - md.length_delta + end + if (eff_payload_size == 0) then return end -- End Of Payload (first byte after the effective payload), used -- for bounds check local eop = payload + eff_payload_size + -- Only process the first packet with non-zero payload after the + -- TCP handshake is completed. + entry.state.done = 1 + self.counters.HTTPS_flows_examined = self.counters.HTTPS_flows_examined + 1 -- Check bounds for the fixed-size part of the message if out_of_bounds(eop, payload, @@ -165,8 +159,8 @@ function accumulate(self, entry, pkt) if sni.name_type ~= 0 then return end local name_length = ntohs(sni.name_length) if out_of_bounds(eop, sni.name, name_length) then return end - ffi.copy(entry.tlsSNI, sni.name, math.min(ffi.sizeof(entry.tlsSNI), name_length)) - entry.tlsSNILength = name_length + ffi.copy(entry.fmTlsSNI, sni.name, math.min(ffi.sizeof(entry.fmTlsSNI), name_length)) + entry.fmTlsSNILength = name_length self.counters.HTTPS_snis = self.counters.HTTPS_snis + 1 return end diff --git a/src/apps/mellanox/connectx.lua b/src/apps/mellanox/connectx.lua index b03835970d..9e98292a24 100644 --- a/src/apps/mellanox/connectx.lua +++ b/src/apps/mellanox/connectx.lua @@ -848,7 +848,11 @@ end -- Provide the NIC with freshly allocated memory. function HCA:alloc_pages (num_pages) - assert(num_pages > 0) + -- Assume that num_pages is the result of a call to query_pages(), + -- i.e. 0 is a legal value and a negative value indicates that + -- pages can be reclaimed. The reclaim is done via notifications on + -- the event queue. + if num_pages <= 0 then return end if debug_info then print(("Allocating %d pages to HW"):format(num_pages)) end diff --git a/src/apps/mellanox/connectx_test.lua b/src/apps/mellanox/connectx_test.lua index 87c0e3b2b5..fe0c448317 100644 --- a/src/apps/mellanox/connectx_test.lua +++ b/src/apps/mellanox/connectx_test.lua @@ -166,14 +166,13 @@ function switch (pci0, pci1, npackets, ncores, minlen, maxlen, minburst, maxburs --C.usleep(100) for id, app in pairs(io0) do app:pull() app:push() dump(pci0, id, app) end for id, app in pairs(io1) do app:pull() app:push() dump(pci1, id, app) end - -- Simulate breathing end engine.setvmprofile("engine") -- Receive any last packets - C.usleep(100) - for i = 1, 10 do + for i = 1, 3 do for id, app in pairs(io0) do app:pull() app:push() dump(pci0, id, app) end for id, app in pairs(io1) do app:pull() app:push() dump(pci1, id, app) end + C.usleep(50) end local finish = engine.now() print("reporting...") diff --git a/src/lib/pmu_x86.dasl b/src/lib/pmu_x86.dasl index 086f2dad06..779639d83a 100644 --- a/src/lib/pmu_x86.dasl +++ b/src/lib/pmu_x86.dasl @@ -236,8 +236,8 @@ function selftest () enable_rdpmc() print("nfixed", nfixed, "ngeneral", ngeneral) if vendor == "GenuineIntel" then - -- Expected values for Sandy Bridge - Skylake - assert(nfixed == 3, "nfixed: " .. nfixed) + -- Expected values for Sandy Bridge - Skylake - Saphire Rapids + assert(nfixed >= 3, "nfixed: " .. nfixed) assert(ngeneral == 4 or ngeneral == 8, "ngeneral: " .. ngeneral) elseif vendor == "AuthenticAMD" and family+extfamily >= 0x15 then assert(nfixed == 0, "nfixed: " .. nfixed)