diff --git a/libmcount/mcount.c b/libmcount/mcount.c index 37995aa11..ba29a0baa 100644 --- a/libmcount/mcount.c +++ b/libmcount/mcount.c @@ -32,6 +32,21 @@ #include "utils/utils.h" #include "version.h" +/* + * mcount global variables. + * + * These are to control various features in the libmcount. + * They are set during initialization (mcount_startup) which I believe, runs in + * a single thread. After that multiple threads (mostly) read the value so it's + * not protected by lock or something. So special care needs to be taken if you + * want to change it at runtime (like in the agent). + * + * Some are marked as 'maybe unused' because they are only used when filter + * functions are implemented. Note that libmcount is built with different + * settings like -fast and -single to be more efficient in some situation like + * when no filter is specified in the command line and/or single-thread only. + */ + /* time filter in nsec */ uint64_t mcount_threshold; @@ -55,7 +70,13 @@ unsigned long mcount_global_flags = MCOUNT_GFL_SETUP; /* TSD key to save mtd below */ pthread_key_t mtd_key = (pthread_key_t)-1; -/* thread local data to trace function execution */ +/* + * A thread local data to trace function execution. + * While this is itself TLS so ok to by accessed safely by each thread, + * mcount routines use TSD APIs to access it for performance reason. + * Also TSD provides destructor so it can release the resources when the thread + * exits. + */ TLS struct mcount_thread_data mtd; /* pipe file descriptor to communite to uftrace */ @@ -106,6 +127,10 @@ __weak void dynamic_return(void) static LIST_HEAD(mcount_watch_list); #ifdef DISABLE_MCOUNT_FILTER +/* + * These functions are only the FAST version of libmcount libraries which don't + * implement filters (other than time and size filters). + */ static void mcount_filter_init(struct uftrace_filter_setting *filter_setting, bool force) { @@ -129,6 +154,9 @@ static void mcount_watch_finish(void) } #else +/* + * Here goes the regular libmcount's filter and trigger functions. + */ /* be careful: this can be called from signal handler */ static void mcount_finish_trigger(void) @@ -581,6 +609,10 @@ static void mcount_watch_release(struct mcount_thread_data *mtdp) #endif /* DISABLE_MCOUNT_FILTER */ +/* + * These are common routines used in every libmcount libraries. + */ + static void send_session_msg(struct mcount_thread_data *mtdp, const char *sess_id) { struct uftrace_msg_sess sess = { @@ -919,6 +951,10 @@ static bool mcount_check_rstack(struct mcount_thread_data *mtdp) } #ifndef DISABLE_MCOUNT_FILTER +/* + * Again, this implements filter functionality used in !fast versions. + */ + extern void *get_argbuf(struct mcount_thread_data *, struct mcount_ret_stack *); /** @@ -1309,6 +1345,10 @@ void mcount_exit_filter_record(struct mcount_thread_data *mtdp, struct mcount_re } #else /* DISABLE_MCOUNT_FILTER */ +/* + * Here fast versions don't implement filters. + */ + enum filter_result mcount_entry_filter_check(struct mcount_thread_data *mtdp, unsigned long child, struct uftrace_trigger *tr) { @@ -2093,14 +2133,20 @@ static void mcount_cleanup(void) */ #define UFTRACE_ALIAS(_func) void uftrace_##_func(void *, void *) __alias(_func) +/* This is the historic startup routine for mcount but not used here. */ void __visible_default __monstartup(unsigned long low, unsigned long high) { } +/* This is the historic cleanup routine for mcount but not used here. */ void __visible_default _mcleanup(void) { } +/* + * This is a non-standard external function to work around some stack + * corruption problems in the past. I hope we don't need it anymore. + */ void __visible_default mcount_restore(void) { struct mcount_thread_data *mtdp; @@ -2112,6 +2158,10 @@ void __visible_default mcount_restore(void) mcount_rstack_restore(mtdp); } +/* + * This is a non-standard external function to work around some stack + * corruption problems in the past. I hope we don't need it anymore. + */ void __visible_default mcount_reset(void) { struct mcount_thread_data *mtdp; @@ -2123,6 +2173,10 @@ void __visible_default mcount_reset(void) mcount_rstack_rehook(mtdp); } +/* + * External entry points for -finstrument-functions. The alias was added to + * avoid calling them through PLT. + */ void __visible_default __cyg_profile_func_enter(void *child, void *parent) { cygprof_entry((unsigned long)parent, (unsigned long)child); diff --git a/libmcount/record.c b/libmcount/record.c index b480b6fd5..e8264ec74 100644 --- a/libmcount/record.c +++ b/libmcount/record.c @@ -240,6 +240,10 @@ static struct mcount_event *get_event_pointer(void *base, unsigned idx) } #ifndef DISABLE_MCOUNT_FILTER +/* + * These functions are for regular libmcount with filters. + */ + void *get_argbuf(struct mcount_thread_data *mtdp, struct mcount_ret_stack *rstack) { ptrdiff_t idx = rstack - mtdp->rstack; @@ -818,6 +822,10 @@ void save_watchpoint(struct mcount_thread_data *mtdp, struct mcount_ret_stack *r } #else +/* + * These are for fast libmcount libraries without filters. + */ + void *get_argbuf(struct mcount_thread_data *mtdp, struct mcount_ret_stack *rstack) { return NULL; @@ -893,7 +901,17 @@ static int record_event(struct mcount_thread_data *mtdp, struct mcount_event *ev /* * instead of set bit fields, do the bit operations manually. - * this would be good for both performance and portability. + * this would be good for both performance and portability, + * and should be equivalent to the following: + * + * struct uftrace_record *data = curr_buf->data + curr_buf->size; + * + * data->time = event->time; + * data->type = UFTRACE_EVENT; + * data->magic = RECORD_MAGIC; + * data->more = 0; + * data->depth = 0; + * data->addr = event->id; */ rec->data = UFTRACE_EVENT | RECORD_MAGIC << 3; rec->data += (uint64_t)event->id << 16; @@ -971,19 +989,19 @@ static int record_ret_stack(struct mcount_thread_data *mtdp, enum uftrace_record if (curr_buf == NULL) return mtdp->shmem.done ? 0 : -1; -#if 0 - frstack = (void *)(curr_buf->data + curr_buf->size); - - frstack->time = timestamp; - frstack->type = type; - frstack->magic = RECORD_MAGIC; - frstack->more = !!argbuf; - frstack->depth = mrstack->depth; - frstack->addr = mrstack->child_ip; -#else /* * instead of set bit fields, do the bit operations manually. - * this would be good for both performance and portability. + * this would be good for both performance and portability, + * and should be equivalent to the following: + * + * frstack = (void *)(curr_buf->data + curr_buf->size); + * + * frstack->time = timestamp; + * frstack->type = type; + * frstack->magic = RECORD_MAGIC; + * frstack->more = !!argbuf; + * frstack->depth = mrstack->depth; + * frstack->addr = mrstack->child_ip; */ rec = type | RECORD_MAGIC << 3; rec += argbuf ? 4 : 0; @@ -993,7 +1011,6 @@ static int record_ret_stack(struct mcount_thread_data *mtdp, enum uftrace_record buf = (void *)(curr_buf->data + curr_buf->size); buf[0] = timestamp; buf[1] = rec; -#endif curr_buf->size += sizeof(*frstack); mrstack->flags |= MCOUNT_FL_WRITTEN; @@ -1033,6 +1050,30 @@ static int record_ret_stack(struct mcount_thread_data *mtdp, enum uftrace_record return 0; } +/* + * For performance reasons and time filter, it doesn't record trace data one at + * a time. Instead it usually writes the data when an EXIT record is ready so + * it needs to record ENTRY data in the current and may in the parent functions. + * + * For example, if it has a time filter for 1 usec. + * + * foo() { + * bar() { + * leaf1(); // takes 0.5 usec + * leaf2(); // takes 1.2 usec + * + * Then it can start to record when leaf2 function returns (at this moment, + * mcount_ret_stack for leaf1 is gone) then it'd save the following records + * (unless ENTRY foo or bar is saved by an earlier child before leaf[12]). + * + * ENTRY (foo) + * ENTRY (bar) + * ENTRY (leaf2) + * EXIT (leaf2) + * + * Then it adds MCOUNT_FL_WRITTEN flag to parent (foo and bar) so that they + * never be written anymore by other child function. + */ int record_trace_data(struct mcount_thread_data *mtdp, struct mcount_ret_stack *mrstack, long *retval) {