From 675fd4eecd75a7156c83bb814ecfdbbcfbf27ea1 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Wed, 20 Apr 2016 00:32:15 +0200 Subject: Add kernel profiling through sampling * NEWS: Advertise feature. * configfrac.ac (--enable-kernsample): Add option. * kern/pc_sample.h (take_pc_sample): Add usermode and pc parameter. (take_pc_sample_macro): Take usermode and pc parameters, pass as such to take_pc_sample. * kern/pc_sample.c (take_pc_sample): Use pc parameter when usermode is 1. * kern/mach_clock.c (clock_interrupt): Add pc parameter. Pass usermode and pc to take_pc_sample_macro call. * i386/i386/hardclock.c (hardclock): Pass regs->eip to clock_interrupt call on normal interrupts, NULL on interrupt interrupt. * vm/vm_fault.c (vm_fault_cleanup): Set usermode to 1 and pc to NULL in take_pc_sample_macro call. --- NEWS | 2 ++ configfrag.ac | 9 +++++++++ i386/i386/hardclock.c | 9 +++++---- kern/mach_clock.c | 10 +++++++--- kern/mach_clock.h | 3 ++- kern/pc_sample.c | 10 ++++++++-- kern/pc_sample.h | 10 ++++++---- vm/vm_fault.c | 2 +- 8 files changed, 40 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index e80cf460..12394824 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,8 @@ heap addressing important scalability issues. A synchronization mechanism was added, similar to the Linux futexes, to allow efficient and powerful userland synchronization. + +Support for profiling kernel code from userland through sampling was added. Version 1.6 (2015-10-31) diff --git a/configfrag.ac b/configfrag.ac index cfc34c76..3d7033ec 100644 --- a/configfrag.ac +++ b/configfrag.ac @@ -85,6 +85,15 @@ AC_DEFINE([MACH_PAGEMAP], [1], [MACH_PAGEMAP]) # Do pc sample histogram. AC_DEFINE([MACH_PCSAMPLE], [1], [MACH_PCSAMPLE]) +# Sample kernel too. +AC_ARG_ENABLE([kernsample], + AS_HELP_STRING([--enable-kernsample], [enable sampling kernel])) +[if [ x"$enable_kernsample" = xyes ]; then] + AC_DEFINE([MACH_KERNSAMPLE], [1], [MACH_KERNSAMPLE]) +[else] + AC_DEFINE([MACH_KERNSAMPLE], [0], [MACH_KERNSAMPLE]) +[fi] + # TTD Remote Kernel Debugging. AC_DEFINE([MACH_TTD], [0], [MACH_TTD]) diff --git a/i386/i386/hardclock.c b/i386/i386/hardclock.c index 49ea82cd..0ce6f381 100644 --- a/i386/i386/hardclock.c +++ b/i386/i386/hardclock.c @@ -62,18 +62,19 @@ hardclock(iunit, old_ipl, irq, ret_addr, regs) (regs->efl & EFL_VM) || /* user mode */ ((regs->cs & 0x03) != 0), /* user mode */ #if defined(LINUX_DEV) - FALSE /* ignore SPL0 */ + FALSE, /* ignore SPL0 */ #else /* LINUX_DEV */ - old_ipl == SPL0 /* base priority */ + old_ipl == SPL0, /* base priority */ #endif /* LINUX_DEV */ - ); + regs->eip); /* interrupted eip */ else /* * Interrupt from interrupt stack. */ clock_interrupt(tick, /* usec per tick */ FALSE, /* kernel mode */ - FALSE); /* not SPL0 */ + FALSE, /* not SPL0 */ + NULL); /* interrupted eip */ #ifdef LINUX_DEV linux_timer_intr(); diff --git a/kern/mach_clock.c b/kern/mach_clock.c index 1817ce22..d6ebf23e 100644 --- a/kern/mach_clock.c +++ b/kern/mach_clock.c @@ -133,7 +133,8 @@ timer_elt_data_t timer_head; /* ordered list of timeouts */ void clock_interrupt( int usec, /* microseconds per tick */ boolean_t usermode, /* executing user code */ - boolean_t basepri) /* at base priority */ + boolean_t basepri, /* at base priority */ + vm_offset_t pc) /* address of interrupted instruction */ { int my_cpu = cpu_number(); thread_t thread = current_thread(); @@ -184,8 +185,11 @@ void clock_interrupt( * This had better be MP safe. It might be interesting * to keep track of cpu in the sample. */ - if (usermode) { - take_pc_sample_macro(thread, SAMPLED_PC_PERIODIC); +#ifndef MACH_KERNSAMPLE + if (usermode) +#endif + { + take_pc_sample_macro(thread, SAMPLED_PC_PERIODIC, usermode, pc); } #endif /* MACH_PCSAMPLE */ diff --git a/kern/mach_clock.h b/kern/mach_clock.h index 1af0cdae..977b43be 100644 --- a/kern/mach_clock.h +++ b/kern/mach_clock.h @@ -62,7 +62,8 @@ typedef struct timer_elt *timer_elt_t; extern void clock_interrupt( int usec, boolean_t usermode, - boolean_t basepri); + boolean_t basepri, + vm_offset_t pc); extern void softclock (void); diff --git a/kern/pc_sample.c b/kern/pc_sample.c index fcb9d71b..e9f0b16a 100644 --- a/kern/pc_sample.c +++ b/kern/pc_sample.c @@ -46,12 +46,18 @@ typedef sampled_pc_t sampled_pcs[MAX_PC_SAMPLES]; void take_pc_sample( const thread_t t, sample_control_t *cp, - sampled_pc_flavor_t flavor) + sampled_pc_flavor_t flavor, + boolean_t usermode, + vm_offset_t kern_pc) { vm_offset_t pc; struct sampled_pc *sample; - pc = interrupted_pc(t); + if (usermode) + pc = interrupted_pc(t); + else + pc = kern_pc; + cp->seqno++; sample = &((sampled_pc_t *)cp->buffer)[cp->seqno % MAX_PC_SAMPLES]; sample->id = (vm_offset_t)t; diff --git a/kern/pc_sample.h b/kern/pc_sample.h index 4832cb9f..04ca6671 100644 --- a/kern/pc_sample.h +++ b/kern/pc_sample.h @@ -71,22 +71,24 @@ typedef struct sample_control sample_control_t; extern void take_pc_sample( thread_t thread, sample_control_t *cp, - sampled_pc_flavor_t flavor); + sampled_pc_flavor_t flavor, + boolean_t usermode, + vm_offset_t pc); /* * Macro to do quick flavor check for sampling, * on both threads and tasks. */ -#define take_pc_sample_macro(thread, flavor) \ +#define take_pc_sample_macro(thread, flavor, usermode, pc) \ MACRO_BEGIN \ task_t task; \ \ if ((thread)->pc_sample.sampletypes & (flavor)) \ - take_pc_sample((thread), &(thread)->pc_sample, (flavor)); \ + take_pc_sample((thread), &(thread)->pc_sample, (flavor), usermode, pc); \ \ task = (thread)->task; \ if (task->pc_sample.sampletypes & (flavor)) \ - take_pc_sample((thread), &task->pc_sample, (flavor)); \ + take_pc_sample((thread), &task->pc_sample, (flavor), usermode, pc); \ MACRO_END #endif /* _KERN_PC_SAMPLE_H_ */ diff --git a/vm/vm_fault.c b/vm/vm_fault.c index 09e2c54d..e61a223d 100644 --- a/vm/vm_fault.c +++ b/vm/vm_fault.c @@ -154,7 +154,7 @@ vm_fault_cleanup( thread_t _thread_ = current_thread(); \ \ if (_thread_ != THREAD_NULL) \ - take_pc_sample_macro(_thread_, (flavor)); \ + take_pc_sample_macro(_thread_, (flavor), 1, NULL); \ MACRO_END #else -- cgit v1.2.3