diff options
Diffstat (limited to 'vm/vm_pageout.c')
-rw-r--r-- | vm/vm_pageout.c | 649 |
1 files changed, 101 insertions, 548 deletions
diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c index a36c9905..dd0f995c 100644 --- a/vm/vm_pageout.c +++ b/vm/vm_pageout.c @@ -53,123 +53,17 @@ #include <vm/vm_pageout.h> #include <machine/locore.h> - - -#ifndef VM_PAGEOUT_BURST_MAX -#define VM_PAGEOUT_BURST_MAX 10 /* number of pages */ -#endif /* VM_PAGEOUT_BURST_MAX */ - -#ifndef VM_PAGEOUT_BURST_MIN -#define VM_PAGEOUT_BURST_MIN 5 /* number of pages */ -#endif /* VM_PAGEOUT_BURST_MIN */ - -#ifndef VM_PAGEOUT_BURST_WAIT -#define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds per page */ -#endif /* VM_PAGEOUT_BURST_WAIT */ - -#ifndef VM_PAGEOUT_EMPTY_WAIT -#define VM_PAGEOUT_EMPTY_WAIT 75 /* milliseconds */ -#endif /* VM_PAGEOUT_EMPTY_WAIT */ - -#ifndef VM_PAGEOUT_PAUSE_MAX -#define VM_PAGEOUT_PAUSE_MAX 10 /* number of pauses */ -#endif /* VM_PAGEOUT_PAUSE_MAX */ - /* - * To obtain a reasonable LRU approximation, the inactive queue - * needs to be large enough to give pages on it a chance to be - * referenced a second time. This macro defines the fraction - * of active+inactive pages that should be inactive. - * The pageout daemon uses it to update vm_page_inactive_target. - * - * If the number of free pages falls below vm_page_free_target and - * vm_page_inactive_count is below vm_page_inactive_target, - * then the pageout daemon starts running. + * Event placeholder for pageout requests, synchronized with + * the free page queue lock. */ - -#ifndef VM_PAGE_INACTIVE_TARGET -#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 2 / 3) -#endif /* VM_PAGE_INACTIVE_TARGET */ +static int vm_pageout_requested; /* - * Once the pageout daemon starts running, it keeps going - * until the number of free pages meets or exceeds vm_page_free_target. + * Event placeholder for pageout throttling, synchronized with + * the free page queue lock. */ - -#ifndef VM_PAGE_FREE_TARGET -#define VM_PAGE_FREE_TARGET(free) (150 + (free) * 10 / 100) -#endif /* VM_PAGE_FREE_TARGET */ - -/* - * The pageout daemon always starts running once the number of free pages - * falls below vm_page_free_min. - */ - -#ifndef VM_PAGE_FREE_MIN -#define VM_PAGE_FREE_MIN(free) (100 + (free) * 8 / 100) -#endif /* VM_PAGE_FREE_MIN */ - -/* - * When the number of free pages falls below vm_page_free_reserved, - * only vm-privileged threads can allocate pages. vm-privilege - * allows the pageout daemon and default pager (and any other - * associated threads needed for default pageout) to continue - * operation by dipping into the reserved pool of pages. */ - -#ifndef VM_PAGE_FREE_RESERVED -#define VM_PAGE_FREE_RESERVED 500 -#endif /* VM_PAGE_FREE_RESERVED */ - -/* - * When the number of free pages falls below vm_pageout_reserved_internal, - * the pageout daemon no longer trusts external pagers to clean pages. - * External pagers are probably all wedged waiting for a free page. - * It forcibly double-pages dirty pages belonging to external objects, - * getting the pages to the default pager to clean. - */ - -#ifndef VM_PAGEOUT_RESERVED_INTERNAL -#define VM_PAGEOUT_RESERVED_INTERNAL(reserve) ((reserve) - 250) -#endif /* VM_PAGEOUT_RESERVED_INTERNAL */ - -/* - * When the number of free pages falls below vm_pageout_reserved_really, - * the pageout daemon stops work entirely to let the default pager - * catch up (assuming the default pager has pages to clean). - * Beyond this point, it is too dangerous to consume memory - * even for memory_object_data_write messages to the default pager. - */ - -#ifndef VM_PAGEOUT_RESERVED_REALLY -#define VM_PAGEOUT_RESERVED_REALLY(reserve) ((reserve) - 400) -#endif /* VM_PAGEOUT_RESERVED_REALLY */ - -unsigned int vm_pageout_reserved_internal = 0; -unsigned int vm_pageout_reserved_really = 0; - -unsigned int vm_pageout_burst_max = 0; -unsigned int vm_pageout_burst_min = 0; -unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */ -unsigned int vm_pageout_empty_wait = 0; /* milliseconds */ -unsigned int vm_pageout_pause_count = 0; -unsigned int vm_pageout_pause_max = 0; - -/* - * These variables record the pageout daemon's actions: - * how many pages it looks at and what happens to those pages. - * No locking needed because only one thread modifies the variables. - */ - -unsigned int vm_pageout_active = 0; /* debugging */ -unsigned int vm_pageout_inactive = 0; /* debugging */ -unsigned int vm_pageout_inactive_nolock = 0; /* debugging */ -unsigned int vm_pageout_inactive_busy = 0; /* debugging */ -unsigned int vm_pageout_inactive_absent = 0; /* debugging */ -unsigned int vm_pageout_inactive_used = 0; /* debugging */ -unsigned int vm_pageout_inactive_clean = 0; /* debugging */ -unsigned int vm_pageout_inactive_dirty = 0; /* debugging */ -unsigned int vm_pageout_inactive_double = 0; /* debugging */ -unsigned int vm_pageout_inactive_cleaned_external = 0; +static int vm_pageout_continue; /* * Routine: vm_pageout_setup @@ -224,15 +118,20 @@ vm_pageout_setup( /* * If we are not flushing the page, allocate a - * page in the object. If we cannot get the - * page, flush instead. + * page in the object. */ if (!flush) { - vm_object_lock(new_object); - new_m = vm_page_alloc(new_object, new_offset); - if (new_m == VM_PAGE_NULL) - flush = TRUE; - vm_object_unlock(new_object); + for (;;) { + vm_object_lock(new_object); + new_m = vm_page_alloc(new_object, new_offset); + vm_object_unlock(new_object); + + if (new_m != VM_PAGE_NULL) { + break; + } + + VM_PAGE_WAIT(NULL); + } } if (flush) { @@ -337,26 +236,33 @@ vm_pageout_setup( vm_page_lock_queues(); vm_stat.pageouts++; if (m->laundry) { + /* - * vm_pageout_scan is telling us to put this page - * at the front of the inactive queue, so it will - * be immediately paged out to the default pager. + * The caller is telling us that it is going to + * immediately double page this page to the default + * pager. */ assert(!old_object->internal); m->laundry = FALSE; - - queue_enter_first(&vm_page_queue_inactive, m, - vm_page_t, pageq); - m->inactive = TRUE; - vm_page_inactive_count++; } else if (old_object->internal) { m->laundry = TRUE; vm_page_laundry_count++; vm_page_wire(m); - } else + } else { vm_page_activate(m); + + /* + * If vm_page_external_pagedout is negative, + * the pageout daemon isn't expecting to be + * notified. + */ + + if (vm_page_external_pagedout >= 0) { + vm_page_external_pagedout++; + } + } vm_page_unlock_queues(); /* @@ -487,455 +393,102 @@ vm_pageout_page( /* * vm_pageout_scan does the dirty work for the pageout daemon. - * It returns with vm_page_queue_free_lock held and - * vm_page_free_wanted == 0. + * + * Return TRUE if the pageout daemon is done for now, FALSE otherwise, + * in which case should_wait indicates whether the pageout daemon + * should wait to allow pagers to keep up. + * + * It returns with vm_page_queue_free_lock held. */ -void vm_pageout_scan(void) +boolean_t vm_pageout_scan(boolean_t *should_wait) { - unsigned int burst_count; - unsigned int want_pages; + boolean_t done; /* - * We want to gradually dribble pages from the active queue - * to the inactive queue. If we let the inactive queue get - * very small, and then suddenly dump many pages into it, - * those pages won't get a sufficient chance to be referenced - * before we start taking them from the inactive queue. - * - * We must limit the rate at which we send pages to the pagers. - * data_write messages consume memory, for message buffers and - * for map-copy objects. If we get too far ahead of the pagers, - * we can potentially run out of memory. - * - * We can use the laundry count to limit directly the number - * of pages outstanding to the default pager. A similar - * strategy for external pagers doesn't work, because - * external pagers don't have to deallocate the pages sent them, - * and because we might have to send pages to external pagers - * even if they aren't processing writes. So we also - * use a burst count to limit writes to external pagers. - * - * When memory is very tight, we can't rely on external pagers to - * clean pages. They probably aren't running, because they - * aren't vm-privileged. If we kept sending dirty pages to them, - * we could exhaust the free list. However, we can't just ignore - * pages belonging to external objects, because there might be no - * pages belonging to internal objects. Hence, we get the page - * into an internal object and then immediately double-page it, - * sending it to the default pager. - * - * slab_collect should be last, because the other operations - * might return memory to caches. When we pause we use - * vm_pageout_scan_continue as our continuation, so we will - * reenter vm_pageout_scan periodically and attempt to reclaim - * internal memory even if we never reach vm_page_free_target. + * Try balancing pages among segments first, since this + * may be enough to resume unprivileged allocations. */ - stack_collect(); - net_kmsg_collect(); - consider_task_collect(); - if (0) /* XXX: pcb_collect doesn't do anything yet, so it is - pointless to call consider_thread_collect. */ - consider_thread_collect(); - slab_collect(); - - for (burst_count = 0;;) { - vm_page_t m; - vm_object_t object; - unsigned long free_count; - - /* - * Recalculate vm_page_inactivate_target. - */ - - vm_page_lock_queues(); - vm_page_inactive_target = - VM_PAGE_INACTIVE_TARGET(vm_page_active_count + - vm_page_inactive_count); - - /* - * Move pages from active to inactive. - */ - - while ((vm_page_inactive_count < vm_page_inactive_target) && - !queue_empty(&vm_page_queue_active)) { - vm_object_t obj; - - vm_pageout_active++; - m = (vm_page_t) queue_first(&vm_page_queue_active); - assert(m->active && !m->inactive); - - obj = m->object; - if (!vm_object_lock_try(obj)) { - /* - * Move page to end and continue. - */ - - queue_remove(&vm_page_queue_active, m, - vm_page_t, pageq); - queue_enter(&vm_page_queue_active, m, - vm_page_t, pageq); - vm_page_unlock_queues(); - vm_page_lock_queues(); - continue; - } - - /* - * If the page is busy, then we pull it - * off the active queue and leave it alone. - */ - - if (m->busy) { - vm_object_unlock(obj); - queue_remove(&vm_page_queue_active, m, - vm_page_t, pageq); - m->active = FALSE; - vm_page_active_count--; - continue; - } - - /* - * Deactivate the page while holding the object - * locked, so we know the page is still not busy. - * This should prevent races between pmap_enter - * and pmap_clear_reference. The page might be - * absent or fictitious, but vm_page_deactivate - * can handle that. - */ - - vm_page_deactivate(m); - vm_object_unlock(obj); - } - - /* - * We are done if we have met our targets *and* - * nobody is still waiting for a page. - */ - - simple_lock(&vm_page_queue_free_lock); - free_count = vm_page_mem_free(); - if ((free_count >= vm_page_free_target) && - (vm_page_free_wanted == 0)) { - vm_page_unlock_queues(); - break; - } - want_pages = ((free_count < vm_page_free_target) || - vm_page_free_wanted); - simple_unlock(&vm_page_queue_free_lock); - - /* - * Sometimes we have to pause: - * 1) No inactive pages - nothing to do. - * 2) Flow control - wait for pagers to catch up. - * 3) Extremely low memory - sending out dirty pages - * consumes memory. We don't take the risk of doing - * this if the default pager already has work to do. - */ - pause: - if (queue_empty(&vm_page_queue_inactive) || - (burst_count >= vm_pageout_burst_max) || - (vm_page_laundry_count >= vm_pageout_burst_max) || - ((free_count < vm_pageout_reserved_really) && - (vm_page_laundry_count > 0))) { - unsigned int pages, msecs; - - /* - * vm_pageout_burst_wait is msecs/page. - * If there is nothing for us to do, we wait - * at least vm_pageout_empty_wait msecs. - */ - - if (vm_page_laundry_count > burst_count) - pages = vm_page_laundry_count; - else - pages = burst_count; - msecs = pages * vm_pageout_burst_wait; - - if (queue_empty(&vm_page_queue_inactive) && - (msecs < vm_pageout_empty_wait)) - msecs = vm_pageout_empty_wait; - vm_page_unlock_queues(); - - thread_will_wait_with_timeout(current_thread(), msecs); - counter(c_vm_pageout_scan_block++); - thread_block(vm_pageout_scan_continue); - call_continuation(vm_pageout_scan_continue); - /*NOTREACHED*/ - } - - vm_pageout_inactive++; - - /* Find a page we are interested in paging out. If we - need pages, then we'll page anything out; otherwise - we only page out external pages. */ - m = (vm_page_t) queue_first (&vm_page_queue_inactive); - while (1) - { - assert (!m->active && m->inactive); - if (want_pages || m->external) - break; - - m = (vm_page_t) queue_next (&m->pageq); - if (!m) - goto pause; - } - - object = m->object; + /* This function returns with vm_page_queue_free_lock held */ + done = vm_page_balance(); - /* - * Try to lock object; since we've got the - * page queues lock, we can only try for this one. - */ - - if (!vm_object_lock_try(object)) { - /* - * Move page to end and continue. - */ - - queue_remove(&vm_page_queue_inactive, m, - vm_page_t, pageq); - queue_enter(&vm_page_queue_inactive, m, - vm_page_t, pageq); - vm_page_unlock_queues(); - vm_pageout_inactive_nolock++; - continue; - } - - /* - * Remove the page from the inactive list. - */ - - queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); - vm_page_inactive_count--; - m->inactive = FALSE; - - if (m->busy || !object->alive) { - /* - * Somebody is already playing with this page. - * Leave it off the pageout queues. - */ - - vm_page_unlock_queues(); - vm_object_unlock(object); - vm_pageout_inactive_busy++; - continue; - } - - /* - * If it's absent, we can reclaim the page. - */ - - if (want_pages && m->absent) { - vm_pageout_inactive_absent++; - reclaim_page: - vm_page_free(m); - vm_page_unlock_queues(); - - if (vm_object_collectable(object)) - vm_object_collect(object); - else - vm_object_unlock(object); - - continue; - } - - /* - * If it's being used, reactivate. - * (Fictitious pages are either busy or absent.) - */ - - assert(!m->fictitious); - if (m->reference || pmap_is_referenced(m->phys_addr)) { - vm_object_unlock(object); - vm_page_activate(m); - vm_stat.reactivations++; - current_task()->reactivations++; - vm_page_unlock_queues(); - vm_pageout_inactive_used++; - continue; - } - - /* - * Eliminate all mappings. - */ - - m->busy = TRUE; - pmap_page_protect(m->phys_addr, VM_PROT_NONE); - if (!m->dirty) - m->dirty = pmap_is_modified(m->phys_addr); - - /* If we don't actually need more memory, and the page - is not dirty, put it on the tail of the inactive queue - and move on to the next page. */ - if (!want_pages && !m->dirty) { - queue_remove (&vm_page_queue_inactive, m, - vm_page_t, pageq); - queue_enter (&vm_page_queue_inactive, m, - vm_page_t, pageq); - vm_page_unlock_queues(); - vm_pageout_inactive_cleaned_external++; - continue; - } - - /* - * If it's clean and not precious, we can free the page. - */ - - if (!m->dirty && !m->precious) { - vm_pageout_inactive_clean++; - goto reclaim_page; - } - - /* - * If we are very low on memory, then we can't - * rely on an external pager to clean a dirty page, - * because external pagers are not vm-privileged. - * - * The laundry bit tells vm_pageout_setup to - * put the page back at the front of the inactive - * queue instead of activating the page. Hence, - * we will pick the page up again immediately and - * resend it to the default pager. - */ - - assert(!m->laundry); - if ((free_count < vm_pageout_reserved_internal) && - !object->internal) { - m->laundry = TRUE; - vm_pageout_inactive_double++; - } - vm_page_unlock_queues(); - - /* - * If there is no memory object for the page, create - * one and hand it to the default pager. - * [First try to collapse, so we don't create - * one unnecessarily.] - */ - - if (!object->pager_initialized) - vm_object_collapse(object); - if (!object->pager_initialized) - vm_object_pager_create(object); - if (!object->pager_initialized) - panic("vm_pageout_scan"); - - vm_pageout_inactive_dirty++; - vm_pageout_page(m, FALSE, TRUE); /* flush it */ - vm_object_unlock(object); - burst_count++; + if (done) { + return TRUE; } -} -void vm_pageout_scan_continue(void) -{ + simple_unlock(&vm_page_queue_free_lock); + /* - * We just paused to let the pagers catch up. - * If vm_page_laundry_count is still high, - * then we aren't waiting long enough. - * If we have paused some vm_pageout_pause_max times without - * adjusting vm_pageout_burst_wait, it might be too big, - * so we decrease it. + * Balancing is not enough. Shrink caches and scan pages + * for eviction. */ - vm_page_lock_queues(); - if (vm_page_laundry_count > vm_pageout_burst_min) { - vm_pageout_burst_wait++; - vm_pageout_pause_count = 0; - } else if (++vm_pageout_pause_count > vm_pageout_pause_max) { - vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4; - if (vm_pageout_burst_wait < 1) - vm_pageout_burst_wait = 1; - vm_pageout_pause_count = 0; - } - vm_page_unlock_queues(); - - vm_pageout_continue(); - /*NOTREACHED*/ -} - -/* - * vm_pageout is the high level pageout daemon. - */ + stack_collect(); + net_kmsg_collect(); + consider_task_collect(); + if (0) /* XXX: pcb_collect doesn't do anything yet, so it is + pointless to call consider_thread_collect. */ + consider_thread_collect(); -void vm_pageout_continue(void) -{ /* - * The pageout daemon is never done, so loop forever. - * We should call vm_pageout_scan at least once each - * time we are woken, even if vm_page_free_wanted is - * zero, to check vm_page_free_target and - * vm_page_inactive_target. + * slab_collect should be last, because the other operations + * might return memory to caches. */ + slab_collect(); - for (;;) { - vm_pageout_scan(); - /* we hold vm_page_queue_free_lock now */ - assert(vm_page_free_wanted == 0); + vm_page_refill_inactive(); - assert_wait(&vm_page_free_wanted, FALSE); - simple_unlock(&vm_page_queue_free_lock); - counter(c_vm_pageout_block++); - thread_block(vm_pageout_continue); - } + /* This function returns with vm_page_queue_free_lock held */ + return vm_page_evict(should_wait); } void vm_pageout(void) { - unsigned long free_after_reserve; + boolean_t done, should_wait; current_thread()->vm_privilege = 1; stack_privilege(current_thread()); thread_set_own_priority(0); - /* - * Initialize some paging parameters. - */ - - if (vm_pageout_burst_max == 0) - vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX; - - if (vm_pageout_burst_min == 0) - vm_pageout_burst_min = VM_PAGEOUT_BURST_MIN; - - if (vm_pageout_burst_wait == 0) - vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT; - - if (vm_pageout_empty_wait == 0) - vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT; - - if (vm_page_free_reserved == 0) - vm_page_free_reserved = VM_PAGE_FREE_RESERVED; - - if (vm_pageout_pause_max == 0) - vm_pageout_pause_max = VM_PAGEOUT_PAUSE_MAX; - - if (vm_pageout_reserved_internal == 0) - vm_pageout_reserved_internal = - VM_PAGEOUT_RESERVED_INTERNAL(vm_page_free_reserved); - - if (vm_pageout_reserved_really == 0) - vm_pageout_reserved_really = - VM_PAGEOUT_RESERVED_REALLY(vm_page_free_reserved); - - free_after_reserve = vm_page_mem_free() - vm_page_free_reserved; - - if (vm_page_free_min == 0) - vm_page_free_min = vm_page_free_reserved + - VM_PAGE_FREE_MIN(free_after_reserve); + for (;;) { + done = vm_pageout_scan(&should_wait); + /* we hold vm_page_queue_free_lock now */ - if (vm_page_free_target == 0) - vm_page_free_target = vm_page_free_reserved + - VM_PAGE_FREE_TARGET(free_after_reserve); + if (done) { + thread_sleep(&vm_pageout_requested, + simple_lock_addr(vm_page_queue_free_lock), + FALSE); + } else if (should_wait) { + assert_wait(&vm_pageout_continue, FALSE); + thread_set_timeout(500); + simple_unlock(&vm_page_queue_free_lock); + thread_block(NULL); + } else { + simple_unlock(&vm_page_queue_free_lock); + } + } +} - if (vm_page_free_target < vm_page_free_min + 5) - vm_page_free_target = vm_page_free_min + 5; +/* + * Start pageout + * + * The free page queue lock must be held before calling this function. + */ +void vm_pageout_start(void) +{ + if (!current_thread()) + return; - /* - * vm_pageout_scan will set vm_page_inactive_target. - */ + thread_wakeup_one(&vm_pageout_requested); +} - vm_pageout_continue(); - /*NOTREACHED*/ +/* + * Resume pageout + * + * The free page queue lock must be held before calling this function. + */ +void vm_pageout_resume(void) +{ + thread_wakeup_one(&vm_pageout_continue); } |