summaryrefslogtreecommitdiff
path: root/vm/vm_pageout.c
diff options
context:
space:
mode:
Diffstat (limited to 'vm/vm_pageout.c')
-rw-r--r--vm/vm_pageout.c649
1 files changed, 101 insertions, 548 deletions
diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c
index a36c9905..dd0f995c 100644
--- a/vm/vm_pageout.c
+++ b/vm/vm_pageout.c
@@ -53,123 +53,17 @@
#include <vm/vm_pageout.h>
#include <machine/locore.h>
-
-
-#ifndef VM_PAGEOUT_BURST_MAX
-#define VM_PAGEOUT_BURST_MAX 10 /* number of pages */
-#endif /* VM_PAGEOUT_BURST_MAX */
-
-#ifndef VM_PAGEOUT_BURST_MIN
-#define VM_PAGEOUT_BURST_MIN 5 /* number of pages */
-#endif /* VM_PAGEOUT_BURST_MIN */
-
-#ifndef VM_PAGEOUT_BURST_WAIT
-#define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds per page */
-#endif /* VM_PAGEOUT_BURST_WAIT */
-
-#ifndef VM_PAGEOUT_EMPTY_WAIT
-#define VM_PAGEOUT_EMPTY_WAIT 75 /* milliseconds */
-#endif /* VM_PAGEOUT_EMPTY_WAIT */
-
-#ifndef VM_PAGEOUT_PAUSE_MAX
-#define VM_PAGEOUT_PAUSE_MAX 10 /* number of pauses */
-#endif /* VM_PAGEOUT_PAUSE_MAX */
-
/*
- * To obtain a reasonable LRU approximation, the inactive queue
- * needs to be large enough to give pages on it a chance to be
- * referenced a second time. This macro defines the fraction
- * of active+inactive pages that should be inactive.
- * The pageout daemon uses it to update vm_page_inactive_target.
- *
- * If the number of free pages falls below vm_page_free_target and
- * vm_page_inactive_count is below vm_page_inactive_target,
- * then the pageout daemon starts running.
+ * Event placeholder for pageout requests, synchronized with
+ * the free page queue lock.
*/
-
-#ifndef VM_PAGE_INACTIVE_TARGET
-#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 2 / 3)
-#endif /* VM_PAGE_INACTIVE_TARGET */
+static int vm_pageout_requested;
/*
- * Once the pageout daemon starts running, it keeps going
- * until the number of free pages meets or exceeds vm_page_free_target.
+ * Event placeholder for pageout throttling, synchronized with
+ * the free page queue lock.
*/
-
-#ifndef VM_PAGE_FREE_TARGET
-#define VM_PAGE_FREE_TARGET(free) (150 + (free) * 10 / 100)
-#endif /* VM_PAGE_FREE_TARGET */
-
-/*
- * The pageout daemon always starts running once the number of free pages
- * falls below vm_page_free_min.
- */
-
-#ifndef VM_PAGE_FREE_MIN
-#define VM_PAGE_FREE_MIN(free) (100 + (free) * 8 / 100)
-#endif /* VM_PAGE_FREE_MIN */
-
-/*
- * When the number of free pages falls below vm_page_free_reserved,
- * only vm-privileged threads can allocate pages. vm-privilege
- * allows the pageout daemon and default pager (and any other
- * associated threads needed for default pageout) to continue
- * operation by dipping into the reserved pool of pages. */
-
-#ifndef VM_PAGE_FREE_RESERVED
-#define VM_PAGE_FREE_RESERVED 500
-#endif /* VM_PAGE_FREE_RESERVED */
-
-/*
- * When the number of free pages falls below vm_pageout_reserved_internal,
- * the pageout daemon no longer trusts external pagers to clean pages.
- * External pagers are probably all wedged waiting for a free page.
- * It forcibly double-pages dirty pages belonging to external objects,
- * getting the pages to the default pager to clean.
- */
-
-#ifndef VM_PAGEOUT_RESERVED_INTERNAL
-#define VM_PAGEOUT_RESERVED_INTERNAL(reserve) ((reserve) - 250)
-#endif /* VM_PAGEOUT_RESERVED_INTERNAL */
-
-/*
- * When the number of free pages falls below vm_pageout_reserved_really,
- * the pageout daemon stops work entirely to let the default pager
- * catch up (assuming the default pager has pages to clean).
- * Beyond this point, it is too dangerous to consume memory
- * even for memory_object_data_write messages to the default pager.
- */
-
-#ifndef VM_PAGEOUT_RESERVED_REALLY
-#define VM_PAGEOUT_RESERVED_REALLY(reserve) ((reserve) - 400)
-#endif /* VM_PAGEOUT_RESERVED_REALLY */
-
-unsigned int vm_pageout_reserved_internal = 0;
-unsigned int vm_pageout_reserved_really = 0;
-
-unsigned int vm_pageout_burst_max = 0;
-unsigned int vm_pageout_burst_min = 0;
-unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */
-unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
-unsigned int vm_pageout_pause_count = 0;
-unsigned int vm_pageout_pause_max = 0;
-
-/*
- * These variables record the pageout daemon's actions:
- * how many pages it looks at and what happens to those pages.
- * No locking needed because only one thread modifies the variables.
- */
-
-unsigned int vm_pageout_active = 0; /* debugging */
-unsigned int vm_pageout_inactive = 0; /* debugging */
-unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
-unsigned int vm_pageout_inactive_busy = 0; /* debugging */
-unsigned int vm_pageout_inactive_absent = 0; /* debugging */
-unsigned int vm_pageout_inactive_used = 0; /* debugging */
-unsigned int vm_pageout_inactive_clean = 0; /* debugging */
-unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
-unsigned int vm_pageout_inactive_double = 0; /* debugging */
-unsigned int vm_pageout_inactive_cleaned_external = 0;
+static int vm_pageout_continue;
/*
* Routine: vm_pageout_setup
@@ -224,15 +118,20 @@ vm_pageout_setup(
/*
* If we are not flushing the page, allocate a
- * page in the object. If we cannot get the
- * page, flush instead.
+ * page in the object.
*/
if (!flush) {
- vm_object_lock(new_object);
- new_m = vm_page_alloc(new_object, new_offset);
- if (new_m == VM_PAGE_NULL)
- flush = TRUE;
- vm_object_unlock(new_object);
+ for (;;) {
+ vm_object_lock(new_object);
+ new_m = vm_page_alloc(new_object, new_offset);
+ vm_object_unlock(new_object);
+
+ if (new_m != VM_PAGE_NULL) {
+ break;
+ }
+
+ VM_PAGE_WAIT(NULL);
+ }
}
if (flush) {
@@ -337,26 +236,33 @@ vm_pageout_setup(
vm_page_lock_queues();
vm_stat.pageouts++;
if (m->laundry) {
+
/*
- * vm_pageout_scan is telling us to put this page
- * at the front of the inactive queue, so it will
- * be immediately paged out to the default pager.
+ * The caller is telling us that it is going to
+ * immediately double page this page to the default
+ * pager.
*/
assert(!old_object->internal);
m->laundry = FALSE;
-
- queue_enter_first(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- m->inactive = TRUE;
- vm_page_inactive_count++;
} else if (old_object->internal) {
m->laundry = TRUE;
vm_page_laundry_count++;
vm_page_wire(m);
- } else
+ } else {
vm_page_activate(m);
+
+ /*
+ * If vm_page_external_pagedout is negative,
+ * the pageout daemon isn't expecting to be
+ * notified.
+ */
+
+ if (vm_page_external_pagedout >= 0) {
+ vm_page_external_pagedout++;
+ }
+ }
vm_page_unlock_queues();
/*
@@ -487,455 +393,102 @@ vm_pageout_page(
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
- * It returns with vm_page_queue_free_lock held and
- * vm_page_free_wanted == 0.
+ *
+ * Return TRUE if the pageout daemon is done for now, FALSE otherwise,
+ * in which case should_wait indicates whether the pageout daemon
+ * should wait to allow pagers to keep up.
+ *
+ * It returns with vm_page_queue_free_lock held.
*/
-void vm_pageout_scan(void)
+boolean_t vm_pageout_scan(boolean_t *should_wait)
{
- unsigned int burst_count;
- unsigned int want_pages;
+ boolean_t done;
/*
- * We want to gradually dribble pages from the active queue
- * to the inactive queue. If we let the inactive queue get
- * very small, and then suddenly dump many pages into it,
- * those pages won't get a sufficient chance to be referenced
- * before we start taking them from the inactive queue.
- *
- * We must limit the rate at which we send pages to the pagers.
- * data_write messages consume memory, for message buffers and
- * for map-copy objects. If we get too far ahead of the pagers,
- * we can potentially run out of memory.
- *
- * We can use the laundry count to limit directly the number
- * of pages outstanding to the default pager. A similar
- * strategy for external pagers doesn't work, because
- * external pagers don't have to deallocate the pages sent them,
- * and because we might have to send pages to external pagers
- * even if they aren't processing writes. So we also
- * use a burst count to limit writes to external pagers.
- *
- * When memory is very tight, we can't rely on external pagers to
- * clean pages. They probably aren't running, because they
- * aren't vm-privileged. If we kept sending dirty pages to them,
- * we could exhaust the free list. However, we can't just ignore
- * pages belonging to external objects, because there might be no
- * pages belonging to internal objects. Hence, we get the page
- * into an internal object and then immediately double-page it,
- * sending it to the default pager.
- *
- * slab_collect should be last, because the other operations
- * might return memory to caches. When we pause we use
- * vm_pageout_scan_continue as our continuation, so we will
- * reenter vm_pageout_scan periodically and attempt to reclaim
- * internal memory even if we never reach vm_page_free_target.
+ * Try balancing pages among segments first, since this
+ * may be enough to resume unprivileged allocations.
*/
- stack_collect();
- net_kmsg_collect();
- consider_task_collect();
- if (0) /* XXX: pcb_collect doesn't do anything yet, so it is
- pointless to call consider_thread_collect. */
- consider_thread_collect();
- slab_collect();
-
- for (burst_count = 0;;) {
- vm_page_t m;
- vm_object_t object;
- unsigned long free_count;
-
- /*
- * Recalculate vm_page_inactivate_target.
- */
-
- vm_page_lock_queues();
- vm_page_inactive_target =
- VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
- vm_page_inactive_count);
-
- /*
- * Move pages from active to inactive.
- */
-
- while ((vm_page_inactive_count < vm_page_inactive_target) &&
- !queue_empty(&vm_page_queue_active)) {
- vm_object_t obj;
-
- vm_pageout_active++;
- m = (vm_page_t) queue_first(&vm_page_queue_active);
- assert(m->active && !m->inactive);
-
- obj = m->object;
- if (!vm_object_lock_try(obj)) {
- /*
- * Move page to end and continue.
- */
-
- queue_remove(&vm_page_queue_active, m,
- vm_page_t, pageq);
- queue_enter(&vm_page_queue_active, m,
- vm_page_t, pageq);
- vm_page_unlock_queues();
- vm_page_lock_queues();
- continue;
- }
-
- /*
- * If the page is busy, then we pull it
- * off the active queue and leave it alone.
- */
-
- if (m->busy) {
- vm_object_unlock(obj);
- queue_remove(&vm_page_queue_active, m,
- vm_page_t, pageq);
- m->active = FALSE;
- vm_page_active_count--;
- continue;
- }
-
- /*
- * Deactivate the page while holding the object
- * locked, so we know the page is still not busy.
- * This should prevent races between pmap_enter
- * and pmap_clear_reference. The page might be
- * absent or fictitious, but vm_page_deactivate
- * can handle that.
- */
-
- vm_page_deactivate(m);
- vm_object_unlock(obj);
- }
-
- /*
- * We are done if we have met our targets *and*
- * nobody is still waiting for a page.
- */
-
- simple_lock(&vm_page_queue_free_lock);
- free_count = vm_page_mem_free();
- if ((free_count >= vm_page_free_target) &&
- (vm_page_free_wanted == 0)) {
- vm_page_unlock_queues();
- break;
- }
- want_pages = ((free_count < vm_page_free_target) ||
- vm_page_free_wanted);
- simple_unlock(&vm_page_queue_free_lock);
-
- /*
- * Sometimes we have to pause:
- * 1) No inactive pages - nothing to do.
- * 2) Flow control - wait for pagers to catch up.
- * 3) Extremely low memory - sending out dirty pages
- * consumes memory. We don't take the risk of doing
- * this if the default pager already has work to do.
- */
- pause:
- if (queue_empty(&vm_page_queue_inactive) ||
- (burst_count >= vm_pageout_burst_max) ||
- (vm_page_laundry_count >= vm_pageout_burst_max) ||
- ((free_count < vm_pageout_reserved_really) &&
- (vm_page_laundry_count > 0))) {
- unsigned int pages, msecs;
-
- /*
- * vm_pageout_burst_wait is msecs/page.
- * If there is nothing for us to do, we wait
- * at least vm_pageout_empty_wait msecs.
- */
-
- if (vm_page_laundry_count > burst_count)
- pages = vm_page_laundry_count;
- else
- pages = burst_count;
- msecs = pages * vm_pageout_burst_wait;
-
- if (queue_empty(&vm_page_queue_inactive) &&
- (msecs < vm_pageout_empty_wait))
- msecs = vm_pageout_empty_wait;
- vm_page_unlock_queues();
-
- thread_will_wait_with_timeout(current_thread(), msecs);
- counter(c_vm_pageout_scan_block++);
- thread_block(vm_pageout_scan_continue);
- call_continuation(vm_pageout_scan_continue);
- /*NOTREACHED*/
- }
-
- vm_pageout_inactive++;
-
- /* Find a page we are interested in paging out. If we
- need pages, then we'll page anything out; otherwise
- we only page out external pages. */
- m = (vm_page_t) queue_first (&vm_page_queue_inactive);
- while (1)
- {
- assert (!m->active && m->inactive);
- if (want_pages || m->external)
- break;
-
- m = (vm_page_t) queue_next (&m->pageq);
- if (!m)
- goto pause;
- }
-
- object = m->object;
+ /* This function returns with vm_page_queue_free_lock held */
+ done = vm_page_balance();
- /*
- * Try to lock object; since we've got the
- * page queues lock, we can only try for this one.
- */
-
- if (!vm_object_lock_try(object)) {
- /*
- * Move page to end and continue.
- */
-
- queue_remove(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- queue_enter(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- vm_page_unlock_queues();
- vm_pageout_inactive_nolock++;
- continue;
- }
-
- /*
- * Remove the page from the inactive list.
- */
-
- queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
- vm_page_inactive_count--;
- m->inactive = FALSE;
-
- if (m->busy || !object->alive) {
- /*
- * Somebody is already playing with this page.
- * Leave it off the pageout queues.
- */
-
- vm_page_unlock_queues();
- vm_object_unlock(object);
- vm_pageout_inactive_busy++;
- continue;
- }
-
- /*
- * If it's absent, we can reclaim the page.
- */
-
- if (want_pages && m->absent) {
- vm_pageout_inactive_absent++;
- reclaim_page:
- vm_page_free(m);
- vm_page_unlock_queues();
-
- if (vm_object_collectable(object))
- vm_object_collect(object);
- else
- vm_object_unlock(object);
-
- continue;
- }
-
- /*
- * If it's being used, reactivate.
- * (Fictitious pages are either busy or absent.)
- */
-
- assert(!m->fictitious);
- if (m->reference || pmap_is_referenced(m->phys_addr)) {
- vm_object_unlock(object);
- vm_page_activate(m);
- vm_stat.reactivations++;
- current_task()->reactivations++;
- vm_page_unlock_queues();
- vm_pageout_inactive_used++;
- continue;
- }
-
- /*
- * Eliminate all mappings.
- */
-
- m->busy = TRUE;
- pmap_page_protect(m->phys_addr, VM_PROT_NONE);
- if (!m->dirty)
- m->dirty = pmap_is_modified(m->phys_addr);
-
- /* If we don't actually need more memory, and the page
- is not dirty, put it on the tail of the inactive queue
- and move on to the next page. */
- if (!want_pages && !m->dirty) {
- queue_remove (&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- queue_enter (&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- vm_page_unlock_queues();
- vm_pageout_inactive_cleaned_external++;
- continue;
- }
-
- /*
- * If it's clean and not precious, we can free the page.
- */
-
- if (!m->dirty && !m->precious) {
- vm_pageout_inactive_clean++;
- goto reclaim_page;
- }
-
- /*
- * If we are very low on memory, then we can't
- * rely on an external pager to clean a dirty page,
- * because external pagers are not vm-privileged.
- *
- * The laundry bit tells vm_pageout_setup to
- * put the page back at the front of the inactive
- * queue instead of activating the page. Hence,
- * we will pick the page up again immediately and
- * resend it to the default pager.
- */
-
- assert(!m->laundry);
- if ((free_count < vm_pageout_reserved_internal) &&
- !object->internal) {
- m->laundry = TRUE;
- vm_pageout_inactive_double++;
- }
- vm_page_unlock_queues();
-
- /*
- * If there is no memory object for the page, create
- * one and hand it to the default pager.
- * [First try to collapse, so we don't create
- * one unnecessarily.]
- */
-
- if (!object->pager_initialized)
- vm_object_collapse(object);
- if (!object->pager_initialized)
- vm_object_pager_create(object);
- if (!object->pager_initialized)
- panic("vm_pageout_scan");
-
- vm_pageout_inactive_dirty++;
- vm_pageout_page(m, FALSE, TRUE); /* flush it */
- vm_object_unlock(object);
- burst_count++;
+ if (done) {
+ return TRUE;
}
-}
-void vm_pageout_scan_continue(void)
-{
+ simple_unlock(&vm_page_queue_free_lock);
+
/*
- * We just paused to let the pagers catch up.
- * If vm_page_laundry_count is still high,
- * then we aren't waiting long enough.
- * If we have paused some vm_pageout_pause_max times without
- * adjusting vm_pageout_burst_wait, it might be too big,
- * so we decrease it.
+ * Balancing is not enough. Shrink caches and scan pages
+ * for eviction.
*/
- vm_page_lock_queues();
- if (vm_page_laundry_count > vm_pageout_burst_min) {
- vm_pageout_burst_wait++;
- vm_pageout_pause_count = 0;
- } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
- vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
- if (vm_pageout_burst_wait < 1)
- vm_pageout_burst_wait = 1;
- vm_pageout_pause_count = 0;
- }
- vm_page_unlock_queues();
-
- vm_pageout_continue();
- /*NOTREACHED*/
-}
-
-/*
- * vm_pageout is the high level pageout daemon.
- */
+ stack_collect();
+ net_kmsg_collect();
+ consider_task_collect();
+ if (0) /* XXX: pcb_collect doesn't do anything yet, so it is
+ pointless to call consider_thread_collect. */
+ consider_thread_collect();
-void vm_pageout_continue(void)
-{
/*
- * The pageout daemon is never done, so loop forever.
- * We should call vm_pageout_scan at least once each
- * time we are woken, even if vm_page_free_wanted is
- * zero, to check vm_page_free_target and
- * vm_page_inactive_target.
+ * slab_collect should be last, because the other operations
+ * might return memory to caches.
*/
+ slab_collect();
- for (;;) {
- vm_pageout_scan();
- /* we hold vm_page_queue_free_lock now */
- assert(vm_page_free_wanted == 0);
+ vm_page_refill_inactive();
- assert_wait(&vm_page_free_wanted, FALSE);
- simple_unlock(&vm_page_queue_free_lock);
- counter(c_vm_pageout_block++);
- thread_block(vm_pageout_continue);
- }
+ /* This function returns with vm_page_queue_free_lock held */
+ return vm_page_evict(should_wait);
}
void vm_pageout(void)
{
- unsigned long free_after_reserve;
+ boolean_t done, should_wait;
current_thread()->vm_privilege = 1;
stack_privilege(current_thread());
thread_set_own_priority(0);
- /*
- * Initialize some paging parameters.
- */
-
- if (vm_pageout_burst_max == 0)
- vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
-
- if (vm_pageout_burst_min == 0)
- vm_pageout_burst_min = VM_PAGEOUT_BURST_MIN;
-
- if (vm_pageout_burst_wait == 0)
- vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
-
- if (vm_pageout_empty_wait == 0)
- vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
-
- if (vm_page_free_reserved == 0)
- vm_page_free_reserved = VM_PAGE_FREE_RESERVED;
-
- if (vm_pageout_pause_max == 0)
- vm_pageout_pause_max = VM_PAGEOUT_PAUSE_MAX;
-
- if (vm_pageout_reserved_internal == 0)
- vm_pageout_reserved_internal =
- VM_PAGEOUT_RESERVED_INTERNAL(vm_page_free_reserved);
-
- if (vm_pageout_reserved_really == 0)
- vm_pageout_reserved_really =
- VM_PAGEOUT_RESERVED_REALLY(vm_page_free_reserved);
-
- free_after_reserve = vm_page_mem_free() - vm_page_free_reserved;
-
- if (vm_page_free_min == 0)
- vm_page_free_min = vm_page_free_reserved +
- VM_PAGE_FREE_MIN(free_after_reserve);
+ for (;;) {
+ done = vm_pageout_scan(&should_wait);
+ /* we hold vm_page_queue_free_lock now */
- if (vm_page_free_target == 0)
- vm_page_free_target = vm_page_free_reserved +
- VM_PAGE_FREE_TARGET(free_after_reserve);
+ if (done) {
+ thread_sleep(&vm_pageout_requested,
+ simple_lock_addr(vm_page_queue_free_lock),
+ FALSE);
+ } else if (should_wait) {
+ assert_wait(&vm_pageout_continue, FALSE);
+ thread_set_timeout(500);
+ simple_unlock(&vm_page_queue_free_lock);
+ thread_block(NULL);
+ } else {
+ simple_unlock(&vm_page_queue_free_lock);
+ }
+ }
+}
- if (vm_page_free_target < vm_page_free_min + 5)
- vm_page_free_target = vm_page_free_min + 5;
+/*
+ * Start pageout
+ *
+ * The free page queue lock must be held before calling this function.
+ */
+void vm_pageout_start(void)
+{
+ if (!current_thread())
+ return;
- /*
- * vm_pageout_scan will set vm_page_inactive_target.
- */
+ thread_wakeup_one(&vm_pageout_requested);
+}
- vm_pageout_continue();
- /*NOTREACHED*/
+/*
+ * Resume pageout
+ *
+ * The free page queue lock must be held before calling this function.
+ */
+void vm_pageout_resume(void)
+{
+ thread_wakeup_one(&vm_pageout_continue);
}