summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kern/slab.c2
-rw-r--r--vm/vm_page.c1244
-rw-r--r--vm/vm_page.h114
-rw-r--r--vm/vm_pageout.c649
-rw-r--r--vm/vm_pageout.h4
-rw-r--r--vm/vm_resident.c316
6 files changed, 1457 insertions, 872 deletions
diff --git a/kern/slab.c b/kern/slab.c
index 9d21c428..d4ef847e 100644
--- a/kern/slab.c
+++ b/kern/slab.c
@@ -389,7 +389,7 @@ kmem_pagefree_physmem(vm_offset_t addr, vm_size_t size)
assert(size == PAGE_SIZE);
page = vm_page_lookup_pa(kvtophys(addr));
assert(page != NULL);
- vm_page_release(page);
+ vm_page_release(page, FALSE, FALSE);
}
static vm_offset_t
diff --git a/vm/vm_page.c b/vm/vm_page.c
index f966e4dc..4c11ea7a 100644
--- a/vm/vm_page.c
+++ b/vm/vm_page.c
@@ -27,10 +27,13 @@
* multiprocessor systems. When a pool is empty and cannot provide a page,
* it is filled by transferring multiple pages from the backend buddy system.
* The symmetric case is handled likewise.
+ *
+ * TODO Limit number of dirty pages, block allocations above a top limit.
*/
#include <string.h>
#include <kern/assert.h>
+#include <kern/counters.h>
#include <kern/cpu_number.h>
#include <kern/debug.h>
#include <kern/list.h>
@@ -42,6 +45,7 @@
#include <machine/pmap.h>
#include <sys/types.h>
#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
#define DEBUG 0
@@ -100,12 +104,96 @@ struct vm_page_free_list {
};
/*
+ * XXX Because of a potential deadlock involving the default pager (see
+ * vm_map_lock()), it's currently impossible to reliably determine the
+ * minimum number of free pages required for successful pageout. Since
+ * that process is dependent on the amount of physical memory, we scale
+ * the minimum number of free pages from it, in the hope that memory
+ * exhaustion happens as rarely as possible...
+ */
+
+/*
+ * Ratio used to compute the minimum number of pages in a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_MIN_NUM 5
+#define VM_PAGE_SEG_THRESHOLD_MIN_DENOM 100
+
+/*
+ * Number of pages reserved for privileged allocations in a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_MIN 500
+
+/*
+ * Ratio used to compute the threshold below which pageout is started.
+ */
+#define VM_PAGE_SEG_THRESHOLD_LOW_NUM 6
+#define VM_PAGE_SEG_THRESHOLD_LOW_DENOM 100
+
+/*
+ * Minimum value the low threshold can have for a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_LOW 600
+
+#if VM_PAGE_SEG_THRESHOLD_LOW <= VM_PAGE_SEG_THRESHOLD_MIN
+#error VM_PAGE_SEG_THRESHOLD_LOW invalid
+#endif /* VM_PAGE_SEG_THRESHOLD_LOW >= VM_PAGE_SEG_THRESHOLD_MIN */
+
+/*
+ * Ratio used to compute the threshold above which pageout is stopped.
+ */
+#define VM_PAGE_SEG_THRESHOLD_HIGH_NUM 10
+#define VM_PAGE_SEG_THRESHOLD_HIGH_DENOM 100
+
+/*
+ * Minimum value the high threshold can have for a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_HIGH 1000
+
+#if VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW
+#error VM_PAGE_SEG_THRESHOLD_HIGH invalid
+#endif /* VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW */
+
+/*
+ * Minimum number of pages allowed for a segment.
+ */
+#define VM_PAGE_SEG_MIN_PAGES 2000
+
+#if VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH
+#error VM_PAGE_SEG_MIN_PAGES invalid
+#endif /* VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH */
+
+/*
+ * Ratio used to compute the threshold of active pages beyond which
+ * to refill the inactive queue.
+ */
+#define VM_PAGE_HIGH_ACTIVE_PAGE_NUM 1
+#define VM_PAGE_HIGH_ACTIVE_PAGE_DENOM 3
+
+/*
+ * Page cache queue.
+ *
+ * XXX The current implementation hardcodes a preference to evict external
+ * pages first and keep internal ones as much as possible. This is because
+ * the Hurd default pager implementation suffers from bugs that can easily
+ * cause the system to freeze.
+ */
+struct vm_page_queue {
+ struct list internal_pages;
+ struct list external_pages;
+};
+
+/*
* Segment name buffer size.
*/
#define VM_PAGE_NAME_SIZE 16
/*
* Segment of contiguous memory.
+ *
+ * XXX Per-segment locking is probably useless, since one or both of the
+ * page queues lock and the free page queue lock is held on any access.
+ * However it should first be made clear which lock protects access to
+ * which members of a segment.
*/
struct vm_page_seg {
struct vm_page_cpu_pool cpu_pools[NCPUS];
@@ -117,6 +205,19 @@ struct vm_page_seg {
simple_lock_data_t lock;
struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS];
unsigned long nr_free_pages;
+
+ /* Free memory thresholds */
+ unsigned long min_free_pages; /* Privileged allocations only */
+ unsigned long low_free_pages; /* Pageout daemon starts scanning */
+ unsigned long high_free_pages; /* Pageout daemon stops scanning,
+ unprivileged allocations resume */
+
+ /* Page cache related data */
+ struct vm_page_queue active_pages;
+ unsigned long nr_active_pages;
+ unsigned long high_active_pages;
+ struct vm_page_queue inactive_pages;
+ unsigned long nr_inactive_pages;
};
/*
@@ -160,6 +261,16 @@ static struct vm_page_boot_seg vm_page_boot_segs[VM_PAGE_MAX_SEGS] __initdata;
*/
static unsigned int vm_page_segs_size __read_mostly;
+/*
+ * If true, unprivileged allocations are blocked, disregarding any other
+ * condition.
+ *
+ * This variable is also used to resume clients once pages are available.
+ *
+ * The free page queue lock must be held when accessing this variable.
+ */
+static boolean_t vm_page_alloc_paused;
+
static void __init
vm_page_init_pa(struct vm_page *page, unsigned short seg_index, phys_addr_t pa)
{
@@ -183,6 +294,40 @@ vm_page_set_type(struct vm_page *page, unsigned int order, unsigned short type)
page[i].type = type;
}
+static boolean_t
+vm_page_pageable(const struct vm_page *page)
+{
+ return (page->object != NULL)
+ && (page->wire_count == 0)
+ && (page->active || page->inactive);
+}
+
+static boolean_t
+vm_page_can_move(const struct vm_page *page)
+{
+ /*
+ * This function is called on pages pulled from the page queues,
+ * implying they're pageable, which is why the wire count isn't
+ * checked here.
+ */
+
+ return !page->busy
+ && !page->wanted
+ && !page->absent
+ && page->object->alive;
+}
+
+static void
+vm_page_remove_mappings(struct vm_page *page)
+{
+ page->busy = TRUE;
+ pmap_page_protect(page->phys_addr, VM_PROT_NONE);
+
+ if (!page->dirty) {
+ page->dirty = pmap_is_modified(page->phys_addr);
+ }
+}
+
static void __init
vm_page_free_list_init(struct vm_page_free_list *free_list)
{
@@ -219,6 +364,19 @@ vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, unsigned int order)
assert(order < VM_PAGE_NR_FREE_LISTS);
+ if (vm_page_alloc_paused && current_thread()
+ && !current_thread()->vm_privilege) {
+ return NULL;
+ } else if (seg->nr_free_pages <= seg->low_free_pages) {
+ vm_pageout_start();
+
+ if ((seg->nr_free_pages <= seg->min_free_pages)
+ && current_thread() && !current_thread()->vm_privilege) {
+ vm_page_alloc_paused = TRUE;
+ return NULL;
+ }
+ }
+
for (i = order; i < VM_PAGE_NR_FREE_LISTS; i++) {
free_list = &seg->free_lists[i];
@@ -241,6 +399,11 @@ vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, unsigned int order)
}
seg->nr_free_pages -= (1 << order);
+
+ if (seg->nr_free_pages < seg->min_free_pages) {
+ vm_page_alloc_paused = TRUE;
+ }
+
return page;
}
@@ -364,6 +527,65 @@ vm_page_cpu_pool_drain(struct vm_page_cpu_pool *cpu_pool,
simple_unlock(&seg->lock);
}
+static void
+vm_page_queue_init(struct vm_page_queue *queue)
+{
+ list_init(&queue->internal_pages);
+ list_init(&queue->external_pages);
+}
+
+static void
+vm_page_queue_push(struct vm_page_queue *queue, struct vm_page *page)
+{
+ if (page->external) {
+ list_insert_tail(&queue->external_pages, &page->node);
+ } else {
+ list_insert_tail(&queue->internal_pages, &page->node);
+ }
+}
+
+static void
+vm_page_queue_remove(struct vm_page_queue *queue, struct vm_page *page)
+{
+ (void)queue;
+ list_remove(&page->node);
+}
+
+static struct vm_page *
+vm_page_queue_first(struct vm_page_queue *queue, boolean_t external_only)
+{
+ struct vm_page *page;
+
+ if (!list_empty(&queue->external_pages)) {
+ page = list_first_entry(&queue->external_pages, struct vm_page, node);
+ return page;
+ }
+
+ if (!external_only && !list_empty(&queue->internal_pages)) {
+ page = list_first_entry(&queue->internal_pages, struct vm_page, node);
+ return page;
+ }
+
+ return NULL;
+}
+
+static struct vm_page_seg *
+vm_page_seg_get(unsigned short index)
+{
+ assert(index < vm_page_segs_size);
+ return &vm_page_segs[index];
+}
+
+static unsigned int
+vm_page_seg_index(const struct vm_page_seg *seg)
+{
+ unsigned int index;
+
+ index = seg - vm_page_segs;
+ assert(index < vm_page_segs_size);
+ return index;
+}
+
static phys_addr_t __init
vm_page_seg_size(struct vm_page_seg *seg)
{
@@ -386,6 +608,39 @@ vm_page_seg_compute_pool_size(struct vm_page_seg *seg)
}
static void __init
+vm_page_seg_compute_pageout_thresholds(struct vm_page_seg *seg)
+{
+ unsigned long nr_pages;
+
+ nr_pages = vm_page_atop(vm_page_seg_size(seg));
+
+ if (nr_pages < VM_PAGE_SEG_MIN_PAGES) {
+ panic("vm_page: segment too small");
+ }
+
+ seg->min_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_MIN_NUM
+ / VM_PAGE_SEG_THRESHOLD_MIN_DENOM;
+
+ if (seg->min_free_pages < VM_PAGE_SEG_THRESHOLD_MIN) {
+ seg->min_free_pages = VM_PAGE_SEG_THRESHOLD_MIN;
+ }
+
+ seg->low_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_LOW_NUM
+ / VM_PAGE_SEG_THRESHOLD_LOW_DENOM;
+
+ if (seg->low_free_pages < VM_PAGE_SEG_THRESHOLD_LOW) {
+ seg->low_free_pages = VM_PAGE_SEG_THRESHOLD_LOW;
+ }
+
+ seg->high_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_HIGH_NUM
+ / VM_PAGE_SEG_THRESHOLD_HIGH_DENOM;
+
+ if (seg->high_free_pages < VM_PAGE_SEG_THRESHOLD_HIGH) {
+ seg->high_free_pages = VM_PAGE_SEG_THRESHOLD_HIGH;
+ }
+}
+
+static void __init
vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end,
struct vm_page *pages)
{
@@ -408,7 +663,15 @@ vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end,
vm_page_free_list_init(&seg->free_lists[i]);
seg->nr_free_pages = 0;
- i = seg - vm_page_segs;
+
+ vm_page_seg_compute_pageout_thresholds(seg);
+
+ vm_page_queue_init(&seg->active_pages);
+ seg->nr_active_pages = 0;
+ vm_page_queue_init(&seg->inactive_pages);
+ seg->nr_inactive_pages = 0;
+
+ i = vm_page_seg_index(seg);
for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE)
vm_page_init_pa(&pages[vm_page_atop(pa - seg->start)], i, pa);
@@ -485,6 +748,502 @@ vm_page_seg_free(struct vm_page_seg *seg, struct vm_page *page,
}
}
+static void
+vm_page_seg_add_active_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && !page->inactive);
+ page->active = TRUE;
+ page->reference = TRUE;
+ vm_page_queue_push(&seg->active_pages, page);
+ seg->nr_active_pages++;
+ vm_page_active_count++;
+}
+
+static void
+vm_page_seg_remove_active_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && page->active && !page->inactive);
+ page->active = FALSE;
+ vm_page_queue_remove(&seg->active_pages, page);
+ seg->nr_active_pages--;
+ vm_page_active_count--;
+}
+
+static void
+vm_page_seg_add_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && !page->inactive);
+ page->inactive = TRUE;
+ vm_page_queue_push(&seg->inactive_pages, page);
+ seg->nr_inactive_pages++;
+ vm_page_inactive_count++;
+}
+
+static void
+vm_page_seg_remove_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && page->inactive);
+ page->inactive = FALSE;
+ vm_page_queue_remove(&seg->inactive_pages, page);
+ seg->nr_inactive_pages--;
+ vm_page_inactive_count--;
+}
+
+/*
+ * Attempt to pull an active page.
+ *
+ * If successful, the object containing the page is locked.
+ */
+static struct vm_page *
+vm_page_seg_pull_active_page(struct vm_page_seg *seg, boolean_t external_only)
+{
+ struct vm_page *page, *first;
+ boolean_t locked;
+
+ first = NULL;
+
+ for (;;) {
+ page = vm_page_queue_first(&seg->active_pages, external_only);
+
+ if (page == NULL) {
+ break;
+ } else if (first == NULL) {
+ first = page;
+ } else if (first == page) {
+ break;
+ }
+
+ vm_page_seg_remove_active_page(seg, page);
+ locked = vm_object_lock_try(page->object);
+
+ if (!locked) {
+ vm_page_seg_add_active_page(seg, page);
+ continue;
+ }
+
+ if (!vm_page_can_move(page)) {
+ vm_page_seg_add_active_page(seg, page);
+ vm_object_unlock(page->object);
+ continue;
+ }
+
+ return page;
+ }
+
+ return NULL;
+}
+
+/*
+ * Attempt to pull an inactive page.
+ *
+ * If successful, the object containing the page is locked.
+ *
+ * XXX See vm_page_seg_pull_active_page (duplicated code).
+ */
+static struct vm_page *
+vm_page_seg_pull_inactive_page(struct vm_page_seg *seg, boolean_t external_only)
+{
+ struct vm_page *page, *first;
+ boolean_t locked;
+
+ first = NULL;
+
+ for (;;) {
+ page = vm_page_queue_first(&seg->inactive_pages, external_only);
+
+ if (page == NULL) {
+ break;
+ } else if (first == NULL) {
+ first = page;
+ } else if (first == page) {
+ break;
+ }
+
+ vm_page_seg_remove_inactive_page(seg, page);
+ locked = vm_object_lock_try(page->object);
+
+ if (!locked) {
+ vm_page_seg_add_inactive_page(seg, page);
+ continue;
+ }
+
+ if (!vm_page_can_move(page)) {
+ vm_page_seg_add_inactive_page(seg, page);
+ vm_object_unlock(page->object);
+ continue;
+ }
+
+ return page;
+ }
+
+ return NULL;
+}
+
+/*
+ * Attempt to pull a page cache page.
+ *
+ * If successful, the object containing the page is locked.
+ */
+static struct vm_page *
+vm_page_seg_pull_cache_page(struct vm_page_seg *seg,
+ boolean_t external_only,
+ boolean_t *was_active)
+{
+ struct vm_page *page;
+
+ page = vm_page_seg_pull_inactive_page(seg, external_only);
+
+ if (page != NULL) {
+ *was_active = FALSE;
+ return page;
+ }
+
+ page = vm_page_seg_pull_active_page(seg, external_only);
+
+ if (page != NULL) {
+ *was_active = TRUE;
+ return page;
+ }
+
+ return NULL;
+}
+
+static boolean_t
+vm_page_seg_min_page_available(const struct vm_page_seg *seg)
+{
+ return (seg->nr_free_pages > seg->min_free_pages);
+}
+
+static boolean_t
+vm_page_seg_page_available(const struct vm_page_seg *seg)
+{
+ return (seg->nr_free_pages > seg->high_free_pages);
+}
+
+static boolean_t
+vm_page_seg_usable(const struct vm_page_seg *seg)
+{
+ return (seg->nr_free_pages >= seg->high_free_pages);
+}
+
+static void
+vm_page_seg_double_lock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
+{
+ assert(seg1 != seg2);
+
+ if (seg1 < seg2) {
+ simple_lock(&seg1->lock);
+ simple_lock(&seg2->lock);
+ } else {
+ simple_lock(&seg2->lock);
+ simple_lock(&seg1->lock);
+ }
+}
+
+static void
+vm_page_seg_double_unlock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
+{
+ simple_unlock(&seg1->lock);
+ simple_unlock(&seg2->lock);
+}
+
+/*
+ * Attempt to balance a segment by moving one page to another segment.
+ *
+ * Return TRUE if a page was actually moved.
+ */
+static boolean_t
+vm_page_seg_balance_page(struct vm_page_seg *seg,
+ struct vm_page_seg *remote_seg)
+{
+ struct vm_page *src, *dest;
+ vm_object_t object;
+ vm_offset_t offset;
+ boolean_t was_active;
+
+ vm_page_lock_queues();
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_seg_double_lock(seg, remote_seg);
+
+ if (vm_page_seg_usable(seg)
+ || !vm_page_seg_page_available(remote_seg)) {
+ goto error;
+ }
+
+ src = vm_page_seg_pull_cache_page(seg, FALSE, &was_active);
+
+ if (src == NULL) {
+ goto error;
+ }
+
+ assert(src->object != NULL);
+ assert(!src->fictitious && !src->private);
+ assert(src->wire_count == 0);
+ assert(src->type != VM_PT_FREE);
+ assert(src->order == VM_PAGE_ORDER_UNLISTED);
+
+ dest = vm_page_seg_alloc_from_buddy(remote_seg, 0);
+ assert(dest != NULL);
+
+ vm_page_seg_double_unlock(seg, remote_seg);
+ simple_unlock(&vm_page_queue_free_lock);
+
+ if (!was_active && !src->reference && pmap_is_referenced(src->phys_addr)) {
+ src->reference = TRUE;
+ }
+
+ object = src->object;
+ offset = src->offset;
+ vm_page_remove(src);
+
+ vm_page_remove_mappings(src);
+
+ vm_page_set_type(dest, 0, src->type);
+ memcpy(&dest->vm_page_header, &src->vm_page_header,
+ sizeof(*dest) - VM_PAGE_HEADER_SIZE);
+ vm_page_copy(src, dest);
+
+ if (!src->dirty) {
+ pmap_clear_modify(dest->phys_addr);
+ }
+
+ dest->busy = FALSE;
+
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_init(src);
+ src->free = TRUE;
+ simple_lock(&seg->lock);
+ vm_page_set_type(src, 0, VM_PT_FREE);
+ vm_page_seg_free_to_buddy(seg, src, 0);
+ simple_unlock(&seg->lock);
+ simple_unlock(&vm_page_queue_free_lock);
+
+ vm_page_insert(dest, object, offset);
+ vm_object_unlock(object);
+
+ if (was_active) {
+ vm_page_activate(dest);
+ } else {
+ vm_page_deactivate(dest);
+ }
+
+ vm_page_unlock_queues();
+
+ return TRUE;
+
+error:
+ vm_page_seg_double_unlock(seg, remote_seg);
+ simple_unlock(&vm_page_queue_free_lock);
+ vm_page_unlock_queues();
+ return FALSE;
+}
+
+static boolean_t
+vm_page_seg_balance(struct vm_page_seg *seg)
+{
+ struct vm_page_seg *remote_seg;
+ unsigned int i;
+ boolean_t balanced;
+
+ /*
+ * It's important here that pages are moved to lower priority
+ * segments first.
+ */
+
+ for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
+ remote_seg = vm_page_seg_get(i);
+
+ if (remote_seg == seg) {
+ continue;
+ }
+
+ balanced = vm_page_seg_balance_page(seg, remote_seg);
+
+ if (balanced) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static boolean_t
+vm_page_seg_evict(struct vm_page_seg *seg,
+ boolean_t external_only, boolean_t low_memory)
+{
+ struct vm_page *page;
+ boolean_t reclaim, laundry;
+ vm_object_t object;
+ boolean_t was_active;
+
+ page = NULL;
+ object = NULL;
+
+restart:
+ vm_page_lock_queues();
+ simple_lock(&seg->lock);
+
+ if (page != NULL) {
+ vm_object_lock(page->object);
+ } else {
+ page = vm_page_seg_pull_cache_page(seg, external_only, &was_active);
+
+ if (page == NULL) {
+ goto out;
+ }
+ }
+
+ assert(page->object != NULL);
+ assert(!page->fictitious && !page->private);
+ assert(page->wire_count == 0);
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+
+ object = page->object;
+
+ if (!was_active
+ && (page->reference || pmap_is_referenced(page->phys_addr))) {
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+ vm_object_unlock(object);
+ vm_stat.reactivations++;
+ current_task()->reactivations++;
+ vm_page_unlock_queues();
+ page = NULL;
+ goto restart;
+ }
+
+ vm_page_remove_mappings(page);
+
+ if (!page->dirty && !page->precious) {
+ reclaim = TRUE;
+ goto out;
+ }
+
+ reclaim = FALSE;
+
+ /*
+ * If we are very low on memory, then we can't rely on an external
+ * pager to clean a dirty page, because external pagers are not
+ * vm-privileged.
+ *
+ * The laundry bit tells vm_pageout_setup not to do any special
+ * processing of this page since it's immediately going to be
+ * double paged out to the default pager. The laundry bit is
+ * reset and the page is inserted into an internal object by
+ * vm_pageout_setup before the double paging pass.
+ */
+
+ assert(!page->laundry);
+
+ if (object->internal || !low_memory) {
+ laundry = FALSE;
+ } else {
+ laundry = page->laundry = TRUE;
+ }
+
+out:
+ simple_unlock(&seg->lock);
+
+ if (object == NULL) {
+ vm_page_unlock_queues();
+ return FALSE;
+ }
+
+ if (reclaim) {
+ vm_page_free(page);
+ vm_page_unlock_queues();
+
+ if (vm_object_collectable(object)) {
+ vm_object_collect(object);
+ } else {
+ vm_object_unlock(object);
+ }
+
+ return TRUE;
+ }
+
+ vm_page_unlock_queues();
+
+ /*
+ * If there is no memory object for the page, create one and hand it
+ * to the default pager. First try to collapse, so we don't create
+ * one unnecessarily.
+ */
+
+ if (!object->pager_initialized) {
+ vm_object_collapse(object);
+ }
+
+ if (!object->pager_initialized) {
+ vm_object_pager_create(object);
+ }
+
+ if (!object->pager_initialized) {
+ panic("vm_page_seg_evict");
+ }
+
+ vm_pageout_page(page, FALSE, TRUE); /* flush it */
+ vm_object_unlock(object);
+
+ if (laundry) {
+ goto restart;
+ }
+
+ return TRUE;
+}
+
+static void
+vm_page_seg_compute_high_active_page(struct vm_page_seg *seg)
+{
+ unsigned long nr_pages;
+
+ nr_pages = seg->nr_active_pages + seg->nr_inactive_pages;
+ seg->high_active_pages = nr_pages * VM_PAGE_HIGH_ACTIVE_PAGE_NUM
+ / VM_PAGE_HIGH_ACTIVE_PAGE_DENOM;
+}
+
+static void
+vm_page_seg_refill_inactive(struct vm_page_seg *seg)
+{
+ struct vm_page *page;
+
+ simple_lock(&seg->lock);
+
+ vm_page_seg_compute_high_active_page(seg);
+
+ while (seg->nr_active_pages > seg->high_active_pages) {
+ page = vm_page_seg_pull_active_page(seg, FALSE);
+
+ if (page == NULL) {
+ break;
+ }
+
+ page->reference = FALSE;
+ pmap_clear_reference(page->phys_addr);
+ vm_page_seg_add_inactive_page(seg, page);
+ vm_object_unlock(page->object);
+ }
+
+ simple_unlock(&seg->lock);
+}
+
void __init
vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end)
{
@@ -712,6 +1471,77 @@ vm_page_lookup_pa(phys_addr_t pa)
return NULL;
}
+static struct vm_page_seg *
+vm_page_lookup_seg(const struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+
+ if ((page->phys_addr >= seg->start) && (page->phys_addr < seg->end)) {
+ return seg;
+ }
+ }
+
+ return NULL;
+}
+
+void vm_page_check(const struct vm_page *page)
+{
+ if (page->fictitious) {
+ if (page->private) {
+ panic("vm_page: page both fictitious and private");
+ }
+
+ if (page->phys_addr != vm_page_fictitious_addr) {
+ panic("vm_page: invalid fictitious page");
+ }
+ } else {
+ struct vm_page_seg *seg;
+
+ if (page->phys_addr == vm_page_fictitious_addr) {
+ panic("vm_page: real page has fictitious address");
+ }
+
+ seg = vm_page_lookup_seg(page);
+
+ if (seg == NULL) {
+ if (!page->private) {
+ panic("vm_page: page claims it's managed but not in any segment");
+ }
+ } else {
+ if (page->private) {
+ struct vm_page *real_page;
+
+ if (vm_page_pageable(page)) {
+ panic("vm_page: private page is pageable");
+ }
+
+ real_page = vm_page_lookup_pa(page->phys_addr);
+
+ if (vm_page_pageable(real_page)) {
+ panic("vm_page: page underlying private page is pageable");
+ }
+
+ if ((real_page->type == VM_PT_FREE)
+ || (real_page->order != VM_PAGE_ORDER_UNLISTED)) {
+ panic("vm_page: page underlying private pagei is free");
+ }
+ } else {
+ unsigned int index;
+
+ index = vm_page_seg_index(seg);
+
+ if (index != page->seg_index) {
+ panic("vm_page: page segment mismatch");
+ }
+ }
+ }
+ }
+}
+
struct vm_page *
vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short type)
{
@@ -725,8 +1555,8 @@ vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short type)
return page;
}
- if (type == VM_PT_PMAP)
- panic("vm_page: unable to allocate pmap page");
+ if (!current_thread() || current_thread()->vm_privilege)
+ panic("vm_page: privileged thread unable to allocate page");
return NULL;
}
@@ -769,6 +1599,9 @@ vm_page_info_all(void)
printf("vm_page: %s: pages: %lu (%luM), free: %lu (%luM)\n",
vm_page_seg_name(i), pages, pages >> (20 - PAGE_SHIFT),
seg->nr_free_pages, seg->nr_free_pages >> (20 - PAGE_SHIFT));
+ printf("vm_page: %s: min:%lu low:%lu high:%lu\n",
+ vm_page_seg_name(vm_page_seg_index(seg)),
+ seg->min_free_pages, seg->low_free_pages, seg->high_free_pages);
}
}
@@ -879,3 +1712,408 @@ vm_page_mem_free(void)
return total;
}
+
+/*
+ * Mark this page as wired down by yet another map, removing it
+ * from paging queues as necessary.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void
+vm_page_wire(struct vm_page *page)
+{
+ VM_PAGE_CHECK(page);
+
+ if (page->wire_count == 0) {
+ vm_page_queues_remove(page);
+
+ if (!page->private && !page->fictitious) {
+ vm_page_wire_count++;
+ }
+ }
+
+ page->wire_count++;
+}
+
+/*
+ * Release one wiring of this page, potentially enabling it to be paged again.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void
+vm_page_unwire(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ assert(page->wire_count != 0);
+ page->wire_count--;
+
+ if ((page->wire_count != 0)
+ || page->fictitious
+ || page->private) {
+ return;
+ }
+
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+
+ vm_page_wire_count--;
+}
+
+/*
+ * Returns the given page to the inactive list, indicating that
+ * no physical maps have access to this page.
+ * [Used by the physical mapping system.]
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_deactivate(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ /*
+ * This page is no longer very interesting. If it was
+ * interesting (active or inactive/referenced), then we
+ * clear the reference bit and (re)enter it in the
+ * inactive queue. Note wired pages should not have
+ * their reference bit cleared.
+ */
+
+ if (page->active || (page->inactive && page->reference)) {
+ if (!page->fictitious && !page->private && !page->absent) {
+ pmap_clear_reference(page->phys_addr);
+ }
+
+ page->reference = FALSE;
+ vm_page_queues_remove(page);
+ }
+
+ if ((page->wire_count == 0) && !page->fictitious
+ && !page->private && !page->inactive) {
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_inactive_page(seg, page);
+ simple_unlock(&seg->lock);
+ }
+}
+
+/*
+ * Put the specified page on the active list (if appropriate).
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_activate(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ /*
+ * Unconditionally remove so that, even if the page was already
+ * active, it gets back to the end of the active queue.
+ */
+ vm_page_queues_remove(page);
+
+ if ((page->wire_count == 0) && !page->fictitious && !page->private) {
+ seg = vm_page_seg_get(page->seg_index);
+
+ if (page->active)
+ panic("vm_page_activate: already active");
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+ }
+}
+
+void
+vm_page_queues_remove(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ assert(!page->active || !page->inactive);
+
+ if (!page->active && !page->inactive) {
+ return;
+ }
+
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+
+ if (page->active) {
+ vm_page_seg_remove_active_page(seg, page);
+ } else {
+ vm_page_seg_remove_inactive_page(seg, page);
+ }
+
+ simple_unlock(&seg->lock);
+}
+
+/*
+ * Check whether segments are all usable for unprivileged allocations.
+ *
+ * If all segments are usable, resume pending unprivileged allocations
+ * and return TRUE.
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+static boolean_t
+vm_page_check_usable(void)
+{
+ struct vm_page_seg *seg;
+ boolean_t usable;
+ unsigned int i;
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = vm_page_seg_get(i);
+
+ simple_lock(&seg->lock);
+ usable = vm_page_seg_usable(seg);
+ simple_unlock(&seg->lock);
+
+ if (!usable) {
+ return FALSE;
+ }
+ }
+
+ vm_page_external_pagedout = -1;
+ vm_page_alloc_paused = FALSE;
+ thread_wakeup(&vm_page_alloc_paused);
+ return TRUE;
+}
+
+static boolean_t
+vm_page_may_balance(void)
+{
+ struct vm_page_seg *seg;
+ boolean_t page_available;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = vm_page_seg_get(i);
+
+ simple_lock(&seg->lock);
+ page_available = vm_page_seg_page_available(seg);
+ simple_unlock(&seg->lock);
+
+ if (page_available) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static boolean_t
+vm_page_balance_once(void)
+{
+ boolean_t balanced;
+ unsigned int i;
+
+ /*
+ * It's important here that pages are moved from higher priority
+ * segments first.
+ */
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ balanced = vm_page_seg_balance(vm_page_seg_get(i));
+
+ if (balanced) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+boolean_t
+vm_page_balance(void)
+{
+ boolean_t balanced;
+
+ while (vm_page_may_balance()) {
+ balanced = vm_page_balance_once();
+
+ if (!balanced) {
+ break;
+ }
+ }
+
+ return vm_page_check_usable();
+}
+
+static boolean_t
+vm_page_evict_once(boolean_t external_only)
+{
+ struct vm_page_seg *seg;
+ boolean_t low_memory, min_page_available, evicted;
+ unsigned int i;
+
+ /*
+ * XXX Page allocation currently only uses the DIRECTMAP selector,
+ * allowing us to know which segments to look at when determining
+ * whether we're very low on memory.
+ */
+ low_memory = TRUE;
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ if (i > VM_PAGE_SEG_DIRECTMAP) {
+ break;
+ }
+
+ seg = vm_page_seg_get(i);
+
+ simple_lock(&seg->lock);
+ min_page_available = vm_page_seg_min_page_available(seg);
+ simple_unlock(&seg->lock);
+
+ if (min_page_available) {
+ low_memory = FALSE;
+ break;
+ }
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ /*
+ * It's important here that pages are evicted from lower priority
+ * segments first.
+ */
+
+ for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
+ evicted = vm_page_seg_evict(vm_page_seg_get(i),
+ external_only, low_memory);
+
+ if (evicted) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+#define VM_PAGE_MAX_LAUNDRY 5
+#define VM_PAGE_MAX_EVICTIONS 5
+
+boolean_t
+vm_page_evict(boolean_t *should_wait)
+{
+ boolean_t pause, evicted, external_only;
+ unsigned int i;
+
+ *should_wait = TRUE;
+ external_only = TRUE;
+
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_external_pagedout = 0;
+ simple_unlock(&vm_page_queue_free_lock);
+
+again:
+ vm_page_lock_queues();
+ pause = (vm_page_laundry_count >= VM_PAGE_MAX_LAUNDRY);
+ vm_page_unlock_queues();
+
+ if (pause) {
+ simple_lock(&vm_page_queue_free_lock);
+ return FALSE;
+ }
+
+ for (i = 0; i < VM_PAGE_MAX_EVICTIONS; i++) {
+ evicted = vm_page_evict_once(external_only);
+
+ if (!evicted) {
+ break;
+ }
+ }
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ /*
+ * Keep in mind eviction may not cause pageouts, since non-precious
+ * clean pages are simply released.
+ */
+ if ((vm_page_external_pagedout == 0) || (vm_page_laundry_count == 0)) {
+ /*
+ * No pageout, but some clean pages were freed. Start a complete
+ * scan again without waiting.
+ */
+ if (evicted) {
+ *should_wait = FALSE;
+ return FALSE;
+ }
+
+ /*
+ * Eviction failed, consider pages from internal objects on the
+ * next attempt.
+ */
+ if (external_only) {
+ simple_unlock(&vm_page_queue_free_lock);
+ external_only = FALSE;
+ goto again;
+ }
+
+ /*
+ * TODO Find out what could cause this and how to deal with it.
+ * This will likely require an out-of-memory killer.
+ */
+ panic("vm_page: unable to recycle any page");
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ return vm_page_check_usable();
+}
+
+void
+vm_page_refill_inactive(void)
+{
+ unsigned int i;
+
+ vm_page_lock_queues();
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ vm_page_seg_refill_inactive(vm_page_seg_get(i));
+ }
+
+ vm_page_unlock_queues();
+}
+
+void
+vm_page_wait(void (*continuation)(void))
+{
+ assert(!current_thread()->vm_privilege);
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ if (!vm_page_alloc_paused) {
+ simple_unlock(&vm_page_queue_free_lock);
+ return;
+ }
+
+ assert_wait(&vm_page_alloc_paused, FALSE);
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ if (continuation != 0) {
+ counter(c_vm_page_wait_block_user++);
+ thread_block(continuation);
+ } else {
+ counter(c_vm_page_wait_block_kernel++);
+ thread_block((void (*)(void)) 0);
+ }
+}
diff --git a/vm/vm_page.h b/vm/vm_page.h
index 164ab6d4..eb684c1b 100644
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -40,6 +40,7 @@
#include <vm/vm_object.h>
#include <vm/vm_types.h>
#include <kern/queue.h>
+#include <kern/list.h>
#include <kern/lock.h>
#include <kern/log2.h>
@@ -77,8 +78,7 @@
*/
struct vm_page {
- /* Members used in the vm_page module only */
- struct list node;
+ struct list node; /* page queues or free list (P) */
unsigned short type;
unsigned short seg_index;
unsigned short order;
@@ -90,15 +90,13 @@ struct vm_page {
*/
phys_addr_t phys_addr;
+ queue_chain_t listq; /* all pages in same object (O) */
+ struct vm_page *next; /* VP bucket link (O) */
+
/* We use an empty struct as the delimiter. */
struct {} vm_page_header;
#define VM_PAGE_HEADER_SIZE offsetof(struct vm_page, vm_page_header)
- queue_chain_t pageq; /* queue info for FIFO
- * queue or free list (P) */
- queue_chain_t listq; /* all pages in same object (O) */
- struct vm_page *next; /* VP bucket link (O) */
-
vm_object_t object; /* which object am I in (O,P) */
vm_offset_t offset; /* offset into that object (O,P) */
@@ -136,7 +134,9 @@ struct vm_page {
* some useful check on a page structure.
*/
-#define VM_PAGE_CHECK(mem)
+#define VM_PAGE_CHECK(mem) vm_page_check(mem)
+
+void vm_page_check(const struct vm_page *page);
/*
* Each pageable resident page falls into one of three lists:
@@ -155,13 +155,6 @@ struct vm_page {
*/
extern
-vm_page_t vm_page_queue_fictitious; /* fictitious free queue */
-extern
-queue_head_t vm_page_queue_active; /* active memory queue */
-extern
-queue_head_t vm_page_queue_inactive; /* inactive memory queue */
-
-extern
int vm_page_fictitious_count;/* How many fictitious pages are free? */
extern
int vm_page_active_count; /* How many pages are active? */
@@ -170,25 +163,15 @@ int vm_page_inactive_count; /* How many pages are inactive? */
extern
int vm_page_wire_count; /* How many pages are wired? */
extern
-int vm_page_free_target; /* How many do we want free? */
-extern
-int vm_page_free_min; /* When to wakeup pageout */
-extern
-int vm_page_inactive_target;/* How many do we want inactive? */
-extern
-int vm_page_free_reserved; /* How many pages reserved to do pageout */
-extern
int vm_page_laundry_count; /* How many pages being laundered? */
-
+extern
+int vm_page_external_pagedout; /* How many external pages being paged out? */
decl_simple_lock_data(extern,vm_page_queue_lock)/* lock on active and inactive
page queues */
decl_simple_lock_data(extern,vm_page_queue_free_lock)
/* lock on free page queue */
-extern unsigned int vm_page_free_wanted;
- /* how many threads are waiting for memory */
-
extern phys_addr_t vm_page_fictitious_addr;
/* (fake) phys_addr of fictitious pages */
@@ -204,7 +187,7 @@ extern vm_page_t vm_page_grab_fictitious(void);
extern boolean_t vm_page_convert(vm_page_t *);
extern void vm_page_more_fictitious(void);
extern vm_page_t vm_page_grab(void);
-extern void vm_page_release(vm_page_t);
+extern void vm_page_release(vm_page_t, boolean_t, boolean_t);
extern phys_addr_t vm_page_grab_phys_addr(void);
extern vm_page_t vm_page_grab_contig(vm_size_t, unsigned int);
extern void vm_page_free_contig(vm_page_t, vm_size_t);
@@ -294,22 +277,7 @@ extern unsigned int vm_page_info(
#define vm_page_lock_queues() simple_lock(&vm_page_queue_lock)
#define vm_page_unlock_queues() simple_unlock(&vm_page_queue_lock)
-#define VM_PAGE_QUEUES_REMOVE(mem) \
- MACRO_BEGIN \
- if (mem->active) { \
- queue_remove(&vm_page_queue_active, \
- mem, vm_page_t, pageq); \
- mem->active = FALSE; \
- vm_page_active_count--; \
- } \
- \
- if (mem->inactive) { \
- queue_remove(&vm_page_queue_inactive, \
- mem, vm_page_t, pageq); \
- mem->inactive = FALSE; \
- vm_page_inactive_count--; \
- } \
- MACRO_END
+#define VM_PAGE_QUEUES_REMOVE(mem) vm_page_queues_remove(mem)
/*
* Copyright (c) 2010-2014 Richard Braun.
@@ -358,18 +326,11 @@ extern unsigned int vm_page_info(
/*
* Page usage types.
- *
- * Failing to allocate pmap pages will cause a kernel panic.
- * TODO Obviously, this needs to be addressed, e.g. with a reserved pool of
- * pages.
*/
#define VM_PT_FREE 0 /* Page unused */
#define VM_PT_RESERVED 1 /* Page reserved at boot time */
#define VM_PT_TABLE 2 /* Page is part of the page table */
-#define VM_PT_PMAP 3 /* Page stores pmap-specific data */
-#define VM_PT_KMEM 4 /* Page is part of a kmem slab */
-#define VM_PT_STACK 5 /* Type for generic kernel allocations */
-#define VM_PT_KERNEL 6 /* Type for generic kernel allocations */
+#define VM_PT_KERNEL 3 /* Type for generic kernel allocations */
static inline unsigned short
vm_page_type(const struct vm_page *page)
@@ -521,4 +482,53 @@ phys_addr_t vm_page_mem_size(void);
*/
unsigned long vm_page_mem_free(void);
+/*
+ * Remove the given page from any page queue it might be in.
+ */
+void vm_page_queues_remove(struct vm_page *page);
+
+/*
+ * Balance physical pages among segments.
+ *
+ * This function should be called first by the pageout daemon
+ * on memory pressure, since it may be unnecessary to perform any
+ * other operation, let alone shrink caches, if balancing is
+ * enough to make enough free pages.
+ *
+ * Return TRUE if balancing made enough free pages for unprivileged
+ * allocations to succeed, in which case pending allocations are resumed.
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+boolean_t vm_page_balance(void);
+
+/*
+ * Evict physical pages.
+ *
+ * This function should be called by the pageout daemon after balancing
+ * the segments and shrinking kernel caches.
+ *
+ * Return TRUE if eviction made enough free pages for unprivileged
+ * allocations to succeed, in which case pending allocations are resumed.
+ *
+ * Otherwise, report whether the pageout daemon should wait (some pages
+ * have been paged out) or not (only clean pages have been released).
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+boolean_t vm_page_evict(boolean_t *should_wait);
+
+/*
+ * Turn active pages into inactive ones for second-chance LRU
+ * approximation.
+ *
+ * This function should be called by the pageout daemon on memory pressure,
+ * i.e. right before evicting pages.
+ *
+ * XXX This is probably not the best strategy, compared to keeping the
+ * active/inactive ratio in check at all times, but this means less
+ * frequent refills.
+ */
+void vm_page_refill_inactive(void);
+
#endif /* _VM_VM_PAGE_H_ */
diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c
index a36c9905..dd0f995c 100644
--- a/vm/vm_pageout.c
+++ b/vm/vm_pageout.c
@@ -53,123 +53,17 @@
#include <vm/vm_pageout.h>
#include <machine/locore.h>
-
-
-#ifndef VM_PAGEOUT_BURST_MAX
-#define VM_PAGEOUT_BURST_MAX 10 /* number of pages */
-#endif /* VM_PAGEOUT_BURST_MAX */
-
-#ifndef VM_PAGEOUT_BURST_MIN
-#define VM_PAGEOUT_BURST_MIN 5 /* number of pages */
-#endif /* VM_PAGEOUT_BURST_MIN */
-
-#ifndef VM_PAGEOUT_BURST_WAIT
-#define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds per page */
-#endif /* VM_PAGEOUT_BURST_WAIT */
-
-#ifndef VM_PAGEOUT_EMPTY_WAIT
-#define VM_PAGEOUT_EMPTY_WAIT 75 /* milliseconds */
-#endif /* VM_PAGEOUT_EMPTY_WAIT */
-
-#ifndef VM_PAGEOUT_PAUSE_MAX
-#define VM_PAGEOUT_PAUSE_MAX 10 /* number of pauses */
-#endif /* VM_PAGEOUT_PAUSE_MAX */
-
/*
- * To obtain a reasonable LRU approximation, the inactive queue
- * needs to be large enough to give pages on it a chance to be
- * referenced a second time. This macro defines the fraction
- * of active+inactive pages that should be inactive.
- * The pageout daemon uses it to update vm_page_inactive_target.
- *
- * If the number of free pages falls below vm_page_free_target and
- * vm_page_inactive_count is below vm_page_inactive_target,
- * then the pageout daemon starts running.
+ * Event placeholder for pageout requests, synchronized with
+ * the free page queue lock.
*/
-
-#ifndef VM_PAGE_INACTIVE_TARGET
-#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 2 / 3)
-#endif /* VM_PAGE_INACTIVE_TARGET */
+static int vm_pageout_requested;
/*
- * Once the pageout daemon starts running, it keeps going
- * until the number of free pages meets or exceeds vm_page_free_target.
+ * Event placeholder for pageout throttling, synchronized with
+ * the free page queue lock.
*/
-
-#ifndef VM_PAGE_FREE_TARGET
-#define VM_PAGE_FREE_TARGET(free) (150 + (free) * 10 / 100)
-#endif /* VM_PAGE_FREE_TARGET */
-
-/*
- * The pageout daemon always starts running once the number of free pages
- * falls below vm_page_free_min.
- */
-
-#ifndef VM_PAGE_FREE_MIN
-#define VM_PAGE_FREE_MIN(free) (100 + (free) * 8 / 100)
-#endif /* VM_PAGE_FREE_MIN */
-
-/*
- * When the number of free pages falls below vm_page_free_reserved,
- * only vm-privileged threads can allocate pages. vm-privilege
- * allows the pageout daemon and default pager (and any other
- * associated threads needed for default pageout) to continue
- * operation by dipping into the reserved pool of pages. */
-
-#ifndef VM_PAGE_FREE_RESERVED
-#define VM_PAGE_FREE_RESERVED 500
-#endif /* VM_PAGE_FREE_RESERVED */
-
-/*
- * When the number of free pages falls below vm_pageout_reserved_internal,
- * the pageout daemon no longer trusts external pagers to clean pages.
- * External pagers are probably all wedged waiting for a free page.
- * It forcibly double-pages dirty pages belonging to external objects,
- * getting the pages to the default pager to clean.
- */
-
-#ifndef VM_PAGEOUT_RESERVED_INTERNAL
-#define VM_PAGEOUT_RESERVED_INTERNAL(reserve) ((reserve) - 250)
-#endif /* VM_PAGEOUT_RESERVED_INTERNAL */
-
-/*
- * When the number of free pages falls below vm_pageout_reserved_really,
- * the pageout daemon stops work entirely to let the default pager
- * catch up (assuming the default pager has pages to clean).
- * Beyond this point, it is too dangerous to consume memory
- * even for memory_object_data_write messages to the default pager.
- */
-
-#ifndef VM_PAGEOUT_RESERVED_REALLY
-#define VM_PAGEOUT_RESERVED_REALLY(reserve) ((reserve) - 400)
-#endif /* VM_PAGEOUT_RESERVED_REALLY */
-
-unsigned int vm_pageout_reserved_internal = 0;
-unsigned int vm_pageout_reserved_really = 0;
-
-unsigned int vm_pageout_burst_max = 0;
-unsigned int vm_pageout_burst_min = 0;
-unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */
-unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
-unsigned int vm_pageout_pause_count = 0;
-unsigned int vm_pageout_pause_max = 0;
-
-/*
- * These variables record the pageout daemon's actions:
- * how many pages it looks at and what happens to those pages.
- * No locking needed because only one thread modifies the variables.
- */
-
-unsigned int vm_pageout_active = 0; /* debugging */
-unsigned int vm_pageout_inactive = 0; /* debugging */
-unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
-unsigned int vm_pageout_inactive_busy = 0; /* debugging */
-unsigned int vm_pageout_inactive_absent = 0; /* debugging */
-unsigned int vm_pageout_inactive_used = 0; /* debugging */
-unsigned int vm_pageout_inactive_clean = 0; /* debugging */
-unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
-unsigned int vm_pageout_inactive_double = 0; /* debugging */
-unsigned int vm_pageout_inactive_cleaned_external = 0;
+static int vm_pageout_continue;
/*
* Routine: vm_pageout_setup
@@ -224,15 +118,20 @@ vm_pageout_setup(
/*
* If we are not flushing the page, allocate a
- * page in the object. If we cannot get the
- * page, flush instead.
+ * page in the object.
*/
if (!flush) {
- vm_object_lock(new_object);
- new_m = vm_page_alloc(new_object, new_offset);
- if (new_m == VM_PAGE_NULL)
- flush = TRUE;
- vm_object_unlock(new_object);
+ for (;;) {
+ vm_object_lock(new_object);
+ new_m = vm_page_alloc(new_object, new_offset);
+ vm_object_unlock(new_object);
+
+ if (new_m != VM_PAGE_NULL) {
+ break;
+ }
+
+ VM_PAGE_WAIT(NULL);
+ }
}
if (flush) {
@@ -337,26 +236,33 @@ vm_pageout_setup(
vm_page_lock_queues();
vm_stat.pageouts++;
if (m->laundry) {
+
/*
- * vm_pageout_scan is telling us to put this page
- * at the front of the inactive queue, so it will
- * be immediately paged out to the default pager.
+ * The caller is telling us that it is going to
+ * immediately double page this page to the default
+ * pager.
*/
assert(!old_object->internal);
m->laundry = FALSE;
-
- queue_enter_first(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- m->inactive = TRUE;
- vm_page_inactive_count++;
} else if (old_object->internal) {
m->laundry = TRUE;
vm_page_laundry_count++;
vm_page_wire(m);
- } else
+ } else {
vm_page_activate(m);
+
+ /*
+ * If vm_page_external_pagedout is negative,
+ * the pageout daemon isn't expecting to be
+ * notified.
+ */
+
+ if (vm_page_external_pagedout >= 0) {
+ vm_page_external_pagedout++;
+ }
+ }
vm_page_unlock_queues();
/*
@@ -487,455 +393,102 @@ vm_pageout_page(
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
- * It returns with vm_page_queue_free_lock held and
- * vm_page_free_wanted == 0.
+ *
+ * Return TRUE if the pageout daemon is done for now, FALSE otherwise,
+ * in which case should_wait indicates whether the pageout daemon
+ * should wait to allow pagers to keep up.
+ *
+ * It returns with vm_page_queue_free_lock held.
*/
-void vm_pageout_scan(void)
+boolean_t vm_pageout_scan(boolean_t *should_wait)
{
- unsigned int burst_count;
- unsigned int want_pages;
+ boolean_t done;
/*
- * We want to gradually dribble pages from the active queue
- * to the inactive queue. If we let the inactive queue get
- * very small, and then suddenly dump many pages into it,
- * those pages won't get a sufficient chance to be referenced
- * before we start taking them from the inactive queue.
- *
- * We must limit the rate at which we send pages to the pagers.
- * data_write messages consume memory, for message buffers and
- * for map-copy objects. If we get too far ahead of the pagers,
- * we can potentially run out of memory.
- *
- * We can use the laundry count to limit directly the number
- * of pages outstanding to the default pager. A similar
- * strategy for external pagers doesn't work, because
- * external pagers don't have to deallocate the pages sent them,
- * and because we might have to send pages to external pagers
- * even if they aren't processing writes. So we also
- * use a burst count to limit writes to external pagers.
- *
- * When memory is very tight, we can't rely on external pagers to
- * clean pages. They probably aren't running, because they
- * aren't vm-privileged. If we kept sending dirty pages to them,
- * we could exhaust the free list. However, we can't just ignore
- * pages belonging to external objects, because there might be no
- * pages belonging to internal objects. Hence, we get the page
- * into an internal object and then immediately double-page it,
- * sending it to the default pager.
- *
- * slab_collect should be last, because the other operations
- * might return memory to caches. When we pause we use
- * vm_pageout_scan_continue as our continuation, so we will
- * reenter vm_pageout_scan periodically and attempt to reclaim
- * internal memory even if we never reach vm_page_free_target.
+ * Try balancing pages among segments first, since this
+ * may be enough to resume unprivileged allocations.
*/
- stack_collect();
- net_kmsg_collect();
- consider_task_collect();
- if (0) /* XXX: pcb_collect doesn't do anything yet, so it is
- pointless to call consider_thread_collect. */
- consider_thread_collect();
- slab_collect();
-
- for (burst_count = 0;;) {
- vm_page_t m;
- vm_object_t object;
- unsigned long free_count;
-
- /*
- * Recalculate vm_page_inactivate_target.
- */
-
- vm_page_lock_queues();
- vm_page_inactive_target =
- VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
- vm_page_inactive_count);
-
- /*
- * Move pages from active to inactive.
- */
-
- while ((vm_page_inactive_count < vm_page_inactive_target) &&
- !queue_empty(&vm_page_queue_active)) {
- vm_object_t obj;
-
- vm_pageout_active++;
- m = (vm_page_t) queue_first(&vm_page_queue_active);
- assert(m->active && !m->inactive);
-
- obj = m->object;
- if (!vm_object_lock_try(obj)) {
- /*
- * Move page to end and continue.
- */
-
- queue_remove(&vm_page_queue_active, m,
- vm_page_t, pageq);
- queue_enter(&vm_page_queue_active, m,
- vm_page_t, pageq);
- vm_page_unlock_queues();
- vm_page_lock_queues();
- continue;
- }
-
- /*
- * If the page is busy, then we pull it
- * off the active queue and leave it alone.
- */
-
- if (m->busy) {
- vm_object_unlock(obj);
- queue_remove(&vm_page_queue_active, m,
- vm_page_t, pageq);
- m->active = FALSE;
- vm_page_active_count--;
- continue;
- }
-
- /*
- * Deactivate the page while holding the object
- * locked, so we know the page is still not busy.
- * This should prevent races between pmap_enter
- * and pmap_clear_reference. The page might be
- * absent or fictitious, but vm_page_deactivate
- * can handle that.
- */
-
- vm_page_deactivate(m);
- vm_object_unlock(obj);
- }
-
- /*
- * We are done if we have met our targets *and*
- * nobody is still waiting for a page.
- */
-
- simple_lock(&vm_page_queue_free_lock);
- free_count = vm_page_mem_free();
- if ((free_count >= vm_page_free_target) &&
- (vm_page_free_wanted == 0)) {
- vm_page_unlock_queues();
- break;
- }
- want_pages = ((free_count < vm_page_free_target) ||
- vm_page_free_wanted);
- simple_unlock(&vm_page_queue_free_lock);
-
- /*
- * Sometimes we have to pause:
- * 1) No inactive pages - nothing to do.
- * 2) Flow control - wait for pagers to catch up.
- * 3) Extremely low memory - sending out dirty pages
- * consumes memory. We don't take the risk of doing
- * this if the default pager already has work to do.
- */
- pause:
- if (queue_empty(&vm_page_queue_inactive) ||
- (burst_count >= vm_pageout_burst_max) ||
- (vm_page_laundry_count >= vm_pageout_burst_max) ||
- ((free_count < vm_pageout_reserved_really) &&
- (vm_page_laundry_count > 0))) {
- unsigned int pages, msecs;
-
- /*
- * vm_pageout_burst_wait is msecs/page.
- * If there is nothing for us to do, we wait
- * at least vm_pageout_empty_wait msecs.
- */
-
- if (vm_page_laundry_count > burst_count)
- pages = vm_page_laundry_count;
- else
- pages = burst_count;
- msecs = pages * vm_pageout_burst_wait;
-
- if (queue_empty(&vm_page_queue_inactive) &&
- (msecs < vm_pageout_empty_wait))
- msecs = vm_pageout_empty_wait;
- vm_page_unlock_queues();
-
- thread_will_wait_with_timeout(current_thread(), msecs);
- counter(c_vm_pageout_scan_block++);
- thread_block(vm_pageout_scan_continue);
- call_continuation(vm_pageout_scan_continue);
- /*NOTREACHED*/
- }
-
- vm_pageout_inactive++;
-
- /* Find a page we are interested in paging out. If we
- need pages, then we'll page anything out; otherwise
- we only page out external pages. */
- m = (vm_page_t) queue_first (&vm_page_queue_inactive);
- while (1)
- {
- assert (!m->active && m->inactive);
- if (want_pages || m->external)
- break;
-
- m = (vm_page_t) queue_next (&m->pageq);
- if (!m)
- goto pause;
- }
-
- object = m->object;
+ /* This function returns with vm_page_queue_free_lock held */
+ done = vm_page_balance();
- /*
- * Try to lock object; since we've got the
- * page queues lock, we can only try for this one.
- */
-
- if (!vm_object_lock_try(object)) {
- /*
- * Move page to end and continue.
- */
-
- queue_remove(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- queue_enter(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- vm_page_unlock_queues();
- vm_pageout_inactive_nolock++;
- continue;
- }
-
- /*
- * Remove the page from the inactive list.
- */
-
- queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
- vm_page_inactive_count--;
- m->inactive = FALSE;
-
- if (m->busy || !object->alive) {
- /*
- * Somebody is already playing with this page.
- * Leave it off the pageout queues.
- */
-
- vm_page_unlock_queues();
- vm_object_unlock(object);
- vm_pageout_inactive_busy++;
- continue;
- }
-
- /*
- * If it's absent, we can reclaim the page.
- */
-
- if (want_pages && m->absent) {
- vm_pageout_inactive_absent++;
- reclaim_page:
- vm_page_free(m);
- vm_page_unlock_queues();
-
- if (vm_object_collectable(object))
- vm_object_collect(object);
- else
- vm_object_unlock(object);
-
- continue;
- }
-
- /*
- * If it's being used, reactivate.
- * (Fictitious pages are either busy or absent.)
- */
-
- assert(!m->fictitious);
- if (m->reference || pmap_is_referenced(m->phys_addr)) {
- vm_object_unlock(object);
- vm_page_activate(m);
- vm_stat.reactivations++;
- current_task()->reactivations++;
- vm_page_unlock_queues();
- vm_pageout_inactive_used++;
- continue;
- }
-
- /*
- * Eliminate all mappings.
- */
-
- m->busy = TRUE;
- pmap_page_protect(m->phys_addr, VM_PROT_NONE);
- if (!m->dirty)
- m->dirty = pmap_is_modified(m->phys_addr);
-
- /* If we don't actually need more memory, and the page
- is not dirty, put it on the tail of the inactive queue
- and move on to the next page. */
- if (!want_pages && !m->dirty) {
- queue_remove (&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- queue_enter (&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- vm_page_unlock_queues();
- vm_pageout_inactive_cleaned_external++;
- continue;
- }
-
- /*
- * If it's clean and not precious, we can free the page.
- */
-
- if (!m->dirty && !m->precious) {
- vm_pageout_inactive_clean++;
- goto reclaim_page;
- }
-
- /*
- * If we are very low on memory, then we can't
- * rely on an external pager to clean a dirty page,
- * because external pagers are not vm-privileged.
- *
- * The laundry bit tells vm_pageout_setup to
- * put the page back at the front of the inactive
- * queue instead of activating the page. Hence,
- * we will pick the page up again immediately and
- * resend it to the default pager.
- */
-
- assert(!m->laundry);
- if ((free_count < vm_pageout_reserved_internal) &&
- !object->internal) {
- m->laundry = TRUE;
- vm_pageout_inactive_double++;
- }
- vm_page_unlock_queues();
-
- /*
- * If there is no memory object for the page, create
- * one and hand it to the default pager.
- * [First try to collapse, so we don't create
- * one unnecessarily.]
- */
-
- if (!object->pager_initialized)
- vm_object_collapse(object);
- if (!object->pager_initialized)
- vm_object_pager_create(object);
- if (!object->pager_initialized)
- panic("vm_pageout_scan");
-
- vm_pageout_inactive_dirty++;
- vm_pageout_page(m, FALSE, TRUE); /* flush it */
- vm_object_unlock(object);
- burst_count++;
+ if (done) {
+ return TRUE;
}
-}
-void vm_pageout_scan_continue(void)
-{
+ simple_unlock(&vm_page_queue_free_lock);
+
/*
- * We just paused to let the pagers catch up.
- * If vm_page_laundry_count is still high,
- * then we aren't waiting long enough.
- * If we have paused some vm_pageout_pause_max times without
- * adjusting vm_pageout_burst_wait, it might be too big,
- * so we decrease it.
+ * Balancing is not enough. Shrink caches and scan pages
+ * for eviction.
*/
- vm_page_lock_queues();
- if (vm_page_laundry_count > vm_pageout_burst_min) {
- vm_pageout_burst_wait++;
- vm_pageout_pause_count = 0;
- } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
- vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
- if (vm_pageout_burst_wait < 1)
- vm_pageout_burst_wait = 1;
- vm_pageout_pause_count = 0;
- }
- vm_page_unlock_queues();
-
- vm_pageout_continue();
- /*NOTREACHED*/
-}
-
-/*
- * vm_pageout is the high level pageout daemon.
- */
+ stack_collect();
+ net_kmsg_collect();
+ consider_task_collect();
+ if (0) /* XXX: pcb_collect doesn't do anything yet, so it is
+ pointless to call consider_thread_collect. */
+ consider_thread_collect();
-void vm_pageout_continue(void)
-{
/*
- * The pageout daemon is never done, so loop forever.
- * We should call vm_pageout_scan at least once each
- * time we are woken, even if vm_page_free_wanted is
- * zero, to check vm_page_free_target and
- * vm_page_inactive_target.
+ * slab_collect should be last, because the other operations
+ * might return memory to caches.
*/
+ slab_collect();
- for (;;) {
- vm_pageout_scan();
- /* we hold vm_page_queue_free_lock now */
- assert(vm_page_free_wanted == 0);
+ vm_page_refill_inactive();
- assert_wait(&vm_page_free_wanted, FALSE);
- simple_unlock(&vm_page_queue_free_lock);
- counter(c_vm_pageout_block++);
- thread_block(vm_pageout_continue);
- }
+ /* This function returns with vm_page_queue_free_lock held */
+ return vm_page_evict(should_wait);
}
void vm_pageout(void)
{
- unsigned long free_after_reserve;
+ boolean_t done, should_wait;
current_thread()->vm_privilege = 1;
stack_privilege(current_thread());
thread_set_own_priority(0);
- /*
- * Initialize some paging parameters.
- */
-
- if (vm_pageout_burst_max == 0)
- vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
-
- if (vm_pageout_burst_min == 0)
- vm_pageout_burst_min = VM_PAGEOUT_BURST_MIN;
-
- if (vm_pageout_burst_wait == 0)
- vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
-
- if (vm_pageout_empty_wait == 0)
- vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
-
- if (vm_page_free_reserved == 0)
- vm_page_free_reserved = VM_PAGE_FREE_RESERVED;
-
- if (vm_pageout_pause_max == 0)
- vm_pageout_pause_max = VM_PAGEOUT_PAUSE_MAX;
-
- if (vm_pageout_reserved_internal == 0)
- vm_pageout_reserved_internal =
- VM_PAGEOUT_RESERVED_INTERNAL(vm_page_free_reserved);
-
- if (vm_pageout_reserved_really == 0)
- vm_pageout_reserved_really =
- VM_PAGEOUT_RESERVED_REALLY(vm_page_free_reserved);
-
- free_after_reserve = vm_page_mem_free() - vm_page_free_reserved;
-
- if (vm_page_free_min == 0)
- vm_page_free_min = vm_page_free_reserved +
- VM_PAGE_FREE_MIN(free_after_reserve);
+ for (;;) {
+ done = vm_pageout_scan(&should_wait);
+ /* we hold vm_page_queue_free_lock now */
- if (vm_page_free_target == 0)
- vm_page_free_target = vm_page_free_reserved +
- VM_PAGE_FREE_TARGET(free_after_reserve);
+ if (done) {
+ thread_sleep(&vm_pageout_requested,
+ simple_lock_addr(vm_page_queue_free_lock),
+ FALSE);
+ } else if (should_wait) {
+ assert_wait(&vm_pageout_continue, FALSE);
+ thread_set_timeout(500);
+ simple_unlock(&vm_page_queue_free_lock);
+ thread_block(NULL);
+ } else {
+ simple_unlock(&vm_page_queue_free_lock);
+ }
+ }
+}
- if (vm_page_free_target < vm_page_free_min + 5)
- vm_page_free_target = vm_page_free_min + 5;
+/*
+ * Start pageout
+ *
+ * The free page queue lock must be held before calling this function.
+ */
+void vm_pageout_start(void)
+{
+ if (!current_thread())
+ return;
- /*
- * vm_pageout_scan will set vm_page_inactive_target.
- */
+ thread_wakeup_one(&vm_pageout_requested);
+}
- vm_pageout_continue();
- /*NOTREACHED*/
+/*
+ * Resume pageout
+ *
+ * The free page queue lock must be held before calling this function.
+ */
+void vm_pageout_resume(void)
+{
+ thread_wakeup_one(&vm_pageout_continue);
}
diff --git a/vm/vm_pageout.h b/vm/vm_pageout.h
index ea6cfaf4..6ddd821c 100644
--- a/vm/vm_pageout.h
+++ b/vm/vm_pageout.h
@@ -46,8 +46,8 @@ extern void vm_pageout_page(vm_page_t, boolean_t, boolean_t);
extern void vm_pageout(void) __attribute__((noreturn));
-extern void vm_pageout_continue(void) __attribute__((noreturn));
+extern void vm_pageout_start(void);
-extern void vm_pageout_scan_continue(void) __attribute__((noreturn));
+extern void vm_pageout_resume(void);
#endif /* _VM_VM_PAGEOUT_H_ */
diff --git a/vm/vm_resident.c b/vm/vm_resident.c
index eac0f50c..e276fe68 100644
--- a/vm/vm_resident.c
+++ b/vm/vm_resident.c
@@ -39,6 +39,7 @@
#include <mach/vm_prot.h>
#include <kern/counters.h>
#include <kern/debug.h>
+#include <kern/list.h>
#include <kern/sched_prim.h>
#include <kern/task.h>
#include <kern/thread.h>
@@ -95,22 +96,13 @@ vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
unsigned long vm_page_bucket_count = 0; /* How big is array? */
unsigned long vm_page_hash_mask; /* Mask for hash function */
-vm_page_t vm_page_queue_fictitious;
+static struct list vm_page_queue_fictitious;
decl_simple_lock_data(,vm_page_queue_free_lock)
-unsigned int vm_page_free_wanted;
int vm_page_fictitious_count;
-int vm_page_external_count;
int vm_object_external_count;
int vm_object_external_pages;
/*
- * This variable isn't directly used. It's merely a placeholder for the
- * address used to synchronize threads waiting for pages to become
- * available. The real value is returned by vm_page_free_mem().
- */
-unsigned int vm_page_free_avail;
-
-/*
* Occasionally, the virtual memory system uses
* resident page structures that do not refer to
* real pages, for example to leave a page with
@@ -136,8 +128,6 @@ phys_addr_t vm_page_fictitious_addr = (phys_addr_t) -1;
* defined here, but are shared by the pageout
* module.
*/
-queue_head_t vm_page_queue_active;
-queue_head_t vm_page_queue_inactive;
decl_simple_lock_data(,vm_page_queue_lock)
int vm_page_active_count;
int vm_page_inactive_count;
@@ -149,11 +139,8 @@ int vm_page_wire_count;
* (done here in vm_page_alloc) can trigger the
* pageout daemon.
*/
-int vm_page_free_target = 0;
-int vm_page_free_min = 0;
-int vm_page_inactive_target = 0;
-int vm_page_free_reserved = 0;
int vm_page_laundry_count = 0;
+int vm_page_external_pagedout = 0;
/*
@@ -191,11 +178,7 @@ void vm_page_bootstrap(
simple_lock_init(&vm_page_queue_free_lock);
simple_lock_init(&vm_page_queue_lock);
- vm_page_queue_fictitious = VM_PAGE_NULL;
- queue_init(&vm_page_queue_active);
- queue_init(&vm_page_queue_inactive);
-
- vm_page_free_wanted = 0;
+ list_init(&vm_page_queue_fictitious);
/*
* Allocate (and initialize) the virtual-to-physical
@@ -330,6 +313,7 @@ void vm_page_module_init(void)
* table and object list.
*
* The object and page must be locked.
+ * The free page queue must not be locked.
*/
void vm_page_insert(
@@ -407,6 +391,7 @@ void vm_page_insert(
* and we don't do deactivate-behind.
*
* The object and page must be locked.
+ * The free page queue must not be locked.
*/
void vm_page_replace(
@@ -457,6 +442,7 @@ void vm_page_replace(
listq);
m->tabled = FALSE;
object->resident_page_count--;
+ VM_PAGE_QUEUES_REMOVE(m);
if (m->external) {
m->external = FALSE;
@@ -501,9 +487,10 @@ void vm_page_replace(
* vm_page_remove: [ internal use only ]
*
* Removes the given mem entry from the object/offset-page
- * table and the object page list.
+ * table, the object page list, and the page queues.
*
* The object and page must be locked.
+ * The free page queue must not be locked.
*/
void vm_page_remove(
@@ -551,6 +538,8 @@ void vm_page_remove(
mem->tabled = FALSE;
+ VM_PAGE_QUEUES_REMOVE(mem);
+
if (mem->external) {
mem->external = FALSE;
vm_object_external_pages--;
@@ -665,11 +654,15 @@ vm_page_t vm_page_grab_fictitious(void)
vm_page_t m;
simple_lock(&vm_page_queue_free_lock);
- m = vm_page_queue_fictitious;
- if (m != VM_PAGE_NULL) {
- vm_page_fictitious_count--;
- vm_page_queue_fictitious = (vm_page_t) m->pageq.next;
+ if (list_empty(&vm_page_queue_fictitious)) {
+ m = VM_PAGE_NULL;
+ } else {
+ m = list_first_entry(&vm_page_queue_fictitious,
+ struct vm_page, node);
+ assert(m->fictitious);
+ list_remove(&m->node);
m->free = FALSE;
+ vm_page_fictitious_count--;
}
simple_unlock(&vm_page_queue_free_lock);
@@ -689,8 +682,7 @@ static void vm_page_release_fictitious(
if (m->free)
panic("vm_page_release_fictitious");
m->free = TRUE;
- m->pageq.next = (queue_entry_t) vm_page_queue_fictitious;
- vm_page_queue_fictitious = m;
+ list_insert_head(&vm_page_queue_fictitious, &m->node);
vm_page_fictitious_count++;
simple_unlock(&vm_page_queue_free_lock);
}
@@ -779,18 +771,6 @@ vm_page_t vm_page_grab(void)
simple_lock(&vm_page_queue_free_lock);
- /*
- * Only let privileged threads (involved in pageout)
- * dip into the reserved pool or exceed the limit
- * for externally-managed pages.
- */
-
- if ((vm_page_mem_free() < vm_page_free_reserved)
- && !current_thread()->vm_privilege) {
- simple_unlock(&vm_page_queue_free_lock);
- return VM_PAGE_NULL;
- }
-
mem = vm_page_alloc_pa(0, VM_PAGE_SEL_DIRECTMAP, VM_PT_KERNEL);
if (mem == NULL) {
@@ -801,22 +781,6 @@ vm_page_t vm_page_grab(void)
mem->free = FALSE;
simple_unlock(&vm_page_queue_free_lock);
- /*
- * Decide if we should poke the pageout daemon.
- * We do this if the free count is less than the low
- * water mark, or if the free count is less than the high
- * water mark (but above the low water mark) and the inactive
- * count is less than its target.
- *
- * We don't have the counts locked ... if they change a little,
- * it doesn't really matter.
- */
-
- if ((vm_page_mem_free() < vm_page_free_min) ||
- ((vm_page_mem_free() < vm_page_free_target) &&
- (vm_page_inactive_count < vm_page_inactive_target)))
- thread_wakeup((event_t) &vm_page_free_wanted);
-
return mem;
}
@@ -836,38 +800,37 @@ phys_addr_t vm_page_grab_phys_addr(void)
*/
void vm_page_release(
- vm_page_t mem)
+ vm_page_t mem,
+ boolean_t laundry,
+ boolean_t external)
{
simple_lock(&vm_page_queue_free_lock);
if (mem->free)
panic("vm_page_release");
mem->free = TRUE;
vm_page_free_pa(mem, 0);
+ if (laundry) {
+ vm_page_laundry_count--;
- /*
- * Check if we should wake up someone waiting for page.
- * But don't bother waking them unless they can allocate.
- *
- * We wakeup only one thread, to prevent starvation.
- * Because the scheduling system handles wait queues FIFO,
- * if we wakeup all waiting threads, one greedy thread
- * can starve multiple niceguy threads. When the threads
- * all wakeup, the greedy threads runs first, grabs the page,
- * and waits for another page. It will be the first to run
- * when the next page is freed.
- *
- * However, there is a slight danger here.
- * The thread we wake might not use the free page.
- * Then the other threads could wait indefinitely
- * while the page goes unused. To forestall this,
- * the pageout daemon will keep making free pages
- * as long as vm_page_free_wanted is non-zero.
- */
+ if (vm_page_laundry_count == 0) {
+ vm_pageout_resume();
+ }
+ }
+ if (external) {
+
+ /*
+ * If vm_page_external_pagedout is negative,
+ * the pageout daemon isn't expecting to be
+ * notified.
+ */
+
+ if (vm_page_external_pagedout > 0) {
+ vm_page_external_pagedout--;
+ }
- if ((vm_page_free_wanted > 0) &&
- (vm_page_mem_free() >= vm_page_free_reserved)) {
- vm_page_free_wanted--;
- thread_wakeup_one((event_t) &vm_page_free_avail);
+ if (vm_page_external_pagedout == 0) {
+ vm_pageout_resume();
+ }
}
simple_unlock(&vm_page_queue_free_lock);
@@ -892,18 +855,6 @@ vm_page_t vm_page_grab_contig(
simple_lock(&vm_page_queue_free_lock);
- /*
- * Only let privileged threads (involved in pageout)
- * dip into the reserved pool or exceed the limit
- * for externally-managed pages.
- */
-
- if (((vm_page_mem_free() - nr_pages) <= vm_page_free_reserved)
- && !current_thread()->vm_privilege) {
- simple_unlock(&vm_page_queue_free_lock);
- return VM_PAGE_NULL;
- }
-
/* TODO Allow caller to pass type */
mem = vm_page_alloc_pa(order, selector, VM_PT_KERNEL);
@@ -918,22 +869,6 @@ vm_page_t vm_page_grab_contig(
simple_unlock(&vm_page_queue_free_lock);
- /*
- * Decide if we should poke the pageout daemon.
- * We do this if the free count is less than the low
- * water mark, or if the free count is less than the high
- * water mark (but above the low water mark) and the inactive
- * count is less than its target.
- *
- * We don't have the counts locked ... if they change a little,
- * it doesn't really matter.
- */
-
- if ((vm_page_mem_free() < vm_page_free_min) ||
- ((vm_page_mem_free() < vm_page_free_target) &&
- (vm_page_inactive_count < vm_page_inactive_target)))
- thread_wakeup((event_t) &vm_page_free_wanted);
-
return mem;
}
@@ -961,52 +896,10 @@ void vm_page_free_contig(vm_page_t mem, vm_size_t size)
vm_page_free_pa(mem, order);
- if ((vm_page_free_wanted > 0) &&
- (vm_page_mem_free() >= vm_page_free_reserved)) {
- vm_page_free_wanted--;
- thread_wakeup_one((event_t) &vm_page_free_avail);
- }
-
simple_unlock(&vm_page_queue_free_lock);
}
/*
- * vm_page_wait:
- *
- * Wait for a page to become available.
- * If there are plenty of free pages, then we don't sleep.
- */
-
-void vm_page_wait(
- void (*continuation)(void))
-{
-
- /*
- * We can't use vm_page_free_reserved to make this
- * determination. Consider: some thread might
- * need to allocate two pages. The first allocation
- * succeeds, the second fails. After the first page is freed,
- * a call to vm_page_wait must really block.
- */
-
- simple_lock(&vm_page_queue_free_lock);
- if ((vm_page_mem_free() < vm_page_free_target)) {
- if (vm_page_free_wanted++ == 0)
- thread_wakeup((event_t)&vm_page_free_wanted);
- assert_wait((event_t)&vm_page_free_avail, FALSE);
- simple_unlock(&vm_page_queue_free_lock);
- if (continuation != 0) {
- counter(c_vm_page_wait_block_user++);
- thread_block(continuation);
- } else {
- counter(c_vm_page_wait_block_kernel++);
- thread_block((void (*)(void)) 0);
- }
- } else
- simple_unlock(&vm_page_queue_free_lock);
-}
-
-/*
* vm_page_alloc:
*
* Allocate and return a memory cell associated
@@ -1046,9 +939,11 @@ void vm_page_free(
if (mem->free)
panic("vm_page_free");
- if (mem->tabled)
+ if (mem->tabled) {
vm_page_remove(mem);
- VM_PAGE_QUEUES_REMOVE(mem);
+ }
+
+ assert(!mem->active && !mem->inactive);
if (mem->wire_count != 0) {
if (!mem->private && !mem->fictitious)
@@ -1056,11 +951,6 @@ void vm_page_free(
mem->wire_count = 0;
}
- if (mem->laundry) {
- vm_page_laundry_count--;
- mem->laundry = FALSE;
- }
-
PAGE_WAKEUP_DONE(mem);
if (mem->absent)
@@ -1077,116 +967,10 @@ void vm_page_free(
mem->fictitious = TRUE;
vm_page_release_fictitious(mem);
} else {
+ boolean_t laundry = mem->laundry;
+ boolean_t external = mem->external;
vm_page_init(mem);
- vm_page_release(mem);
- }
-}
-
-/*
- * vm_page_wire:
- *
- * Mark this page as wired down by yet
- * another map, removing it from paging queues
- * as necessary.
- *
- * The page's object and the page queues must be locked.
- */
-void vm_page_wire(
- vm_page_t mem)
-{
- VM_PAGE_CHECK(mem);
-
- if (mem->wire_count == 0) {
- VM_PAGE_QUEUES_REMOVE(mem);
- if (!mem->private && !mem->fictitious)
- vm_page_wire_count++;
- }
- mem->wire_count++;
-}
-
-/*
- * vm_page_unwire:
- *
- * Release one wiring of this page, potentially
- * enabling it to be paged again.
- *
- * The page's object and the page queues must be locked.
- */
-void vm_page_unwire(
- vm_page_t mem)
-{
- VM_PAGE_CHECK(mem);
-
- if (--mem->wire_count == 0) {
- queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
- vm_page_active_count++;
- mem->active = TRUE;
- if (!mem->private && !mem->fictitious)
- vm_page_wire_count--;
- }
-}
-
-/*
- * vm_page_deactivate:
- *
- * Returns the given page to the inactive list,
- * indicating that no physical maps have access
- * to this page. [Used by the physical mapping system.]
- *
- * The page queues must be locked.
- */
-void vm_page_deactivate(
- vm_page_t m)
-{
- VM_PAGE_CHECK(m);
-
- /*
- * This page is no longer very interesting. If it was
- * interesting (active or inactive/referenced), then we
- * clear the reference bit and (re)enter it in the
- * inactive queue. Note wired pages should not have
- * their reference bit cleared.
- */
-
- if (m->active || (m->inactive && m->reference)) {
- if (!m->fictitious && !m->absent)
- pmap_clear_reference(m->phys_addr);
- m->reference = FALSE;
- VM_PAGE_QUEUES_REMOVE(m);
- }
- if (m->wire_count == 0 && !m->inactive) {
- queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
- m->inactive = TRUE;
- vm_page_inactive_count++;
- }
-}
-
-/*
- * vm_page_activate:
- *
- * Put the specified page on the active list (if appropriate).
- *
- * The page queues must be locked.
- */
-
-void vm_page_activate(
- vm_page_t m)
-{
- VM_PAGE_CHECK(m);
-
- if (m->inactive) {
- queue_remove(&vm_page_queue_inactive, m, vm_page_t,
- pageq);
- vm_page_inactive_count--;
- m->inactive = FALSE;
- }
- if (m->wire_count == 0) {
- if (m->active)
- panic("vm_page_activate: already active");
-
- queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
- m->active = TRUE;
- vm_page_active_count++;
+ vm_page_release(mem, laundry, external);
}
}