summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Braun <rbraun@sceen.net>2016-09-20 23:44:23 +0200
committerRichard Braun <rbraun@sceen.net>2016-09-21 00:21:42 +0200
commit5d1258459ad618481a4f239e8ce020bdecda1d3f (patch)
tree507285e032d20fc29237e5a415e9d76380130607
parent783ad37f65384994dfa5387ab3847a8a4d77b90b (diff)
Rework pageout to handle multiple segments
As we're about to use a new HIGHMEM segment, potentially much larger than the existing DMA and DIRECTMAP ones, it's now compulsory to make the pageout daemon aware of those segments. And while we're at it, let's fix some of the defects that have been plaguing pageout forever, such as throttling, and pageout of internal versus external pages (this commit notably introduces a hardcoded policy in which as many external pages are selected before considering internal pages). * kern/slab.c (kmem_pagefree_physmem): Update call to vm_page_release. * vm/vm_page.c: Include <kern/counters.h> and <vm/vm_pageout.h>. (VM_PAGE_SEG_THRESHOLD_MIN_NUM, VM_PAGE_SEG_THRESHOLD_MIN_DENOM, VM_PAGE_SEG_THRESHOLD_MIN, VM_PAGE_SEG_THRESHOLD_LOW_NUM, VM_PAGE_SEG_THRESHOLD_LOW_DENOM, VM_PAGE_SEG_THRESHOLD_LOW, VM_PAGE_SEG_THRESHOLD_HIGH_NUM, VM_PAGE_SEG_THRESHOLD_HIGH_DENOM, VM_PAGE_SEG_THRESHOLD_HIGH, VM_PAGE_SEG_MIN_PAGES, VM_PAGE_HIGH_ACTIVE_PAGE_NUM, VM_PAGE_HIGH_ACTIVE_PAGE_DENOM): New macros. (struct vm_page_queue): New type. (struct vm_page_seg): Add new members `min_free_pages', `low_free_pages', `high_free_pages', `active_pages', `nr_active_pages', `high_active_pages', `inactive_pages', `nr_inactive_pages'. (vm_page_alloc_paused): New variable. (vm_page_pageable, vm_page_can_move, vm_page_remove_mappings): New functions. (vm_page_seg_alloc_from_buddy): Pause allocations and start the pageout daemon as appropriate. (vm_page_queue_init, vm_page_queue_push, vm_page_queue_remove, vm_page_queue_first, vm_page_seg_get, vm_page_seg_index, vm_page_seg_compute_pageout_thresholds): New functions. (vm_page_seg_init): Initialize the new segment members. (vm_page_seg_add_active_page, vm_page_seg_remove_active_page, vm_page_seg_add_inactive_page, vm_page_seg_remove_inactive_page, vm_page_seg_pull_active_page, vm_page_seg_pull_inactive_page, vm_page_seg_pull_cache_page): New functions. (vm_page_seg_min_page_available, vm_page_seg_page_available, vm_page_seg_usable, vm_page_seg_double_lock, vm_page_seg_double_unlock, vm_page_seg_balance_page, vm_page_seg_balance, vm_page_seg_evict, vm_page_seg_compute_high_active_page, vm_page_seg_refill_inactive, vm_page_lookup_seg, vm_page_check): New functions. (vm_page_alloc_pa): Handle allocation failure from VM privileged thread. (vm_page_info_all): Display additional segment properties. (vm_page_wire, vm_page_unwire, vm_page_deactivate, vm_page_activate, vm_page_wait): Move from vm/vm_resident.c and rewrite to use segments. (vm_page_queues_remove, vm_page_check_usable, vm_page_may_balance, vm_page_balance_once, vm_page_balance, vm_page_evict_once): New functions. (VM_PAGE_MAX_LAUNDRY, VM_PAGE_MAX_EVICTIONS): New macros. (vm_page_evict, vm_page_refill_inactive): New functions. * vm/vm_page.h: Include <kern/list.h>. (struct vm_page): Remove member `pageq', reuse the `node' member instead, move the `listq' and `next' members above `vm_page_header'. (VM_PAGE_CHECK): Define as an alias to vm_page_check. (vm_page_check): New function declaration. (vm_page_queue_fictitious, vm_page_queue_active, vm_page_queue_inactive, vm_page_free_target, vm_page_free_min, vm_page_inactive_target, vm_page_free_reserved, vm_page_free_wanted): Remove extern declarations. (vm_page_external_pagedout): New extern declaration. (vm_page_release): Update declaration. (VM_PAGE_QUEUES_REMOVE): Define as an alias to vm_page_queues_remove. (VM_PT_PMAP, VM_PT_KMEM, VM_PT_STACK): Remove macros. (VM_PT_KERNEL): Update value. (vm_page_queues_remove, vm_page_balance, vm_page_evict, vm_page_refill_inactive): New function declarations. * vm/vm_pageout.c (VM_PAGEOUT_BURST_MAX, VM_PAGEOUT_BURST_MIN, VM_PAGEOUT_BURST_WAIT, VM_PAGEOUT_EMPTY_WAIT, VM_PAGEOUT_PAUSE_MAX, VM_PAGE_INACTIVE_TARGET, VM_PAGE_FREE_TARGET, VM_PAGE_FREE_MIN, VM_PAGE_FREE_RESERVED, VM_PAGEOUT_RESERVED_INTERNAL, VM_PAGEOUT_RESERVED_REALLY): Remove macros. (vm_pageout_reserved_internal, vm_pageout_reserved_really, vm_pageout_burst_max, vm_pageout_burst_min, vm_pageout_burst_wait, vm_pageout_empty_wait, vm_pageout_pause_count, vm_pageout_pause_max, vm_pageout_active, vm_pageout_inactive, vm_pageout_inactive_nolock, vm_pageout_inactive_busy, vm_pageout_inactive_absent, vm_pageout_inactive_used, vm_pageout_inactive_clean, vm_pageout_inactive_dirty, vm_pageout_inactive_double, vm_pageout_inactive_cleaned_external): Remove variables. (vm_pageout_requested, vm_pageout_continue): New variables. (vm_pageout_setup): Wait for page allocation to succeed instead of falling back to flush, update double paging protocol with caller, add pageout throttling setup. (vm_pageout_scan): Rewrite to use the new vm_page balancing, eviction and inactive queue refill functions. (vm_pageout_scan_continue, vm_pageout_continue): Remove functions. (vm_pageout): Rewrite. (vm_pageout_start, vm_pageout_resume): New functions. * vm/vm_pageout.h (vm_pageout_continue, vm_pageout_scan_continue): Remove function declarations. (vm_pageout_start, vm_pageout_resume): New function declarations. * vm/vm_resident.c: Include <kern/list.h>. (vm_page_queue_fictitious): Define as a struct list. (vm_page_free_wanted, vm_page_external_count, vm_page_free_avail, vm_page_queue_active, vm_page_queue_inactive, vm_page_free_target, vm_page_free_min, vm_page_inactive_target, vm_page_free_reserved): Remove variables. (vm_page_external_pagedout): New variable. (vm_page_bootstrap): Don't initialize removed variable, update initialization of vm_page_queue_fictitious. (vm_page_replace): Call VM_PAGE_QUEUES_REMOVE where appropriate. (vm_page_remove): Likewise. (vm_page_grab_fictitious): Update to use list_xxx functions. (vm_page_release_fictitious): Likewise. (vm_page_grab): Remove pageout related code. (vm_page_release): Add `laundry' and `external' parameters for pageout throttling. (vm_page_grab_contig): Remove pageout related code. (vm_page_free_contig): Likewise. (vm_page_free): Remove pageout related code, update call to vm_page_release. (vm_page_wait, vm_page_wire, vm_page_unwire, vm_page_deactivate, vm_page_activate): Move to vm/vm_page.c.
-rw-r--r--kern/slab.c2
-rw-r--r--vm/vm_page.c1244
-rw-r--r--vm/vm_page.h114
-rw-r--r--vm/vm_pageout.c649
-rw-r--r--vm/vm_pageout.h4
-rw-r--r--vm/vm_resident.c316
6 files changed, 1457 insertions, 872 deletions
diff --git a/kern/slab.c b/kern/slab.c
index 9d21c428..d4ef847e 100644
--- a/kern/slab.c
+++ b/kern/slab.c
@@ -389,7 +389,7 @@ kmem_pagefree_physmem(vm_offset_t addr, vm_size_t size)
assert(size == PAGE_SIZE);
page = vm_page_lookup_pa(kvtophys(addr));
assert(page != NULL);
- vm_page_release(page);
+ vm_page_release(page, FALSE, FALSE);
}
static vm_offset_t
diff --git a/vm/vm_page.c b/vm/vm_page.c
index f966e4dc..4c11ea7a 100644
--- a/vm/vm_page.c
+++ b/vm/vm_page.c
@@ -27,10 +27,13 @@
* multiprocessor systems. When a pool is empty and cannot provide a page,
* it is filled by transferring multiple pages from the backend buddy system.
* The symmetric case is handled likewise.
+ *
+ * TODO Limit number of dirty pages, block allocations above a top limit.
*/
#include <string.h>
#include <kern/assert.h>
+#include <kern/counters.h>
#include <kern/cpu_number.h>
#include <kern/debug.h>
#include <kern/list.h>
@@ -42,6 +45,7 @@
#include <machine/pmap.h>
#include <sys/types.h>
#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
#define DEBUG 0
@@ -100,12 +104,96 @@ struct vm_page_free_list {
};
/*
+ * XXX Because of a potential deadlock involving the default pager (see
+ * vm_map_lock()), it's currently impossible to reliably determine the
+ * minimum number of free pages required for successful pageout. Since
+ * that process is dependent on the amount of physical memory, we scale
+ * the minimum number of free pages from it, in the hope that memory
+ * exhaustion happens as rarely as possible...
+ */
+
+/*
+ * Ratio used to compute the minimum number of pages in a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_MIN_NUM 5
+#define VM_PAGE_SEG_THRESHOLD_MIN_DENOM 100
+
+/*
+ * Number of pages reserved for privileged allocations in a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_MIN 500
+
+/*
+ * Ratio used to compute the threshold below which pageout is started.
+ */
+#define VM_PAGE_SEG_THRESHOLD_LOW_NUM 6
+#define VM_PAGE_SEG_THRESHOLD_LOW_DENOM 100
+
+/*
+ * Minimum value the low threshold can have for a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_LOW 600
+
+#if VM_PAGE_SEG_THRESHOLD_LOW <= VM_PAGE_SEG_THRESHOLD_MIN
+#error VM_PAGE_SEG_THRESHOLD_LOW invalid
+#endif /* VM_PAGE_SEG_THRESHOLD_LOW >= VM_PAGE_SEG_THRESHOLD_MIN */
+
+/*
+ * Ratio used to compute the threshold above which pageout is stopped.
+ */
+#define VM_PAGE_SEG_THRESHOLD_HIGH_NUM 10
+#define VM_PAGE_SEG_THRESHOLD_HIGH_DENOM 100
+
+/*
+ * Minimum value the high threshold can have for a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_HIGH 1000
+
+#if VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW
+#error VM_PAGE_SEG_THRESHOLD_HIGH invalid
+#endif /* VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW */
+
+/*
+ * Minimum number of pages allowed for a segment.
+ */
+#define VM_PAGE_SEG_MIN_PAGES 2000
+
+#if VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH
+#error VM_PAGE_SEG_MIN_PAGES invalid
+#endif /* VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH */
+
+/*
+ * Ratio used to compute the threshold of active pages beyond which
+ * to refill the inactive queue.
+ */
+#define VM_PAGE_HIGH_ACTIVE_PAGE_NUM 1
+#define VM_PAGE_HIGH_ACTIVE_PAGE_DENOM 3
+
+/*
+ * Page cache queue.
+ *
+ * XXX The current implementation hardcodes a preference to evict external
+ * pages first and keep internal ones as much as possible. This is because
+ * the Hurd default pager implementation suffers from bugs that can easily
+ * cause the system to freeze.
+ */
+struct vm_page_queue {
+ struct list internal_pages;
+ struct list external_pages;
+};
+
+/*
* Segment name buffer size.
*/
#define VM_PAGE_NAME_SIZE 16
/*
* Segment of contiguous memory.
+ *
+ * XXX Per-segment locking is probably useless, since one or both of the
+ * page queues lock and the free page queue lock is held on any access.
+ * However it should first be made clear which lock protects access to
+ * which members of a segment.
*/
struct vm_page_seg {
struct vm_page_cpu_pool cpu_pools[NCPUS];
@@ -117,6 +205,19 @@ struct vm_page_seg {
simple_lock_data_t lock;
struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS];
unsigned long nr_free_pages;
+
+ /* Free memory thresholds */
+ unsigned long min_free_pages; /* Privileged allocations only */
+ unsigned long low_free_pages; /* Pageout daemon starts scanning */
+ unsigned long high_free_pages; /* Pageout daemon stops scanning,
+ unprivileged allocations resume */
+
+ /* Page cache related data */
+ struct vm_page_queue active_pages;
+ unsigned long nr_active_pages;
+ unsigned long high_active_pages;
+ struct vm_page_queue inactive_pages;
+ unsigned long nr_inactive_pages;
};
/*
@@ -160,6 +261,16 @@ static struct vm_page_boot_seg vm_page_boot_segs[VM_PAGE_MAX_SEGS] __initdata;
*/
static unsigned int vm_page_segs_size __read_mostly;
+/*
+ * If true, unprivileged allocations are blocked, disregarding any other
+ * condition.
+ *
+ * This variable is also used to resume clients once pages are available.
+ *
+ * The free page queue lock must be held when accessing this variable.
+ */
+static boolean_t vm_page_alloc_paused;
+
static void __init
vm_page_init_pa(struct vm_page *page, unsigned short seg_index, phys_addr_t pa)
{
@@ -183,6 +294,40 @@ vm_page_set_type(struct vm_page *page, unsigned int order, unsigned short type)
page[i].type = type;
}
+static boolean_t
+vm_page_pageable(const struct vm_page *page)
+{
+ return (page->object != NULL)
+ && (page->wire_count == 0)
+ && (page->active || page->inactive);
+}
+
+static boolean_t
+vm_page_can_move(const struct vm_page *page)
+{
+ /*
+ * This function is called on pages pulled from the page queues,
+ * implying they're pageable, which is why the wire count isn't
+ * checked here.
+ */
+
+ return !page->busy
+ && !page->wanted
+ && !page->absent
+ && page->object->alive;
+}
+
+static void
+vm_page_remove_mappings(struct vm_page *page)
+{
+ page->busy = TRUE;
+ pmap_page_protect(page->phys_addr, VM_PROT_NONE);
+
+ if (!page->dirty) {
+ page->dirty = pmap_is_modified(page->phys_addr);
+ }
+}
+
static void __init
vm_page_free_list_init(struct vm_page_free_list *free_list)
{
@@ -219,6 +364,19 @@ vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, unsigned int order)
assert(order < VM_PAGE_NR_FREE_LISTS);
+ if (vm_page_alloc_paused && current_thread()
+ && !current_thread()->vm_privilege) {
+ return NULL;
+ } else if (seg->nr_free_pages <= seg->low_free_pages) {
+ vm_pageout_start();
+
+ if ((seg->nr_free_pages <= seg->min_free_pages)
+ && current_thread() && !current_thread()->vm_privilege) {
+ vm_page_alloc_paused = TRUE;
+ return NULL;
+ }
+ }
+
for (i = order; i < VM_PAGE_NR_FREE_LISTS; i++) {
free_list = &seg->free_lists[i];
@@ -241,6 +399,11 @@ vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, unsigned int order)
}
seg->nr_free_pages -= (1 << order);
+
+ if (seg->nr_free_pages < seg->min_free_pages) {
+ vm_page_alloc_paused = TRUE;
+ }
+
return page;
}
@@ -364,6 +527,65 @@ vm_page_cpu_pool_drain(struct vm_page_cpu_pool *cpu_pool,
simple_unlock(&seg->lock);
}
+static void
+vm_page_queue_init(struct vm_page_queue *queue)
+{
+ list_init(&queue->internal_pages);
+ list_init(&queue->external_pages);
+}
+
+static void
+vm_page_queue_push(struct vm_page_queue *queue, struct vm_page *page)
+{
+ if (page->external) {
+ list_insert_tail(&queue->external_pages, &page->node);
+ } else {
+ list_insert_tail(&queue->internal_pages, &page->node);
+ }
+}
+
+static void
+vm_page_queue_remove(struct vm_page_queue *queue, struct vm_page *page)
+{
+ (void)queue;
+ list_remove(&page->node);
+}
+
+static struct vm_page *
+vm_page_queue_first(struct vm_page_queue *queue, boolean_t external_only)
+{
+ struct vm_page *page;
+
+ if (!list_empty(&queue->external_pages)) {
+ page = list_first_entry(&queue->external_pages, struct vm_page, node);
+ return page;
+ }
+
+ if (!external_only && !list_empty(&queue->internal_pages)) {
+ page = list_first_entry(&queue->internal_pages, struct vm_page, node);
+ return page;
+ }
+
+ return NULL;
+}
+
+static struct vm_page_seg *
+vm_page_seg_get(unsigned short index)
+{
+ assert(index < vm_page_segs_size);
+ return &vm_page_segs[index];
+}
+
+static unsigned int
+vm_page_seg_index(const struct vm_page_seg *seg)
+{
+ unsigned int index;
+
+ index = seg - vm_page_segs;
+ assert(index < vm_page_segs_size);
+ return index;
+}
+
static phys_addr_t __init
vm_page_seg_size(struct vm_page_seg *seg)
{
@@ -386,6 +608,39 @@ vm_page_seg_compute_pool_size(struct vm_page_seg *seg)
}
static void __init
+vm_page_seg_compute_pageout_thresholds(struct vm_page_seg *seg)
+{
+ unsigned long nr_pages;
+
+ nr_pages = vm_page_atop(vm_page_seg_size(seg));
+
+ if (nr_pages < VM_PAGE_SEG_MIN_PAGES) {
+ panic("vm_page: segment too small");
+ }
+
+ seg->min_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_MIN_NUM
+ / VM_PAGE_SEG_THRESHOLD_MIN_DENOM;
+
+ if (seg->min_free_pages < VM_PAGE_SEG_THRESHOLD_MIN) {
+ seg->min_free_pages = VM_PAGE_SEG_THRESHOLD_MIN;
+ }
+
+ seg->low_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_LOW_NUM
+ / VM_PAGE_SEG_THRESHOLD_LOW_DENOM;
+
+ if (seg->low_free_pages < VM_PAGE_SEG_THRESHOLD_LOW) {
+ seg->low_free_pages = VM_PAGE_SEG_THRESHOLD_LOW;
+ }
+
+ seg->high_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_HIGH_NUM
+ / VM_PAGE_SEG_THRESHOLD_HIGH_DENOM;
+
+ if (seg->high_free_pages < VM_PAGE_SEG_THRESHOLD_HIGH) {
+ seg->high_free_pages = VM_PAGE_SEG_THRESHOLD_HIGH;
+ }
+}
+
+static void __init
vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end,
struct vm_page *pages)
{
@@ -408,7 +663,15 @@ vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end,
vm_page_free_list_init(&seg->free_lists[i]);
seg->nr_free_pages = 0;
- i = seg - vm_page_segs;
+
+ vm_page_seg_compute_pageout_thresholds(seg);
+
+ vm_page_queue_init(&seg->active_pages);
+ seg->nr_active_pages = 0;
+ vm_page_queue_init(&seg->inactive_pages);
+ seg->nr_inactive_pages = 0;
+
+ i = vm_page_seg_index(seg);
for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE)
vm_page_init_pa(&pages[vm_page_atop(pa - seg->start)], i, pa);
@@ -485,6 +748,502 @@ vm_page_seg_free(struct vm_page_seg *seg, struct vm_page *page,
}
}
+static void
+vm_page_seg_add_active_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && !page->inactive);
+ page->active = TRUE;
+ page->reference = TRUE;
+ vm_page_queue_push(&seg->active_pages, page);
+ seg->nr_active_pages++;
+ vm_page_active_count++;
+}
+
+static void
+vm_page_seg_remove_active_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && page->active && !page->inactive);
+ page->active = FALSE;
+ vm_page_queue_remove(&seg->active_pages, page);
+ seg->nr_active_pages--;
+ vm_page_active_count--;
+}
+
+static void
+vm_page_seg_add_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && !page->inactive);
+ page->inactive = TRUE;
+ vm_page_queue_push(&seg->inactive_pages, page);
+ seg->nr_inactive_pages++;
+ vm_page_inactive_count++;
+}
+
+static void
+vm_page_seg_remove_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && page->inactive);
+ page->inactive = FALSE;
+ vm_page_queue_remove(&seg->inactive_pages, page);
+ seg->nr_inactive_pages--;
+ vm_page_inactive_count--;
+}
+
+/*
+ * Attempt to pull an active page.
+ *
+ * If successful, the object containing the page is locked.
+ */
+static struct vm_page *
+vm_page_seg_pull_active_page(struct vm_page_seg *seg, boolean_t external_only)
+{
+ struct vm_page *page, *first;
+ boolean_t locked;
+
+ first = NULL;
+
+ for (;;) {
+ page = vm_page_queue_first(&seg->active_pages, external_only);
+
+ if (page == NULL) {
+ break;
+ } else if (first == NULL) {
+ first = page;
+ } else if (first == page) {
+ break;
+ }
+
+ vm_page_seg_remove_active_page(seg, page);
+ locked = vm_object_lock_try(page->object);
+
+ if (!locked) {
+ vm_page_seg_add_active_page(seg, page);
+ continue;
+ }
+
+ if (!vm_page_can_move(page)) {
+ vm_page_seg_add_active_page(seg, page);
+ vm_object_unlock(page->object);
+ continue;
+ }
+
+ return page;
+ }
+
+ return NULL;
+}
+
+/*
+ * Attempt to pull an inactive page.
+ *
+ * If successful, the object containing the page is locked.
+ *
+ * XXX See vm_page_seg_pull_active_page (duplicated code).
+ */
+static struct vm_page *
+vm_page_seg_pull_inactive_page(struct vm_page_seg *seg, boolean_t external_only)
+{
+ struct vm_page *page, *first;
+ boolean_t locked;
+
+ first = NULL;
+
+ for (;;) {
+ page = vm_page_queue_first(&seg->inactive_pages, external_only);
+
+ if (page == NULL) {
+ break;
+ } else if (first == NULL) {
+ first = page;
+ } else if (first == page) {
+ break;
+ }
+
+ vm_page_seg_remove_inactive_page(seg, page);
+ locked = vm_object_lock_try(page->object);
+
+ if (!locked) {
+ vm_page_seg_add_inactive_page(seg, page);
+ continue;
+ }
+
+ if (!vm_page_can_move(page)) {
+ vm_page_seg_add_inactive_page(seg, page);
+ vm_object_unlock(page->object);
+ continue;
+ }
+
+ return page;
+ }
+
+ return NULL;
+}
+
+/*
+ * Attempt to pull a page cache page.
+ *
+ * If successful, the object containing the page is locked.
+ */
+static struct vm_page *
+vm_page_seg_pull_cache_page(struct vm_page_seg *seg,
+ boolean_t external_only,
+ boolean_t *was_active)
+{
+ struct vm_page *page;
+
+ page = vm_page_seg_pull_inactive_page(seg, external_only);
+
+ if (page != NULL) {
+ *was_active = FALSE;
+ return page;
+ }
+
+ page = vm_page_seg_pull_active_page(seg, external_only);
+
+ if (page != NULL) {
+ *was_active = TRUE;
+ return page;
+ }
+
+ return NULL;
+}
+
+static boolean_t
+vm_page_seg_min_page_available(const struct vm_page_seg *seg)
+{
+ return (seg->nr_free_pages > seg->min_free_pages);
+}
+
+static boolean_t
+vm_page_seg_page_available(const struct vm_page_seg *seg)
+{
+ return (seg->nr_free_pages > seg->high_free_pages);
+}
+
+static boolean_t
+vm_page_seg_usable(const struct vm_page_seg *seg)
+{
+ return (seg->nr_free_pages >= seg->high_free_pages);
+}
+
+static void
+vm_page_seg_double_lock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
+{
+ assert(seg1 != seg2);
+
+ if (seg1 < seg2) {
+ simple_lock(&seg1->lock);
+ simple_lock(&seg2->lock);
+ } else {
+ simple_lock(&seg2->lock);
+ simple_lock(&seg1->lock);
+ }
+}
+
+static void
+vm_page_seg_double_unlock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
+{
+ simple_unlock(&seg1->lock);
+ simple_unlock(&seg2->lock);
+}
+
+/*
+ * Attempt to balance a segment by moving one page to another segment.
+ *
+ * Return TRUE if a page was actually moved.
+ */
+static boolean_t
+vm_page_seg_balance_page(struct vm_page_seg *seg,
+ struct vm_page_seg *remote_seg)
+{
+ struct vm_page *src, *dest;
+ vm_object_t object;
+ vm_offset_t offset;
+ boolean_t was_active;
+
+ vm_page_lock_queues();
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_seg_double_lock(seg, remote_seg);
+
+ if (vm_page_seg_usable(seg)
+ || !vm_page_seg_page_available(remote_seg)) {
+ goto error;
+ }
+
+ src = vm_page_seg_pull_cache_page(seg, FALSE, &was_active);
+
+ if (src == NULL) {
+ goto error;
+ }
+
+ assert(src->object != NULL);
+ assert(!src->fictitious && !src->private);
+ assert(src->wire_count == 0);
+ assert(src->type != VM_PT_FREE);
+ assert(src->order == VM_PAGE_ORDER_UNLISTED);
+
+ dest = vm_page_seg_alloc_from_buddy(remote_seg, 0);
+ assert(dest != NULL);
+
+ vm_page_seg_double_unlock(seg, remote_seg);
+ simple_unlock(&vm_page_queue_free_lock);
+
+ if (!was_active && !src->reference && pmap_is_referenced(src->phys_addr)) {
+ src->reference = TRUE;
+ }
+
+ object = src->object;
+ offset = src->offset;
+ vm_page_remove(src);
+
+ vm_page_remove_mappings(src);
+
+ vm_page_set_type(dest, 0, src->type);
+ memcpy(&dest->vm_page_header, &src->vm_page_header,
+ sizeof(*dest) - VM_PAGE_HEADER_SIZE);
+ vm_page_copy(src, dest);
+
+ if (!src->dirty) {
+ pmap_clear_modify(dest->phys_addr);
+ }
+
+ dest->busy = FALSE;
+
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_init(src);
+ src->free = TRUE;
+ simple_lock(&seg->lock);
+ vm_page_set_type(src, 0, VM_PT_FREE);
+ vm_page_seg_free_to_buddy(seg, src, 0);
+ simple_unlock(&seg->lock);
+ simple_unlock(&vm_page_queue_free_lock);
+
+ vm_page_insert(dest, object, offset);
+ vm_object_unlock(object);
+
+ if (was_active) {
+ vm_page_activate(dest);
+ } else {
+ vm_page_deactivate(dest);
+ }
+
+ vm_page_unlock_queues();
+
+ return TRUE;
+
+error:
+ vm_page_seg_double_unlock(seg, remote_seg);
+ simple_unlock(&vm_page_queue_free_lock);
+ vm_page_unlock_queues();
+ return FALSE;
+}
+
+static boolean_t
+vm_page_seg_balance(struct vm_page_seg *seg)
+{
+ struct vm_page_seg *remote_seg;
+ unsigned int i;
+ boolean_t balanced;
+
+ /*
+ * It's important here that pages are moved to lower priority
+ * segments first.
+ */
+
+ for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
+ remote_seg = vm_page_seg_get(i);
+
+ if (remote_seg == seg) {
+ continue;
+ }
+
+ balanced = vm_page_seg_balance_page(seg, remote_seg);
+
+ if (balanced) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static boolean_t
+vm_page_seg_evict(struct vm_page_seg *seg,
+ boolean_t external_only, boolean_t low_memory)
+{
+ struct vm_page *page;
+ boolean_t reclaim, laundry;
+ vm_object_t object;
+ boolean_t was_active;
+
+ page = NULL;
+ object = NULL;
+
+restart:
+ vm_page_lock_queues();
+ simple_lock(&seg->lock);
+
+ if (page != NULL) {
+ vm_object_lock(page->object);
+ } else {
+ page = vm_page_seg_pull_cache_page(seg, external_only, &was_active);
+
+ if (page == NULL) {
+ goto out;
+ }
+ }
+
+ assert(page->object != NULL);
+ assert(!page->fictitious && !page->private);
+ assert(page->wire_count == 0);
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+
+ object = page->object;
+
+ if (!was_active
+ && (page->reference || pmap_is_referenced(page->phys_addr))) {
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+ vm_object_unlock(object);
+ vm_stat.reactivations++;
+ current_task()->reactivations++;
+ vm_page_unlock_queues();
+ page = NULL;
+ goto restart;
+ }
+
+ vm_page_remove_mappings(page);
+
+ if (!page->dirty && !page->precious) {
+ reclaim = TRUE;
+ goto out;
+ }
+
+ reclaim = FALSE;
+
+ /*
+ * If we are very low on memory, then we can't rely on an external
+ * pager to clean a dirty page, because external pagers are not
+ * vm-privileged.
+ *
+ * The laundry bit tells vm_pageout_setup not to do any special
+ * processing of this page since it's immediately going to be
+ * double paged out to the default pager. The laundry bit is
+ * reset and the page is inserted into an internal object by
+ * vm_pageout_setup before the double paging pass.
+ */
+
+ assert(!page->laundry);
+
+ if (object->internal || !low_memory) {
+ laundry = FALSE;
+ } else {
+ laundry = page->laundry = TRUE;
+ }
+
+out:
+ simple_unlock(&seg->lock);
+
+ if (object == NULL) {
+ vm_page_unlock_queues();
+ return FALSE;
+ }
+
+ if (reclaim) {
+ vm_page_free(page);
+ vm_page_unlock_queues();
+
+ if (vm_object_collectable(object)) {
+ vm_object_collect(object);
+ } else {
+ vm_object_unlock(object);
+ }
+
+ return TRUE;
+ }
+
+ vm_page_unlock_queues();
+
+ /*
+ * If there is no memory object for the page, create one and hand it
+ * to the default pager. First try to collapse, so we don't create
+ * one unnecessarily.
+ */
+
+ if (!object->pager_initialized) {
+ vm_object_collapse(object);
+ }
+
+ if (!object->pager_initialized) {
+ vm_object_pager_create(object);
+ }
+
+ if (!object->pager_initialized) {
+ panic("vm_page_seg_evict");
+ }
+
+ vm_pageout_page(page, FALSE, TRUE); /* flush it */
+ vm_object_unlock(object);
+
+ if (laundry) {
+ goto restart;
+ }
+
+ return TRUE;
+}
+
+static void
+vm_page_seg_compute_high_active_page(struct vm_page_seg *seg)
+{
+ unsigned long nr_pages;
+
+ nr_pages = seg->nr_active_pages + seg->nr_inactive_pages;
+ seg->high_active_pages = nr_pages * VM_PAGE_HIGH_ACTIVE_PAGE_NUM
+ / VM_PAGE_HIGH_ACTIVE_PAGE_DENOM;
+}
+
+static void
+vm_page_seg_refill_inactive(struct vm_page_seg *seg)
+{
+ struct vm_page *page;
+
+ simple_lock(&seg->lock);
+
+ vm_page_seg_compute_high_active_page(seg);
+
+ while (seg->nr_active_pages > seg->high_active_pages) {
+ page = vm_page_seg_pull_active_page(seg, FALSE);
+
+ if (page == NULL) {
+ break;
+ }
+
+ page->reference = FALSE;
+ pmap_clear_reference(page->phys_addr);
+ vm_page_seg_add_inactive_page(seg, page);
+ vm_object_unlock(page->object);
+ }
+
+ simple_unlock(&seg->lock);
+}
+
void __init
vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end)
{
@@ -712,6 +1471,77 @@ vm_page_lookup_pa(phys_addr_t pa)
return NULL;
}
+static struct vm_page_seg *
+vm_page_lookup_seg(const struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+
+ if ((page->phys_addr >= seg->start) && (page->phys_addr < seg->end)) {
+ return seg;
+ }
+ }
+
+ return NULL;
+}
+
+void vm_page_check(const struct vm_page *page)
+{
+ if (page->fictitious) {
+ if (page->private) {
+ panic("vm_page: page both fictitious and private");
+ }
+
+ if (page->phys_addr != vm_page_fictitious_addr) {
+ panic("vm_page: invalid fictitious page");
+ }
+ } else {
+ struct vm_page_seg *seg;
+
+ if (page->phys_addr == vm_page_fictitious_addr) {
+ panic("vm_page: real page has fictitious address");
+ }
+
+ seg = vm_page_lookup_seg(page);
+
+ if (seg == NULL) {
+ if (!page->private) {
+ panic("vm_page: page claims it's managed but not in any segment");
+ }
+ } else {
+ if (page->private) {
+ struct vm_page *real_page;
+
+ if (vm_page_pageable(page)) {
+ panic("vm_page: private page is pageable");
+ }
+
+ real_page = vm_page_lookup_pa(page->phys_addr);
+
+ if (vm_page_pageable(real_page)) {
+ panic("vm_page: page underlying private page is pageable");
+ }
+
+ if ((real_page->type == VM_PT_FREE)
+ || (real_page->order != VM_PAGE_ORDER_UNLISTED)) {
+ panic("vm_page: page underlying private pagei is free");
+ }
+ } else {
+ unsigned int index;
+
+ index = vm_page_seg_index(seg);
+
+ if (index != page->seg_index) {
+ panic("vm_page: page segment mismatch");
+ }
+ }
+ }
+ }
+}
+
struct vm_page *
vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short type)
{
@@ -725,8 +1555,8 @@ vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short type)
return page;
}
- if (type == VM_PT_PMAP)
- panic("vm_page: unable to allocate pmap page");
+ if (!current_thread() || current_thread()->vm_privilege)
+ panic("vm_page: privileged thread unable to allocate page");
return NULL;
}
@@ -769,6 +1599,9 @@ vm_page_info_all(void)
printf("vm_page: %s: pages: %lu (%luM), free: %lu (%luM)\n",
vm_page_seg_name(i), pages, pages >> (20 - PAGE_SHIFT),
seg->nr_free_pages, seg->nr_free_pages >> (20 - PAGE_SHIFT));
+ printf("vm_page: %s: min:%lu low:%lu high:%lu\n",
+ vm_page_seg_name(vm_page_seg_index(seg)),
+ seg->min_free_pages, seg->low_free_pages, seg->high_free_pages);
}
}
@@ -879,3 +1712,408 @@ vm_page_mem_free(void)
return total;
}
+
+/*
+ * Mark this page as wired down by yet another map, removing it
+ * from paging queues as necessary.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void
+vm_page_wire(struct vm_page *page)
+{
+ VM_PAGE_CHECK(page);
+
+ if (page->wire_count == 0) {
+ vm_page_queues_remove(page);
+
+ if (!page->private && !page->fictitious) {
+ vm_page_wire_count++;
+ }
+ }
+
+ page->wire_count++;
+}
+
+/*
+ * Release one wiring of this page, potentially enabling it to be paged again.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void
+vm_page_unwire(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ assert(page->wire_count != 0);
+ page->wire_count--;
+
+ if ((page->wire_count != 0)
+ || page->fictitious
+ || page->private) {
+ return;
+ }
+
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+
+ vm_page_wire_count--;
+}
+
+/*
+ * Returns the given page to the inactive list, indicating that
+ * no physical maps have access to this page.
+ * [Used by the physical mapping system.]
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_deactivate(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ /*
+ * This page is no longer very interesting. If it was
+ * interesting (active or inactive/referenced), then we
+ * clear the reference bit and (re)enter it in the
+ * inactive queue. Note wired pages should not have
+ * their reference bit cleared.
+ */
+
+ if (page->active || (page->inactive && page->reference)) {
+ if (!page->fictitious && !page->private && !page->absent) {
+ pmap_clear_reference(page->phys_addr);
+ }
+
+ page->reference = FALSE;
+ vm_page_queues_remove(page);
+ }
+
+ if ((page->wire_count == 0) && !page->fictitious
+ && !page->private && !page->inactive) {
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_inactive_page(seg, page);
+ simple_unlock(&seg->lock);
+ }
+}
+
+/*
+ * Put the specified page on the active list (if appropriate).
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_activate(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ /*
+ * Unconditionally remove so that, even if the page was already
+ * active, it gets back to the end of the active queue.
+ */
+ vm_page_queues_remove(page);
+
+ if ((page->wire_count == 0) && !page->fictitious && !page->private) {
+ seg = vm_page_seg_get(page->seg_index);
+
+ if (page->active)
+ panic("vm_page_activate: already active");
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+ }
+}
+
+void
+vm_page_queues_remove(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ assert(!page->active || !page->inactive);
+
+ if (!page->active && !page->inactive) {
+ return;
+ }
+
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+
+ if (page->active) {
+ vm_page_seg_remove_active_page(seg, page);
+ } else {
+ vm_page_seg_remove_inactive_page(seg, page);
+ }
+
+ simple_unlock(&seg->lock);
+}
+
+/*
+ * Check whether segments are all usable for unprivileged allocations.
+ *
+ * If all segments are usable, resume pending unprivileged allocations
+ * and return TRUE.
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+static boolean_t
+vm_page_check_usable(void)
+{
+ struct vm_page_seg *seg;
+ boolean_t usable;
+ unsigned int i;
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = vm_page_seg_get(i);
+
+ simple_lock(&seg->lock);
+ usable = vm_page_seg_usable(seg);
+ simple_unlock(&seg->lock);
+
+ if (!usable) {
+ return FALSE;
+ }
+ }
+
+ vm_page_external_pagedout = -1;
+ vm_page_alloc_paused = FALSE;
+ thread_wakeup(&vm_page_alloc_paused);
+ return TRUE;
+}
+
+static boolean_t
+vm_page_may_balance(void)
+{
+ struct vm_page_seg *seg;
+ boolean_t page_available;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = vm_page_seg_get(i);
+
+ simple_lock(&seg->lock);
+ page_available = vm_page_seg_page_available(seg);
+ simple_unlock(&seg->lock);
+
+ if (page_available) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static boolean_t
+vm_page_balance_once(void)
+{
+ boolean_t balanced;
+ unsigned int i;
+
+ /*
+ * It's important here that pages are moved from higher priority
+ * segments first.
+ */
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ balanced = vm_page_seg_balance(vm_page_seg_get(i));
+
+ if (balanced) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+boolean_t
+vm_page_balance(void)
+{
+ boolean_t balanced;
+
+ while (vm_page_may_balance()) {
+ balanced = vm_page_balance_once();
+
+ if (!balanced) {
+ break;
+ }
+ }
+
+ return vm_page_check_usable();
+}
+
+static boolean_t
+vm_page_evict_once(boolean_t external_only)
+{
+ struct vm_page_seg *seg;
+ boolean_t low_memory, min_page_available, evicted;
+ unsigned int i;
+
+ /*
+ * XXX Page allocation currently only uses the DIRECTMAP selector,
+ * allowing us to know which segments to look at when determining
+ * whether we're very low on memory.
+ */
+ low_memory = TRUE;
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ if (i > VM_PAGE_SEG_DIRECTMAP) {
+ break;
+ }
+
+ seg = vm_page_seg_get(i);
+
+ simple_lock(&seg->lock);
+ min_page_available = vm_page_seg_min_page_available(seg);
+ simple_unlock(&seg->lock);
+
+ if (min_page_available) {
+ low_memory = FALSE;
+ break;
+ }
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ /*
+ * It's important here that pages are evicted from lower priority
+ * segments first.
+ */
+
+ for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
+ evicted = vm_page_seg_evict(vm_page_seg_get(i),
+ external_only, low_memory);
+
+ if (evicted) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+#define VM_PAGE_MAX_LAUNDRY 5
+#define VM_PAGE_MAX_EVICTIONS 5
+
+boolean_t
+vm_page_evict(boolean_t *should_wait)
+{
+ boolean_t pause, evicted, external_only;
+ unsigned int i;
+
+ *should_wait = TRUE;
+ external_only = TRUE;
+
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_external_pagedout = 0;
+ simple_unlock(&vm_page_queue_free_lock);
+
+again:
+ vm_page_lock_queues();
+ pause = (vm_page_laundry_count >= VM_PAGE_MAX_LAUNDRY);
+ vm_page_unlock_queues();
+
+ if (pause) {
+ simple_lock(&vm_page_queue_free_lock);
+ return FALSE;
+ }
+
+ for (i = 0; i < VM_PAGE_MAX_EVICTIONS; i++) {
+ evicted = vm_page_evict_once(external_only);
+
+ if (!evicted) {
+ break;
+ }
+ }
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ /*
+ * Keep in mind eviction may not cause pageouts, since non-precious
+ * clean pages are simply released.
+ */
+ if ((vm_page_external_pagedout == 0) || (vm_page_laundry_count == 0)) {
+ /*
+ * No pageout, but some clean pages were freed. Start a complete
+ * scan again without waiting.
+ */
+ if (evicted) {
+ *should_wait = FALSE;
+ return FALSE;
+ }
+
+ /*
+ * Eviction failed, consider pages from internal objects on the
+ * next attempt.
+ */
+ if (external_only) {
+ simple_unlock(&vm_page_queue_free_lock);
+ external_only = FALSE;
+ goto again;
+ }
+
+ /*
+ * TODO Find out what could cause this and how to deal with it.
+ * This will likely require an out-of-memory killer.
+ */
+ panic("vm_page: unable to recycle any page");
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ return vm_page_check_usable();
+}
+
+void
+vm_page_refill_inactive(void)
+{
+ unsigned int i;
+
+ vm_page_lock_queues();
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ vm_page_seg_refill_inactive(vm_page_seg_get(i));
+ }
+
+ vm_page_unlock_queues();
+}
+
+void
+vm_page_wait(void (*continuation)(void))
+{
+ assert(!current_thread()->vm_privilege);
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ if (!vm_page_alloc_paused) {
+ simple_unlock(&vm_page_queue_free_lock);
+ return;
+ }
+
+ assert_wait(&vm_page_alloc_paused, FALSE);
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ if (continuation != 0) {
+ counter(c_vm_page_wait_block_user++);
+ thread_block(continuation);
+ } else {
+ counter(c_vm_page_wait_block_kernel++);
+ thread_block((void (*)(void)) 0);
+ }
+}
diff --git a/vm/vm_page.h b/vm/vm_page.h
index 164ab6d4..eb684c1b 100644
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -40,6 +40,7 @@
#include <vm/vm_object.h>
#include <vm/vm_types.h>
#include <kern/queue.h>
+#include <kern/list.h>
#include <kern/lock.h>
#include <kern/log2.h>
@@ -77,8 +78,7 @@
*/
struct vm_page {
- /* Members used in the vm_page module only */
- struct list node;
+ struct list node; /* page queues or free list (P) */
unsigned short type;
unsigned short seg_index;
unsigned short order;
@@ -90,15 +90,13 @@ struct vm_page {
*/
phys_addr_t phys_addr;
+ queue_chain_t listq; /* all pages in same object (O) */
+ struct vm_page *next; /* VP bucket link (O) */
+
/* We use an empty struct as the delimiter. */
struct {} vm_page_header;
#define VM_PAGE_HEADER_SIZE offsetof(struct vm_page, vm_page_header)
- queue_chain_t pageq; /* queue info for FIFO
- * queue or free list (P) */
- queue_chain_t listq; /* all pages in same object (O) */
- struct vm_page *next; /* VP bucket link (O) */
-
vm_object_t object; /* which object am I in (O,P) */
vm_offset_t offset; /* offset into that object (O,P) */
@@ -136,7 +134,9 @@ struct vm_page {
* some useful check on a page structure.
*/
-#define VM_PAGE_CHECK(mem)
+#define VM_PAGE_CHECK(mem) vm_page_check(mem)
+
+void vm_page_check(const struct vm_page *page);
/*
* Each pageable resident page falls into one of three lists:
@@ -155,13 +155,6 @@ struct vm_page {
*/
extern
-vm_page_t vm_page_queue_fictitious; /* fictitious free queue */
-extern
-queue_head_t vm_page_queue_active; /* active memory queue */
-extern
-queue_head_t vm_page_queue_inactive; /* inactive memory queue */
-
-extern
int vm_page_fictitious_count;/* How many fictitious pages are free? */
extern
int vm_page_active_count; /* How many pages are active? */
@@ -170,25 +163,15 @@ int vm_page_inactive_count; /* How many pages are inactive? */
extern
int vm_page_wire_count; /* How many pages are wired? */
extern
-int vm_page_free_target; /* How many do we want free? */
-extern
-int vm_page_free_min; /* When to wakeup pageout */
-extern
-int vm_page_inactive_target;/* How many do we want inactive? */
-extern
-int vm_page_free_reserved; /* How many pages reserved to do pageout */
-extern
int vm_page_laundry_count; /* How many pages being laundered? */
-
+extern
+int vm_page_external_pagedout; /* How many external pages being paged out? */
decl_simple_lock_data(extern,vm_page_queue_lock)/* lock on active and inactive
page queues */
decl_simple_lock_data(extern,vm_page_queue_free_lock)
/* lock on free page queue */
-extern unsigned int vm_page_free_wanted;
- /* how many threads are waiting for memory */
-
extern phys_addr_t vm_page_fictitious_addr;
/* (fake) phys_addr of fictitious pages */
@@ -204,7 +187,7 @@ extern vm_page_t vm_page_grab_fictitious(void);
extern boolean_t vm_page_convert(vm_page_t *);
extern void vm_page_more_fictitious(void);
extern vm_page_t vm_page_grab(void);
-extern void vm_page_release(vm_page_t);
+extern void vm_page_release(vm_page_t, boolean_t, boolean_t);
extern phys_addr_t vm_page_grab_phys_addr(void);
extern vm_page_t vm_page_grab_contig(vm_size_t, unsigned int);
extern void vm_page_free_contig(vm_page_t, vm_size_t);
@@ -294,22 +277,7 @@ extern unsigned int vm_page_info(
#define vm_page_lock_queues() simple_lock(&vm_page_queue_lock)
#define vm_page_unlock_queues() simple_unlock(&vm_page_queue_lock)
-#define VM_PAGE_QUEUES_REMOVE(mem) \
- MACRO_BEGIN \
- if (mem->active) { \
- queue_remove(&vm_page_queue_active, \
- mem, vm_page_t, pageq); \
- mem->active = FALSE; \
- vm_page_active_count--; \
- } \
- \
- if (mem->inactive) { \
- queue_remove(&vm_page_queue_inactive, \
- mem, vm_page_t, pageq); \
- mem->inactive = FALSE; \
- vm_page_inactive_count--; \
- } \
- MACRO_END
+#define VM_PAGE_QUEUES_REMOVE(mem) vm_page_queues_remove(mem)
/*
* Copyright (c) 2010-2014 Richard Braun.
@@ -358,18 +326,11 @@ extern unsigned int vm_page_info(
/*
* Page usage types.
- *
- * Failing to allocate pmap pages will cause a kernel panic.
- * TODO Obviously, this needs to be addressed, e.g. with a reserved pool of
- * pages.
*/
#define VM_PT_FREE 0 /* Page unused */
#define VM_PT_RESERVED 1 /* Page reserved at boot time */
#define VM_PT_TABLE 2 /* Page is part of the page table */
-#define VM_PT_PMAP 3 /* Page stores pmap-specific data */
-#define VM_PT_KMEM 4 /* Page is part of a kmem slab */
-#define VM_PT_STACK 5 /* Type for generic kernel allocations */
-#define VM_PT_KERNEL 6 /* Type for generic kernel allocations */
+#define VM_PT_KERNEL 3 /* Type for generic kernel allocations */
static inline unsigned short
vm_page_type(const struct vm_page *page)
@@ -521,4 +482,53 @@ phys_addr_t vm_page_mem_size(void);
*/
unsigned long vm_page_mem_free(void);
+/*
+ * Remove the given page from any page queue it might be in.
+ */
+void vm_page_queues_remove(struct vm_page *page);
+
+/*
+ * Balance physical pages among segments.
+ *
+ * This function should be called first by the pageout daemon
+ * on memory pressure, since it may be unnecessary to perform any
+ * other operation, let alone shrink caches, if balancing is
+ * enough to make enough free pages.
+ *
+ * Return TRUE if balancing made enough free pages for unprivileged
+ * allocations to succeed, in which case pending allocations are resumed.
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+boolean_t vm_page_balance(void);
+
+/*
+ * Evict physical pages.
+ *
+ * This function should be called by the pageout daemon after balancing
+ * the segments and shrinking kernel caches.
+ *
+ * Return TRUE if eviction made enough free pages for unprivileged
+ * allocations to succeed, in which case pending allocations are resumed.
+ *
+ * Otherwise, report whether the pageout daemon should wait (some pages
+ * have been paged out) or not (only clean pages have been released).
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+boolean_t vm_page_evict(boolean_t *should_wait);
+
+/*
+ * Turn active pages into inactive ones for second-chance LRU
+ * approximation.
+ *
+ * This function should be called by the pageout daemon on memory pressure,
+ * i.e. right before evicting pages.
+ *
+ * XXX This is probably not the best strategy, compared to keeping the
+ * active/inactive ratio in check at all times, but this means less
+ * frequent refills.
+ */
+void vm_page_refill_inactive(void);
+
#endif /* _VM_VM_PAGE_H_ */
diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c
index a36c9905..dd0f995c 100644
--- a/vm/vm_pageout.c
+++ b/vm/vm_pageout.c
@@ -53,123 +53,17 @@
#include <vm/vm_pageout.h>
#include <machine/locore.h>
-
-
-#ifndef VM_PAGEOUT_BURST_MAX
-#define VM_PAGEOUT_BURST_MAX 10 /* number of pages */
-#endif /* VM_PAGEOUT_BURST_MAX */
-
-#ifndef VM_PAGEOUT_BURST_MIN
-#define VM_PAGEOUT_BURST_MIN 5 /* number of pages */
-#endif /* VM_PAGEOUT_BURST_MIN */
-
-#ifndef VM_PAGEOUT_BURST_WAIT
-#define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds per page */
-#endif /* VM_PAGEOUT_BURST_WAIT */
-
-#ifndef VM_PAGEOUT_EMPTY_WAIT
-#define VM_PAGEOUT_EMPTY_WAIT 75 /* milliseconds */
-#endif /* VM_PAGEOUT_EMPTY_WAIT */
-
-#ifndef VM_PAGEOUT_PAUSE_MAX
-#define VM_PAGEOUT_PAUSE_MAX 10 /* number of pauses */
-#endif /* VM_PAGEOUT_PAUSE_MAX */
-
/*
- * To obtain a reasonable LRU approximation, the inactive queue
- * needs to be large enough to give pages on it a chance to be
- * referenced a second time. This macro defines the fraction
- * of active+inactive pages that should be inactive.
- * The pageout daemon uses it to update vm_page_inactive_target.
- *
- * If the number of free pages falls below vm_page_free_target and
- * vm_page_inactive_count is below vm_page_inactive_target,
- * then the pageout daemon starts running.
+ * Event placeholder for pageout requests, synchronized with
+ * the free page queue lock.
*/
-
-#ifndef VM_PAGE_INACTIVE_TARGET
-#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 2 / 3)
-#endif /* VM_PAGE_INACTIVE_TARGET */
+static int vm_pageout_requested;
/*
- * Once the pageout daemon starts running, it keeps going
- * until the number of free pages meets or exceeds vm_page_free_target.
+ * Event placeholder for pageout throttling, synchronized with
+ * the free page queue lock.
*/
-
-#ifndef VM_PAGE_FREE_TARGET
-#define VM_PAGE_FREE_TARGET(free) (150 + (free) * 10 / 100)
-#endif /* VM_PAGE_FREE_TARGET */
-
-/*
- * The pageout daemon always starts running once the number of free pages
- * falls below vm_page_free_min.
- */
-
-#ifndef VM_PAGE_FREE_MIN
-#define VM_PAGE_FREE_MIN(free) (100 + (free) * 8 / 100)
-#endif /* VM_PAGE_FREE_MIN */
-
-/*
- * When the number of free pages falls below vm_page_free_reserved,
- * only vm-privileged threads can allocate pages. vm-privilege
- * allows the pageout daemon and default pager (and any other
- * associated threads needed for default pageout) to continue
- * operation by dipping into the reserved pool of pages. */
-
-#ifndef VM_PAGE_FREE_RESERVED
-#define VM_PAGE_FREE_RESERVED 500
-#endif /* VM_PAGE_FREE_RESERVED */
-
-/*
- * When the number of free pages falls below vm_pageout_reserved_internal,
- * the pageout daemon no longer trusts external pagers to clean pages.
- * External pagers are probably all wedged waiting for a free page.
- * It forcibly double-pages dirty pages belonging to external objects,
- * getting the pages to the default pager to clean.
- */
-
-#ifndef VM_PAGEOUT_RESERVED_INTERNAL
-#define VM_PAGEOUT_RESERVED_INTERNAL(reserve) ((reserve) - 250)
-#endif /* VM_PAGEOUT_RESERVED_INTERNAL */
-
-/*
- * When the number of free pages falls below vm_pageout_reserved_really,
- * the pageout daemon stops work entirely to let the default pager
- * catch up (assuming the default pager has pages to clean).
- * Beyond this point, it is too dangerous to consume memory
- * even for memory_object_data_write messages to the default pager.
- */
-
-#ifndef VM_PAGEOUT_RESERVED_REALLY
-#define VM_PAGEOUT_RESERVED_REALLY(reserve) ((reserve) - 400)
-#endif /* VM_PAGEOUT_RESERVED_REALLY */
-
-unsigned int vm_pageout_reserved_internal = 0;
-unsigned int vm_pageout_reserved_really = 0;
-
-unsigned int vm_pageout_burst_max = 0;
-unsigned int vm_pageout_burst_min = 0;
-unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */
-unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
-unsigned int vm_pageout_pause_count = 0;
-unsigned int vm_pageout_pause_max = 0;
-
-/*
- * These variables record the pageout daemon's actions:
- * how many pages it looks at and what happens to those pages.
- * No locking needed because only one thread modifies the variables.
- */
-
-unsigned int vm_pageout_active = 0; /* debugging */
-unsigned int vm_pageout_inactive = 0; /* debugging */
-unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
-unsigned int vm_pageout_inactive_busy = 0; /* debugging */
-unsigned int vm_pageout_inactive_absent = 0; /* debugging */
-unsigned int vm_pageout_inactive_used = 0; /* debugging */
-unsigned int vm_pageout_inactive_clean = 0; /* debugging */
-unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
-unsigned int vm_pageout_inactive_double = 0; /* debugging */
-unsigned int vm_pageout_inactive_cleaned_external = 0;
+static int vm_pageout_continue;
/*
* Routine: vm_pageout_setup
@@ -224,15 +118,20 @@ vm_pageout_setup(
/*
* If we are not flushing the page, allocate a
- * page in the object. If we cannot get the
- * page, flush instead.
+ * page in the object.
*/
if (!flush) {
- vm_object_lock(new_object);
- new_m = vm_page_alloc(new_object, new_offset);
- if (new_m == VM_PAGE_NULL)
- flush = TRUE;
- vm_object_unlock(new_object);
+ for (;;) {
+ vm_object_lock(new_object);
+ new_m = vm_page_alloc(new_object, new_offset);
+ vm_object_unlock(new_object);
+
+ if (new_m != VM_PAGE_NULL) {
+ break;
+ }
+
+ VM_PAGE_WAIT(NULL);
+ }
}
if (flush) {
@@ -337,26 +236,33 @@ vm_pageout_setup(
vm_page_lock_queues();
vm_stat.pageouts++;
if (m->laundry) {
+
/*
- * vm_pageout_scan is telling us to put this page
- * at the front of the inactive queue, so it will
- * be immediately paged out to the default pager.
+ * The caller is telling us that it is going to
+ * immediately double page this page to the default
+ * pager.
*/
assert(!old_object->internal);
m->laundry = FALSE;
-
- queue_enter_first(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- m->inactive = TRUE;
- vm_page_inactive_count++;
} else if (old_object->internal) {
m->laundry = TRUE;
vm_page_laundry_count++;
vm_page_wire(m);
- } else
+ } else {
vm_page_activate(m);
+
+ /*
+ * If vm_page_external_pagedout is negative,
+ * the pageout daemon isn't expecting to be
+ * notified.
+ */
+
+ if (vm_page_external_pagedout >= 0) {
+ vm_page_external_pagedout++;
+ }
+ }
vm_page_unlock_queues();
/*
@@ -487,455 +393,102 @@ vm_pageout_page(
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
- * It returns with vm_page_queue_free_lock held and
- * vm_page_free_wanted == 0.
+ *
+ * Return TRUE if the pageout daemon is done for now, FALSE otherwise,
+ * in which case should_wait indicates whether the pageout daemon
+ * should wait to allow pagers to keep up.
+ *
+ * It returns with vm_page_queue_free_lock held.
*/
-void vm_pageout_scan(void)
+boolean_t vm_pageout_scan(boolean_t *should_wait)
{
- unsigned int burst_count;
- unsigned int want_pages;
+ boolean_t done;
/*
- * We want to gradually dribble pages from the active queue
- * to the inactive queue. If we let the inactive queue get
- * very small, and then suddenly dump many pages into it,
- * those pages won't get a sufficient chance to be referenced
- * before we start taking them from the inactive queue.
- *
- * We must limit the rate at which we send pages to the pagers.
- * data_write messages consume memory, for message buffers and
- * for map-copy objects. If we get too far ahead of the pagers,
- * we can potentially run out of memory.
- *
- * We can use the laundry count to limit directly the number
- * of pages outstanding to the default pager. A similar
- * strategy for external pagers doesn't work, because
- * external pagers don't have to deallocate the pages sent them,
- * and because we might have to send pages to external pagers
- * even if they aren't processing writes. So we also
- * use a burst count to limit writes to external pagers.
- *
- * When memory is very tight, we can't rely on external pagers to
- * clean pages. They probably aren't running, because they
- * aren't vm-privileged. If we kept sending dirty pages to them,
- * we could exhaust the free list. However, we can't just ignore
- * pages belonging to external objects, because there might be no
- * pages belonging to internal objects. Hence, we get the page
- * into an internal object and then immediately double-page it,
- * sending it to the default pager.
- *
- * slab_collect should be last, because the other operations
- * might return memory to caches. When we pause we use
- * vm_pageout_scan_continue as our continuation, so we will
- * reenter vm_pageout_scan periodically and attempt to reclaim
- * internal memory even if we never reach vm_page_free_target.
+ * Try balancing pages among segments first, since this
+ * may be enough to resume unprivileged allocations.
*/
- stack_collect();
- net_kmsg_collect();
- consider_task_collect();
- if (0) /* XXX: pcb_collect doesn't do anything yet, so it is
- pointless to call consider_thread_collect. */
- consider_thread_collect();
- slab_collect();
-
- for (burst_count = 0;;) {
- vm_page_t m;
- vm_object_t object;
- unsigned long free_count;
-
- /*
- * Recalculate vm_page_inactivate_target.
- */
-
- vm_page_lock_queues();
- vm_page_inactive_target =
- VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
- vm_page_inactive_count);
-
- /*
- * Move pages from active to inactive.
- */
-
- while ((vm_page_inactive_count < vm_page_inactive_target) &&
- !queue_empty(&vm_page_queue_active)) {
- vm_object_t obj;
-
- vm_pageout_active++;
- m = (vm_page_t) queue_first(&vm_page_queue_active);
- assert(m->active && !m->inactive);
-
- obj = m->object;
- if (!vm_object_lock_try(obj)) {
- /*
- * Move page to end and continue.
- */
-
- queue_remove(&vm_page_queue_active, m,
- vm_page_t, pageq);
- queue_enter(&vm_page_queue_active, m,
- vm_page_t, pageq);
- vm_page_unlock_queues();
- vm_page_lock_queues();
- continue;
- }
-
- /*
- * If the page is busy, then we pull it
- * off the active queue and leave it alone.
- */
-
- if (m->busy) {
- vm_object_unlock(obj);
- queue_remove(&vm_page_queue_active, m,
- vm_page_t, pageq);
- m->active = FALSE;
- vm_page_active_count--;
- continue;
- }
-
- /*
- * Deactivate the page while holding the object
- * locked, so we know the page is still not busy.
- * This should prevent races between pmap_enter
- * and pmap_clear_reference. The page might be
- * absent or fictitious, but vm_page_deactivate
- * can handle that.
- */
-
- vm_page_deactivate(m);
- vm_object_unlock(obj);
- }
-
- /*
- * We are done if we have met our targets *and*
- * nobody is still waiting for a page.
- */
-
- simple_lock(&vm_page_queue_free_lock);
- free_count = vm_page_mem_free();
- if ((free_count >= vm_page_free_target) &&
- (vm_page_free_wanted == 0)) {
- vm_page_unlock_queues();
- break;
- }
- want_pages = ((free_count < vm_page_free_target) ||
- vm_page_free_wanted);
- simple_unlock(&vm_page_queue_free_lock);
-
- /*
- * Sometimes we have to pause:
- * 1) No inactive pages - nothing to do.
- * 2) Flow control - wait for pagers to catch up.
- * 3) Extremely low memory - sending out dirty pages
- * consumes memory. We don't take the risk of doing
- * this if the default pager already has work to do.
- */
- pause:
- if (queue_empty(&vm_page_queue_inactive) ||
- (burst_count >= vm_pageout_burst_max) ||
- (vm_page_laundry_count >= vm_pageout_burst_max) ||
- ((free_count < vm_pageout_reserved_really) &&
- (vm_page_laundry_count > 0))) {
- unsigned int pages, msecs;
-
- /*
- * vm_pageout_burst_wait is msecs/page.
- * If there is nothing for us to do, we wait
- * at least vm_pageout_empty_wait msecs.
- */
-
- if (vm_page_laundry_count > burst_count)
- pages = vm_page_laundry_count;
- else
- pages = burst_count;
- msecs = pages * vm_pageout_burst_wait;
-
- if (queue_empty(&vm_page_queue_inactive) &&
- (msecs < vm_pageout_empty_wait))
- msecs = vm_pageout_empty_wait;
- vm_page_unlock_queues();
-
- thread_will_wait_with_timeout(current_thread(), msecs);
- counter(c_vm_pageout_scan_block++);
- thread_block(vm_pageout_scan_continue);
- call_continuation(vm_pageout_scan_continue);
- /*NOTREACHED*/
- }
-
- vm_pageout_inactive++;
-
- /* Find a page we are interested in paging out. If we
- need pages, then we'll page anything out; otherwise
- we only page out external pages. */
- m = (vm_page_t) queue_first (&vm_page_queue_inactive);
- while (1)
- {
- assert (!m->active && m->inactive);
- if (want_pages || m->external)
- break;
-
- m = (vm_page_t) queue_next (&m->pageq);
- if (!m)
- goto pause;
- }
-
- object = m->object;
+ /* This function returns with vm_page_queue_free_lock held */
+ done = vm_page_balance();
- /*
- * Try to lock object; since we've got the
- * page queues lock, we can only try for this one.
- */
-
- if (!vm_object_lock_try(object)) {
- /*
- * Move page to end and continue.
- */
-
- queue_remove(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- queue_enter(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- vm_page_unlock_queues();
- vm_pageout_inactive_nolock++;
- continue;
- }
-
- /*
- * Remove the page from the inactive list.
- */
-
- queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
- vm_page_inactive_count--;
- m->inactive = FALSE;
-
- if (m->busy || !object->alive) {
- /*
- * Somebody is already playing with this page.
- * Leave it off the pageout queues.
- */
-
- vm_page_unlock_queues();
- vm_object_unlock(object);
- vm_pageout_inactive_busy++;
- continue;
- }
-
- /*
- * If it's absent, we can reclaim the page.
- */
-
- if (want_pages && m->absent) {
- vm_pageout_inactive_absent++;
- reclaim_page:
- vm_page_free(m);
- vm_page_unlock_queues();
-
- if (vm_object_collectable(object))
- vm_object_collect(object);
- else
- vm_object_unlock(object);
-
- continue;
- }
-
- /*
- * If it's being used, reactivate.
- * (Fictitious pages are either busy or absent.)
- */
-
- assert(!m->fictitious);
- if (m->reference || pmap_is_referenced(m->phys_addr)) {
- vm_object_unlock(object);
- vm_page_activate(m);
- vm_stat.reactivations++;
- current_task()->reactivations++;
- vm_page_unlock_queues();
- vm_pageout_inactive_used++;
- continue;
- }
-
- /*
- * Eliminate all mappings.
- */
-
- m->busy = TRUE;
- pmap_page_protect(m->phys_addr, VM_PROT_NONE);
- if (!m->dirty)
- m->dirty = pmap_is_modified(m->phys_addr);
-
- /* If we don't actually need more memory, and the page
- is not dirty, put it on the tail of the inactive queue
- and move on to the next page. */
- if (!want_pages && !m->dirty) {
- queue_remove (&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- queue_enter (&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- vm_page_unlock_queues();
- vm_pageout_inactive_cleaned_external++;
- continue;
- }
-
- /*
- * If it's clean and not precious, we can free the page.
- */
-
- if (!m->dirty && !m->precious) {
- vm_pageout_inactive_clean++;
- goto reclaim_page;
- }
-
- /*
- * If we are very low on memory, then we can't
- * rely on an external pager to clean a dirty page,
- * because external pagers are not vm-privileged.
- *
- * The laundry bit tells vm_pageout_setup to
- * put the page back at the front of the inactive
- * queue instead of activating the page. Hence,
- * we will pick the page up again immediately and
- * resend it to the default pager.
- */
-
- assert(!m->laundry);
- if ((free_count < vm_pageout_reserved_internal) &&
- !object->internal) {
- m->laundry = TRUE;
- vm_pageout_inactive_double++;
- }
- vm_page_unlock_queues();
-
- /*
- * If there is no memory object for the page, create
- * one and hand it to the default pager.
- * [First try to collapse, so we don't create
- * one unnecessarily.]
- */
-
- if (!object->pager_initialized)
- vm_object_collapse(object);
- if (!object->pager_initialized)
- vm_object_pager_create(object);
- if (!object->pager_initialized)
- panic("vm_pageout_scan");
-
- vm_pageout_inactive_dirty++;
- vm_pageout_page(m, FALSE, TRUE); /* flush it */
- vm_object_unlock(object);
- burst_count++;
+ if (done) {
+ return TRUE;
}
-}
-void vm_pageout_scan_continue(void)
-{
+ simple_unlock(&vm_page_queue_free_lock);
+
/*
- * We just paused to let the pagers catch up.
- * If vm_page_laundry_count is still high,
- * then we aren't waiting long enough.
- * If we have paused some vm_pageout_pause_max times without
- * adjusting vm_pageout_burst_wait, it might be too big,
- * so we decrease it.
+ * Balancing is not enough. Shrink caches and scan pages
+ * for eviction.
*/
- vm_page_lock_queues();
- if (vm_page_laundry_count > vm_pageout_burst_min) {
- vm_pageout_burst_wait++;
- vm_pageout_pause_count = 0;
- } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
- vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
- if (vm_pageout_burst_wait < 1)
- vm_pageout_burst_wait = 1;
- vm_pageout_pause_count = 0;
- }
- vm_page_unlock_queues();
-
- vm_pageout_continue();
- /*NOTREACHED*/
-}
-
-/*
- * vm_pageout is the high level pageout daemon.
- */
+ stack_collect();
+ net_kmsg_collect();
+ consider_task_collect();
+ if (0) /* XXX: pcb_collect doesn't do anything yet, so it is
+ pointless to call consider_thread_collect. */
+ consider_thread_collect();
-void vm_pageout_continue(void)
-{
/*
- * The pageout daemon is never done, so loop forever.
- * We should call vm_pageout_scan at least once each
- * time we are woken, even if vm_page_free_wanted is
- * zero, to check vm_page_free_target and
- * vm_page_inactive_target.
+ * slab_collect should be last, because the other operations
+ * might return memory to caches.
*/
+ slab_collect();
- for (;;) {
- vm_pageout_scan();
- /* we hold vm_page_queue_free_lock now */
- assert(vm_page_free_wanted == 0);
+ vm_page_refill_inactive();
- assert_wait(&vm_page_free_wanted, FALSE);
- simple_unlock(&vm_page_queue_free_lock);
- counter(c_vm_pageout_block++);
- thread_block(vm_pageout_continue);
- }
+ /* This function returns with vm_page_queue_free_lock held */
+ return vm_page_evict(should_wait);
}
void vm_pageout(void)
{
- unsigned long free_after_reserve;
+ boolean_t done, should_wait;
current_thread()->vm_privilege = 1;
stack_privilege(current_thread());
thread_set_own_priority(0);
- /*
- * Initialize some paging parameters.
- */
-
- if (vm_pageout_burst_max == 0)
- vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
-
- if (vm_pageout_burst_min == 0)
- vm_pageout_burst_min = VM_PAGEOUT_BURST_MIN;
-
- if (vm_pageout_burst_wait == 0)
- vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
-
- if (vm_pageout_empty_wait == 0)
- vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
-
- if (vm_page_free_reserved == 0)
- vm_page_free_reserved = VM_PAGE_FREE_RESERVED;
-
- if (vm_pageout_pause_max == 0)
- vm_pageout_pause_max = VM_PAGEOUT_PAUSE_MAX;
-
- if (vm_pageout_reserved_internal == 0)
- vm_pageout_reserved_internal =
- VM_PAGEOUT_RESERVED_INTERNAL(vm_page_free_reserved);
-
- if (vm_pageout_reserved_really == 0)
- vm_pageout_reserved_really =
- VM_PAGEOUT_RESERVED_REALLY(vm_page_free_reserved);
-
- free_after_reserve = vm_page_mem_free() - vm_page_free_reserved;
-
- if (vm_page_free_min == 0)
- vm_page_free_min = vm_page_free_reserved +
- VM_PAGE_FREE_MIN(free_after_reserve);
+ for (;;) {
+ done = vm_pageout_scan(&should_wait);
+ /* we hold vm_page_queue_free_lock now */
- if (vm_page_free_target == 0)
- vm_page_free_target = vm_page_free_reserved +
- VM_PAGE_FREE_TARGET(free_after_reserve);
+ if (done) {
+ thread_sleep(&vm_pageout_requested,
+ simple_lock_addr(vm_page_queue_free_lock),
+ FALSE);
+ } else if (should_wait) {
+ assert_wait(&vm_pageout_continue, FALSE);
+ thread_set_timeout(500);
+ simple_unlock(&vm_page_queue_free_lock);
+ thread_block(NULL);
+ } else {
+ simple_unlock(&vm_page_queue_free_lock);
+ }
+ }
+}
- if (vm_page_free_target < vm_page_free_min + 5)
- vm_page_free_target = vm_page_free_min + 5;
+/*
+ * Start pageout
+ *
+ * The free page queue lock must be held before calling this function.
+ */
+void vm_pageout_start(void)
+{
+ if (!current_thread())
+ return;
- /*
- * vm_pageout_scan will set vm_page_inactive_target.
- */
+ thread_wakeup_one(&vm_pageout_requested);
+}
- vm_pageout_continue();
- /*NOTREACHED*/
+/*
+ * Resume pageout
+ *
+ * The free page queue lock must be held before calling this function.
+ */
+void vm_pageout_resume(void)
+{
+ thread_wakeup_one(&vm_pageout_continue);
}
diff --git a/vm/vm_pageout.h b/vm/vm_pageout.h
index ea6cfaf4..6ddd821c 100644
--- a/vm/vm_pageout.h
+++ b/vm/vm_pageout.h
@@ -46,8 +46,8 @@ extern void vm_pageout_page(vm_page_t, boolean_t, boolean_t);
extern void vm_pageout(void) __attribute__((noreturn));
-extern void vm_pageout_continue(void) __attribute__((noreturn));
+extern void vm_pageout_start(void);
-extern void vm_pageout_scan_continue(void) __attribute__((noreturn));
+extern void vm_pageout_resume(void);
#endif /* _VM_VM_PAGEOUT_H_ */
diff --git a/vm/vm_resident.c b/vm/vm_resident.c
index eac0f50c..e276fe68 100644
--- a/vm/vm_resident.c
+++ b/vm/vm_resident.c
@@ -39,6 +39,7 @@
#include <mach/vm_prot.h>
#include <kern/counters.h>
#include <kern/debug.h>
+#include <kern/list.h>
#include <kern/sched_prim.h>
#include <kern/task.h>
#include <kern/thread.h>
@@ -95,22 +96,13 @@ vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
unsigned long vm_page_bucket_count = 0; /* How big is array? */
unsigned long vm_page_hash_mask; /* Mask for hash function */
-vm_page_t vm_page_queue_fictitious;
+static struct list vm_page_queue_fictitious;
decl_simple_lock_data(,vm_page_queue_free_lock)
-unsigned int vm_page_free_wanted;
int vm_page_fictitious_count;
-int vm_page_external_count;
int vm_object_external_count;
int vm_object_external_pages;
/*
- * This variable isn't directly used. It's merely a placeholder for the
- * address used to synchronize threads waiting for pages to become
- * available. The real value is returned by vm_page_free_mem().
- */
-unsigned int vm_page_free_avail;
-
-/*
* Occasionally, the virtual memory system uses
* resident page structures that do not refer to
* real pages, for example to leave a page with
@@ -136,8 +128,6 @@ phys_addr_t vm_page_fictitious_addr = (phys_addr_t) -1;
* defined here, but are shared by the pageout
* module.
*/
-queue_head_t vm_page_queue_active;
-queue_head_t vm_page_queue_inactive;
decl_simple_lock_data(,vm_page_queue_lock)
int vm_page_active_count;
int vm_page_inactive_count;
@@ -149,11 +139,8 @@ int vm_page_wire_count;
* (done here in vm_page_alloc) can trigger the
* pageout daemon.
*/
-int vm_page_free_target = 0;
-int vm_page_free_min = 0;
-int vm_page_inactive_target = 0;
-int vm_page_free_reserved = 0;
int vm_page_laundry_count = 0;
+int vm_page_external_pagedout = 0;
/*
@@ -191,11 +178,7 @@ void vm_page_bootstrap(
simple_lock_init(&vm_page_queue_free_lock);
simple_lock_init(&vm_page_queue_lock);
- vm_page_queue_fictitious = VM_PAGE_NULL;
- queue_init(&vm_page_queue_active);
- queue_init(&vm_page_queue_inactive);
-
- vm_page_free_wanted = 0;
+ list_init(&vm_page_queue_fictitious);
/*
* Allocate (and initialize) the virtual-to-physical
@@ -330,6 +313,7 @@ void vm_page_module_init(void)
* table and object list.
*
* The object and page must be locked.
+ * The free page queue must not be locked.
*/
void vm_page_insert(
@@ -407,6 +391,7 @@ void vm_page_insert(
* and we don't do deactivate-behind.
*
* The object and page must be locked.
+ * The free page queue must not be locked.
*/
void vm_page_replace(
@@ -457,6 +442,7 @@ void vm_page_replace(
listq);
m->tabled = FALSE;
object->resident_page_count--;
+ VM_PAGE_QUEUES_REMOVE(m);
if (m->external) {
m->external = FALSE;
@@ -501,9 +487,10 @@ void vm_page_replace(
* vm_page_remove: [ internal use only ]
*
* Removes the given mem entry from the object/offset-page
- * table and the object page list.
+ * table, the object page list, and the page queues.
*
* The object and page must be locked.
+ * The free page queue must not be locked.
*/
void vm_page_remove(
@@ -551,6 +538,8 @@ void vm_page_remove(
mem->tabled = FALSE;
+ VM_PAGE_QUEUES_REMOVE(mem);
+
if (mem->external) {
mem->external = FALSE;
vm_object_external_pages--;
@@ -665,11 +654,15 @@ vm_page_t vm_page_grab_fictitious(void)
vm_page_t m;
simple_lock(&vm_page_queue_free_lock);
- m = vm_page_queue_fictitious;
- if (m != VM_PAGE_NULL) {
- vm_page_fictitious_count--;
- vm_page_queue_fictitious = (vm_page_t) m->pageq.next;
+ if (list_empty(&vm_page_queue_fictitious)) {
+ m = VM_PAGE_NULL;
+ } else {
+ m = list_first_entry(&vm_page_queue_fictitious,
+ struct vm_page, node);
+ assert(m->fictitious);
+ list_remove(&m->node);
m->free = FALSE;
+ vm_page_fictitious_count--;
}
simple_unlock(&vm_page_queue_free_lock);
@@ -689,8 +682,7 @@ static void vm_page_release_fictitious(
if (m->free)
panic("vm_page_release_fictitious");
m->free = TRUE;
- m->pageq.next = (queue_entry_t) vm_page_queue_fictitious;
- vm_page_queue_fictitious = m;
+ list_insert_head(&vm_page_queue_fictitious, &m->node);
vm_page_fictitious_count++;
simple_unlock(&vm_page_queue_free_lock);
}
@@ -779,18 +771,6 @@ vm_page_t vm_page_grab(void)
simple_lock(&vm_page_queue_free_lock);
- /*
- * Only let privileged threads (involved in pageout)
- * dip into the reserved pool or exceed the limit
- * for externally-managed pages.
- */
-
- if ((vm_page_mem_free() < vm_page_free_reserved)
- && !current_thread()->vm_privilege) {
- simple_unlock(&vm_page_queue_free_lock);
- return VM_PAGE_NULL;
- }
-
mem = vm_page_alloc_pa(0, VM_PAGE_SEL_DIRECTMAP, VM_PT_KERNEL);
if (mem == NULL) {
@@ -801,22 +781,6 @@ vm_page_t vm_page_grab(void)
mem->free = FALSE;
simple_unlock(&vm_page_queue_free_lock);
- /*
- * Decide if we should poke the pageout daemon.
- * We do this if the free count is less than the low
- * water mark, or if the free count is less than the high
- * water mark (but above the low water mark) and the inactive
- * count is less than its target.
- *
- * We don't have the counts locked ... if they change a little,
- * it doesn't really matter.
- */
-
- if ((vm_page_mem_free() < vm_page_free_min) ||
- ((vm_page_mem_free() < vm_page_free_target) &&
- (vm_page_inactive_count < vm_page_inactive_target)))
- thread_wakeup((event_t) &vm_page_free_wanted);
-
return mem;
}
@@ -836,38 +800,37 @@ phys_addr_t vm_page_grab_phys_addr(void)
*/
void vm_page_release(
- vm_page_t mem)
+ vm_page_t mem,
+ boolean_t laundry,
+ boolean_t external)
{
simple_lock(&vm_page_queue_free_lock);
if (mem->free)
panic("vm_page_release");
mem->free = TRUE;
vm_page_free_pa(mem, 0);
+ if (laundry) {
+ vm_page_laundry_count--;
- /*
- * Check if we should wake up someone waiting for page.
- * But don't bother waking them unless they can allocate.
- *
- * We wakeup only one thread, to prevent starvation.
- * Because the scheduling system handles wait queues FIFO,
- * if we wakeup all waiting threads, one greedy thread
- * can starve multiple niceguy threads. When the threads
- * all wakeup, the greedy threads runs first, grabs the page,
- * and waits for another page. It will be the first to run
- * when the next page is freed.
- *
- * However, there is a slight danger here.
- * The thread we wake might not use the free page.
- * Then the other threads could wait indefinitely
- * while the page goes unused. To forestall this,
- * the pageout daemon will keep making free pages
- * as long as vm_page_free_wanted is non-zero.
- */
+ if (vm_page_laundry_count == 0) {
+ vm_pageout_resume();
+ }
+ }
+ if (external) {
+
+ /*
+ * If vm_page_external_pagedout is negative,
+ * the pageout daemon isn't expecting to be
+ * notified.
+ */
+
+ if (vm_page_external_pagedout > 0) {
+ vm_page_external_pagedout--;
+ }
- if ((vm_page_free_wanted > 0) &&
- (vm_page_mem_free() >= vm_page_free_reserved)) {
- vm_page_free_wanted--;
- thread_wakeup_one((event_t) &vm_page_free_avail);
+ if (vm_page_external_pagedout == 0) {
+ vm_pageout_resume();
+ }
}
simple_unlock(&vm_page_queue_free_lock);
@@ -892,18 +855,6 @@ vm_page_t vm_page_grab_contig(
simple_lock(&vm_page_queue_free_lock);
- /*
- * Only let privileged threads (involved in pageout)
- * dip into the reserved pool or exceed the limit
- * for externally-managed pages.
- */
-
- if (((vm_page_mem_free() - nr_pages) <= vm_page_free_reserved)
- && !current_thread()->vm_privilege) {
- simple_unlock(&vm_page_queue_free_lock);
- return VM_PAGE_NULL;
- }
-
/* TODO Allow caller to pass type */
mem = vm_page_alloc_pa(order, selector, VM_PT_KERNEL);
@@ -918,22 +869,6 @@ vm_page_t vm_page_grab_contig(
simple_unlock(&vm_page_queue_free_lock);
- /*
- * Decide if we should poke the pageout daemon.
- * We do this if the free count is less than the low
- * water mark, or if the free count is less than the high
- * water mark (but above the low water mark) and the inactive
- * count is less than its target.
- *
- * We don't have the counts locked ... if they change a little,
- * it doesn't really matter.
- */
-
- if ((vm_page_mem_free() < vm_page_free_min) ||
- ((vm_page_mem_free() < vm_page_free_target) &&
- (vm_page_inactive_count < vm_page_inactive_target)))
- thread_wakeup((event_t) &vm_page_free_wanted);
-
return mem;
}
@@ -961,52 +896,10 @@ void vm_page_free_contig(vm_page_t mem, vm_size_t size)
vm_page_free_pa(mem, order);
- if ((vm_page_free_wanted > 0) &&
- (vm_page_mem_free() >= vm_page_free_reserved)) {
- vm_page_free_wanted--;
- thread_wakeup_one((event_t) &vm_page_free_avail);
- }
-
simple_unlock(&vm_page_queue_free_lock);
}
/*
- * vm_page_wait:
- *
- * Wait for a page to become available.
- * If there are plenty of free pages, then we don't sleep.
- */
-
-void vm_page_wait(
- void (*continuation)(void))
-{
-
- /*
- * We can't use vm_page_free_reserved to make this
- * determination. Consider: some thread might
- * need to allocate two pages. The first allocation
- * succeeds, the second fails. After the first page is freed,
- * a call to vm_page_wait must really block.
- */
-
- simple_lock(&vm_page_queue_free_lock);
- if ((vm_page_mem_free() < vm_page_free_target)) {
- if (vm_page_free_wanted++ == 0)
- thread_wakeup((event_t)&vm_page_free_wanted);
- assert_wait((event_t)&vm_page_free_avail, FALSE);
- simple_unlock(&vm_page_queue_free_lock);
- if (continuation != 0) {
- counter(c_vm_page_wait_block_user++);
- thread_block(continuation);
- } else {
- counter(c_vm_page_wait_block_kernel++);
- thread_block((void (*)(void)) 0);
- }
- } else
- simple_unlock(&vm_page_queue_free_lock);
-}
-
-/*
* vm_page_alloc:
*
* Allocate and return a memory cell associated
@@ -1046,9 +939,11 @@ void vm_page_free(
if (mem->free)
panic("vm_page_free");
- if (mem->tabled)
+ if (mem->tabled) {
vm_page_remove(mem);
- VM_PAGE_QUEUES_REMOVE(mem);
+ }
+
+ assert(!mem->active && !mem->inactive);
if (mem->wire_count != 0) {
if (!mem->private && !mem->fictitious)
@@ -1056,11 +951,6 @@ void vm_page_free(
mem->wire_count = 0;
}
- if (mem->laundry) {
- vm_page_laundry_count--;
- mem->laundry = FALSE;
- }
-
PAGE_WAKEUP_DONE(mem);
if (mem->absent)
@@ -1077,116 +967,10 @@ void vm_page_free(
mem->fictitious = TRUE;
vm_page_release_fictitious(mem);
} else {
+ boolean_t laundry = mem->laundry;
+ boolean_t external = mem->external;
vm_page_init(mem);
- vm_page_release(mem);
- }
-}
-
-/*
- * vm_page_wire:
- *
- * Mark this page as wired down by yet
- * another map, removing it from paging queues
- * as necessary.
- *
- * The page's object and the page queues must be locked.
- */
-void vm_page_wire(
- vm_page_t mem)
-{
- VM_PAGE_CHECK(mem);
-
- if (mem->wire_count == 0) {
- VM_PAGE_QUEUES_REMOVE(mem);
- if (!mem->private && !mem->fictitious)
- vm_page_wire_count++;
- }
- mem->wire_count++;
-}
-
-/*
- * vm_page_unwire:
- *
- * Release one wiring of this page, potentially
- * enabling it to be paged again.
- *
- * The page's object and the page queues must be locked.
- */
-void vm_page_unwire(
- vm_page_t mem)
-{
- VM_PAGE_CHECK(mem);
-
- if (--mem->wire_count == 0) {
- queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
- vm_page_active_count++;
- mem->active = TRUE;
- if (!mem->private && !mem->fictitious)
- vm_page_wire_count--;
- }
-}
-
-/*
- * vm_page_deactivate:
- *
- * Returns the given page to the inactive list,
- * indicating that no physical maps have access
- * to this page. [Used by the physical mapping system.]
- *
- * The page queues must be locked.
- */
-void vm_page_deactivate(
- vm_page_t m)
-{
- VM_PAGE_CHECK(m);
-
- /*
- * This page is no longer very interesting. If it was
- * interesting (active or inactive/referenced), then we
- * clear the reference bit and (re)enter it in the
- * inactive queue. Note wired pages should not have
- * their reference bit cleared.
- */
-
- if (m->active || (m->inactive && m->reference)) {
- if (!m->fictitious && !m->absent)
- pmap_clear_reference(m->phys_addr);
- m->reference = FALSE;
- VM_PAGE_QUEUES_REMOVE(m);
- }
- if (m->wire_count == 0 && !m->inactive) {
- queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
- m->inactive = TRUE;
- vm_page_inactive_count++;
- }
-}
-
-/*
- * vm_page_activate:
- *
- * Put the specified page on the active list (if appropriate).
- *
- * The page queues must be locked.
- */
-
-void vm_page_activate(
- vm_page_t m)
-{
- VM_PAGE_CHECK(m);
-
- if (m->inactive) {
- queue_remove(&vm_page_queue_inactive, m, vm_page_t,
- pageq);
- vm_page_inactive_count--;
- m->inactive = FALSE;
- }
- if (m->wire_count == 0) {
- if (m->active)
- panic("vm_page_activate: already active");
-
- queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
- m->active = TRUE;
- vm_page_active_count++;
+ vm_page_release(mem, laundry, external);
}
}