VM cache policy change

This patch lets the kernel unconditionnally cache non empty unreferenced objects instead of using a fixed arbitrary limit. As the pageout daemon evicts pages, it collects cached objects that have become empty. The effective result is a graceful adjustment of the number of objects related to memory management (virtual memory objects, their associated ports, and potentially objects maintained in the external memory managers). Physical memory can now be almost entirely filled up with cached pages. In addition, these cached pages are not automatically deactivated as objects can quickly be referenced again. There are problems with this patch however. The first is that, on machines with a large amount of physical memory (above 1 GiB but it also depends on usage patterns), scalability issues are exposed. For example, file systems which don't throttle their writeback requests can create thread storms, strongly reducing system responsiveness. Other issues such as linear scans of memory objects also add visible CPU overhead. The second is that, as most memory is used, it increases the chances of swapping deadlocks. Applications that map large objects and quickly cause lots of page faults can still easily bring the system to its knees.
author: Richard Braun <rbraun@sceen.net> 2013-10-09 11:51:54 +0200
committer: Richard Braun <rbraun@sceen.net> 2013-10-09 11:51:54 +0200
commit: 98d64d1a78172b1efc26cac36a367eec8496926f (patch)
tree: 6a6837406ad1ec12cc724c18a3d30293b41765c0
parent: 54357e27b6f3f727357b6ae93808cc5da41291a2 (diff)
4 files changed, 63 insertions, 120 deletions
diff --git a/vm/vm_object.c b/vm/vm_object.c
index 18a909f8..9a019f6e 100644
--- a/vm/vm_object.c
+++ b/vm/vm_object.c
@@ -65,8 +65,6 @@ void memory_object_release(
 	pager_request_t	pager_request,
 	ipc_port_t	pager_name); /* forward */
 
-void vm_object_deactivate_pages(vm_object_t);
-
 /*
  *	Virtual memory objects maintain the actual data
  *	associated with allocated virtual memory.  A given
@@ -167,8 +165,9 @@ vm_object_t		kernel_object = &kernel_object_store;
  *
  *	The kernel may choose to terminate objects from this
  *	queue in order to reclaim storage.  The current policy
- *	is to permit a fixed maximum number of unreferenced
- *	objects (vm_object_cached_max).
+ *	is to let memory pressure dynamically adjust the number
+ *	of unreferenced objects. The pageout daemon attempts to
+ *	collect objects after removing pages from them.
  *
  *	A simple lock (accessed by routines
  *	vm_object_cache_{lock,lock_try,unlock}) governs the
@@ -184,7 +183,6 @@ vm_object_t		kernel_object = &kernel_object_store;
  */
 queue_head_t	vm_object_cached_list;
 int		vm_object_cached_count;
-int		vm_object_cached_max = 4000;	/* may be patched*/
 
 decl_simple_lock_data(,vm_object_cached_lock_data)
 
@@ -347,6 +345,33 @@ void vm_object_init(void)
 			IKOT_PAGING_NAME);
 }
 
+void vm_object_collect(
+	register vm_object_t	object)
+{
+	vm_object_unlock(object);
+
+	/*
+	 *	The cache lock must be acquired in the proper order.
+	 */
+
+	vm_object_cache_lock();
+	vm_object_lock(object);
+
+	/*
+	 *	If the object was referenced while the lock was
+	 *	dropped, cancel the termination.
+	 */
+
+	if (!vm_object_collectable(object)) {
+		vm_object_unlock(object);
+		vm_object_cache_unlock();
+		return;
+	}
+
+	queue_remove(&vm_object_cached_list, object, vm_object_t, cached_list);
+	vm_object_terminate(object);
+}
+
 /*
  *	vm_object_reference:
  *
@@ -407,103 +432,35 @@ void vm_object_deallocate(
 
 		/*
 		 *	See whether this object can persist.  If so, enter
-		 *	it in the cache, then deactivate all of its
-		 *	pages.
+		 *	it in the cache.
 		 */
-		if (object->can_persist) {
-			boolean_t	overflow;
-
-			/*
-			 *	Enter the object onto the queue
-			 *	of "cached" objects.  Remember whether
-			 *	we've caused the queue to overflow,
-			 *	as a hint.
-			 */
-
+		if (object->can_persist && (object->resident_page_count > 0)) {
 			queue_enter(&vm_object_cached_list, object,
 				vm_object_t, cached_list);
-			overflow = (++vm_object_cached_count > vm_object_cached_max);
+			vm_object_cached_count++;
 			vm_object_cached_pages_update(object->resident_page_count);
 			vm_object_cache_unlock();
 
-			vm_object_deactivate_pages(object);
 			vm_object_unlock(object);
+			return;
+		}
 
-			/*
-			 *	If we didn't overflow, or if the queue has
-			 *	been reduced back to below the specified
-			 *	minimum, then quit.
-			 */
-			if (!overflow)
-				return;
-
-			while (TRUE) {
-				vm_object_cache_lock();
-				if (vm_object_cached_count <=
-				    vm_object_cached_max) {
-					vm_object_cache_unlock();
-					return;
-				}
-
-				/*
-				 *	If we must trim down the queue, take
-				 *	the first object, and proceed to
-				 *	terminate it instead of the original
-				 *	object.	 Have to wait for pager init.
-				 *  if it's in progress.
-				 */
-				object= (vm_object_t)
-				    queue_first(&vm_object_cached_list);
-				vm_object_lock(object);
-
-				if (!(object->pager_created &&
-				    !object->pager_initialized)) {
-
-					/*
-					 *  Ok to terminate, hang on to lock.
-					 */
-					break;
-				}
-
-				vm_object_assert_wait(object,
-					VM_OBJECT_EVENT_INITIALIZED, FALSE);
-				vm_object_unlock(object);
-				vm_object_cache_unlock();
-				thread_block((void (*)()) 0);
-
-				/*
-				 *  Continue loop to check if cache still
-				 *  needs to be trimmed.
-				 */
-			}
+		if (object->pager_created &&
+		    !object->pager_initialized) {
 
 			/*
-			 *	Actually remove object from cache.
+			 *	Have to wait for initialization.
+			 *	Put reference back and retry
+			 *	when it's initialized.
 			 */
 
-			queue_remove(&vm_object_cached_list, object,
-					vm_object_t, cached_list);
-			vm_object_cached_count--;
-
-			assert(object->ref_count == 0);
-		}
-		else {
-			if (object->pager_created &&
-			    !object->pager_initialized) {
-
-				/*
-				 *	Have to wait for initialization.
-				 *	Put reference back and retry
-				 *	when it's initialized.
-				 */
-				object->ref_count++;
-				vm_object_assert_wait(object,
-					VM_OBJECT_EVENT_INITIALIZED, FALSE);
-				vm_object_unlock(object);
-				vm_object_cache_unlock();
-				thread_block((void (*)()) 0);
-				continue;
-			  }
+			object->ref_count++;
+			vm_object_assert_wait(object,
+				VM_OBJECT_EVENT_INITIALIZED, FALSE);
+			vm_object_unlock(object);
+			vm_object_cache_unlock();
+			thread_block((void (*)()) 0);
+			continue;
 		}
 
 		/*
@@ -530,8 +487,6 @@ void vm_object_deallocate(
 	}
 }
 
-boolean_t	vm_object_terminate_remove_all = FALSE;
-
 /*
  *	Routine:	vm_object_terminate
  *	Purpose:
@@ -884,28 +839,6 @@ kern_return_t memory_object_destroy(
 }
 
 /*
- *	vm_object_deactivate_pages
- *
- *	Deactivate all pages in the specified object.  (Keep its pages
- *	in memory even though it is no longer referenced.)
- *
- *	The object must be locked.
- */
-void vm_object_deactivate_pages(
-	register vm_object_t	object)
-{
-	register vm_page_t	p;
-
-	queue_iterate(&object->memq, p, vm_page_t, listq) {
-		vm_page_lock_queues();
-		if (!p->busy)
-			vm_page_deactivate(p);
-		vm_page_unlock_queues();
-	}
-}
-
-
-/*
  *	Routine:	vm_object_pmap_protect
  *
  *	Purpose:
@@ -2761,7 +2694,7 @@ void vm_object_page_remove(
 	 *	It balances vm_object_lookup vs iteration.
 	 */
 
-	if (atop(end - start) < (unsigned)object->resident_page_count/16) {
+	if (atop(end - start) < object->resident_page_count/16) {
 		vm_object_page_remove_lookup++;
 
 		for (; start < end; start += PAGE_SIZE) {
@@ -2989,7 +2922,7 @@ void vm_object_print(
 
 	iprintf("Object 0x%X: size=0x%X",
 		(vm_offset_t) object, (vm_offset_t) object->size);
-	 printf(", %d references, %d resident pages,", object->ref_count,
+	 printf(", %d references, %lu resident pages,", object->ref_count,
 		object->resident_page_count);
 	 printf(" %d absent pages,", object->absent_count);
 	 printf(" %d paging ops\n", object->paging_in_progress);
diff --git a/vm/vm_object.h b/vm/vm_object.h
index adeff657..95798790 100644
--- a/vm/vm_object.h
+++ b/vm/vm_object.h
@@ -72,7 +72,7 @@ struct vm_object {
 						 */
 
 	int			ref_count;	/* Number of references */
-	int			resident_page_count;
+	unsigned long		resident_page_count;
 						/* number of resident pages */
 
 	struct vm_object	*copy;		/* Object that should receive
@@ -169,6 +169,7 @@ vm_object_t	kernel_object;		/* the single kernel object */
 
 extern void		vm_object_bootstrap(void);
 extern void		vm_object_init(void);
+extern void		vm_object_collect(vm_object_t);
 extern void		vm_object_terminate(vm_object_t);
 extern vm_object_t	vm_object_allocate(vm_size_t);
 extern void		vm_object_reference(vm_object_t);
@@ -280,6 +281,10 @@ extern void vm_object_pager_wakeup(ipc_port_t  pager);
  *	Routines implemented as macros
  */
 
+#define vm_object_collectable(object)					\
+	(((object)->ref_count == 0)					\
+	&& ((object)->resident_page_count == 0))
+
 #define	vm_object_paging_begin(object) 					\
 	((object)->paging_in_progress++)
 
diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c
index 661675f0..eb75b975 100644
--- a/vm/vm_pageout.c
+++ b/vm/vm_pageout.c
@@ -750,7 +750,12 @@ void vm_pageout_scan()
 		    reclaim_page:
 			vm_page_free(m);
 			vm_page_unlock_queues();
-			vm_object_unlock(object);
+
+			if (vm_object_collectable(object))
+				vm_object_collect(object);
+			else
+				vm_object_unlock(object);
+
 			continue;
 		}
 
diff --git a/vm/vm_resident.c b/vm/vm_resident.c
index 7906b583..66ab51f0 100644
--- a/vm/vm_resident.c
+++ b/vm/vm_resident.c
@@ -523,7 +523,7 @@ void vm_page_insert(
 	 */
 
 	object->resident_page_count++;
-	assert(object->resident_page_count >= 0);
+	assert(object->resident_page_count != 0);
 
 	if (object->can_persist && (object->ref_count == 0))
 		vm_object_cached_pages_update(1);
@@ -630,7 +630,7 @@ void vm_page_replace(
 	 */
 
 	object->resident_page_count++;
-	assert(object->resident_page_count >= 0);
+	assert(object->resident_page_count != 0);
 
 	if (object->can_persist && (object->ref_count == 0))
 		vm_object_cached_pages_update(1);
author	Richard Braun <rbraun@sceen.net>	2013-10-09 11:51:54 +0200
committer	Richard Braun <rbraun@sceen.net>	2013-10-09 11:51:54 +0200
commit	98d64d1a78172b1efc26cac36a367eec8496926f (patch)
tree	6a6837406ad1ec12cc724c18a3d30293b41765c0
parent	54357e27b6f3f727357b6ae93808cc5da41291a2 (diff)