summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Braun <rbraun@sceen.net>2016-01-23 21:30:07 +0100
committerRichard Braun <rbraun@sceen.net>2016-01-23 21:30:07 +0100
commit8bde80c03f475f06b62962905dc6766f5bc28c4c (patch)
tree28c0dd9f3b460b09ce1d9f8e7926b02bb93576b1
parentbee3f0799656116cee095f79f6aa91d18157c0f0 (diff)
parente835160b6b95f3b904fbc429392a63be1e4ed6b8 (diff)
Merge branch 'rbraun/vm_page'
-rw-r--r--Makefrag.am1
-rw-r--r--i386/Makefrag.am2
-rw-r--r--i386/i386/vm_param.h40
-rw-r--r--i386/i386at/biosmem.c844
-rw-r--r--i386/i386at/biosmem.h84
-rw-r--r--i386/i386at/elf.h61
-rw-r--r--i386/i386at/model_dep.c286
-rw-r--r--i386/include/mach/i386/multiboot.h105
-rw-r--r--i386/include/mach/i386/vm_types.h5
-rw-r--r--kern/bootstrap.c16
-rw-r--r--kern/cpu_number.h2
-rw-r--r--kern/log2.h50
-rw-r--r--kern/slab.h5
-rw-r--r--kern/startup.c2
-rw-r--r--linux/dev/glue/glue.h4
-rw-r--r--linux/dev/glue/kmem.c6
-rw-r--r--linux/dev/init/main.c143
-rw-r--r--linux/pcmcia-cs/glue/ds.c6
-rw-r--r--vm/pmap.h15
-rw-r--r--vm/vm_fault.c4
-rw-r--r--vm/vm_init.c1
-rw-r--r--vm/vm_object.c3
-rw-r--r--vm/vm_page.c762
-rw-r--r--vm/vm_page.h220
-rw-r--r--vm/vm_resident.c546
25 files changed, 2378 insertions, 835 deletions
diff --git a/Makefrag.am b/Makefrag.am
index 823ece5d..bb600e0c 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -259,6 +259,7 @@ libkernel_a_SOURCES += \
vm/vm_map.h \
vm/vm_object.c \
vm/vm_object.h \
+ vm/vm_page.c \
vm/vm_page.h \
vm/vm_pageout.c \
vm/vm_pageout.h \
diff --git a/i386/Makefrag.am b/i386/Makefrag.am
index ef393d5d..e6cfedd7 100644
--- a/i386/Makefrag.am
+++ b/i386/Makefrag.am
@@ -29,6 +29,8 @@ libkernel_a_SOURCES += \
if PLATFORM_at
libkernel_a_SOURCES += \
+ i386/i386at/biosmem.c \
+ i386/i386at/biosmem.h \
i386/i386at/boothdr.S \
i386/i386at/com.c \
i386/i386at/com.h \
diff --git a/i386/i386/vm_param.h b/i386/i386/vm_param.h
index 6292ca25..da3126c0 100644
--- a/i386/i386/vm_param.h
+++ b/i386/i386/vm_param.h
@@ -23,6 +23,8 @@
#ifndef _I386_KERNEL_I386_VM_PARAM_
#define _I386_KERNEL_I386_VM_PARAM_
+#include <kern/macros.h>
+
/* XXX use xu/vm_param.h */
#include <mach/vm_param.h>
#ifdef MACH_PV_PAGETABLES
@@ -101,4 +103,42 @@
#define kvtolin(a) ((vm_offset_t)(a) - VM_MIN_KERNEL_ADDRESS + LINEAR_MIN_KERNEL_ADDRESS)
#define lintokv(a) ((vm_offset_t)(a) - LINEAR_MIN_KERNEL_ADDRESS + VM_MIN_KERNEL_ADDRESS)
+/*
+ * Physical memory properties.
+ */
+#define VM_PAGE_DMA_LIMIT DECL_CONST(0x1000000, UL)
+
+#ifdef __LP64__
+#define VM_PAGE_MAX_SEGS 4
+#define VM_PAGE_DMA32_LIMIT DECL_CONST(0x100000000, UL)
+#define VM_PAGE_DIRECTMAP_LIMIT DECL_CONST(0x400000000000, UL)
+#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0x10000000000000, UL)
+#else /* __LP64__ */
+#define VM_PAGE_DIRECTMAP_LIMIT (VM_MAX_KERNEL_ADDRESS \
+ - VM_MIN_KERNEL_ADDRESS \
+ - VM_KERNEL_MAP_SIZE + 1)
+#ifdef PAE
+#define VM_PAGE_MAX_SEGS 3
+#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0x10000000000000, ULL)
+#else /* PAE */
+#define VM_PAGE_MAX_SEGS 3
+#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0xfffff000, UL)
+#endif /* PAE */
+#endif /* __LP64__ */
+
+/*
+ * Physical segment indexes.
+ */
+#define VM_PAGE_SEG_DMA 0
+
+#ifdef __LP64__
+#define VM_PAGE_SEG_DMA32 1
+#define VM_PAGE_SEG_DIRECTMAP 2
+#define VM_PAGE_SEG_HIGHMEM 3
+#else /* __LP64__ */
+#define VM_PAGE_SEG_DMA32 1 /* Alias for the DIRECTMAP segment */
+#define VM_PAGE_SEG_DIRECTMAP 1
+#define VM_PAGE_SEG_HIGHMEM 2
+#endif /* __LP64__ */
+
#endif /* _I386_KERNEL_I386_VM_PARAM_ */
diff --git a/i386/i386at/biosmem.c b/i386/i386at/biosmem.c
new file mode 100644
index 00000000..5b4fbddc
--- /dev/null
+++ b/i386/i386at/biosmem.c
@@ -0,0 +1,844 @@
+/*
+ * Copyright (c) 2010-2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <i386/model_dep.h>
+#include <i386at/biosmem.h>
+#include <i386at/elf.h>
+#include <kern/assert.h>
+#include <kern/debug.h>
+#include <kern/macros.h>
+#include <kern/printf.h>
+#include <mach/vm_param.h>
+#include <mach/machine/multiboot.h>
+#include <sys/types.h>
+#include <vm/vm_page.h>
+
+#define __boot
+#define __bootdata
+#define __init
+
+#define boot_memmove memmove
+#define boot_panic panic
+#define boot_strlen strlen
+
+#define BOOT_CGAMEM phystokv(0xb8000)
+#define BOOT_CGACHARS (80 * 25)
+#define BOOT_CGACOLOR 0x7
+
+extern char _start, _end;
+
+/*
+ * Maximum number of entries in the BIOS memory map.
+ *
+ * Because of adjustments of overlapping ranges, the memory map can grow
+ * to twice this size.
+ */
+#define BIOSMEM_MAX_MAP_SIZE 128
+
+/*
+ * Memory range types.
+ */
+#define BIOSMEM_TYPE_AVAILABLE 1
+#define BIOSMEM_TYPE_RESERVED 2
+#define BIOSMEM_TYPE_ACPI 3
+#define BIOSMEM_TYPE_NVS 4
+#define BIOSMEM_TYPE_UNUSABLE 5
+#define BIOSMEM_TYPE_DISABLED 6
+
+/*
+ * Memory map entry.
+ */
+struct biosmem_map_entry {
+ uint64_t base_addr;
+ uint64_t length;
+ unsigned int type;
+};
+
+/*
+ * Contiguous block of physical memory.
+ *
+ * Tha "available" range records what has been passed to the VM system as
+ * available inside the segment.
+ */
+struct biosmem_segment {
+ phys_addr_t start;
+ phys_addr_t end;
+ phys_addr_t avail_start;
+ phys_addr_t avail_end;
+};
+
+/*
+ * Memory map built from the information passed by the boot loader.
+ *
+ * If the boot loader didn't pass a valid memory map, a simple map is built
+ * based on the mem_lower and mem_upper multiboot fields.
+ */
+static struct biosmem_map_entry biosmem_map[BIOSMEM_MAX_MAP_SIZE * 2]
+ __bootdata;
+static unsigned int biosmem_map_size __bootdata;
+
+/*
+ * Physical segment boundaries.
+ */
+static struct biosmem_segment biosmem_segments[VM_PAGE_MAX_SEGS] __bootdata;
+
+/*
+ * Boundaries of the simple bootstrap heap.
+ *
+ * This heap is located above BIOS memory.
+ */
+static uint32_t biosmem_heap_start __bootdata;
+static uint32_t biosmem_heap_cur __bootdata;
+static uint32_t biosmem_heap_end __bootdata;
+
+static char biosmem_panic_toobig_msg[] __bootdata
+ = "biosmem: too many memory map entries";
+static char biosmem_panic_setup_msg[] __bootdata
+ = "biosmem: unable to set up the early memory allocator";
+static char biosmem_panic_noseg_msg[] __bootdata
+ = "biosmem: unable to find any memory segment";
+static char biosmem_panic_inval_msg[] __bootdata
+ = "biosmem: attempt to allocate 0 page";
+static char biosmem_panic_nomem_msg[] __bootdata
+ = "biosmem: unable to allocate memory";
+
+static void __boot
+biosmem_map_build(const struct multiboot_raw_info *mbi)
+{
+ struct multiboot_raw_mmap_entry *mb_entry, *mb_end;
+ struct biosmem_map_entry *start, *entry, *end;
+ unsigned long addr;
+
+ addr = phystokv(mbi->mmap_addr);
+ mb_entry = (struct multiboot_raw_mmap_entry *)addr;
+ mb_end = (struct multiboot_raw_mmap_entry *)(addr + mbi->mmap_length);
+ start = biosmem_map;
+ entry = start;
+ end = entry + BIOSMEM_MAX_MAP_SIZE;
+
+ while ((mb_entry < mb_end) && (entry < end)) {
+ entry->base_addr = mb_entry->base_addr;
+ entry->length = mb_entry->length;
+ entry->type = mb_entry->type;
+
+ mb_entry = (void *)mb_entry + sizeof(mb_entry->size) + mb_entry->size;
+ entry++;
+ }
+
+ biosmem_map_size = entry - start;
+}
+
+static void __boot
+biosmem_map_build_simple(const struct multiboot_raw_info *mbi)
+{
+ struct biosmem_map_entry *entry;
+
+ entry = biosmem_map;
+ entry->base_addr = 0;
+ entry->length = mbi->mem_lower << 10;
+ entry->type = BIOSMEM_TYPE_AVAILABLE;
+
+ entry++;
+ entry->base_addr = BIOSMEM_END;
+ entry->length = mbi->mem_upper << 10;
+ entry->type = BIOSMEM_TYPE_AVAILABLE;
+
+ biosmem_map_size = 2;
+}
+
+static int __boot
+biosmem_map_entry_is_invalid(const struct biosmem_map_entry *entry)
+{
+ return (entry->base_addr + entry->length) <= entry->base_addr;
+}
+
+static void __boot
+biosmem_map_filter(void)
+{
+ struct biosmem_map_entry *entry;
+ unsigned int i;
+
+ i = 0;
+
+ while (i < biosmem_map_size) {
+ entry = &biosmem_map[i];
+
+ if (biosmem_map_entry_is_invalid(entry)) {
+ biosmem_map_size--;
+ boot_memmove(entry, entry + 1,
+ (biosmem_map_size - i) * sizeof(*entry));
+ continue;
+ }
+
+ i++;
+ }
+}
+
+static void __boot
+biosmem_map_sort(void)
+{
+ struct biosmem_map_entry tmp;
+ unsigned int i, j;
+
+ /*
+ * Simple insertion sort.
+ */
+ for (i = 1; i < biosmem_map_size; i++) {
+ tmp = biosmem_map[i];
+
+ for (j = i - 1; j < i; j--) {
+ if (biosmem_map[j].base_addr < tmp.base_addr)
+ break;
+
+ biosmem_map[j + 1] = biosmem_map[j];
+ }
+
+ biosmem_map[j + 1] = tmp;
+ }
+}
+
+static void __boot
+biosmem_map_adjust(void)
+{
+ struct biosmem_map_entry tmp, *a, *b, *first, *second;
+ uint64_t a_end, b_end, last_end;
+ unsigned int i, j, last_type;
+
+ biosmem_map_filter();
+
+ /*
+ * Resolve overlapping areas, giving priority to most restrictive
+ * (i.e. numerically higher) types.
+ */
+ for (i = 0; i < biosmem_map_size; i++) {
+ a = &biosmem_map[i];
+ a_end = a->base_addr + a->length;
+
+ j = i + 1;
+
+ while (j < biosmem_map_size) {
+ b = &biosmem_map[j];
+ b_end = b->base_addr + b->length;
+
+ if ((a->base_addr >= b_end) || (a_end <= b->base_addr)) {
+ j++;
+ continue;
+ }
+
+ if (a->base_addr < b->base_addr) {
+ first = a;
+ second = b;
+ } else {
+ first = b;
+ second = a;
+ }
+
+ if (a_end > b_end) {
+ last_end = a_end;
+ last_type = a->type;
+ } else {
+ last_end = b_end;
+ last_type = b->type;
+ }
+
+ tmp.base_addr = second->base_addr;
+ tmp.length = MIN(a_end, b_end) - tmp.base_addr;
+ tmp.type = MAX(a->type, b->type);
+ first->length = tmp.base_addr - first->base_addr;
+ second->base_addr += tmp.length;
+ second->length = last_end - second->base_addr;
+ second->type = last_type;
+
+ /*
+ * Filter out invalid entries.
+ */
+ if (biosmem_map_entry_is_invalid(a)
+ && biosmem_map_entry_is_invalid(b)) {
+ *a = tmp;
+ biosmem_map_size--;
+ memmove(b, b + 1, (biosmem_map_size - j) * sizeof(*b));
+ continue;
+ } else if (biosmem_map_entry_is_invalid(a)) {
+ *a = tmp;
+ j++;
+ continue;
+ } else if (biosmem_map_entry_is_invalid(b)) {
+ *b = tmp;
+ j++;
+ continue;
+ }
+
+ if (tmp.type == a->type)
+ first = a;
+ else if (tmp.type == b->type)
+ first = b;
+ else {
+
+ /*
+ * If the overlapping area can't be merged with one of its
+ * neighbors, it must be added as a new entry.
+ */
+
+ if (biosmem_map_size >= ARRAY_SIZE(biosmem_map))
+ boot_panic(biosmem_panic_toobig_msg);
+
+ biosmem_map[biosmem_map_size] = tmp;
+ biosmem_map_size++;
+ j++;
+ continue;
+ }
+
+ if (first->base_addr > tmp.base_addr)
+ first->base_addr = tmp.base_addr;
+
+ first->length += tmp.length;
+ j++;
+ }
+ }
+
+ biosmem_map_sort();
+}
+
+static int __boot
+biosmem_map_find_avail(phys_addr_t *phys_start, phys_addr_t *phys_end)
+{
+ const struct biosmem_map_entry *entry, *map_end;
+ phys_addr_t seg_start, seg_end;
+ uint64_t start, end;
+
+ seg_start = (phys_addr_t)-1;
+ seg_end = (phys_addr_t)-1;
+ map_end = biosmem_map + biosmem_map_size;
+
+ for (entry = biosmem_map; entry < map_end; entry++) {
+ if (entry->type != BIOSMEM_TYPE_AVAILABLE)
+ continue;
+
+ start = vm_page_round(entry->base_addr);
+
+ if (start >= *phys_end)
+ break;
+
+ end = vm_page_trunc(entry->base_addr + entry->length);
+
+ if ((start < end) && (start < *phys_end) && (end > *phys_start)) {
+ if (seg_start == (phys_addr_t)-1)
+ seg_start = start;
+
+ seg_end = end;
+ }
+ }
+
+ if ((seg_start == (phys_addr_t)-1) || (seg_end == (phys_addr_t)-1))
+ return -1;
+
+ if (seg_start > *phys_start)
+ *phys_start = seg_start;
+
+ if (seg_end < *phys_end)
+ *phys_end = seg_end;
+
+ return 0;
+}
+
+static void __boot
+biosmem_set_segment(unsigned int seg_index, phys_addr_t start, phys_addr_t end)
+{
+ biosmem_segments[seg_index].start = start;
+ biosmem_segments[seg_index].end = end;
+}
+
+static phys_addr_t __boot
+biosmem_segment_end(unsigned int seg_index)
+{
+ return biosmem_segments[seg_index].end;
+}
+
+static phys_addr_t __boot
+biosmem_segment_size(unsigned int seg_index)
+{
+ return biosmem_segments[seg_index].end - biosmem_segments[seg_index].start;
+}
+
+static void __boot
+biosmem_save_cmdline_sizes(struct multiboot_raw_info *mbi)
+{
+ struct multiboot_raw_module *mod;
+ uint32_t i, va;
+
+ if (mbi->flags & MULTIBOOT_LOADER_CMDLINE) {
+ va = phystokv(mbi->cmdline);
+ mbi->unused0 = boot_strlen((char *)va) + 1;
+ }
+
+ if (mbi->flags & MULTIBOOT_LOADER_MODULES) {
+ unsigned long addr;
+
+ addr = phystokv(mbi->mods_addr);
+
+ for (i = 0; i < mbi->mods_count; i++) {
+ mod = (struct multiboot_raw_module *)addr + i;
+ va = phystokv(mod->string);
+ mod->reserved = boot_strlen((char *)va) + 1;
+ }
+ }
+}
+
+static void __boot
+biosmem_find_boot_data_update(uint32_t min, uint32_t *start, uint32_t *end,
+ uint32_t data_start, uint32_t data_end)
+{
+ if ((min <= data_start) && (data_start < *start)) {
+ *start = data_start;
+ *end = data_end;
+ }
+}
+
+/*
+ * Find the first boot data in the given range, and return their containing
+ * area (start address is returned directly, end address is returned in end).
+ * The following are considered boot data :
+ * - the kernel
+ * - the kernel command line
+ * - the module table
+ * - the modules
+ * - the modules command lines
+ * - the ELF section header table
+ * - the ELF .shstrtab, .symtab and .strtab sections
+ *
+ * If no boot data was found, 0 is returned, and the end address isn't set.
+ */
+static uint32_t __boot
+biosmem_find_boot_data(const struct multiboot_raw_info *mbi, uint32_t min,
+ uint32_t max, uint32_t *endp)
+{
+ struct multiboot_raw_module *mod;
+ struct elf_shdr *shdr;
+ uint32_t i, start, end = end;
+ unsigned long tmp;
+
+ start = max;
+
+ biosmem_find_boot_data_update(min, &start, &end, _kvtophys(&_start),
+ _kvtophys(&_end));
+
+ if ((mbi->flags & MULTIBOOT_LOADER_CMDLINE) && (mbi->cmdline != 0))
+ biosmem_find_boot_data_update(min, &start, &end, mbi->cmdline,
+ mbi->cmdline + mbi->unused0);
+
+ if (mbi->flags & MULTIBOOT_LOADER_MODULES) {
+ i = mbi->mods_count * sizeof(struct multiboot_raw_module);
+ biosmem_find_boot_data_update(min, &start, &end, mbi->mods_addr,
+ mbi->mods_addr + i);
+ tmp = phystokv(mbi->mods_addr);
+
+ for (i = 0; i < mbi->mods_count; i++) {
+ mod = (struct multiboot_raw_module *)tmp + i;
+ biosmem_find_boot_data_update(min, &start, &end, mod->mod_start,
+ mod->mod_end);
+
+ if (mod->string != 0)
+ biosmem_find_boot_data_update(min, &start, &end, mod->string,
+ mod->string + mod->reserved);
+ }
+ }
+
+ if (mbi->flags & MULTIBOOT_LOADER_SHDR) {
+ tmp = mbi->shdr_num * mbi->shdr_size;
+ biosmem_find_boot_data_update(min, &start, &end, mbi->shdr_addr,
+ mbi->shdr_addr + tmp);
+ tmp = phystokv(mbi->shdr_addr);
+
+ for (i = 0; i < mbi->shdr_num; i++) {
+ shdr = (struct elf_shdr *)(tmp + (i * mbi->shdr_size));
+
+ if ((shdr->type != ELF_SHT_SYMTAB)
+ && (shdr->type != ELF_SHT_STRTAB))
+ continue;
+
+ biosmem_find_boot_data_update(min, &start, &end, shdr->addr,
+ shdr->addr + shdr->size);
+ }
+ }
+
+ if (start == max)
+ return 0;
+
+ *endp = end;
+ return start;
+}
+
+static void __boot
+biosmem_setup_allocator(struct multiboot_raw_info *mbi)
+{
+ uint32_t heap_start, heap_end, max_heap_start, max_heap_end;
+ uint32_t mem_end, next;
+
+ /*
+ * Find some memory for the heap. Look for the largest unused area in
+ * upper memory, carefully avoiding all boot data.
+ */
+ mem_end = vm_page_trunc((mbi->mem_upper + 1024) << 10);
+
+#ifndef __LP64__
+ if (mem_end > VM_PAGE_DIRECTMAP_LIMIT)
+ mem_end = VM_PAGE_DIRECTMAP_LIMIT;
+#endif /* __LP64__ */
+
+ max_heap_start = 0;
+ max_heap_end = 0;
+ next = BIOSMEM_END;
+
+ do {
+ heap_start = next;
+ heap_end = biosmem_find_boot_data(mbi, heap_start, mem_end, &next);
+
+ if (heap_end == 0) {
+ heap_end = mem_end;
+ next = 0;
+ }
+
+ if ((heap_end - heap_start) > (max_heap_end - max_heap_start)) {
+ max_heap_start = heap_start;
+ max_heap_end = heap_end;
+ }
+ } while (next != 0);
+
+ max_heap_start = vm_page_round(max_heap_start);
+ max_heap_end = vm_page_trunc(max_heap_end);
+
+ if (max_heap_start >= max_heap_end)
+ boot_panic(biosmem_panic_setup_msg);
+
+ biosmem_heap_start = max_heap_start;
+ biosmem_heap_end = max_heap_end;
+ biosmem_heap_cur = biosmem_heap_end;
+}
+
+void __boot
+biosmem_bootstrap(struct multiboot_raw_info *mbi)
+{
+ phys_addr_t phys_start, phys_end, last_addr;
+ int error;
+
+ if (mbi->flags & MULTIBOOT_LOADER_MMAP)
+ biosmem_map_build(mbi);
+ else
+ biosmem_map_build_simple(mbi);
+
+ biosmem_map_adjust();
+
+ phys_start = BIOSMEM_BASE;
+ phys_end = VM_PAGE_DMA_LIMIT;
+ error = biosmem_map_find_avail(&phys_start, &phys_end);
+
+ if (error)
+ boot_panic(biosmem_panic_noseg_msg);
+
+ biosmem_set_segment(VM_PAGE_SEG_DMA, phys_start, phys_end);
+ last_addr = phys_end;
+
+ phys_start = VM_PAGE_DMA_LIMIT;
+#ifdef VM_PAGE_DMA32_LIMIT
+ phys_end = VM_PAGE_DMA32_LIMIT;
+ error = biosmem_map_find_avail(&phys_start, &phys_end);
+
+ if (error)
+ goto out;
+
+ biosmem_set_segment(VM_PAGE_SEG_DMA32, phys_start, phys_end);
+ last_addr = phys_end;
+
+ phys_start = VM_PAGE_DMA32_LIMIT;
+#endif /* VM_PAGE_DMA32_LIMIT */
+ phys_end = VM_PAGE_DIRECTMAP_LIMIT;
+ error = biosmem_map_find_avail(&phys_start, &phys_end);
+
+ if (error)
+ goto out;
+
+ biosmem_set_segment(VM_PAGE_SEG_DIRECTMAP, phys_start, phys_end);
+ last_addr = phys_end;
+
+ phys_start = VM_PAGE_DIRECTMAP_LIMIT;
+ phys_end = VM_PAGE_HIGHMEM_LIMIT;
+ error = biosmem_map_find_avail(&phys_start, &phys_end);
+
+ if (error)
+ goto out;
+
+ biosmem_set_segment(VM_PAGE_SEG_HIGHMEM, phys_start, phys_end);
+
+out:
+
+ /*
+ * The kernel and modules command lines will be memory mapped later
+ * during initialization. Their respective sizes must be saved.
+ */
+ biosmem_save_cmdline_sizes(mbi);
+ biosmem_setup_allocator(mbi);
+
+ /* XXX phys_last_addr must be part of the direct physical mapping */
+ phys_last_addr = last_addr;
+}
+
+unsigned long __boot
+biosmem_bootalloc(unsigned int nr_pages)
+{
+ unsigned long addr, size;
+
+ assert(!vm_page_ready());
+
+ size = vm_page_ptoa(nr_pages);
+
+ if (size == 0)
+ boot_panic(biosmem_panic_inval_msg);
+
+ /* Top-down allocation to avoid unnecessarily filling DMA segments */
+ addr = biosmem_heap_cur - size;
+
+ if ((addr < biosmem_heap_start) || (addr > biosmem_heap_cur))
+ boot_panic(biosmem_panic_nomem_msg);
+
+ biosmem_heap_cur = addr;
+ return addr;
+}
+
+phys_addr_t __boot
+biosmem_directmap_size(void)
+{
+ if (biosmem_segment_size(VM_PAGE_SEG_DIRECTMAP) != 0)
+ return biosmem_segment_end(VM_PAGE_SEG_DIRECTMAP);
+ else if (biosmem_segment_size(VM_PAGE_SEG_DMA32) != 0)
+ return biosmem_segment_end(VM_PAGE_SEG_DMA32);
+ else
+ return biosmem_segment_end(VM_PAGE_SEG_DMA);
+}
+
+static const char * __init
+biosmem_type_desc(unsigned int type)
+{
+ switch (type) {
+ case BIOSMEM_TYPE_AVAILABLE:
+ return "available";
+ case BIOSMEM_TYPE_RESERVED:
+ return "reserved";
+ case BIOSMEM_TYPE_ACPI:
+ return "ACPI";
+ case BIOSMEM_TYPE_NVS:
+ return "ACPI NVS";
+ case BIOSMEM_TYPE_UNUSABLE:
+ return "unusable";
+ default:
+ return "unknown (reserved)";
+ }
+}
+
+static void __init
+biosmem_map_show(void)
+{
+ const struct biosmem_map_entry *entry, *end;
+
+ printf("biosmem: physical memory map:\n");
+
+ for (entry = biosmem_map, end = entry + biosmem_map_size;
+ entry < end;
+ entry++)
+ printf("biosmem: %018llx:%018llx, %s\n", entry->base_addr,
+ entry->base_addr + entry->length,
+ biosmem_type_desc(entry->type));
+
+ printf("biosmem: heap: %x-%x\n", biosmem_heap_start, biosmem_heap_end);
+}
+
+static void __init
+biosmem_load_segment(struct biosmem_segment *seg, uint64_t max_phys_end,
+ phys_addr_t phys_start, phys_addr_t phys_end,
+ phys_addr_t avail_start, phys_addr_t avail_end)
+{
+ unsigned int seg_index;
+
+ seg_index = seg - biosmem_segments;
+
+ if (phys_end > max_phys_end) {
+ if (max_phys_end <= phys_start) {
+ printf("biosmem: warning: segment %s physically unreachable, "
+ "not loaded\n", vm_page_seg_name(seg_index));
+ return;
+ }
+
+ printf("biosmem: warning: segment %s truncated to %#llx\n",
+ vm_page_seg_name(seg_index), max_phys_end);
+ phys_end = max_phys_end;
+ }
+
+ if ((avail_start < phys_start) || (avail_start >= phys_end))
+ avail_start = phys_start;
+
+ if ((avail_end <= phys_start) || (avail_end > phys_end))
+ avail_end = phys_end;
+
+ seg->avail_start = avail_start;
+ seg->avail_end = avail_end;
+ vm_page_load(seg_index, phys_start, phys_end, avail_start, avail_end);
+}
+
+void __init
+biosmem_setup(void)
+{
+ struct biosmem_segment *seg;
+ unsigned int i;
+
+ biosmem_map_show();
+
+ for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) {
+ if (biosmem_segment_size(i) == 0)
+ break;
+
+ seg = &biosmem_segments[i];
+ biosmem_load_segment(seg, VM_PAGE_HIGHMEM_LIMIT, seg->start, seg->end,
+ biosmem_heap_start, biosmem_heap_cur);
+ }
+}
+
+static void __init
+biosmem_free_usable_range(phys_addr_t start, phys_addr_t end)
+{
+ struct vm_page *page;
+
+ printf("biosmem: release to vm_page: %llx-%llx (%lluk)\n",
+ (unsigned long long)start, (unsigned long long)end,
+ (unsigned long long)((end - start) >> 10));
+
+ while (start < end) {
+ page = vm_page_lookup_pa(start);
+ assert(page != NULL);
+ vm_page_manage(page);
+ start += PAGE_SIZE;
+ }
+}
+
+static void __init
+biosmem_free_usable_update_start(phys_addr_t *start, phys_addr_t res_start,
+ phys_addr_t res_end)
+{
+ if ((*start >= res_start) && (*start < res_end))
+ *start = res_end;
+}
+
+static phys_addr_t __init
+biosmem_free_usable_start(phys_addr_t start)
+{
+ const struct biosmem_segment *seg;
+ unsigned int i;
+
+ biosmem_free_usable_update_start(&start, _kvtophys(&_start),
+ _kvtophys(&_end));
+ biosmem_free_usable_update_start(&start, biosmem_heap_start,
+ biosmem_heap_end);
+
+ for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) {
+ seg = &biosmem_segments[i];
+ biosmem_free_usable_update_start(&start, seg->avail_start,
+ seg->avail_end);
+ }
+
+ return start;
+}
+
+static int __init
+biosmem_free_usable_reserved(phys_addr_t addr)
+{
+ const struct biosmem_segment *seg;
+ unsigned int i;
+
+ if ((addr >= _kvtophys(&_start))
+ && (addr < _kvtophys(&_end)))
+ return 1;
+
+ if ((addr >= biosmem_heap_start) && (addr < biosmem_heap_end))
+ return 1;
+
+ for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) {
+ seg = &biosmem_segments[i];
+
+ if ((addr >= seg->avail_start) && (addr < seg->avail_end))
+ return 1;
+ }
+
+ return 0;
+}
+
+static phys_addr_t __init
+biosmem_free_usable_end(phys_addr_t start, phys_addr_t entry_end)
+{
+ while (start < entry_end) {
+ if (biosmem_free_usable_reserved(start))
+ break;
+
+ start += PAGE_SIZE;
+ }
+
+ return start;
+}
+
+static void __init
+biosmem_free_usable_entry(phys_addr_t start, phys_addr_t end)
+{
+ phys_addr_t entry_end;
+
+ entry_end = end;
+
+ for (;;) {
+ start = biosmem_free_usable_start(start);
+
+ if (start >= entry_end)
+ return;
+
+ end = biosmem_free_usable_end(start, entry_end);
+ biosmem_free_usable_range(start, end);
+ start = end;
+ }
+}
+
+void __init
+biosmem_free_usable(void)
+{
+ struct biosmem_map_entry *entry;
+ uint64_t start, end;
+ unsigned int i;
+
+ for (i = 0; i < biosmem_map_size; i++) {
+ entry = &biosmem_map[i];
+
+ if (entry->type != BIOSMEM_TYPE_AVAILABLE)
+ continue;
+
+ start = vm_page_round(entry->base_addr);
+
+ if (start >= VM_PAGE_HIGHMEM_LIMIT)
+ break;
+
+ end = vm_page_trunc(entry->base_addr + entry->length);
+
+ if (start < BIOSMEM_BASE)
+ start = BIOSMEM_BASE;
+
+ biosmem_free_usable_entry(start, end);
+ }
+}
diff --git a/i386/i386at/biosmem.h b/i386/i386at/biosmem.h
new file mode 100644
index 00000000..0cc4f8a6
--- /dev/null
+++ b/i386/i386at/biosmem.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2010-2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _X86_BIOSMEM_H
+#define _X86_BIOSMEM_H
+
+#include <mach/machine/vm_types.h>
+#include <mach/machine/multiboot.h>
+
+/*
+ * Address where the address of the Extended BIOS Data Area segment can be
+ * found.
+ */
+#define BIOSMEM_EBDA_PTR 0x40e
+
+/*
+ * Significant low memory addresses.
+ *
+ * The first 64 KiB are reserved for various reasons (e.g. to preserve BIOS
+ * data and to work around data corruption on some hardware).
+ */
+#define BIOSMEM_BASE 0x010000
+#define BIOSMEM_BASE_END 0x0a0000
+#define BIOSMEM_EXT_ROM 0x0e0000
+#define BIOSMEM_ROM 0x0f0000
+#define BIOSMEM_END 0x100000
+
+/*
+ * Early initialization of the biosmem module.
+ *
+ * This function processes the given multiboot data for BIOS-provided
+ * memory information, and sets up a bootstrap physical page allocator.
+ *
+ * It is called before paging is enabled.
+ */
+void biosmem_bootstrap(struct multiboot_raw_info *mbi);
+
+/*
+ * Allocate contiguous physical pages during bootstrap.
+ *
+ * This function is called before paging is enabled. It should only be used
+ * to allocate initial page table pages. Those pages are later loaded into
+ * the VM system (as reserved pages) which means they can be freed like other
+ * regular pages. Users should fix up the type of those pages once the VM
+ * system is initialized.
+ */
+unsigned long biosmem_bootalloc(unsigned int nr_pages);
+
+/*
+ * Return the amount of physical memory that can be directly mapped.
+ *
+ * This includes the size of both the DMA/DMA32 and DIRECTMAP segments.
+ */
+phys_addr_t biosmem_directmap_size(void);
+
+/*
+ * Set up physical memory based on the information obtained during bootstrap
+ * and load it in the VM system.
+ */
+void biosmem_setup(void);
+
+/*
+ * Free all usable memory.
+ *
+ * This includes ranges that weren't part of the bootstrap allocator initial
+ * heap, e.g. because they contained boot data.
+ */
+void biosmem_free_usable(void);
+
+#endif /* _X86_BIOSMEM_H */
diff --git a/i386/i386at/elf.h b/i386/i386at/elf.h
new file mode 100644
index 00000000..26f4d87b
--- /dev/null
+++ b/i386/i386at/elf.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2013 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _X86_ELF_H
+#define _X86_ELF_H
+
+#define ELF_SHT_SYMTAB 2
+#define ELF_SHT_STRTAB 3
+
+struct elf_shdr {
+ unsigned int name;
+ unsigned int type;
+ unsigned int flags;
+ unsigned long addr;
+ unsigned long offset;
+ unsigned int size;
+ unsigned int link;
+ unsigned int info;
+ unsigned int addralign;
+ unsigned int entsize;
+};
+
+#ifdef __LP64__
+
+struct elf_sym {
+ unsigned int name;
+ unsigned char info;
+ unsigned char other;
+ unsigned short shndx;
+ unsigned long value;
+ unsigned long size;
+};
+
+#else /* __LP64__ */
+
+struct elf_sym {
+ unsigned int name;
+ unsigned long value;
+ unsigned long size;
+ unsigned char info;
+ unsigned char other;
+ unsigned short shndx;
+};
+
+#endif /* __LP64__ */
+
+#endif /* _X86_ELF_H */
diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c
index 04cf6958..dbb9d8b6 100644
--- a/i386/i386at/model_dep.c
+++ b/i386/i386at/model_dep.c
@@ -64,6 +64,7 @@
#include <i386/locore.h>
#include <i386/model_dep.h>
#include <i386at/autoconf.h>
+#include <i386at/biosmem.h>
#include <i386at/idt.h>
#include <i386at/int_init.h>
#include <i386at/kd.h>
@@ -127,20 +128,6 @@ struct multiboot_info boot_info;
/* Command line supplied to kernel. */
char *kernel_cmdline = "";
-/* This is used for memory initialization:
- it gets bumped up through physical memory
- that exists and is not occupied by boot gunk.
- It is not necessarily page-aligned. */
-static vm_offset_t avail_next
-#ifndef MACH_HYP
- = RESERVED_BIOS /* XX end of BIOS data area */
-#endif /* MACH_HYP */
- ;
-
-/* Possibly overestimated amount of available memory
- still remaining to be handed to the VM system. */
-static vm_size_t avail_remaining;
-
extern char version[];
/* If set, reboot the system on ctrl-alt-delete. */
@@ -275,91 +262,6 @@ void db_reset_cpu(void)
halt_all_cpus(1);
}
-
-/*
- * Compute physical memory size and other parameters.
- */
-void
-mem_size_init(void)
-{
- vm_offset_t max_phys_size;
-
- /* Physical memory on all PCs starts at physical address 0.
- XX make it a constant. */
- phys_first_addr = 0;
-
-#ifdef MACH_HYP
- if (boot_info.nr_pages >= 0x100000) {
- printf("Truncating memory size to 4GiB\n");
- phys_last_addr = 0xffffffffU;
- } else
- phys_last_addr = boot_info.nr_pages * 0x1000;
-#else /* MACH_HYP */
- vm_size_t phys_last_kb;
-
- if (boot_info.flags & MULTIBOOT_MEM_MAP) {
- struct multiboot_mmap *map, *map_end;
-
- map = (void*) phystokv(boot_info.mmap_addr);
- map_end = (void*) map + boot_info.mmap_count;
-
- while (map + 1 <= map_end) {
- if (map->Type == MB_ARD_MEMORY) {
- unsigned long long start = map->BaseAddr, end = map->BaseAddr + map->Length;;
-
- if (start >= 0x100000000ULL) {
- printf("Ignoring %luMiB RAM region above 4GiB\n", (unsigned long) (map->Length >> 20));
- } else {
- if (end >= 0x100000000ULL) {
- printf("Truncating memory region to 4GiB\n");
- end = 0x0ffffffffU;
- }
- if (end > phys_last_addr)
- phys_last_addr = end;
-
- printf("AT386 boot: physical memory map from 0x%lx to 0x%lx\n",
- (unsigned long) start,
- (unsigned long) end);
- }
- }
- map = (void*) map + map->size + sizeof(map->size);
- }
- } else {
- phys_last_kb = 0x400 + boot_info.mem_upper;
- /* Avoid 4GiB overflow. */
- if (phys_last_kb < 0x400 || phys_last_kb >= 0x400000) {
- printf("Truncating memory size to 4GiB\n");
- phys_last_addr = 0xffffffffU;
- } else
- phys_last_addr = phys_last_kb * 0x400;
- }
-#endif /* MACH_HYP */
-
- printf("AT386 boot: physical memory from 0x%lx to 0x%lx\n",
- phys_first_addr, phys_last_addr);
-
- /* Reserve room for virtual mappings.
- * Yes, this loses memory. Blame i386. */
- max_phys_size = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS - VM_KERNEL_MAP_SIZE;
- if (phys_last_addr - phys_first_addr > max_phys_size) {
- phys_last_addr = phys_first_addr + max_phys_size;
- printf("Truncating memory to %luMiB\n", (phys_last_addr - phys_first_addr) / (1024 * 1024));
- /* TODO Xen: be nice, free lost memory */
- }
-
- phys_first_addr = round_page(phys_first_addr);
- phys_last_addr = trunc_page(phys_last_addr);
-
-#ifdef MACH_HYP
- /* Memory is just contiguous */
- avail_remaining = phys_last_addr;
-#else /* MACH_HYP */
- avail_remaining
- = phys_last_addr - (0x100000 - (boot_info.mem_lower * 0x400)
- - RESERVED_BIOS);
-#endif /* MACH_HYP */
-}
-
/*
* Basic PC VM initialization.
* Turns on paging and changes the kernel segments to use high linear addresses.
@@ -382,9 +284,9 @@ i386at_init(void)
#endif /* MACH_HYP */
/*
- * Find memory size parameters.
+ * Read memory map and load it into the physical page allocator.
*/
- mem_size_init();
+ biosmem_bootstrap((struct multiboot_raw_info *) &boot_info);
#ifdef MACH_XEN
kernel_cmdline = (char*) boot_info.cmd_line;
@@ -432,6 +334,13 @@ i386at_init(void)
pmap_bootstrap();
/*
+ * Load physical segments into the VM system.
+ * The early allocation functions become unusable after
+ * this point.
+ */
+ biosmem_setup();
+
+ /*
* We'll have to temporarily install a direct mapping
* between physical memory and low linear memory,
* until we start using our new kernel segment descriptors.
@@ -706,187 +615,20 @@ resettodr(void)
unsigned int pmap_free_pages(void)
{
- return atop(avail_remaining);
+ return vm_page_atop(phys_last_addr); /* XXX */
}
-/* Always returns page-aligned regions. */
boolean_t
init_alloc_aligned(vm_size_t size, vm_offset_t *addrp)
{
- vm_offset_t addr;
+ *addrp = biosmem_bootalloc(vm_page_atop(vm_page_round(size)));
-#ifdef MACH_HYP
- /* There is none */
- if (!avail_next)
- avail_next = _kvtophys(boot_info.pt_base) + (boot_info.nr_pt_frames + 3) * 0x1000;
-#else /* MACH_HYP */
- extern char start[], end[];
- int i;
- static int wrapped = 0;
-
- /* Memory regions to skip. */
- vm_offset_t cmdline_start_pa = boot_info.flags & MULTIBOOT_CMDLINE
- ? boot_info.cmdline : 0;
- vm_offset_t cmdline_end_pa = cmdline_start_pa
- ? cmdline_start_pa+strlen((char*)phystokv(cmdline_start_pa))+1
- : 0;
- vm_offset_t mods_start_pa = boot_info.flags & MULTIBOOT_MODS
- ? boot_info.mods_addr : 0;
- vm_offset_t mods_end_pa = mods_start_pa
- ? mods_start_pa
- + boot_info.mods_count * sizeof(struct multiboot_module)
- : 0;
-
- retry:
-#endif /* MACH_HYP */
-
- /* Page-align the start address. */
- avail_next = round_page(avail_next);
-
-#ifndef MACH_HYP
- /* Start with memory above 16MB, reserving the low memory for later. */
- /* Don't care on Xen */
- if (!wrapped && phys_last_addr > 16 * 1024*1024)
- {
- if (avail_next < 16 * 1024*1024)
- avail_next = 16 * 1024*1024;
- else if (avail_next == phys_last_addr)
- {
- /* We have used all the memory above 16MB, so now start on
- the low memory. This will wind up at the end of the list
- of free pages, so it should not have been allocated to any
- other use in early initialization before the Linux driver
- glue initialization needs to allocate low memory. */
- avail_next = RESERVED_BIOS;
- wrapped = 1;
- }
- }
-#endif /* MACH_HYP */
-
- /* Check if we have reached the end of memory. */
- if (avail_next ==
- (
-#ifndef MACH_HYP
- wrapped ? 16 * 1024*1024 :
-#endif /* MACH_HYP */
- phys_last_addr))
+ if (*addrp == 0)
return FALSE;
- /* Tentatively assign the current location to the caller. */
- addr = avail_next;
-
- /* Bump the pointer past the newly allocated region
- and see where that puts us. */
- avail_next += size;
-
-#ifndef MACH_HYP
- /* Skip past the I/O and ROM area. */
- if (boot_info.flags & MULTIBOOT_MEM_MAP)
- {
- struct multiboot_mmap *map, *map_end, *current = NULL, *next = NULL;
- unsigned long long minimum_next = ~0ULL;
-
- map = (void*) phystokv(boot_info.mmap_addr);
- map_end = (void*) map + boot_info.mmap_count;
-
- /* Find both our current map, and the next one */
- while (map + 1 <= map_end)
- {
- if (map->Type == MB_ARD_MEMORY)
- {
- unsigned long long start = map->BaseAddr;
- unsigned long long end = start + map->Length;;
-
- if (start <= addr && avail_next <= end)
- {
- /* Ok, fits in the current map */
- current = map;
- break;
- }
- else if (avail_next <= start && start < minimum_next)
- {
- /* This map is not far from avail_next */
- next = map;
- minimum_next = start;
- }
- }
- map = (void*) map + map->size + sizeof(map->size);
- }
-
- if (!current) {
- /* Area does not fit in the current map, switch to next
- * map if any */
- if (!next || next->BaseAddr >= phys_last_addr)
- {
- /* No further reachable map, we have reached
- * the end of memory, but possibly wrap around
- * 16MiB. */
- avail_next = phys_last_addr;
- goto retry;
- }
-
- /* Start from next map */
- avail_next = next->BaseAddr;
- goto retry;
- }
- }
- else if ((avail_next > (boot_info.mem_lower * 0x400)) && (addr < 0x100000))
- {
- avail_next = 0x100000;
- goto retry;
- }
-
- /* Skip our own kernel code, data, and bss. */
- if ((phystokv(avail_next) > (vm_offset_t)start) && (phystokv(addr) < (vm_offset_t)end))
- {
- avail_next = _kvtophys(end);
- goto retry;
- }
-
- /* Skip any areas occupied by valuable boot_info data. */
- if ((avail_next > cmdline_start_pa) && (addr < cmdline_end_pa))
- {
- avail_next = cmdline_end_pa;
- goto retry;
- }
- if ((avail_next > mods_start_pa) && (addr < mods_end_pa))
- {
- avail_next = mods_end_pa;
- goto retry;
- }
- if ((phystokv(avail_next) > kern_sym_start) && (phystokv(addr) < kern_sym_end))
- {
- avail_next = _kvtophys(kern_sym_end);
- goto retry;
- }
- if (boot_info.flags & MULTIBOOT_MODS)
- {
- struct multiboot_module *m = (struct multiboot_module *)
- phystokv(boot_info.mods_addr);
- for (i = 0; i < boot_info.mods_count; i++)
- {
- if ((avail_next > m[i].mod_start)
- && (addr < m[i].mod_end))
- {
- avail_next = m[i].mod_end;
- goto retry;
- }
- /* XXX string */
- }
- }
-#endif /* MACH_HYP */
-
- avail_remaining -= size;
-
- *addrp = addr;
return TRUE;
}
-boolean_t pmap_next_page(vm_offset_t *addrp)
-{
- return init_alloc_aligned(PAGE_SIZE, addrp);
-}
-
/* Grab a physical page:
the standard memory allocation mechanism
during system initialization. */
@@ -894,7 +636,7 @@ vm_offset_t
pmap_grab_page(void)
{
vm_offset_t addr;
- if (!pmap_next_page(&addr))
+ if (!init_alloc_aligned(PAGE_SIZE, &addr))
panic("Not enough memory to initialize Mach");
return addr;
}
diff --git a/i386/include/mach/i386/multiboot.h b/i386/include/mach/i386/multiboot.h
index 8f1c47b0..c66ca032 100644
--- a/i386/include/mach/i386/multiboot.h
+++ b/i386/include/mach/i386/multiboot.h
@@ -188,5 +188,110 @@ struct multiboot_mmap
/* usable memory "Type", all others are reserved. */
#define MB_ARD_MEMORY 1
+/*
+ * Copyright (c) 2010, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Versions used by the biosmem module.
+ */
+
+#include <kern/macros.h>
+
+/*
+ * Magic number provided by the OS to the boot loader.
+ */
+#define MULTIBOOT_OS_MAGIC 0x1badb002
+
+/*
+ * Multiboot flags requesting services from the boot loader.
+ */
+#define MULTIBOOT_OS_MEMORY_INFO 0x2
+
+#define MULTIBOOT_OS_FLAGS MULTIBOOT_OS_MEMORY_INFO
+
+/*
+ * Magic number to identify a multiboot compliant boot loader.
+ */
+#define MULTIBOOT_LOADER_MAGIC 0x2badb002
+
+/*
+ * Multiboot flags set by the boot loader.
+ */
+#define MULTIBOOT_LOADER_MEMORY 0x01
+#define MULTIBOOT_LOADER_CMDLINE 0x04
+#define MULTIBOOT_LOADER_MODULES 0x08
+#define MULTIBOOT_LOADER_SHDR 0x20
+#define MULTIBOOT_LOADER_MMAP 0x40
+
+/*
+ * A multiboot module.
+ */
+struct multiboot_raw_module {
+ uint32_t mod_start;
+ uint32_t mod_end;
+ uint32_t string;
+ uint32_t reserved;
+} __packed;
+
+/*
+ * Memory map entry.
+ */
+struct multiboot_raw_mmap_entry {
+ uint32_t size;
+ uint64_t base_addr;
+ uint64_t length;
+ uint32_t type;
+} __packed;
+
+/*
+ * Multiboot information structure as passed by the boot loader.
+ */
+struct multiboot_raw_info {
+ uint32_t flags;
+ uint32_t mem_lower;
+ uint32_t mem_upper;
+ uint32_t unused0;
+ uint32_t cmdline;
+ uint32_t mods_count;
+ uint32_t mods_addr;
+ uint32_t shdr_num;
+ uint32_t shdr_size;
+ uint32_t shdr_addr;
+ uint32_t shdr_strndx;
+ uint32_t mmap_length;
+ uint32_t mmap_addr;
+ uint32_t unused1[9];
+} __packed;
+
+/*
+ * Versions of the multiboot structures suitable for use with 64-bit pointers.
+ */
+
+struct multiboot_os_module {
+ void *mod_start;
+ void *mod_end;
+ char *string;
+};
+
+struct multiboot_os_info {
+ uint32_t flags;
+ char *cmdline;
+ struct multiboot_module *mods_addr;
+ uint32_t mods_count;
+};
#endif /* _MACH_I386_MULTIBOOT_H_ */
diff --git a/i386/include/mach/i386/vm_types.h b/i386/include/mach/i386/vm_types.h
index 1439940b..41272e39 100644
--- a/i386/include/mach/i386/vm_types.h
+++ b/i386/include/mach/i386/vm_types.h
@@ -77,6 +77,11 @@ typedef unsigned long vm_offset_t;
typedef vm_offset_t * vm_offset_array_t;
/*
+ * A type for physical addresses.
+ */
+typedef unsigned long phys_addr_t;
+
+/*
* A vm_size_t is the proper type for e.g.
* expressing the difference between two
* vm_offset_t entities.
diff --git a/kern/bootstrap.c b/kern/bootstrap.c
index 249c605c..08362767 100644
--- a/kern/bootstrap.c
+++ b/kern/bootstrap.c
@@ -107,6 +107,20 @@ task_insert_send_right(
return name;
}
+static void
+free_bootstrap_pages(phys_addr_t start, phys_addr_t end)
+{
+ struct vm_page *page;
+
+ while (start < end)
+ {
+ page = vm_page_lookup_pa(start);
+ assert(page != NULL);
+ vm_page_manage(page);
+ start += PAGE_SIZE;
+ }
+}
+
void bootstrap_create(void)
{
int compat;
@@ -265,7 +279,7 @@ void bootstrap_create(void)
/* XXX we could free the memory used
by the boot loader's descriptors and such. */
for (n = 0; n < boot_info.mods_count; n++)
- vm_page_create(bmods[n].mod_start, bmods[n].mod_end);
+ free_bootstrap_pages(bmods[n].mod_start, bmods[n].mod_end);
}
static void
diff --git a/kern/cpu_number.h b/kern/cpu_number.h
index 44bbd641..650f4042 100644
--- a/kern/cpu_number.h
+++ b/kern/cpu_number.h
@@ -37,5 +37,7 @@ int master_cpu; /* 'master' processor - keeps time */
/* cpu number is always 0 on a single processor system */
#define cpu_number() (0)
+#define CPU_L1_SIZE (1 << CPU_L1_SHIFT)
+
#endif /* NCPUS == 1 */
#endif /* _KERN_CPU_NUMBER_H_ */
diff --git a/kern/log2.h b/kern/log2.h
new file mode 100644
index 00000000..0e67701c
--- /dev/null
+++ b/kern/log2.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Integer base 2 logarithm operations.
+ */
+
+#ifndef _KERN_LOG2_H
+#define _KERN_LOG2_H
+
+#include <kern/assert.h>
+
+#ifdef __LP64__
+#define LONG_BIT 64
+#else /* __LP64__ */
+#define LONG_BIT 32
+#endif /* __LP64__ */
+
+static inline unsigned int
+ilog2(unsigned long x)
+{
+ assert(x != 0);
+ return LONG_BIT - __builtin_clzl(x) - 1;
+}
+
+static inline unsigned int
+iorder2(unsigned long size)
+{
+ assert(size != 0);
+
+ if (size == 1)
+ return 0;
+
+ return ilog2(size - 1) + 1;
+}
+
+#endif /* _KERN_LOG2_H */
diff --git a/kern/slab.h b/kern/slab.h
index 77db7c1b..5ff3960e 100644
--- a/kern/slab.h
+++ b/kern/slab.h
@@ -48,6 +48,7 @@
#define _KERN_SLAB_H
#include <cache.h>
+#include <kern/cpu_number.h>
#include <kern/lock.h>
#include <kern/list.h>
#include <kern/rbtree.h>
@@ -56,10 +57,6 @@
#include <vm/vm_types.h>
#if SLAB_USE_CPU_POOLS
-/*
- * L1 cache line size.
- */
-#define CPU_L1_SIZE (1 << CPU_L1_SHIFT)
/*
* Per-processor cache of pre-constructed objects.
diff --git a/kern/startup.c b/kern/startup.c
index 30cff5c0..bd296943 100644
--- a/kern/startup.c
+++ b/kern/startup.c
@@ -136,7 +136,7 @@ void setup_main(void)
mapable_time_init();
machine_info.max_cpus = NCPUS;
- machine_info.memory_size = phys_last_addr - phys_first_addr; /* XXX mem_size */
+ machine_info.memory_size = vm_page_mem_size(); /* XXX phys_addr_t -> vm_size_t */
machine_info.avail_cpus = 0;
machine_info.major_version = KERNEL_MAJOR_VERSION;
machine_info.minor_version = KERNEL_MINOR_VERSION;
diff --git a/linux/dev/glue/glue.h b/linux/dev/glue/glue.h
index 5d4f6d88..8cb118cc 100644
--- a/linux/dev/glue/glue.h
+++ b/linux/dev/glue/glue.h
@@ -25,8 +25,8 @@
extern int linux_auto_config;
extern int linux_intr_pri;
-extern void *alloc_contig_mem (unsigned, unsigned, unsigned, vm_page_t *);
-extern void free_contig_mem (vm_page_t);
+extern unsigned long alloc_contig_mem (unsigned, unsigned, unsigned, vm_page_t *);
+extern void free_contig_mem (vm_page_t, unsigned);
extern void init_IRQ (void);
extern void restore_IRQ (void);
extern void linux_kmem_init (void);
diff --git a/linux/dev/glue/kmem.c b/linux/dev/glue/kmem.c
index ff052ffc..ed576105 100644
--- a/linux/dev/glue/kmem.c
+++ b/linux/dev/glue/kmem.c
@@ -111,10 +111,8 @@ linux_kmem_init ()
for (p = pages, j = 0; j < MEM_CHUNK_SIZE - PAGE_SIZE; j += PAGE_SIZE)
{
assert (p->phys_addr < MEM_DMA_LIMIT);
- assert (p->phys_addr + PAGE_SIZE
- == ((vm_page_t) p->pageq.next)->phys_addr);
-
- p = (vm_page_t) p->pageq.next;
+ assert (p->phys_addr + PAGE_SIZE == (p + 1)->phys_addr);
+ p++;
}
pages_free[i].end = pages_free[i].start + MEM_CHUNK_SIZE;
diff --git a/linux/dev/init/main.c b/linux/dev/init/main.c
index 8737b62c..d69b3fc7 100644
--- a/linux/dev/init/main.c
+++ b/linux/dev/init/main.c
@@ -98,7 +98,7 @@ void
linux_init (void)
{
int addr;
- unsigned memory_start, memory_end;
+ unsigned long memory_start, memory_end;
vm_page_t pages;
/*
@@ -131,9 +131,7 @@ linux_init (void)
/*
* Allocate contiguous memory below 16 MB.
*/
- memory_start = (unsigned long) alloc_contig_mem (CONTIG_ALLOC,
- 16 * 1024 * 1024,
- 0, &pages);
+ memory_start = alloc_contig_mem (CONTIG_ALLOC, 16 * 1024 * 1024, 0, &pages);
if (memory_start == 0)
panic ("linux_init: alloc_contig_mem failed");
memory_end = memory_start + CONTIG_ALLOC;
@@ -147,14 +145,6 @@ linux_init (void)
panic ("linux_init: ran out memory");
/*
- * Free unused memory.
- */
- while (pages && phystokv(pages->phys_addr) < round_page (memory_start))
- pages = (vm_page_t) pages->pageq.next;
- if (pages)
- free_contig_mem (pages);
-
- /*
* Initialize devices.
*/
#ifdef CONFIG_INET
@@ -182,140 +172,31 @@ linux_init (void)
/*
* Allocate contiguous memory with the given constraints.
- * This routine is horribly inefficient but it is presently
- * only used during initialization so it's not that bad.
*/
-void *
+unsigned long
alloc_contig_mem (unsigned size, unsigned limit,
unsigned mask, vm_page_t * pages)
{
- int i, j, bits_len;
- unsigned *bits, len;
- void *m;
- vm_page_t p, page_list, tail, prev;
- vm_offset_t addr = 0, max_addr;
-
- if (size == 0)
- return (NULL);
- size = round_page (size);
- if ((size >> PAGE_SHIFT) > vm_page_free_count)
- return (NULL);
-
- /* Allocate bit array. */
- max_addr = phys_last_addr;
- if (max_addr > limit)
- max_addr = limit;
- bits_len = ((((max_addr >> PAGE_SHIFT) + NBPW - 1) / NBPW)
- * sizeof (unsigned));
- bits = (unsigned *) kalloc (bits_len);
- if (!bits)
- return (NULL);
- memset (bits, 0, bits_len);
+ vm_page_t p;
- /*
- * Walk the page free list and set a bit for every usable page.
- */
- simple_lock (&vm_page_queue_free_lock);
- p = vm_page_queue_free;
- while (p)
- {
- if (p->phys_addr < limit)
- (bits[(p->phys_addr >> PAGE_SHIFT) / NBPW]
- |= 1 << ((p->phys_addr >> PAGE_SHIFT) % NBPW));
- p = (vm_page_t) p->pageq.next;
- }
+ p = vm_page_grab_contig(size, VM_PAGE_SEL_DMA);
- /*
- * Scan bit array for contiguous pages.
- */
- len = 0;
- m = NULL;
- for (i = 0; len < size && i < bits_len / sizeof (unsigned); i++)
- for (j = 0; len < size && j < NBPW; j++)
- if (!(bits[i] & (1 << j)))
- {
- len = 0;
- m = NULL;
- }
- else
- {
- if (len == 0)
- {
- addr = ((vm_offset_t) (i * NBPW + j)
- << PAGE_SHIFT);
- if ((addr & mask) == 0)
- {
- len += PAGE_SIZE;
- m = (void *) addr;
- }
- }
- else
- len += PAGE_SIZE;
- }
-
- if (len != size)
- {
- simple_unlock (&vm_page_queue_free_lock);
- kfree ((vm_offset_t) bits, bits_len);
- return (NULL);
- }
-
- /*
- * Remove pages from free list
- * and construct list to return to caller.
- */
- page_list = NULL;
- for (len = 0; len < size; len += PAGE_SIZE, addr += PAGE_SIZE)
- {
- prev = NULL;
- for (p = vm_page_queue_free; p; p = (vm_page_t) p->pageq.next)
- {
- if (p->phys_addr == addr)
- break;
- prev = p;
- }
- if (!p)
- panic ("alloc_contig_mem: page not on free list");
- if (prev)
- prev->pageq.next = p->pageq.next;
- else
- vm_page_queue_free = (vm_page_t) p->pageq.next;
- p->free = FALSE;
- p->pageq.next = NULL;
- if (!page_list)
- page_list = tail = p;
- else
- {
- tail->pageq.next = (queue_entry_t) p;
- tail = p;
- }
- vm_page_free_count--;
- }
+ if (p == NULL)
+ return 0;
- simple_unlock (&vm_page_queue_free_lock);
- kfree ((vm_offset_t) bits, bits_len);
if (pages)
- *pages = page_list;
- return (void *) phystokv(m);
+ *pages = p;
+
+ return phystokv(vm_page_to_pa(p));
}
/*
* Free memory allocated by alloc_contig_mem.
*/
void
-free_contig_mem (vm_page_t pages)
+free_contig_mem (vm_page_t pages, unsigned size)
{
- int i;
- vm_page_t p;
-
- for (p = pages, i = 0; p->pageq.next; p = (vm_page_t) p->pageq.next, i++)
- p->free = TRUE;
- p->free = TRUE;
- simple_lock (&vm_page_queue_free_lock);
- vm_page_free_count += i + 1;
- p->pageq.next = (queue_entry_t) vm_page_queue_free;
- vm_page_queue_free = pages;
- simple_unlock (&vm_page_queue_free_lock);
+ vm_page_free_contig(pages, size);
}
/* This is the number of bits of precision for the loops_per_second. Each
diff --git a/linux/pcmcia-cs/glue/ds.c b/linux/pcmcia-cs/glue/ds.c
index 8f88b553..cc4b92b5 100644
--- a/linux/pcmcia-cs/glue/ds.c
+++ b/linux/pcmcia-cs/glue/ds.c
@@ -24,12 +24,6 @@
/* This file is included from linux/pcmcia-cs/modules/ds.c. */
/*
- * Prepare the namespace for inclusion of Mach header files.
- */
-
-#undef PAGE_SHIFT
-
-/*
* This is really ugly. But this is glue code, so... It's about the `kfree'
* symbols in <linux/malloc.h> and <kern/kalloc.h>.
*/
diff --git a/vm/pmap.h b/vm/pmap.h
index 134f9c64..9bbcdc32 100644
--- a/vm/pmap.h
+++ b/vm/pmap.h
@@ -67,9 +67,6 @@
extern vm_offset_t pmap_steal_memory(vm_size_t);
/* During VM initialization, report remaining unused physical pages. */
extern unsigned int pmap_free_pages(void);
-/* During VM initialization, use remaining physical pages to allocate page
- * frames. */
-extern void pmap_startup(vm_offset_t *, vm_offset_t *);
/* Initialization, after kernel runs in virtual memory. */
extern void pmap_init(void);
@@ -80,18 +77,14 @@ extern void pmap_init(void);
* Otherwise, it must implement
* pmap_free_pages
* pmap_virtual_space
- * pmap_next_page
* pmap_init
- * and vm/vm_resident.c implements pmap_steal_memory and pmap_startup
- * using pmap_free_pages, pmap_next_page, pmap_virtual_space,
- * and pmap_enter. pmap_free_pages may over-estimate the number
- * of unused physical pages, and pmap_next_page may return FALSE
- * to indicate that there are no more unused pages to return.
+ * and vm/vm_resident.c implements pmap_steal_memory using
+ * pmap_free_pages, pmap_virtual_space, and pmap_enter.
+ *
+ * pmap_free_pages may over-estimate the number of unused physical pages.
* However, for best performance pmap_free_pages should be accurate.
*/
-/* During VM initialization, return the next unused physical page. */
-extern boolean_t pmap_next_page(vm_offset_t *);
/* During VM initialization, report virtual space available for the kernel. */
extern void pmap_virtual_space(vm_offset_t *, vm_offset_t *);
#endif /* MACHINE_PAGES */
diff --git a/vm/vm_fault.c b/vm/vm_fault.c
index 46779f63..4d674174 100644
--- a/vm/vm_fault.c
+++ b/vm/vm_fault.c
@@ -607,7 +607,7 @@ vm_fault_return_t vm_fault_page(
* won't block for pages.
*/
- if (m->fictitious && !vm_page_convert(m, FALSE)) {
+ if (m->fictitious && !vm_page_convert(&m, FALSE)) {
VM_PAGE_FREE(m);
vm_fault_cleanup(object, first_m);
return(VM_FAULT_MEMORY_SHORTAGE);
@@ -725,7 +725,7 @@ vm_fault_return_t vm_fault_page(
assert(m->object == object);
first_m = VM_PAGE_NULL;
- if (m->fictitious && !vm_page_convert(m, !object->internal)) {
+ if (m->fictitious && !vm_page_convert(&m, !object->internal)) {
VM_PAGE_FREE(m);
vm_fault_cleanup(object, VM_PAGE_NULL);
return(VM_FAULT_MEMORY_SHORTAGE);
diff --git a/vm/vm_init.c b/vm/vm_init.c
index 3d1081cc..23d5d46e 100644
--- a/vm/vm_init.c
+++ b/vm/vm_init.c
@@ -83,4 +83,5 @@ void vm_mem_init(void)
{
vm_object_init();
memory_object_proxy_init();
+ vm_page_info_all();
}
diff --git a/vm/vm_object.c b/vm/vm_object.c
index 6666fcba..eda03c65 100644
--- a/vm/vm_object.c
+++ b/vm/vm_object.c
@@ -2891,7 +2891,8 @@ vm_object_page_map(
VM_PAGE_FREE(old_page);
}
- vm_page_init(m, addr);
+ vm_page_init(m);
+ m->phys_addr = addr;
m->private = TRUE; /* don`t free page */
m->wire_count = 1;
vm_page_lock_queues();
diff --git a/vm/vm_page.c b/vm/vm_page.c
new file mode 100644
index 00000000..a539ab41
--- /dev/null
+++ b/vm/vm_page.c
@@ -0,0 +1,762 @@
+/*
+ * Copyright (c) 2010-2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation uses the binary buddy system to manage its heap.
+ * Descriptions of the buddy system can be found in the following works :
+ * - "UNIX Internals: The New Frontiers", by Uresh Vahalia.
+ * - "Dynamic Storage Allocation: A Survey and Critical Review",
+ * by Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles.
+ *
+ * In addition, this allocator uses per-CPU pools of pages for order 0
+ * (i.e. single page) allocations. These pools act as caches (but are named
+ * differently to avoid confusion with CPU caches) that reduce contention on
+ * multiprocessor systems. When a pool is empty and cannot provide a page,
+ * it is filled by transferring multiple pages from the backend buddy system.
+ * The symmetric case is handled likewise.
+ */
+
+#include <string.h>
+#include <kern/assert.h>
+#include <kern/cpu_number.h>
+#include <kern/debug.h>
+#include <kern/list.h>
+#include <kern/lock.h>
+#include <kern/macros.h>
+#include <kern/printf.h>
+#include <kern/thread.h>
+#include <mach/vm_param.h>
+#include <machine/pmap.h>
+#include <sys/types.h>
+#include <vm/vm_page.h>
+
+#define __init
+#define __initdata
+#define __read_mostly
+
+#define thread_pin()
+#define thread_unpin()
+
+/*
+ * Number of free block lists per segment.
+ */
+#define VM_PAGE_NR_FREE_LISTS 11
+
+/*
+ * The size of a CPU pool is computed by dividing the number of pages in its
+ * containing segment by this value.
+ */
+#define VM_PAGE_CPU_POOL_RATIO 1024
+
+/*
+ * Maximum number of pages in a CPU pool.
+ */
+#define VM_PAGE_CPU_POOL_MAX_SIZE 128
+
+/*
+ * The transfer size of a CPU pool is computed by dividing the pool size by
+ * this value.
+ */
+#define VM_PAGE_CPU_POOL_TRANSFER_RATIO 2
+
+/*
+ * Per-processor cache of pages.
+ */
+struct vm_page_cpu_pool {
+ simple_lock_data_t lock;
+ int size;
+ int transfer_size;
+ int nr_pages;
+ struct list pages;
+} __aligned(CPU_L1_SIZE);
+
+/*
+ * Special order value for pages that aren't in a free list. Such pages are
+ * either allocated, or part of a free block of pages but not the head page.
+ */
+#define VM_PAGE_ORDER_UNLISTED ((unsigned short)-1)
+
+/*
+ * Doubly-linked list of free blocks.
+ */
+struct vm_page_free_list {
+ unsigned long size;
+ struct list blocks;
+};
+
+/*
+ * Segment name buffer size.
+ */
+#define VM_PAGE_NAME_SIZE 16
+
+/*
+ * Segment of contiguous memory.
+ */
+struct vm_page_seg {
+ struct vm_page_cpu_pool cpu_pools[NCPUS];
+
+ phys_addr_t start;
+ phys_addr_t end;
+ struct vm_page *pages;
+ struct vm_page *pages_end;
+ simple_lock_data_t lock;
+ struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS];
+ unsigned long nr_free_pages;
+};
+
+/*
+ * Bootstrap information about a segment.
+ */
+struct vm_page_boot_seg {
+ phys_addr_t start;
+ phys_addr_t end;
+ phys_addr_t avail_start;
+ phys_addr_t avail_end;
+};
+
+static int vm_page_is_ready __read_mostly;
+
+/*
+ * Segment table.
+ *
+ * The system supports a maximum of 4 segments :
+ * - DMA: suitable for DMA
+ * - DMA32: suitable for DMA when devices support 32-bits addressing
+ * - DIRECTMAP: direct physical mapping, allows direct access from
+ * the kernel with a simple offset translation
+ * - HIGHMEM: must be mapped before it can be accessed
+ *
+ * Segments are ordered by priority, 0 being the lowest priority. Their
+ * relative priorities are DMA < DMA32 < DIRECTMAP < HIGHMEM. Some segments
+ * may actually be aliases for others, e.g. if DMA is always possible from
+ * the direct physical mapping, DMA and DMA32 are aliases for DIRECTMAP,
+ * in which case the segment table contains DIRECTMAP and HIGHMEM only.
+ */
+static struct vm_page_seg vm_page_segs[VM_PAGE_MAX_SEGS];
+
+/*
+ * Bootstrap segment table.
+ */
+static struct vm_page_boot_seg vm_page_boot_segs[VM_PAGE_MAX_SEGS] __initdata;
+
+/*
+ * Number of loaded segments.
+ */
+static unsigned int vm_page_segs_size __read_mostly;
+
+static void __init
+vm_page_init_pa(struct vm_page *page, unsigned short seg_index, phys_addr_t pa)
+{
+ memset(page, 0, sizeof(*page));
+ vm_page_init(page); /* vm_resident members */
+ page->type = VM_PT_RESERVED;
+ page->seg_index = seg_index;
+ page->order = VM_PAGE_ORDER_UNLISTED;
+ page->phys_addr = pa;
+}
+
+void
+vm_page_set_type(struct vm_page *page, unsigned int order, unsigned short type)
+{
+ unsigned int i, nr_pages;
+
+ nr_pages = 1 << order;
+
+ for (i = 0; i < nr_pages; i++)
+ page[i].type = type;
+}
+
+static void __init
+vm_page_free_list_init(struct vm_page_free_list *free_list)
+{
+ free_list->size = 0;
+ list_init(&free_list->blocks);
+}
+
+static inline void
+vm_page_free_list_insert(struct vm_page_free_list *free_list,
+ struct vm_page *page)
+{
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+
+ free_list->size++;
+ list_insert_head(&free_list->blocks, &page->node);
+}
+
+static inline void
+vm_page_free_list_remove(struct vm_page_free_list *free_list,
+ struct vm_page *page)
+{
+ assert(page->order != VM_PAGE_ORDER_UNLISTED);
+
+ free_list->size--;
+ list_remove(&page->node);
+}
+
+static struct vm_page *
+vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, unsigned int order)
+{
+ struct vm_page_free_list *free_list = free_list;
+ struct vm_page *page, *buddy;
+ unsigned int i;
+
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ for (i = order; i < VM_PAGE_NR_FREE_LISTS; i++) {
+ free_list = &seg->free_lists[i];
+
+ if (free_list->size != 0)
+ break;
+ }
+
+ if (i == VM_PAGE_NR_FREE_LISTS)
+ return NULL;
+
+ page = list_first_entry(&free_list->blocks, struct vm_page, node);
+ vm_page_free_list_remove(free_list, page);
+ page->order = VM_PAGE_ORDER_UNLISTED;
+
+ while (i > order) {
+ i--;
+ buddy = &page[1 << i];
+ vm_page_free_list_insert(&seg->free_lists[i], buddy);
+ buddy->order = i;
+ }
+
+ seg->nr_free_pages -= (1 << order);
+ return page;
+}
+
+static void
+vm_page_seg_free_to_buddy(struct vm_page_seg *seg, struct vm_page *page,
+ unsigned int order)
+{
+ struct vm_page *buddy;
+ phys_addr_t pa, buddy_pa;
+ unsigned int nr_pages;
+
+ assert(page >= seg->pages);
+ assert(page < seg->pages_end);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ nr_pages = (1 << order);
+ pa = page->phys_addr;
+
+ while (order < (VM_PAGE_NR_FREE_LISTS - 1)) {
+ buddy_pa = pa ^ vm_page_ptoa(1 << order);
+
+ if ((buddy_pa < seg->start) || (buddy_pa >= seg->end))
+ break;
+
+ buddy = &seg->pages[vm_page_atop(buddy_pa - seg->start)];
+
+ if (buddy->order != order)
+ break;
+
+ vm_page_free_list_remove(&seg->free_lists[order], buddy);
+ buddy->order = VM_PAGE_ORDER_UNLISTED;
+ order++;
+ pa &= -vm_page_ptoa(1 << order);
+ page = &seg->pages[vm_page_atop(pa - seg->start)];
+ }
+
+ vm_page_free_list_insert(&seg->free_lists[order], page);
+ page->order = order;
+ seg->nr_free_pages += nr_pages;
+}
+
+static void __init
+vm_page_cpu_pool_init(struct vm_page_cpu_pool *cpu_pool, int size)
+{
+ simple_lock_init(&cpu_pool->lock);
+ cpu_pool->size = size;
+ cpu_pool->transfer_size = (size + VM_PAGE_CPU_POOL_TRANSFER_RATIO - 1)
+ / VM_PAGE_CPU_POOL_TRANSFER_RATIO;
+ cpu_pool->nr_pages = 0;
+ list_init(&cpu_pool->pages);
+}
+
+static inline struct vm_page_cpu_pool *
+vm_page_cpu_pool_get(struct vm_page_seg *seg)
+{
+ return &seg->cpu_pools[cpu_number()];
+}
+
+static inline struct vm_page *
+vm_page_cpu_pool_pop(struct vm_page_cpu_pool *cpu_pool)
+{
+ struct vm_page *page;
+
+ assert(cpu_pool->nr_pages != 0);
+ cpu_pool->nr_pages--;
+ page = list_first_entry(&cpu_pool->pages, struct vm_page, node);
+ list_remove(&page->node);
+ return page;
+}
+
+static inline void
+vm_page_cpu_pool_push(struct vm_page_cpu_pool *cpu_pool, struct vm_page *page)
+{
+ assert(cpu_pool->nr_pages < cpu_pool->size);
+ cpu_pool->nr_pages++;
+ list_insert_head(&cpu_pool->pages, &page->node);
+}
+
+static int
+vm_page_cpu_pool_fill(struct vm_page_cpu_pool *cpu_pool,
+ struct vm_page_seg *seg)
+{
+ struct vm_page *page;
+ int i;
+
+ assert(cpu_pool->nr_pages == 0);
+
+ simple_lock(&seg->lock);
+
+ for (i = 0; i < cpu_pool->transfer_size; i++) {
+ page = vm_page_seg_alloc_from_buddy(seg, 0);
+
+ if (page == NULL)
+ break;
+
+ vm_page_cpu_pool_push(cpu_pool, page);
+ }
+
+ simple_unlock(&seg->lock);
+
+ return i;
+}
+
+static void
+vm_page_cpu_pool_drain(struct vm_page_cpu_pool *cpu_pool,
+ struct vm_page_seg *seg)
+{
+ struct vm_page *page;
+ int i;
+
+ assert(cpu_pool->nr_pages == cpu_pool->size);
+
+ simple_lock(&seg->lock);
+
+ for (i = cpu_pool->transfer_size; i > 0; i--) {
+ page = vm_page_cpu_pool_pop(cpu_pool);
+ vm_page_seg_free_to_buddy(seg, page, 0);
+ }
+
+ simple_unlock(&seg->lock);
+}
+
+static phys_addr_t __init
+vm_page_seg_size(struct vm_page_seg *seg)
+{
+ return seg->end - seg->start;
+}
+
+static int __init
+vm_page_seg_compute_pool_size(struct vm_page_seg *seg)
+{
+ phys_addr_t size;
+
+ size = vm_page_atop(vm_page_seg_size(seg)) / VM_PAGE_CPU_POOL_RATIO;
+
+ if (size == 0)
+ size = 1;
+ else if (size > VM_PAGE_CPU_POOL_MAX_SIZE)
+ size = VM_PAGE_CPU_POOL_MAX_SIZE;
+
+ return size;
+}
+
+static void __init
+vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end,
+ struct vm_page *pages)
+{
+ phys_addr_t pa;
+ int pool_size;
+ unsigned int i;
+
+ seg->start = start;
+ seg->end = end;
+ pool_size = vm_page_seg_compute_pool_size(seg);
+
+ for (i = 0; i < ARRAY_SIZE(seg->cpu_pools); i++)
+ vm_page_cpu_pool_init(&seg->cpu_pools[i], pool_size);
+
+ seg->pages = pages;
+ seg->pages_end = pages + vm_page_atop(vm_page_seg_size(seg));
+ simple_lock_init(&seg->lock);
+
+ for (i = 0; i < ARRAY_SIZE(seg->free_lists); i++)
+ vm_page_free_list_init(&seg->free_lists[i]);
+
+ seg->nr_free_pages = 0;
+ i = seg - vm_page_segs;
+
+ for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE)
+ vm_page_init_pa(&pages[vm_page_atop(pa - seg->start)], i, pa);
+}
+
+static struct vm_page *
+vm_page_seg_alloc(struct vm_page_seg *seg, unsigned int order,
+ unsigned short type)
+{
+ struct vm_page_cpu_pool *cpu_pool;
+ struct vm_page *page;
+ int filled;
+
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ if (order == 0) {
+ thread_pin();
+ cpu_pool = vm_page_cpu_pool_get(seg);
+ simple_lock(&cpu_pool->lock);
+
+ if (cpu_pool->nr_pages == 0) {
+ filled = vm_page_cpu_pool_fill(cpu_pool, seg);
+
+ if (!filled) {
+ simple_unlock(&cpu_pool->lock);
+ thread_unpin();
+ return NULL;
+ }
+ }
+
+ page = vm_page_cpu_pool_pop(cpu_pool);
+ simple_unlock(&cpu_pool->lock);
+ thread_unpin();
+ } else {
+ simple_lock(&seg->lock);
+ page = vm_page_seg_alloc_from_buddy(seg, order);
+ simple_unlock(&seg->lock);
+ }
+
+ assert(page->type == VM_PT_FREE);
+ vm_page_set_type(page, order, type);
+ return page;
+}
+
+static void
+vm_page_seg_free(struct vm_page_seg *seg, struct vm_page *page,
+ unsigned int order)
+{
+ struct vm_page_cpu_pool *cpu_pool;
+
+ assert(page->type != VM_PT_FREE);
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ vm_page_set_type(page, order, VM_PT_FREE);
+
+ if (order == 0) {
+ thread_pin();
+ cpu_pool = vm_page_cpu_pool_get(seg);
+ simple_lock(&cpu_pool->lock);
+
+ if (cpu_pool->nr_pages == cpu_pool->size)
+ vm_page_cpu_pool_drain(cpu_pool, seg);
+
+ vm_page_cpu_pool_push(cpu_pool, page);
+ simple_unlock(&cpu_pool->lock);
+ thread_unpin();
+ } else {
+ simple_lock(&seg->lock);
+ vm_page_seg_free_to_buddy(seg, page, order);
+ simple_unlock(&seg->lock);
+ }
+}
+
+void __init
+vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end,
+ phys_addr_t avail_start, phys_addr_t avail_end)
+{
+ struct vm_page_boot_seg *seg;
+
+ assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
+ assert(vm_page_aligned(start));
+ assert(vm_page_aligned(end));
+ assert(vm_page_aligned(avail_start));
+ assert(vm_page_aligned(avail_end));
+ assert(start < end);
+ assert(start <= avail_start);
+ assert(avail_end <= end);
+ assert(vm_page_segs_size < ARRAY_SIZE(vm_page_boot_segs));
+
+ seg = &vm_page_boot_segs[seg_index];
+ seg->start = start;
+ seg->end = end;
+ seg->avail_start = avail_start;
+ seg->avail_end = avail_end;
+ vm_page_segs_size++;
+}
+
+int
+vm_page_ready(void)
+{
+ return vm_page_is_ready;
+}
+
+static unsigned int
+vm_page_select_alloc_seg(unsigned int selector)
+{
+ unsigned int seg_index;
+
+ switch (selector) {
+ case VM_PAGE_SEL_DMA:
+ seg_index = VM_PAGE_SEG_DMA;
+ break;
+ case VM_PAGE_SEL_DMA32:
+ seg_index = VM_PAGE_SEG_DMA32;
+ break;
+ case VM_PAGE_SEL_DIRECTMAP:
+ seg_index = VM_PAGE_SEG_DIRECTMAP;
+ break;
+ case VM_PAGE_SEL_HIGHMEM:
+ seg_index = VM_PAGE_SEG_HIGHMEM;
+ break;
+ default:
+ panic("vm_page: invalid selector");
+ }
+
+ return MIN(vm_page_segs_size - 1, seg_index);
+}
+
+static int __init
+vm_page_boot_seg_loaded(const struct vm_page_boot_seg *seg)
+{
+ return (seg->end != 0);
+}
+
+static void __init
+vm_page_check_boot_segs(void)
+{
+ unsigned int i;
+ int expect_loaded;
+
+ if (vm_page_segs_size == 0)
+ panic("vm_page: no physical memory loaded");
+
+ for (i = 0; i < ARRAY_SIZE(vm_page_boot_segs); i++) {
+ expect_loaded = (i < vm_page_segs_size);
+
+ if (vm_page_boot_seg_loaded(&vm_page_boot_segs[i]) == expect_loaded)
+ continue;
+
+ panic("vm_page: invalid boot segment table");
+ }
+}
+
+static phys_addr_t __init
+vm_page_boot_seg_size(struct vm_page_boot_seg *seg)
+{
+ return seg->end - seg->start;
+}
+
+static phys_addr_t __init
+vm_page_boot_seg_avail_size(struct vm_page_boot_seg *seg)
+{
+ return seg->avail_end - seg->avail_start;
+}
+
+unsigned long __init
+vm_page_bootalloc(size_t size)
+{
+ struct vm_page_boot_seg *seg;
+ phys_addr_t pa;
+ unsigned int i;
+
+ for (i = vm_page_select_alloc_seg(VM_PAGE_SEL_DIRECTMAP);
+ i < vm_page_segs_size;
+ i--) {
+ seg = &vm_page_boot_segs[i];
+
+ if (size <= vm_page_boot_seg_avail_size(seg)) {
+ pa = seg->avail_start;
+ seg->avail_start += vm_page_round(size);
+ return pa;
+ }
+ }
+
+ panic("vm_page: no physical memory available");
+}
+
+void __init
+vm_page_setup(void)
+{
+ struct vm_page_boot_seg *boot_seg;
+ struct vm_page_seg *seg;
+ struct vm_page *table, *page, *end;
+ size_t nr_pages, table_size;
+ unsigned long va;
+ unsigned int i;
+ phys_addr_t pa;
+
+ vm_page_check_boot_segs();
+
+ /*
+ * Compute the page table size.
+ */
+ nr_pages = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++)
+ nr_pages += vm_page_atop(vm_page_boot_seg_size(&vm_page_boot_segs[i]));
+
+ table_size = vm_page_round(nr_pages * sizeof(struct vm_page));
+ printf("vm_page: page table size: %lu entries (%luk)\n", nr_pages,
+ table_size >> 10);
+ table = (struct vm_page *)pmap_steal_memory(table_size);
+ va = (unsigned long)table;
+
+ /*
+ * Initialize the segments, associating them to the page table. When
+ * the segments are initialized, all their pages are set allocated.
+ * Pages are then released, which populates the free lists.
+ */
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+ boot_seg = &vm_page_boot_segs[i];
+ vm_page_seg_init(seg, boot_seg->start, boot_seg->end, table);
+ page = seg->pages + vm_page_atop(boot_seg->avail_start
+ - boot_seg->start);
+ end = seg->pages + vm_page_atop(boot_seg->avail_end
+ - boot_seg->start);
+
+ while (page < end) {
+ page->type = VM_PT_FREE;
+ vm_page_seg_free_to_buddy(seg, page, 0);
+ page++;
+
+ /* XXX */
+ if (i <= VM_PAGE_SEG_DIRECTMAP)
+ vm_page_free_count++;
+ }
+
+ table += vm_page_atop(vm_page_seg_size(seg));
+ }
+
+ while (va < (unsigned long)table) {
+ pa = pmap_extract(kernel_pmap, va);
+ page = vm_page_lookup_pa(pa);
+ assert((page != NULL) && (page->type == VM_PT_RESERVED));
+ page->type = VM_PT_TABLE;
+ va += PAGE_SIZE;
+ }
+
+ vm_page_is_ready = 1;
+}
+
+void __init
+vm_page_manage(struct vm_page *page)
+{
+ assert(page->seg_index < ARRAY_SIZE(vm_page_segs));
+ assert(page->type == VM_PT_RESERVED);
+
+ vm_page_set_type(page, 0, VM_PT_FREE);
+ vm_page_seg_free_to_buddy(&vm_page_segs[page->seg_index], page, 0);
+}
+
+struct vm_page *
+vm_page_lookup_pa(phys_addr_t pa)
+{
+ struct vm_page_seg *seg;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+
+ if ((pa >= seg->start) && (pa < seg->end))
+ return &seg->pages[vm_page_atop(pa - seg->start)];
+ }
+
+ return NULL;
+}
+
+struct vm_page *
+vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short type)
+{
+ struct vm_page *page;
+ unsigned int i;
+
+ for (i = vm_page_select_alloc_seg(selector); i < vm_page_segs_size; i--) {
+ page = vm_page_seg_alloc(&vm_page_segs[i], order, type);
+
+ if (page != NULL)
+ return page;
+ }
+
+ if (type == VM_PT_PMAP)
+ panic("vm_page: unable to allocate pmap page");
+
+ return NULL;
+}
+
+void
+vm_page_free_pa(struct vm_page *page, unsigned int order)
+{
+ assert(page->seg_index < ARRAY_SIZE(vm_page_segs));
+
+ vm_page_seg_free(&vm_page_segs[page->seg_index], page, order);
+}
+
+const char *
+vm_page_seg_name(unsigned int seg_index)
+{
+ /* Don't use a switch statement since segments can be aliased */
+ if (seg_index == VM_PAGE_SEG_HIGHMEM)
+ return "HIGHMEM";
+ else if (seg_index == VM_PAGE_SEG_DIRECTMAP)
+ return "DIRECTMAP";
+ else if (seg_index == VM_PAGE_SEG_DMA32)
+ return "DMA32";
+ else if (seg_index == VM_PAGE_SEG_DMA)
+ return "DMA";
+ else
+ panic("vm_page: invalid segment index");
+}
+
+void
+vm_page_info_all(void)
+{
+ struct vm_page_seg *seg;
+ unsigned long pages;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+ pages = (unsigned long)(seg->pages_end - seg->pages);
+ printf("vm_page: %s: pages: %lu (%luM), free: %lu (%luM)\n",
+ vm_page_seg_name(i), pages, pages >> (20 - PAGE_SHIFT),
+ seg->nr_free_pages, seg->nr_free_pages >> (20 - PAGE_SHIFT));
+ }
+}
+
+phys_addr_t
+vm_page_mem_size(void)
+{
+ phys_addr_t total;
+ unsigned int i;
+
+ total = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ /* XXX */
+ if (i > VM_PAGE_SEG_DIRECTMAP)
+ continue;
+
+ total += vm_page_seg_size(&vm_page_segs[i]);
+ }
+
+ return total;
+}
diff --git a/vm/vm_page.h b/vm/vm_page.h
index e6a8c497..7607aad0 100644
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -36,11 +36,12 @@
#include <mach/boolean.h>
#include <mach/vm_prot.h>
-#include <mach/vm_param.h>
+#include <machine/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_types.h>
#include <kern/queue.h>
#include <kern/lock.h>
+#include <kern/log2.h>
#include <kern/macros.h>
#include <kern/sched_prim.h> /* definitions of wait/wakeup */
@@ -76,6 +77,22 @@
*/
struct vm_page {
+ /* Members used in the vm_page module only */
+ struct list node;
+ unsigned short type;
+ unsigned short seg_index;
+ unsigned short order;
+
+ /*
+ * This member is used throughout the code and may only change for
+ * fictitious pages.
+ */
+ phys_addr_t phys_addr;
+
+ /* We use an empty struct as the delimiter. */
+ struct {} vm_page_header;
+#define VM_PAGE_HEADER_SIZE offsetof(struct vm_page, vm_page_header)
+
queue_chain_t pageq; /* queue info for FIFO
* queue or free list (P) */
queue_chain_t listq; /* all pages in same object (O) */
@@ -110,8 +127,6 @@ struct vm_page {
* without having data. (O)
* [See vm_object_overwrite] */
- vm_offset_t phys_addr; /* Physical address of page, passed
- * to pmap_enter (read-only) */
vm_prot_t page_lock; /* Uses prohibited by data manager (O) */
vm_prot_t unlock_request; /* Outstanding unlock request (O) */
};
@@ -140,8 +155,6 @@ struct vm_page {
*/
extern
-vm_page_t vm_page_queue_free; /* memory free queue */
-extern
vm_page_t vm_page_queue_fictitious; /* fictitious free queue */
extern
queue_head_t vm_page_queue_active; /* active memory queue */
@@ -196,25 +209,21 @@ extern void vm_page_bootstrap(
vm_offset_t *endp);
extern void vm_page_module_init(void);
-extern void vm_page_create(
- vm_offset_t start,
- vm_offset_t end);
extern vm_page_t vm_page_lookup(
vm_object_t object,
vm_offset_t offset);
extern vm_page_t vm_page_grab_fictitious(void);
-extern void vm_page_release_fictitious(vm_page_t);
-extern boolean_t vm_page_convert(vm_page_t, boolean_t);
+extern boolean_t vm_page_convert(vm_page_t *, boolean_t);
extern void vm_page_more_fictitious(void);
extern vm_page_t vm_page_grab(boolean_t);
-extern void vm_page_release(vm_page_t, boolean_t);
+extern vm_page_t vm_page_grab_contig(vm_size_t, unsigned int);
+extern void vm_page_free_contig(vm_page_t, vm_size_t);
extern void vm_page_wait(void (*)(void));
extern vm_page_t vm_page_alloc(
vm_object_t object,
vm_offset_t offset);
extern void vm_page_init(
- vm_page_t mem,
- vm_offset_t phys_addr);
+ vm_page_t mem);
extern void vm_page_free(vm_page_t);
extern void vm_page_activate(vm_page_t);
extern void vm_page_deactivate(vm_page_t);
@@ -312,4 +321,189 @@ extern unsigned int vm_page_info(
} \
MACRO_END
+/*
+ * Copyright (c) 2010-2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Physical page management.
+ */
+
+/*
+ * Address/page conversion and rounding macros (not inline functions to
+ * be easily usable on both virtual and physical addresses, which may not
+ * have the same type size).
+ */
+#define vm_page_atop(addr) ((addr) >> PAGE_SHIFT)
+#define vm_page_ptoa(page) ((page) << PAGE_SHIFT)
+#define vm_page_trunc(addr) P2ALIGN(addr, PAGE_SIZE)
+#define vm_page_round(addr) P2ROUND(addr, PAGE_SIZE)
+#define vm_page_aligned(addr) P2ALIGNED(addr, PAGE_SIZE)
+
+/*
+ * Segment selectors.
+ *
+ * Selector-to-segment-list translation table :
+ * DMA DMA
+ * DMA32 DMA32 DMA
+ * DIRECTMAP DIRECTMAP DMA32 DMA
+ * HIGHMEM HIGHMEM DIRECTMAP DMA32 DMA
+ */
+#define VM_PAGE_SEL_DMA 0
+#define VM_PAGE_SEL_DMA32 1
+#define VM_PAGE_SEL_DIRECTMAP 2
+#define VM_PAGE_SEL_HIGHMEM 3
+
+/*
+ * Page usage types.
+ *
+ * Failing to allocate pmap pages will cause a kernel panic.
+ * TODO Obviously, this needs to be addressed, e.g. with a reserved pool of
+ * pages.
+ */
+#define VM_PT_FREE 0 /* Page unused */
+#define VM_PT_RESERVED 1 /* Page reserved at boot time */
+#define VM_PT_TABLE 2 /* Page is part of the page table */
+#define VM_PT_PMAP 3 /* Page stores pmap-specific data */
+#define VM_PT_KMEM 4 /* Page is part of a kmem slab */
+#define VM_PT_KERNEL 5 /* Type for generic kernel allocations */
+
+static inline unsigned short
+vm_page_type(const struct vm_page *page)
+{
+ return page->type;
+}
+
+void vm_page_set_type(struct vm_page *page, unsigned int order,
+ unsigned short type);
+
+static inline unsigned int
+vm_page_order(size_t size)
+{
+ return iorder2(vm_page_atop(vm_page_round(size)));
+}
+
+static inline phys_addr_t
+vm_page_to_pa(const struct vm_page *page)
+{
+ return page->phys_addr;
+}
+
+#if 0
+static inline unsigned long
+vm_page_direct_va(phys_addr_t pa)
+{
+ assert(pa < VM_PAGE_DIRECTMAP_LIMIT);
+ return ((unsigned long)pa + VM_MIN_DIRECTMAP_ADDRESS);
+}
+
+static inline phys_addr_t
+vm_page_direct_pa(unsigned long va)
+{
+ assert(va >= VM_MIN_DIRECTMAP_ADDRESS);
+ assert(va < VM_MAX_DIRECTMAP_ADDRESS);
+ return (va - VM_MIN_DIRECTMAP_ADDRESS);
+}
+
+static inline void *
+vm_page_direct_ptr(const struct vm_page *page)
+{
+ return (void *)vm_page_direct_va(vm_page_to_pa(page));
+}
+#endif
+
+/*
+ * Load physical memory into the vm_page module at boot time.
+ *
+ * The avail_start and avail_end parameters are used to maintain a simple
+ * heap for bootstrap allocations.
+ *
+ * All addresses must be page-aligned. Segments can be loaded in any order.
+ */
+void vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end,
+ phys_addr_t avail_start, phys_addr_t avail_end);
+
+/*
+ * Return true if the vm_page module is completely initialized, false
+ * otherwise, in which case only vm_page_bootalloc() can be used for
+ * allocations.
+ */
+int vm_page_ready(void);
+
+/*
+ * Early allocation function.
+ *
+ * This function is used by the vm_resident module to implement
+ * pmap_steal_memory. It can be used after physical segments have been loaded
+ * and before the vm_page module is initialized.
+ */
+unsigned long vm_page_bootalloc(size_t size);
+
+/*
+ * Set up the vm_page module.
+ *
+ * Architecture-specific code must have loaded segments before calling this
+ * function. Segments must comply with the selector-to-segment-list table,
+ * e.g. HIGHMEM is loaded if and only if DIRECTMAP, DMA32 and DMA are loaded,
+ * notwithstanding segment aliasing.
+ *
+ * Once this function returns, the vm_page module is ready, and normal
+ * allocation functions can be used.
+ */
+void vm_page_setup(void);
+
+/*
+ * Make the given page managed by the vm_page module.
+ *
+ * If additional memory can be made usable after the VM system is initialized,
+ * it should be reported through this function.
+ */
+void vm_page_manage(struct vm_page *page);
+
+/*
+ * Return the page descriptor for the given physical address.
+ */
+struct vm_page * vm_page_lookup_pa(phys_addr_t pa);
+
+/*
+ * Allocate a block of 2^order physical pages.
+ *
+ * The selector is used to determine the segments from which allocation can
+ * be attempted.
+ */
+struct vm_page * vm_page_alloc_pa(unsigned int order, unsigned int selector,
+ unsigned short type);
+
+/*
+ * Release a block of 2^order physical pages.
+ */
+void vm_page_free_pa(struct vm_page *page, unsigned int order);
+
+/*
+ * Return the name of the given segment.
+ */
+const char * vm_page_seg_name(unsigned int seg_index);
+
+/*
+ * Display internal information about the module.
+ */
+void vm_page_info_all(void);
+
+/*
+ * Return the total amount of physical memory.
+ */
+phys_addr_t vm_page_mem_size(void);
+
#endif /* _VM_VM_PAGE_H_ */
diff --git a/vm/vm_resident.c b/vm/vm_resident.c
index c70fa734..9fd64918 100644
--- a/vm/vm_resident.c
+++ b/vm/vm_resident.c
@@ -72,7 +72,7 @@
/*
* These variables record the values returned by vm_page_bootstrap,
* for debugging purposes. The implementation of pmap_steal_memory
- * and pmap_startup here also uses them internally.
+ * here also uses them internally.
*/
vm_offset_t virtual_space_start;
@@ -95,21 +95,6 @@ vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
unsigned int vm_page_bucket_count = 0; /* How big is array? */
unsigned int vm_page_hash_mask; /* Mask for hash function */
-/*
- * Resident page structures are initialized from
- * a template (see vm_page_alloc).
- *
- * When adding a new field to the virtual memory
- * object structure, be sure to add initialization
- * (see vm_page_bootstrap).
- */
-struct vm_page vm_page_template;
-
-/*
- * Resident pages that represent real memory
- * are allocated from a free list.
- */
-vm_page_t vm_page_queue_free;
vm_page_t vm_page_queue_fictitious;
decl_simple_lock_data(,vm_page_queue_free_lock)
unsigned int vm_page_free_wanted;
@@ -192,48 +177,15 @@ void vm_page_bootstrap(
vm_offset_t *startp,
vm_offset_t *endp)
{
- vm_page_t m;
int i;
/*
- * Initialize the vm_page template.
- */
-
- m = &vm_page_template;
- m->object = VM_OBJECT_NULL; /* reset later */
- m->offset = 0; /* reset later */
- m->wire_count = 0;
-
- m->inactive = FALSE;
- m->active = FALSE;
- m->laundry = FALSE;
- m->free = FALSE;
- m->external = FALSE;
-
- m->busy = TRUE;
- m->wanted = FALSE;
- m->tabled = FALSE;
- m->fictitious = FALSE;
- m->private = FALSE;
- m->absent = FALSE;
- m->error = FALSE;
- m->dirty = FALSE;
- m->precious = FALSE;
- m->reference = FALSE;
-
- m->phys_addr = 0; /* reset later */
-
- m->page_lock = VM_PROT_NONE;
- m->unlock_request = VM_PROT_NONE;
-
- /*
* Initialize the page queues.
*/
simple_lock_init(&vm_page_queue_free_lock);
simple_lock_init(&vm_page_queue_lock);
- vm_page_queue_free = VM_PAGE_NULL;
vm_page_queue_fictitious = VM_PAGE_NULL;
queue_init(&vm_page_queue_active);
queue_init(&vm_page_queue_inactive);
@@ -280,15 +232,8 @@ void vm_page_bootstrap(
simple_lock_init(&bucket->lock);
}
- /*
- * Machine-dependent code allocates the resident page table.
- * It uses vm_page_init to initialize the page frames.
- * The code also returns to us the virtual space available
- * to the kernel. We don't trust the pmap module
- * to get the alignment right.
- */
+ vm_page_setup();
- pmap_startup(&virtual_space_start, &virtual_space_end);
virtual_space_start = round_page(virtual_space_start);
virtual_space_end = trunc_page(virtual_space_end);
@@ -301,8 +246,8 @@ void vm_page_bootstrap(
#ifndef MACHINE_PAGES
/*
- * We implement pmap_steal_memory and pmap_startup with the help
- * of two simpler functions, pmap_virtual_space and pmap_next_page.
+ * We implement pmap_steal_memory with the help
+ * of two simpler functions, pmap_virtual_space and vm_page_bootalloc.
*/
vm_offset_t pmap_steal_memory(
@@ -310,11 +255,7 @@ vm_offset_t pmap_steal_memory(
{
vm_offset_t addr, vaddr, paddr;
- /*
- * We round the size to an integer multiple.
- */
-
- size = (size + 3) &~ 3;
+ size = round_page(size);
/*
* If this is the first call to pmap_steal_memory,
@@ -347,8 +288,7 @@ vm_offset_t pmap_steal_memory(
for (vaddr = round_page(addr);
vaddr < addr + size;
vaddr += PAGE_SIZE) {
- if (!pmap_next_page(&paddr))
- panic("pmap_steal_memory");
+ paddr = vm_page_bootalloc(PAGE_SIZE);
/*
* XXX Logically, these mappings should be wired,
@@ -361,64 +301,6 @@ vm_offset_t pmap_steal_memory(
return addr;
}
-
-void pmap_startup(
- vm_offset_t *startp,
- vm_offset_t *endp)
-{
- unsigned int i, npages, pages_initialized;
- vm_page_t pages;
- vm_offset_t paddr;
-
- /*
- * We calculate how many page frames we will have
- * and then allocate the page structures in one chunk.
- */
-
- npages = ((PAGE_SIZE * pmap_free_pages() +
- (round_page(virtual_space_start) - virtual_space_start)) /
- (PAGE_SIZE + sizeof *pages));
-
- pages = (vm_page_t) pmap_steal_memory(npages * sizeof *pages);
-
- /*
- * Initialize the page frames.
- */
-
- for (i = 0, pages_initialized = 0; i < npages; i++) {
- if (!pmap_next_page(&paddr))
- break;
-
- vm_page_init(&pages[i], paddr);
- pages_initialized++;
- }
- i = 0;
- while (pmap_next_page(&paddr))
- i++;
- if (i)
- printf("%u memory page(s) left away\n", i);
-
- /*
- * Release pages in reverse order so that physical pages
- * initially get allocated in ascending addresses. This keeps
- * the devices (which must address physical memory) happy if
- * they require several consecutive pages.
- */
-
- for (i = pages_initialized; i > 0; i--) {
- vm_page_release(&pages[i - 1], FALSE);
- }
-
- /*
- * We have to re-align virtual_space_start,
- * because pmap_steal_memory has been using it.
- */
-
- virtual_space_start = round_page(virtual_space_start);
-
- *startp = virtual_space_start;
- *endp = virtual_space_end;
-}
#endif /* MACHINE_PAGES */
/*
@@ -434,34 +316,6 @@ void vm_page_module_init(void)
}
/*
- * Routine: vm_page_create
- * Purpose:
- * After the VM system is up, machine-dependent code
- * may stumble across more physical memory. For example,
- * memory that it was reserving for a frame buffer.
- * vm_page_create turns this memory into available pages.
- */
-
-void vm_page_create(
- vm_offset_t start,
- vm_offset_t end)
-{
- vm_offset_t paddr;
- vm_page_t m;
-
- for (paddr = round_page(start);
- paddr < trunc_page(end);
- paddr += PAGE_SIZE) {
- m = (vm_page_t) kmem_cache_alloc(&vm_page_cache);
- if (m == VM_PAGE_NULL)
- panic("vm_page_create");
-
- vm_page_init(m, paddr);
- vm_page_release(m, FALSE);
- }
-}
-
-/*
* vm_page_hash:
*
* Distributes the object/offset key pair among hash buckets.
@@ -750,6 +604,33 @@ void vm_page_rename(
vm_page_unlock_queues();
}
+static void vm_page_init_template(vm_page_t m)
+{
+ m->object = VM_OBJECT_NULL; /* reset later */
+ m->offset = 0; /* reset later */
+ m->wire_count = 0;
+
+ m->inactive = FALSE;
+ m->active = FALSE;
+ m->laundry = FALSE;
+ m->free = FALSE;
+ m->external = FALSE;
+
+ m->busy = TRUE;
+ m->wanted = FALSE;
+ m->tabled = FALSE;
+ m->fictitious = FALSE;
+ m->private = FALSE;
+ m->absent = FALSE;
+ m->error = FALSE;
+ m->dirty = FALSE;
+ m->precious = FALSE;
+ m->reference = FALSE;
+
+ m->page_lock = VM_PROT_NONE;
+ m->unlock_request = VM_PROT_NONE;
+}
+
/*
* vm_page_init:
*
@@ -758,11 +639,9 @@ void vm_page_rename(
* so that it can be given to vm_page_release or vm_page_insert.
*/
void vm_page_init(
- vm_page_t mem,
- vm_offset_t phys_addr)
+ vm_page_t mem)
{
- *mem = vm_page_template;
- mem->phys_addr = phys_addr;
+ vm_page_init_template(mem);
}
/*
@@ -794,7 +673,7 @@ vm_page_t vm_page_grab_fictitious(void)
* Release a fictitious page to the free list.
*/
-void vm_page_release_fictitious(
+static void vm_page_release_fictitious(
vm_page_t m)
{
simple_lock(&vm_page_queue_free_lock);
@@ -826,7 +705,8 @@ void vm_page_more_fictitious(void)
if (m == VM_PAGE_NULL)
panic("vm_page_more_fictitious");
- vm_page_init(m, vm_page_fictitious_addr);
+ vm_page_init(m);
+ m->phys_addr = vm_page_fictitious_addr;
m->fictitious = TRUE;
vm_page_release_fictitious(m);
}
@@ -836,25 +716,46 @@ void vm_page_more_fictitious(void)
* vm_page_convert:
*
* Attempt to convert a fictitious page into a real page.
+ *
+ * The object referenced by *MP must be locked.
*/
boolean_t vm_page_convert(
- vm_page_t m,
+ struct vm_page **mp,
boolean_t external)
{
- vm_page_t real_m;
+ struct vm_page *real_m, *fict_m;
+ vm_object_t object;
+ vm_offset_t offset;
+
+ fict_m = *mp;
+
+ assert(fict_m->fictitious);
+ assert(fict_m->phys_addr == vm_page_fictitious_addr);
+ assert(!fict_m->active);
+ assert(!fict_m->inactive);
real_m = vm_page_grab(external);
if (real_m == VM_PAGE_NULL)
return FALSE;
- m->phys_addr = real_m->phys_addr;
- m->fictitious = FALSE;
+ object = fict_m->object;
+ offset = fict_m->offset;
+ vm_page_remove(fict_m);
+
+ memcpy(&real_m->vm_page_header,
+ &fict_m->vm_page_header,
+ sizeof(*fict_m) - VM_PAGE_HEADER_SIZE);
+ real_m->fictitious = FALSE;
- real_m->phys_addr = vm_page_fictitious_addr;
- real_m->fictitious = TRUE;
+ vm_page_insert(real_m, object, offset);
- vm_page_release_fictitious(real_m);
+ assert(real_m->phys_addr != vm_page_fictitious_addr);
+ assert(fict_m->fictitious);
+ assert(fict_m->phys_addr == vm_page_fictitious_addr);
+
+ vm_page_release_fictitious(fict_m);
+ *mp = real_m;
return TRUE;
}
@@ -886,15 +787,16 @@ vm_page_t vm_page_grab(
return VM_PAGE_NULL;
}
- if (vm_page_queue_free == VM_PAGE_NULL)
+ mem = vm_page_alloc_pa(0, VM_PAGE_SEL_DIRECTMAP, VM_PT_KERNEL);
+
+ if (mem == NULL)
panic("vm_page_grab");
if (--vm_page_free_count < vm_page_free_count_minimum)
vm_page_free_count_minimum = vm_page_free_count;
if (external)
vm_page_external_count++;
- mem = vm_page_queue_free;
- vm_page_queue_free = (vm_page_t) mem->pageq.next;
+
mem->free = FALSE;
mem->extcounted = mem->external = external;
simple_unlock(&vm_page_queue_free_lock);
@@ -928,208 +830,97 @@ vm_offset_t vm_page_grab_phys_addr(void)
}
/*
- * vm_page_grab_contiguous_pages:
- *
- * Take N pages off the free list, the pages should
- * cover a contiguous range of physical addresses.
- * [Used by device drivers to cope with DMA limitations]
+ * vm_page_release:
*
- * Returns the page descriptors in ascending order, or
- * Returns KERN_RESOURCE_SHORTAGE if it could not.
+ * Return a page to the free list.
*/
-/* Biggest phys page number for the pages we handle in VM */
-
-vm_size_t vm_page_big_pagenum = 0; /* Set this before call! */
-
-kern_return_t
-vm_page_grab_contiguous_pages(
- int npages,
- vm_page_t pages[],
- natural_t *bits,
- boolean_t external)
+static void vm_page_release(
+ vm_page_t mem,
+ boolean_t external)
{
- int first_set;
- int size, alloc_size;
- kern_return_t ret;
- vm_page_t mem, *prevmemp;
+ simple_lock(&vm_page_queue_free_lock);
+ if (mem->free)
+ panic("vm_page_release");
+ mem->free = TRUE;
+ vm_page_free_pa(mem, 0);
+ vm_page_free_count++;
+ if (external)
+ vm_page_external_count--;
-#ifndef NBBY
-#define NBBY 8 /* size in bits of sizeof()`s unity */
-#endif
+ /*
+ * Check if we should wake up someone waiting for page.
+ * But don't bother waking them unless they can allocate.
+ *
+ * We wakeup only one thread, to prevent starvation.
+ * Because the scheduling system handles wait queues FIFO,
+ * if we wakeup all waiting threads, one greedy thread
+ * can starve multiple niceguy threads. When the threads
+ * all wakeup, the greedy threads runs first, grabs the page,
+ * and waits for another page. It will be the first to run
+ * when the next page is freed.
+ *
+ * However, there is a slight danger here.
+ * The thread we wake might not use the free page.
+ * Then the other threads could wait indefinitely
+ * while the page goes unused. To forestall this,
+ * the pageout daemon will keep making free pages
+ * as long as vm_page_free_wanted is non-zero.
+ */
-#define NBPEL (sizeof(natural_t)*NBBY)
+ if ((vm_page_free_wanted > 0) &&
+ (vm_page_free_count >= vm_page_free_reserved)) {
+ vm_page_free_wanted--;
+ thread_wakeup_one((event_t) &vm_page_free_count);
+ }
- size = (vm_page_big_pagenum + NBPEL - 1)
- & ~(NBPEL - 1); /* in bits */
+ simple_unlock(&vm_page_queue_free_lock);
+}
- size = size / NBBY; /* in bytes */
+/*
+ * vm_page_grab_contig:
+ *
+ * Remove a block of contiguous pages from the free list.
+ * Returns VM_PAGE_NULL if the request fails.
+ */
- /*
- * If we are called before the VM system is fully functional
- * the invoker must provide us with the work space. [one bit
- * per page starting at phys 0 and up to vm_page_big_pagenum]
- */
- if (bits == 0) {
- alloc_size = round_page(size);
- if (kmem_alloc_wired(kernel_map,
- (vm_offset_t *)&bits,
- alloc_size)
- != KERN_SUCCESS)
- return KERN_RESOURCE_SHORTAGE;
- } else
- alloc_size = 0;
+vm_page_t vm_page_grab_contig(
+ vm_size_t size,
+ unsigned int selector)
+{
+ unsigned int i, order, nr_pages;
+ vm_page_t mem;
- memset(bits, 0, size);
+ order = vm_page_order(size);
+ nr_pages = 1 << order;
- /*
- * A very large granularity call, its rare so that is ok
- */
simple_lock(&vm_page_queue_free_lock);
/*
- * Do not dip into the reserved pool.
+ * Only let privileged threads (involved in pageout)
+ * dip into the reserved pool or exceed the limit
+ * for externally-managed pages.
*/
- if ((vm_page_free_count < vm_page_free_reserved)
- || (vm_page_external_count >= vm_page_external_limit)) {
- printf_once("no more room for vm_page_grab_contiguous_pages");
+ if (((vm_page_free_count - nr_pages) <= vm_page_free_reserved)
+ && !current_thread()->vm_privilege) {
simple_unlock(&vm_page_queue_free_lock);
- return KERN_RESOURCE_SHORTAGE;
- }
-
- /*
- * First pass through, build a big bit-array of
- * the pages that are free. It is not going to
- * be too large anyways, in 4k we can fit info
- * for 32k pages.
- */
- mem = vm_page_queue_free;
- while (mem) {
- int word_index, bit_index;
-
- bit_index = (mem->phys_addr >> PAGE_SHIFT);
- word_index = bit_index / NBPEL;
- bit_index = bit_index - (word_index * NBPEL);
- bits[word_index] |= 1 << bit_index;
-
- mem = (vm_page_t) mem->pageq.next;
+ return VM_PAGE_NULL;
}
- /*
- * Second loop. Scan the bit array for NPAGES
- * contiguous bits. That gives us, if any,
- * the range of pages we will be grabbing off
- * the free list.
- */
- {
- int bits_so_far = 0, i;
-
- first_set = 0;
-
- for (i = 0; i < size; i += sizeof(natural_t)) {
-
- natural_t v = bits[i / sizeof(natural_t)];
- int bitpos;
-
- /*
- * Bitscan this one word
- */
- if (v) {
- /*
- * keep counting them beans ?
- */
- bitpos = 0;
-
- if (bits_so_far) {
-count_ones:
- while (v & 1) {
- bitpos++;
- /*
- * got enough beans ?
- */
- if (++bits_so_far == npages)
- goto found_em;
- v >>= 1;
- }
- /* if we are being lucky, roll again */
- if (bitpos == NBPEL)
- continue;
- }
-
- /*
- * search for beans here
- */
- bits_so_far = 0;
- while ((bitpos < NBPEL) && ((v & 1) == 0)) {
- bitpos++;
- v >>= 1;
- }
- if (v & 1) {
- first_set = (i * NBBY) + bitpos;
- goto count_ones;
- }
- }
- /*
- * No luck
- */
- bits_so_far = 0;
- }
- }
+ mem = vm_page_alloc_pa(order, selector, VM_PT_KERNEL);
- /*
- * We could not find enough contiguous pages.
- */
- simple_unlock(&vm_page_queue_free_lock);
+ if (mem == NULL)
+ panic("vm_page_grab_contig");
- printf_once("no contiguous room for vm_page_grab_contiguous_pages");
- ret = KERN_RESOURCE_SHORTAGE;
- goto out;
+ vm_page_free_count -= nr_pages;
- /*
- * Final pass. Now we know which pages we want.
- * Scan the list until we find them all, grab
- * pages as we go. FIRST_SET tells us where
- * in the bit-array our pages start.
- */
-found_em:
- vm_page_free_count -= npages;
if (vm_page_free_count < vm_page_free_count_minimum)
vm_page_free_count_minimum = vm_page_free_count;
- if (external)
- vm_page_external_count += npages;
- {
- vm_offset_t first_phys, last_phys;
-
- /* cache values for compare */
- first_phys = first_set << PAGE_SHIFT;
- last_phys = first_phys + (npages << PAGE_SHIFT);/* not included */
-
- /* running pointers */
- mem = vm_page_queue_free;
- prevmemp = &vm_page_queue_free;
-
- while (mem) {
-
- vm_offset_t addr;
-
- addr = mem->phys_addr;
-
- if ((addr >= first_phys) &&
- (addr < last_phys)) {
- *prevmemp = (vm_page_t) mem->pageq.next;
- pages[(addr - first_phys) >> PAGE_SHIFT] = mem;
- mem->free = FALSE;
- mem->extcounted = mem->external = external;
- /*
- * Got them all ?
- */
- if (--npages == 0) break;
- } else
- prevmemp = (vm_page_t *) &mem->pageq.next;
-
- mem = (vm_page_t) mem->pageq.next;
- }
+
+ for (i = 0; i < nr_pages; i++) {
+ mem[i].free = FALSE;
+ mem[i].extcounted = mem[i].external = 0;
}
simple_unlock(&vm_page_queue_free_lock);
@@ -1148,55 +939,35 @@ found_em:
if ((vm_page_free_count < vm_page_free_min) ||
((vm_page_free_count < vm_page_free_target) &&
(vm_page_inactive_count < vm_page_inactive_target)))
- thread_wakeup(&vm_page_free_wanted);
-
- ret = KERN_SUCCESS;
-out:
- if (alloc_size)
- kmem_free(kernel_map, (vm_offset_t) bits, alloc_size);
+ thread_wakeup((event_t) &vm_page_free_wanted);
- return ret;
+ return mem;
}
/*
- * vm_page_release:
+ * vm_page_free_contig:
*
- * Return a page to the free list.
+ * Return a block of contiguous pages to the free list.
*/
-void vm_page_release(
- vm_page_t mem,
- boolean_t external)
+void vm_page_free_contig(vm_page_t mem, vm_size_t size)
{
+ unsigned int i, order, nr_pages;
+
+ order = vm_page_order(size);
+ nr_pages = 1 << order;
+
simple_lock(&vm_page_queue_free_lock);
- if (mem->free)
- panic("vm_page_release");
- mem->free = TRUE;
- mem->pageq.next = (queue_entry_t) vm_page_queue_free;
- vm_page_queue_free = mem;
- vm_page_free_count++;
- if (external)
- vm_page_external_count--;
- /*
- * Check if we should wake up someone waiting for page.
- * But don't bother waking them unless they can allocate.
- *
- * We wakeup only one thread, to prevent starvation.
- * Because the scheduling system handles wait queues FIFO,
- * if we wakeup all waiting threads, one greedy thread
- * can starve multiple niceguy threads. When the threads
- * all wakeup, the greedy threads runs first, grabs the page,
- * and waits for another page. It will be the first to run
- * when the next page is freed.
- *
- * However, there is a slight danger here.
- * The thread we wake might not use the free page.
- * Then the other threads could wait indefinitely
- * while the page goes unused. To forestall this,
- * the pageout daemon will keep making free pages
- * as long as vm_page_free_wanted is non-zero.
- */
+ for (i = 0; i < nr_pages; i++) {
+ if (mem[i].free)
+ panic("vm_page_free_contig");
+
+ mem[i].free = TRUE;
+ }
+
+ vm_page_free_pa(mem, order);
+ vm_page_free_count += nr_pages;
if ((vm_page_free_wanted > 0) &&
(vm_page_free_count >= vm_page_free_reserved)) {
@@ -1310,12 +1081,13 @@ void vm_page_free(
*/
if (mem->private || mem->fictitious) {
- vm_page_init(mem, vm_page_fictitious_addr);
+ vm_page_init(mem);
+ mem->phys_addr = vm_page_fictitious_addr;
mem->fictitious = TRUE;
vm_page_release_fictitious(mem);
} else {
int external = mem->external && mem->extcounted;
- vm_page_init(mem, mem->phys_addr);
+ vm_page_init(mem);
vm_page_release(mem, external);
}
}