summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.am32
-rw-r--r--Makefrag.am29
-rw-r--r--chips/busses.h6
-rw-r--r--configfrag.ac15
-rw-r--r--configure.ac31
-rw-r--r--ddb/db_aout.c508
-rw-r--r--ddb/db_aout.h52
-rw-r--r--ddb/db_break.c89
-rw-r--r--ddb/db_break.h12
-rw-r--r--ddb/db_command.c82
-rw-r--r--ddb/db_command.h4
-rw-r--r--ddb/db_cond.c9
-rw-r--r--ddb/db_cond.h6
-rw-r--r--ddb/db_elf.c26
-rw-r--r--ddb/db_examine.c206
-rw-r--r--ddb/db_examine.h12
-rw-r--r--ddb/db_expr.c21
-rw-r--r--ddb/db_ext_symtab.c3
-rw-r--r--ddb/db_input.c58
-rw-r--r--ddb/db_input.h3
-rw-r--r--ddb/db_lex.c6
-rw-r--r--ddb/db_macro.c24
-rw-r--r--ddb/db_macro.h22
-rw-r--r--ddb/db_mp.c29
-rw-r--r--ddb/db_mp.h6
-rw-r--r--ddb/db_output.c2
-rw-r--r--ddb/db_print.c119
-rw-r--r--ddb/db_print.h11
-rw-r--r--ddb/db_run.c48
-rw-r--r--ddb/db_sym.c46
-rw-r--r--ddb/db_sym.h22
-rw-r--r--ddb/db_task_thread.c25
-rw-r--r--ddb/db_variables.c36
-rw-r--r--ddb/db_watch.c50
-rw-r--r--ddb/db_watch.h6
-rw-r--r--ddb/db_write_cmd.c11
-rw-r--r--device/blkio.c46
-rw-r--r--device/blkio.h2
-rw-r--r--device/buf.h6
-rw-r--r--device/chario.c109
-rw-r--r--device/cirbuf.c29
-rw-r--r--device/conf.h12
-rw-r--r--device/cons.c3
-rw-r--r--device/dev_hdr.h8
-rw-r--r--device/dev_lookup.c26
-rw-r--r--device/dev_name.c43
-rw-r--r--device/dev_pager.c33
-rw-r--r--device/device_emul.h8
-rw-r--r--device/device_init.c1
-rw-r--r--device/ds_routines.c88
-rw-r--r--device/ds_routines.h12
-rw-r--r--device/intr.c79
-rw-r--r--device/io_req.h3
-rw-r--r--device/kmsg.c40
-rw-r--r--device/net_io.c96
-rw-r--r--device/net_io.h4
-rw-r--r--device/subrs.c45
-rw-r--r--device/tty.h2
-rw-r--r--doc/mach.texi139
-rw-r--r--gensym.awk3
-rw-r--r--i386/Makefrag.am76
-rw-r--r--i386/Makefrag_x86.am84
-rw-r--r--i386/configfrag.ac2
-rw-r--r--i386/i386/apic.c239
-rw-r--r--i386/i386/apic.h146
-rw-r--r--i386/i386/ast_check.c11
-rw-r--r--i386/i386/copy_user.h100
-rw-r--r--i386/i386/cpu_number.h77
-rw-r--r--i386/i386/cpuboot.S245
-rw-r--r--i386/i386/cswitch.S19
-rw-r--r--i386/i386/db_disasm.c15
-rw-r--r--i386/i386/db_interface.c23
-rw-r--r--i386/i386/db_interface.h13
-rw-r--r--i386/i386/db_trace.c192
-rw-r--r--i386/i386/debug_i386.c13
-rw-r--r--i386/i386/fpu.c46
-rw-r--r--i386/i386/gdt.c62
-rw-r--r--i386/i386/gdt.h12
-rw-r--r--i386/i386/hardclock.c2
-rw-r--r--i386/i386/hardclock.h1
-rw-r--r--i386/i386/i386asm.sym44
-rw-r--r--i386/i386/idt-gen.h4
-rw-r--r--i386/i386/idt.c31
-rw-r--r--i386/i386/idt_inittab.S1
-rw-r--r--i386/i386/io_map.c92
-rw-r--r--i386/i386/io_perm.c7
-rw-r--r--i386/i386/ipl.h6
-rw-r--r--i386/i386/ktss.c38
-rw-r--r--i386/i386/ktss.h1
-rw-r--r--i386/i386/kttd_interface.c3
-rw-r--r--i386/i386/ldt.c66
-rw-r--r--i386/i386/ldt.h18
-rw-r--r--i386/i386/lock.h28
-rw-r--r--i386/i386/locore.S260
-rw-r--r--i386/i386/locore.h7
-rw-r--r--i386/i386/loose_ends.c16
-rw-r--r--i386/i386/machine_task.c1
-rw-r--r--i386/i386/model_dep.h10
-rw-r--r--i386/i386/mp_desc.c307
-rw-r--r--i386/i386/mp_desc.h9
-rw-r--r--i386/i386/msr.h56
-rw-r--r--i386/i386/pcb.c175
-rw-r--r--i386/i386/pcb.h2
-rw-r--r--i386/i386/percpu.c33
-rw-r--r--i386/i386/percpu.h98
-rw-r--r--i386/i386/phys.c4
-rw-r--r--i386/i386/pic.c11
-rw-r--r--i386/i386/pic.h4
-rw-r--r--i386/i386/pit.c36
-rw-r--r--i386/i386/pit.h7
-rw-r--r--i386/i386/proc_reg.h12
-rw-r--r--i386/i386/seg.c5
-rw-r--r--i386/i386/seg.h20
-rw-r--r--i386/i386/smp.c168
-rw-r--r--i386/i386/smp.h13
-rw-r--r--i386/i386/spl.S35
-rw-r--r--i386/i386/spl.h3
-rw-r--r--i386/i386/strings.c54
-rw-r--r--i386/i386/thread.h42
-rw-r--r--i386/i386/trap.c58
-rw-r--r--i386/i386/trap.h23
-rw-r--r--i386/i386/tss.h26
-rw-r--r--i386/i386/user_ldt.c29
-rw-r--r--i386/i386/vm_param.h62
-rw-r--r--i386/i386/xen.h42
-rw-r--r--i386/i386at/acpi_parse_apic.c302
-rw-r--r--i386/i386at/acpi_parse_apic.h44
-rw-r--r--i386/i386at/autoconf.c24
-rw-r--r--i386/i386at/autoconf.h4
-rw-r--r--i386/i386at/biosmem.c37
-rw-r--r--i386/i386at/boothdr.S89
-rw-r--r--i386/i386at/com.c59
-rw-r--r--i386/i386at/com.h2
-rw-r--r--i386/i386at/conf.c48
-rw-r--r--i386/i386at/cram.h5
-rw-r--r--i386/i386at/idt.h7
-rw-r--r--i386/i386at/immc.c8
-rw-r--r--i386/i386at/int_init.c50
-rw-r--r--i386/i386at/int_init.h1
-rw-r--r--i386/i386at/interrupt.S84
-rw-r--r--i386/i386at/ioapic.c272
-rw-r--r--i386/i386at/kd.c181
-rw-r--r--i386/i386at/kd.h65
-rw-r--r--i386/i386at/kd_event.c21
-rw-r--r--i386/i386at/kd_mouse.c31
-rw-r--r--i386/i386at/kd_mouse.h8
-rw-r--r--i386/i386at/kd_queue.c12
-rw-r--r--i386/i386at/kdsoft.h13
-rw-r--r--i386/i386at/lpr.c9
-rw-r--r--i386/i386at/mem.c2
-rw-r--r--i386/i386at/model_dep.c216
-rw-r--r--i386/i386at/model_dep.h4
-rw-r--r--i386/i386at/pic_isa.c4
-rw-r--r--i386/i386at/rtc.c19
-rw-r--r--i386/include/mach/i386/cthreads.h56
-rw-r--r--i386/include/mach/i386/exec/elf.h19
-rw-r--r--i386/include/mach/i386/mach_i386.defs2
-rw-r--r--i386/include/mach/i386/mach_i386_types.h9
-rwxr-xr-xi386/include/mach/i386/machine_types.defs57
-rw-r--r--i386/include/mach/i386/multiboot.h110
-rw-r--r--i386/include/mach/i386/syscall_sw.h12
-rw-r--r--i386/include/mach/i386/thread_status.h36
-rw-r--r--i386/include/mach/i386/vm_param.h34
-rw-r--r--i386/include/mach/i386/vm_types.h101
-rw-r--r--i386/intel/pmap.c1139
-rw-r--r--i386/intel/pmap.h64
-rw-r--r--i386/intel/read_fault.c4
-rw-r--r--i386/xen/xen.c13
-rw-r--r--include/device/device.defs21
-rw-r--r--include/device/device_request.defs8
-rw-r--r--include/device/device_types.defs13
-rw-r--r--include/device/device_types.h7
-rw-r--r--include/device/input.h106
-rw-r--r--include/inttypes.h20
-rw-r--r--include/mach/bootstrap.defs49
-rw-r--r--include/mach/default_pager_helper.defs53
-rw-r--r--include/mach/default_pager_types.defs17
-rw-r--r--include/mach/error.h2
-rw-r--r--include/mach/exc.defs2
-rw-r--r--include/mach/exec/elf.h72
-rw-r--r--include/mach/gnumach.defs26
-rw-r--r--include/mach/host_info.h5
-rw-r--r--include/mach/kern_return.h8
-rw-r--r--include/mach/mach4.defs18
-rw-r--r--include/mach/mach_host.defs40
-rw-r--r--include/mach/mach_port.defs8
-rw-r--r--include/mach/mach_traps.h18
-rw-r--r--include/mach/mach_types.defs89
-rw-r--r--include/mach/mach_types.h13
-rw-r--r--include/mach/machine.h1
-rw-r--r--include/mach/message.h151
-rw-r--r--include/mach/mig_errors.h2
-rw-r--r--include/mach/mig_support.h6
-rw-r--r--include/mach/msg_type.h42
-rw-r--r--include/mach/multiboot.h82
-rw-r--r--include/mach/notify.h6
-rw-r--r--include/mach/pc_sample.h6
-rw-r--r--include/mach/port.h43
-rw-r--r--include/mach/rpc.h34
-rw-r--r--include/mach/std_types.defs9
-rw-r--r--include/mach/std_types.h4
-rw-r--r--include/mach/task_info.h48
-rw-r--r--include/mach/thread_info.h12
-rw-r--r--include/mach/time_value.h108
-rw-r--r--include/mach_debug/hash_info.h2
-rw-r--r--include/mach_debug/ipc_info.h77
-rw-r--r--include/mach_debug/mach_debug.defs44
-rw-r--r--include/mach_debug/mach_debug_types.defs80
-rw-r--r--include/mach_debug/mach_debug_types.h1
-rw-r--r--include/mach_debug/slab_info.h20
-rw-r--r--include/mach_debug/vm_info.h74
-rw-r--r--include/stdint.h55
-rw-r--r--include/string.h4
-rw-r--r--include/sys/ioctl.h52
-rw-r--r--include/sys/time.h53
-rw-r--r--ipc/.gitignore2
-rw-r--r--ipc/ipc_entry.c13
-rw-r--r--ipc/ipc_entry.h10
-rw-r--r--ipc/ipc_kmsg.c387
-rw-r--r--ipc/ipc_kmsg.h77
-rwxr-xr-xipc/ipc_machdep.h13
-rw-r--r--ipc/ipc_marequest.c22
-rw-r--r--ipc/ipc_marequest.h8
-rw-r--r--ipc/ipc_mqueue.c11
-rw-r--r--ipc/ipc_mqueue.h4
-rw-r--r--ipc/ipc_notify.c38
-rw-r--r--ipc/ipc_notify.h6
-rw-r--r--ipc/ipc_object.c88
-rw-r--r--ipc/ipc_object.h20
-rw-r--r--ipc/ipc_port.c43
-rw-r--r--ipc/ipc_port.h21
-rw-r--r--ipc/ipc_pset.c7
-rw-r--r--ipc/ipc_pset.h4
-rw-r--r--ipc/ipc_right.c109
-rw-r--r--ipc/ipc_right.h36
-rw-r--r--ipc/ipc_space.c2
-rw-r--r--ipc/ipc_space.h24
-rw-r--r--ipc/ipc_target.c2
-rw-r--r--ipc/ipc_target.h5
-rw-r--r--ipc/mach_debug.c8
-rw-r--r--ipc/mach_msg.c108
-rw-r--r--ipc/mach_msg.h10
-rw-r--r--ipc/mach_port.c120
-rw-r--r--ipc/mach_port.h35
-rw-r--r--ipc/mach_rpc.c150
-rw-r--r--ipc/port.h29
-rw-r--r--kern/.gitignore2
-rw-r--r--kern/ast.c4
-rw-r--r--kern/ast.h1
-rw-r--r--kern/boot_script.c5
-rw-r--r--kern/boot_script.h4
-rw-r--r--kern/bootstrap.c74
-rw-r--r--kern/bootstrap.h4
-rw-r--r--kern/cpu_number.h3
-rw-r--r--kern/debug.c29
-rw-r--r--kern/elf-load.c10
-rw-r--r--kern/eventcount.c8
-rw-r--r--kern/eventcount.h2
-rw-r--r--kern/exception.c130
-rw-r--r--kern/exception.h6
-rw-r--r--kern/gsync.c19
-rw-r--r--kern/host.c13
-rw-r--r--kern/ipc_host.c4
-rw-r--r--kern/ipc_kobject.c36
-rw-r--r--kern/ipc_mig.c204
-rw-r--r--kern/ipc_mig.h83
-rw-r--r--kern/ipc_sched.c2
-rw-r--r--kern/ipc_tt.c14
-rw-r--r--kern/ipc_tt.h2
-rw-r--r--kern/lock.c28
-rw-r--r--kern/lock.h110
-rw-r--r--kern/lock_mon.c38
-rw-r--r--kern/mach_clock.c217
-rw-r--r--kern/mach_clock.h18
-rw-r--r--kern/machine.c168
-rw-r--r--kern/pc_sample.c43
-rw-r--r--kern/printf.c4
-rw-r--r--kern/priority.c6
-rw-r--r--kern/processor.c19
-rw-r--r--kern/processor.h43
-rw-r--r--kern/profile.c15
-rw-r--r--kern/queue.c10
-rw-r--r--kern/sched.h19
-rw-r--r--kern/sched_prim.c118
-rw-r--r--kern/sched_prim.h1
-rw-r--r--kern/slab.c104
-rw-r--r--kern/slab.h11
-rw-r--r--kern/startup.c15
-rw-r--r--kern/strings.c21
-rw-r--r--kern/syscall_emulation.c6
-rw-r--r--kern/syscall_subr.c10
-rw-r--r--kern/syscall_subr.h2
-rw-r--r--kern/syscall_sw.c6
-rw-r--r--kern/syscall_sw.h10
-rw-r--r--kern/task.c97
-rw-r--r--kern/task.h25
-rw-r--r--kern/thread.c102
-rw-r--r--kern/thread.h54
-rw-r--r--kern/thread_swap.c4
-rw-r--r--kern/time_stamp.c61
-rw-r--r--kern/time_stamp.h68
-rw-r--r--kern/timer.c112
-rw-r--r--kern/timer.h16
-rw-r--r--kern/xpr.c2
-rw-r--r--linux/Makefrag.am4
-rw-r--r--linux/dev/arch/i386/kernel/irq.c13
-rw-r--r--linux/dev/glue/block.c6
-rw-r--r--linux/dev/glue/misc.c6
-rw-r--r--linux/dev/include/asm-i386/system.h5
-rw-r--r--linux/dev/kernel/sched.c3
-rw-r--r--linux/src/arch/i386/kernel/bios32.c10
-rw-r--r--linux/src/drivers/block/ide.c7
-rw-r--r--tests/Makefrag.am12
-rw-r--r--tests/README37
-rw-r--r--tests/configfrag.ac18
-rw-r--r--tests/grub.cfg.single.template4
-rw-r--r--tests/include/device/cons.h27
l---------tests/include/kern/printf.h1
-rw-r--r--tests/include/mach/mig_support.h71
-rw-r--r--tests/include/syscalls.h83
-rw-r--r--tests/include/testlib.h75
l---------tests/include/util/atoi.h1
-rw-r--r--tests/run-qemu.sh.template38
-rw-r--r--tests/start.S28
-rw-r--r--tests/syscalls.S4
-rw-r--r--tests/test-gsync.c122
-rw-r--r--tests/test-hello.c26
-rw-r--r--tests/test-mach_host.c81
-rw-r--r--tests/test-mach_port.c121
-rw-r--r--tests/test-machmsg.c405
-rw-r--r--tests/test-multiboot.in (renamed from tests/test-mbchk.in)8
-rw-r--r--tests/test-syscalls.c166
-rw-r--r--tests/test-task.c171
-rw-r--r--tests/test-threads.c104
-rw-r--r--tests/test-vm.c85
-rw-r--r--tests/testlib.c114
-rw-r--r--tests/testlib_thread_start.c86
-rw-r--r--tests/user-qemu.mk221
-rw-r--r--util/byteorder.c53
-rw-r--r--util/byteorder.h (renamed from include/stddef.h)23
-rw-r--r--util/putchar.c31
-rw-r--r--util/putchar.h32
-rw-r--r--util/puts.c40
-rw-r--r--vm/memory_object.c17
-rw-r--r--vm/memory_object_proxy.c6
-rw-r--r--vm/memory_object_proxy.h8
-rw-r--r--vm/pmap.h5
-rw-r--r--vm/vm_debug.c99
-rw-r--r--vm/vm_external.c5
-rw-r--r--vm/vm_fault.c28
-rw-r--r--vm/vm_fault.h7
-rw-r--r--vm/vm_init.c1
-rw-r--r--vm/vm_kern.c2
-rw-r--r--vm/vm_map.c353
-rw-r--r--vm/vm_map.h18
-rw-r--r--vm/vm_object.c145
-rw-r--r--vm/vm_object.h4
-rw-r--r--vm/vm_page.c31
-rw-r--r--vm/vm_page.h36
-rw-r--r--vm/vm_pageout.c2
-rw-r--r--vm/vm_resident.c19
-rw-r--r--vm/vm_user.c148
-rw-r--r--x86_64/Makefrag.am133
-rw-r--r--x86_64/boothdr.S222
-rw-r--r--x86_64/configfrag.ac4
-rw-r--r--x86_64/copy_user.c613
-rw-r--r--x86_64/cswitch.S20
-rw-r--r--x86_64/idt_inittab.S30
-rw-r--r--x86_64/include/syscall_sw.h (renamed from include/mach_debug/pc_info.h)33
-rw-r--r--x86_64/interrupt.S82
-rw-r--r--x86_64/ldscript26
-rw-r--r--x86_64/locore.S824
-rw-r--r--x86_64/spl.S40
-rw-r--r--xen/block.c4
-rw-r--r--xen/console.c54
-rw-r--r--xen/console.h5
-rw-r--r--xen/evt.c16
-rw-r--r--xen/evt.h2
-rw-r--r--xen/grant.c2
-rw-r--r--xen/net.c7
-rw-r--r--xen/public/elfstructs.h45
-rw-r--r--xen/store.c2
-rw-r--r--xen/time.c6
-rw-r--r--xen/xen.c15
-rw-r--r--xen/xen.h12
385 files changed, 12560 insertions, 7304 deletions
diff --git a/Makefile.am b/Makefile.am
index 2bfdcee9..ad38249b 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -41,8 +41,9 @@ AM_LDFLAGS =
# Compilation flags
#
+GCC_INSTALL = $(shell LANG=C $(CC) -print-search-dirs | sed -n -e 's/install: \(.*\)/\1/p')
AM_CPPFLAGS += \
- -ffreestanding -nostdinc -imacros config.h
+ -nostdinc -imacros config.h -I $(GCC_INSTALL)/include
AM_CPPFLAGS += \
-I$(systype) \
@@ -60,7 +61,7 @@ AM_CCASFLAGS += \
# Yes, this makes the eyes hurt. But perhaps someone will finally take care of
# all that scruffy Mach code... Also see <http://savannah.gnu.org/task/?5726>.
AM_CFLAGS += \
- -Wall
+ -Wall -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes
# We need the GNU-style inline
AM_CFLAGS += \
@@ -80,7 +81,10 @@ endif
# We do not support or need position-independent
AM_CFLAGS += \
- -no-pie -fno-pic
+ -no-pie -fno-PIE -fno-pie -fno-pic
+
+# This must be the same size as port names, see e.g. ipc/ipc_entry.c
+AM_CFLAGS += -DRDXTREE_KEY_32
#
# Silent build support.
@@ -167,31 +171,27 @@ gnumach_o_LINK = $(LD) $(LDFLAGS) -u _start -r -o $@
noinst_PROGRAMS += \
gnumach.o
-# This is the list of routines we decide is OK to steal from the C library.
-clib_routines := htonl htons ntohl ntohs \
- udivdi3 __udivdi3 __udivmoddi4 __umoddi3 \
- __divdi3 __moddi3 \
- __rel_iplt_start __rel_iplt_end \
- __rela_iplt_start __rela_iplt_end \
- __ffsdi2 ffs \
- _START _start etext _edata end _end # actually ld magic, not libc.
+# This is the list of routines we use from libgcc.
+libgcc_routines := udivdi3 __udivdi3 __udivmoddi4 __umoddi3 __divdi3 __divmoddi4 __moddi3 __ffsdi2
+# References generated by ld.
+ld_magic_routines := __rel_iplt_start __rel_iplt_end __rela_iplt_start __rela_iplt_end _START etext _edata _end
gnumach-undef: gnumach.$(OBJEXT)
$(NM_V) $(NM) -u $< | sed 's/ *U *//' | sort -u > $@
MOSTLYCLEANFILES += gnumach-undef
gnumach-undef-bad: gnumach-undef Makefile
- $(AM_V_GEN) sed '$(foreach r,$(clib_routines),/^$r$$/d;)' $< > $@
+ $(AM_V_GEN) sed '$(foreach r,$(libgcc_routines) $(ld_magic_routines),/^$r$$/d;)' $< > $@
MOSTLYCLEANFILES += gnumach-undef-bad
-clib-routines.o: gnumach-undef gnumach-undef-bad
+libgcc-routines.o: gnumach-undef gnumach-undef-bad
$(AM_V_at) if test -s gnumach-undef-bad; \
then cat gnumach-undef-bad; exit 2; else true; fi
- $(AM_V_CCLD) $(CCLD) $(LDFLAGS) -nostdlib -nostartfiles -r -static \
- -o $@ `sed 's/^/-Wl,-u,/' < $<` -x c /dev/null -lc -lgcc
+ $(AM_V_CCLD) $(CCLD) $(LDFLAGS) -r -static \
+ -o $@ `sed 's/^/-Wl,-u,/' < $<` -x c /dev/null -lgcc
@if nm $@ | grep __init_cpu_features; \
then echo "Please install a 32bit libc without multiarch support (on Debian systems, the libc6-dev:i386 package containing /usr/lib/i386-linux-gnu/libc.a)". ; \
false ; fi
gnumach_LINK = $(LD) $(LDFLAGS) $(LINKFLAGS) $(gnumach_LINKFLAGS) -o $@
-gnumach_LDADD = gnumach.o clib-routines.o
+gnumach_LDADD = gnumach.o libgcc-routines.o
#
# Installation.
diff --git a/Makefrag.am b/Makefrag.am
index 50e9b24b..5b61a1d6 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -23,8 +23,6 @@ if enable_kdb
libkernel_a_SOURCES += \
ddb/db_access.c \
ddb/db_access.h \
- ddb/db_aout.c \
- ddb/db_aout.h \
ddb/db_elf.c \
ddb/db_elf.h \
ddb/db_break.c \
@@ -114,7 +112,6 @@ libkernel_a_SOURCES += \
ipc/mach_msg.h \
ipc/mach_port.c \
ipc/mach_port.h \
- ipc/mach_rpc.c \
ipc/mach_debug.c \
ipc/port.h
EXTRA_DIST += \
@@ -216,8 +213,6 @@ libkernel_a_SOURCES += \
kern/thread.h \
kern/thread_swap.c \
kern/thread_swap.h \
- kern/time_stamp.c \
- kern/time_stamp.h \
kern/timer.c \
kern/timer.h \
kern/xpr.c \
@@ -240,11 +235,10 @@ EXTRA_DIST += \
#
libkernel_a_SOURCES += \
- util/putchar.c \
- util/putchar.h \
- util/puts.c \
util/atoi.c \
- util/atoi.h
+ util/atoi.h \
+ util/byteorder.h \
+ util/byteorder.c
#
# Virtual memory implementation.
@@ -364,6 +358,7 @@ include_device_HEADERS = \
include/device/device_types.defs \
include/device/device_types.h \
include/device/disk_status.h \
+ include/device/input.h \
include/device/net_status.h \
include/device/notify.defs \
include/device/notify.h \
@@ -372,9 +367,7 @@ include_device_HEADERS = \
include_machdir = $(includedir)/mach
include_mach_HEADERS = \
- include/mach/bootstrap.defs \
include/mach/default_pager.defs \
- include/mach/default_pager_helper.defs \
include/mach/default_pager_types.defs \
include/mach/exc.defs \
include/mach/mach.defs \
@@ -403,8 +396,6 @@ include_mach_HEADERS = \
include/mach/memory_object.h \
include/mach/message.h \
include/mach/mig_errors.h \
- include/mach/msg_type.h \
- include/mach/multiboot.h \
include/mach/notify.h \
include/mach/pc_sample.h \
include/mach/policy.h \
@@ -412,7 +403,6 @@ include_mach_HEADERS = \
include/mach/processor_info.h \
include/mach/profil.h \
include/mach/profilparam.h \
- include/mach/rpc.h \
include/mach/std_types.h \
include/mach/syscall_sw.h \
include/mach/task_info.h \
@@ -445,18 +435,16 @@ include_mach_debugdir = $(includedir)/mach_debug
include_mach_debug_HEADERS = \
$(addprefix include/mach_debug/, \
hash_info.h \
- ipc_info.h \
mach_debug.defs \
mach_debug_types.defs \
mach_debug_types.h \
- pc_info.h \
vm_info.h \
slab_info.h \
)
# Other headers for the distribution. We don't install these, because the
# GNU C library has correct versions for users to use.
-# other-sys-headers := types.h time.h reboot.h ioctl.h
+# other-sys-headers := types.h reboot.h ioctl.h
# other-mach-headers := mig_support.h mach_traps.h error.h
# other-headers := alloca.h
@@ -609,6 +597,13 @@ endif
# Architecture specific parts.
#
+if HOST_ix86
+include i386/Makefrag_x86.am
+endif
+if HOST_x86_64
+include i386/Makefrag_x86.am
+endif
+
# ix86.
include i386/Makefrag.am
diff --git a/chips/busses.h b/chips/busses.h
index f728add0..90eebc67 100644
--- a/chips/busses.h
+++ b/chips/busses.h
@@ -73,7 +73,7 @@ struct bus_ctlr {
struct bus_driver *driver; /* myself, as a device */
char *name; /* readability */
int unit; /* index in driver */
- void (*intr)(); /* interrupt handler(s) */
+ void (*intr)(int); /* interrupt handler(s) */
vm_offset_t address; /* device virtual address */
int am; /* address modifier */
vm_offset_t phys_address;/* device phys address */
@@ -93,7 +93,7 @@ struct bus_device {
struct bus_driver *driver; /* autoconf info */
char *name; /* my name */
int unit;
- void (*intr)();
+ void (*intr)(int);
vm_offset_t address; /* device address */
int am; /* address modifier */
vm_offset_t phys_address;/* device phys address */
@@ -131,7 +131,7 @@ struct bus_driver {
vm_offset_t);
void (*attach)( /* setup driver after probe */
struct bus_device *);
- int (*dgo)(); /* start transfer */
+ int (*dgo)(struct bus_device *); /* start transfer */
vm_offset_t *addr; /* device csr addresses */
char *dname; /* name of a device */
struct bus_device **dinfo; /* backpointers to init structs */
diff --git a/configfrag.ac b/configfrag.ac
index 3c3ba3aa..b8b41261 100644
--- a/configfrag.ac
+++ b/configfrag.ac
@@ -70,8 +70,12 @@ AC_DEFINE([MACH_DEBUG], [1], [MACH_DEBUG])
# Fixed priority threads.
AC_DEFINE([MACH_FIXPRI], [1], [MACH_FIXPRI])
-# Mach host (resource alloc.).
-AC_DEFINE([MACH_HOST], [0], [MACH_HOST])
+# Mach host (cpu resource alloc.).
+[if [ $mach_ncpus -gt 1 ]; then]
+ AC_DEFINE([MACH_HOST], [1], [MACH_HOST])
+[else]
+ AC_DEFINE([MACH_HOST], [0], [MACH_HOST])
+[fi]
# IPC debugging calls.
AC_DEFINE([MACH_IPC_DEBUG], [1], [MACH_IPC_DEBUG])
@@ -99,7 +103,12 @@ AC_DEFINE([MACH_MP_DEBUG], [0], [MACH_MP_DEBUG])
AC_DEFINE([MACH_PAGEMAP], [1], [MACH_PAGEMAP])
# Do pc sample histogram.
-AC_DEFINE([MACH_PCSAMPLE], [1], [MACH_PCSAMPLE])
+[if [ $mach_ncpus -gt 1 ]; then]
+ # Apparently not MP-safe yet.
+ AC_DEFINE([MACH_PCSAMPLE], [0], [MACH_PCSAMPLE])
+[else]
+ AC_DEFINE([MACH_PCSAMPLE], [1], [MACH_PCSAMPLE])
+[fi]
# Sample kernel too.
AC_ARG_ENABLE([kernsample],
diff --git a/configure.ac b/configure.ac
index 019842db..69f75cf2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -21,6 +21,13 @@ AC_INIT([AC_PACKAGE_NAME], [AC_PACKAGE_VERSION], [AC_PACKAGE_BUGREPORT],
[AC_PACKAGE_TARNAME])
AC_CONFIG_SRCDIR([kern/ipc_kobject.c])
+if test -z "${CFLAGS+set}"; then
+ # Use these CFLAGS by default if nothing is set.
+ CFLAGS="-g -O2"
+fi
+# We don't need glibc to compile gnumach.
+CFLAGS="$CFLAGS -ffreestanding -nostdlib"
+
AC_CONFIG_AUX_DIR([build-aux])
AM_INIT_AUTOMAKE(
@@ -56,8 +63,7 @@ case $host_platform:$host_cpu in
default:i?86)
host_platform=at;;
default:x86_64)]
- AC_MSG_WARN([Platform set to Xen by default, this can not boot on non-Xen systems, you currently need a 32bit build for that.])
- [host_platform=xen;;
+ [host_platform=at;;
at:i?86 | xen:i?86 | at:x86_64 | xen:x86_64)
:;;
*)]
@@ -80,7 +86,12 @@ AC_SUBST([systype])
#
AC_PROG_AWK
+# Temporarily force cross compiling mode to make sure the configure script
+# does not try to run compiled binaries.
+save_cross_compiling=$cross_compiling
+cross_compiling=yes
AM_PROG_AS
+cross_compiling=$save_cross_compiling
AC_PROG_CC
AC_PROG_CPP
AC_PROG_INSTALL
@@ -112,9 +123,6 @@ AC_CHECK_PROG([PATCH], [patch], [patch], [patch-not-found])
# configure fragments.
#
-# The test suite.
-m4_include([tests/configfrag.ac])
-
# Default set of device drivers.
AC_ARG_ENABLE([device-drivers],
AS_HELP_STRING([--enable-device-drivers=WHICH], [specify WHICH (on `ix86-at'
@@ -136,6 +144,16 @@ AC_ARG_ENABLE([device-drivers],
[;;
esac]
+AC_ARG_ENABLE([user32],
+AS_HELP_STRING([--enable-user32], [enable 32-bit user space on a 64-bit kernel]))
+[if [ x"$enable_user32" = xyes ]; then]
+ AC_DEFINE([USER32], [], [enable 32-bit user on 64-bit kernel])
+ AM_CONDITIONAL([enable_user32], [true])
+[else]
+ AM_CONDITIONAL([enable_user32], [false])
+[fi]
+
+
# Platform-specific configuration.
# PC AT.
@@ -160,6 +178,9 @@ m4_include([configfrag.ac])
# Linux code snarfed into GNU Mach.
m4_include([linux/configfrag.ac])
+
+# The test suite.
+m4_include([tests/configfrag.ac])
#
# Compiler features.
diff --git a/ddb/db_aout.c b/ddb/db_aout.c
deleted file mode 100644
index d3f2e31e..00000000
--- a/ddb/db_aout.c
+++ /dev/null
@@ -1,508 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- * Author: David B. Golub, Carnegie Mellon University
- * Date: 7/90
- */
-
-#if MACH_KDB
-
-/*
- * Symbol table routines for a.out format files.
- */
-
-#include <string.h>
-#include <mach/std_types.h>
-#include <machine/db_machdep.h> /* data types */
-#include <ddb/db_output.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_aout.h>
-
-#ifndef DB_NO_AOUT
-
-#include <ddb/nlist.h> /* a.out symbol table */
-#include <ddb/stab.h>
-
-#define private static
-
-/*
- * An a.out symbol table as loaded into the kernel debugger:
- *
- * symtab -> size of symbol entries, in bytes
- * sp -> first symbol entry
- * ...
- * ep -> last symbol entry + 1
- * strtab == start of string table
- * size of string table in bytes,
- * including this word
- * -> strings
- */
-
-/*
- * Find pointers to the start and end of the symbol entries,
- * given a pointer to the start of the symbol table.
- */
-#define db_get_aout_symtab(symtab, sp, ep) \
- (sp = (struct nlist *)((vm_offset_t *)(symtab) + 1), \
- ep = (struct nlist *)((char *)sp + *((int*)symtab)))
-
-boolean_t
-aout_db_sym_init(
- char * symtab, /* pointer to start of symbol table */
- char * esymtab, /* pointer to end of string table,
- for checking - may be rounded up to
- integer boundary */
- char * name,
- char * task_addr) /* use for this task only */
-{
- struct nlist *sym_start, *sym_end;
- struct nlist *sp;
- char * strtab;
- int strlen;
- char * estrtab;
-
- db_get_aout_symtab(symtab, sym_start, sym_end);
-
- strtab = (char *)sym_end;
- strlen = *(int *)strtab;
- estrtab = strtab + strlen;
-
-#define round_to_size(x) \
- (((vm_offset_t)(x) + sizeof(vm_size_t) - 1) & ~(sizeof(vm_size_t) - 1))
-
- if (round_to_size(estrtab) != round_to_size(esymtab)) {
- db_printf("[ %s symbol table not valid ]\n", name);
- return (FALSE);
- }
-
-#undef round_to_size
-
- for (sp = sym_start; sp < sym_end; sp++) {
- long strx;
- strx = sp->n_un.n_strx;
- if (strx != 0) {
- if (strx > strlen) {
- db_printf("Bad string table index (%#x)\n", strx);
- sp->n_un.n_name = 0;
- continue;
- }
- sp->n_un.n_name = strtab + strx;
- }
- }
-
- if (db_add_symbol_table(SYMTAB_AOUT,
- (char *)sym_start,
- (char *)sym_end,
- name,
- symtab,
- task_addr))
- {
- /* Successfully added symbol table */
- db_printf("[ preserving %d bytes of %s symbol table ]\n",
- esymtab - (char *)symtab, name);
- return TRUE;
- }
- else
- return FALSE;
-}
-
-/*
- * check file name or not (check xxxx.x pattern)
- */
-private boolean_t __attribute__ ((pure))
-aout_db_is_filename(name)
- const char *name;
-{
- while (*name) {
- if (*name == '.') {
- if (name[1])
- return(TRUE);
- }
- name++;
- }
- return(FALSE);
-}
-
-/*
- * special name comparison routine with a name in the symbol table entry
- */
-private boolean_t __attribute__ ((pure))
-aout_db_eq_name(sp, name)
- const struct nlist *sp;
- const char *name;
-{
- const char *s1, *s2;
-
- s1 = sp->n_un.n_name;
- s2 = name;
- if (*s1 == '_' && *s2 && *s2 != '_')
- s1++;
- while (*s2) {
- if (*s1++ != *s2++) {
- /*
- * check .c .o file name comparison case
- */
- if (*s2 == 0 && sp->n_un.n_name <= s1 - 2
- && s1[-2] == '.' && s1[-1] == 'o')
- return(TRUE);
- return(FALSE);
- }
- }
- /*
- * do special check for
- * xxx:yyy for N_FUN
- * xxx.ttt for N_DATA and N_BSS
- */
- return(*s1 == 0 || (*s1 == ':' && sp->n_type == N_FUN) ||
- (*s1 == '.' && (sp->n_type == N_DATA || sp->n_type == N_BSS)));
-}
-
-/*
- * search a symbol table with name and type
- * fp(in,out): last found text file name symbol entry
- */
-private struct nlist *
-aout_db_search_name(sp, ep, name, type, fp)
- struct nlist *sp;
- const struct nlist *ep;
- const char *name;
- int type;
- struct nlist **fp;
-{
- struct nlist *file_sp = *fp;
- struct nlist *found_sp = 0;
-
- for ( ; sp < ep; sp++) {
- if (sp->n_type == N_TEXT && aout_db_is_filename(sp->n_un.n_name))
- *fp = sp;
- if (type) {
- if (sp->n_type == type) {
- if (aout_db_eq_name(sp, name))
- return(sp);
- }
- if (sp->n_type == N_SO)
- *fp = sp;
- continue;
- }
- if (sp->n_type & N_STAB)
- continue;
- if (sp->n_un.n_name && aout_db_eq_name(sp, name)) {
- /*
- * In case of qaulified search by a file,
- * return it immediately with some check.
- * Otherwise, search external one
- */
- if (file_sp) {
- if ((file_sp == *fp) || (sp->n_type & N_EXT))
- return(sp);
- } else if (sp->n_type & N_EXT)
- return(sp);
- else
- found_sp = sp;
- }
- }
- return(found_sp);
-}
-
-/*
- * search a symbol with file, func and line qualification
- */
-private db_sym_t
-aout_db_qualified_search(stab, file, sym, line)
- db_symtab_t *stab;
- const char *file;
- const char *sym;
- int line;
-{
- struct nlist *sp = (struct nlist *)stab->start;
- struct nlist *ep = (struct nlist *)stab->end;
- struct nlist *fp = 0;
- struct nlist *found_sp;
- unsigned long func_top;
- boolean_t in_file;
-
- if (file == 0 && sym == 0)
- return(DB_SYM_NULL);
- if (file) {
- if ((sp = aout_db_search_name(sp, ep, file, N_TEXT, &fp)) == 0)
- return(DB_SYM_NULL);
- }
- if (sym) {
- sp = aout_db_search_name(sp, ep, sym, (line > 0)? N_FUN: 0, &fp);
- if (sp == 0)
- return(DB_SYM_NULL);
- }
- if (line > 0) {
- if (file && !aout_db_eq_name(fp, file))
- return(DB_SYM_NULL);
- found_sp = 0;
- if (sp->n_type == N_FUN) {
- /*
- * qualified by function name
- * search backward because line number entries
- * for the function are above it in this case.
- */
- func_top = sp->n_value;
- for (sp--; sp >= (struct nlist *)stab->start; sp--) {
- if (sp->n_type != N_SLINE)
- continue;
- if (sp->n_value < func_top)
- break;
- if (sp->n_desc <= line) {
- if (found_sp == 0 || found_sp->n_desc < sp->n_desc)
- found_sp = sp;
- if (sp->n_desc == line)
- break;
- }
- }
- if (sp->n_type != N_SLINE || sp->n_value < func_top)
- return(DB_SYM_NULL);
- } else {
- /*
- * qualified by only file name
- * search forward in this case
- */
- in_file = TRUE;
- for (sp++; sp < ep; sp++) {
- if (sp->n_type == N_TEXT
- && aout_db_is_filename(sp->n_un.n_name))
- break; /* enter into another file */
- if (sp->n_type == N_SOL) {
- in_file = aout_db_eq_name(sp, file);
- continue;
- }
- if (!in_file || sp->n_type != N_SLINE)
- continue;
- if (sp->n_desc <= line) {
- if (found_sp == 0 || found_sp->n_desc < sp->n_desc)
- found_sp = sp;
- if (sp->n_desc == line)
- break;
- }
- }
- }
- sp = found_sp;
- }
- return((db_sym_t) sp);
-}
-
-/*
- * lookup symbol by name
- */
-db_sym_t
-aout_db_lookup(
- db_symtab_t *stab,
- char * symstr)
-{
- return(db_sym_parse_and_lookup(aout_db_qualified_search, stab, symstr));
-}
-
-db_sym_t
-aout_db_search_symbol(
- db_symtab_t * symtab,
- db_addr_t off,
- db_strategy_t strategy,
- db_expr_t *diffp) /* in/out */
-{
- unsigned long diff = *diffp;
- struct nlist *symp = 0;
- struct nlist *sp, *ep;
-
- sp = (struct nlist *)symtab->start;
- ep = (struct nlist *)symtab->end;
-
- for (; sp < ep; sp++) {
- if (sp->n_un.n_name == 0)
- continue;
- if ((sp->n_type & N_STAB) != 0)
- continue;
- if (strategy == DB_STGY_XTRN && (sp->n_type & N_EXT) == 0)
- continue;
- if (off >= sp->n_value) {
-
- unsigned int type = sp->n_type;
-
- if (type == N_FN) continue;
- if (off - sp->n_value < diff) {
- diff = off - sp->n_value;
- symp = sp;
- if (diff == 0 && (type & N_EXT))
- break;
- }
- else if (off - sp->n_value == diff) {
- if (symp == 0)
- symp = sp;
- else if ((symp->n_type & N_EXT) == 0 &&
- (type & N_EXT) != 0)
- symp = sp; /* pick the external symbol */
- }
- }
- }
- if (symp == 0) {
- *diffp = off;
- }
- else {
- *diffp = diff;
- }
- return ((db_sym_t)symp);
-}
-
-/*
- * Return the name and value for a symbol.
- */
-void
-aout_db_symbol_values(
- db_symtab_t *stab,
- db_sym_t sym,
- char **namep,
- db_expr_t *valuep)
-{
- struct nlist *sp;
-
- sp = (struct nlist *)sym;
- if (namep)
- *namep = sp->n_un.n_name;
- if (valuep)
- *valuep = sp->n_value;
-}
-
-#define X_DB_MAX_DIFF 8 /* maximum allowable diff at the end of line */
-
-/*
- * search symbol by value
- */
-private boolean_t
-aout_db_search_by_addr(stab, addr, file, func, line, diff)
- const db_symtab_t *stab;
- vm_offset_t addr;
- char **file;
- char **func;
- int *line;
- unsigned long *diff;
-{
- struct nlist *sp;
- struct nlist *line_sp, *func_sp, *file_sp, *line_func;
- vm_size_t func_diff, line_diff;
- boolean_t found_line = FALSE;
- struct nlist *ep = (struct nlist *)stab->end;
-
- line_sp = func_sp = file_sp = line_func = 0;
- *file = *func = 0;
- *line = 0;
- func_diff = line_diff = ~0;
- for (sp = (struct nlist *)stab->start; sp < ep; sp++) {
- switch(sp->n_type) {
- case N_SLINE:
- if (sp->n_value <= addr) {
- if (line_sp == 0 || line_diff >= addr - sp->n_value) {
- if (line_func)
- line_func = 0;
- line_sp = sp;
- line_diff = addr - sp->n_value;
- }
- }
- if (sp->n_value >= addr && line_sp)
- found_line = TRUE;
- continue;
- case N_FUN:
- if ((found_line || (line_sp && line_diff < X_DB_MAX_DIFF))
- && line_func == 0)
- line_func = sp;
- continue;
- case N_SO:
- if (sp->n_value > addr)
- continue;
- if (file_sp == 0 || file_sp->n_value <= sp->n_value)
- file_sp = sp;
- continue;
- case N_TEXT:
- if (aout_db_is_filename(sp->n_un.n_name)) {
- if (sp->n_value > addr)
- continue;
- if (file_sp == 0 || file_sp->n_value <= sp->n_value)
- file_sp = sp;
- } else if (sp->n_value <= addr &&
- (func_sp == 0 || func_diff > addr - sp->n_value)) {
- func_sp = sp;
- func_diff = addr - sp->n_value;
- }
- continue;
- case N_TEXT|N_EXT:
- if (sp->n_value <= addr &&
- (func_sp == 0 || func_diff >= addr - sp->n_value)) {
- func_sp = sp;
- func_diff = addr - sp->n_value;
- if (func_diff == 0 && file_sp && func_sp)
- break;
- }
- default:
- continue;
- }
- break;
- }
- if (line_sp) {
- if (line_func == 0 || func_sp == 0
- || line_func->n_value != func_sp->n_value)
- line_sp = 0;
- }
- if (file_sp) {
- *diff = addr - file_sp->n_value;
- *file = file_sp->n_un.n_name;
- }
- if (func_sp) {
- *diff = addr - func_sp->n_value;
- *func = (func_sp->n_un.n_name[0] == '_')?
- func_sp->n_un.n_name + 1: func_sp->n_un.n_name;
- }
- if (line_sp) {
- *diff = addr - line_sp->n_value;
- *line = line_sp->n_desc;
- }
- return(file_sp || func_sp || line_sp);
-}
-
-/*
- * Find filename and lineno within, given the current pc.
- */
-boolean_t
-aout_db_line_at_pc(stab, sym, file, line, pc)
- db_symtab_t *stab;
- db_sym_t sym;
- char **file;
- int *line;
- db_addr_t pc;
-{
- char *func;
- unsigned long diff;
- boolean_t found;
-
- found = aout_db_search_by_addr(stab, pc, file, &func, line, &diff);
- return(found && func && *file);
-}
-
-#endif /* DB_NO_AOUT */
-
-#endif /* MACH_KDB */
diff --git a/ddb/db_aout.h b/ddb/db_aout.h
deleted file mode 100644
index 7c03d36d..00000000
--- a/ddb/db_aout.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2013 Free Software Foundation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _DDB_DB_AOUT_H_
-#define _DDB_DB_AOUT_H_
-
-#include <ddb/db_sym.h>
-#include <machine/db_machdep.h>
-
-extern boolean_t
-aout_db_line_at_pc(
- db_symtab_t *stab,
- db_sym_t sym,
- char **file,
- int *line,
- db_addr_t pc);
-
-extern db_sym_t
-aout_db_lookup(
- db_symtab_t *stab,
- char * symstr);
-
-extern db_sym_t
-aout_db_search_symbol(
- db_symtab_t * symtab,
- db_addr_t off,
- db_strategy_t strategy,
- db_expr_t *diffp);
-
-extern void
-aout_db_symbol_values(
- db_symtab_t *stab,
- db_sym_t sym,
- char **namep,
- db_expr_t *valuep);
-
-#endif /* _DDB_DB_AOUT_H_ */
diff --git a/ddb/db_break.c b/ddb/db_break.c
index c3a9e181..374dc6ac 100644
--- a/ddb/db_break.c
+++ b/ddb/db_break.c
@@ -61,8 +61,8 @@ static db_thread_breakpoint_t db_free_thread_break_list = 0;
static boolean_t db_thread_break_init = FALSE;
static int db_breakpoint_number = 0;
-db_breakpoint_t
-db_breakpoint_alloc()
+static db_breakpoint_t
+db_breakpoint_alloc(void)
{
db_breakpoint_t bkpt;
@@ -80,20 +80,19 @@ db_breakpoint_alloc()
return (bkpt);
}
-void
-db_breakpoint_free(bkpt)
- db_breakpoint_t bkpt;
+static void
+db_breakpoint_free(db_breakpoint_t bkpt)
{
bkpt->link = db_free_breakpoints;
db_free_breakpoints = bkpt;
}
static int
-db_add_thread_breakpoint(bkpt, task_thd, count, task_bpt)
- const db_breakpoint_t bkpt;
- vm_offset_t task_thd;
- int count;
- boolean_t task_bpt;
+db_add_thread_breakpoint(
+ const db_breakpoint_t bkpt,
+ vm_offset_t task_thd,
+ int count,
+ boolean_t task_bpt)
{
db_thread_breakpoint_t tp;
@@ -155,9 +154,9 @@ db_delete_thread_breakpoint(
}
static db_thread_breakpoint_t __attribute__ ((pure))
-db_find_thread_breakpoint(bkpt, thread)
- const db_breakpoint_t bkpt;
- const thread_t thread;
+db_find_thread_breakpoint(
+ const db_breakpoint_t bkpt,
+ const thread_t thread)
{
db_thread_breakpoint_t tp;
task_t task = (thread == THREAD_NULL)? TASK_NULL: thread->task;
@@ -175,9 +174,9 @@ db_find_thread_breakpoint(bkpt, thread)
}
db_thread_breakpoint_t
-db_find_thread_breakpoint_here(task, addr)
- const task_t task;
- db_addr_t addr;
+db_find_thread_breakpoint_here(
+ const task_t task,
+ db_addr_t addr)
{
db_breakpoint_t bkpt;
@@ -267,12 +266,12 @@ db_check_breakpoint_valid(void)
}
db_breakpoint_t
-db_set_breakpoint(task, addr, count, thread, task_bpt)
- const task_t task;
- db_addr_t addr;
- int count;
- const thread_t thread;
- boolean_t task_bpt;
+db_set_breakpoint(
+ const task_t task,
+ db_addr_t addr,
+ int count,
+ const thread_t thread,
+ boolean_t task_bpt)
{
db_breakpoint_t bkpt;
db_breakpoint_t alloc_bkpt = 0;
@@ -319,11 +318,11 @@ db_set_breakpoint(task, addr, count, thread, task_bpt)
}
}
-void
-db_delete_breakpoint(task, addr, task_thd)
- const task_t task;
- db_addr_t addr;
- vm_offset_t task_thd;
+static void
+db_delete_breakpoint(
+ const task_t task,
+ db_addr_t addr,
+ vm_offset_t task_thd)
{
db_breakpoint_t bkpt;
db_breakpoint_t *prev;
@@ -351,9 +350,9 @@ db_delete_breakpoint(task, addr, task_thd)
}
db_breakpoint_t __attribute__ ((pure))
-db_find_breakpoint(task, addr)
- const task_t task;
- db_addr_t addr;
+db_find_breakpoint(
+ const task_t task,
+ db_addr_t addr)
{
db_breakpoint_t bkpt;
@@ -367,9 +366,9 @@ db_find_breakpoint(task, addr)
}
boolean_t
-db_find_breakpoint_here(task, addr)
- const task_t task;
- db_addr_t addr;
+db_find_breakpoint_here(
+ const task_t task,
+ db_addr_t addr)
{
db_breakpoint_t bkpt;
@@ -523,7 +522,7 @@ db_delete_temp_breakpoint(
/*
* List breakpoints.
*/
-void
+static void
db_list_breakpoints(void)
{
db_breakpoint_t bkpt;
@@ -598,7 +597,11 @@ db_list_breakpoints(void)
/* Delete breakpoint */
/*ARGSUSED*/
void
-db_delete_cmd(void)
+db_delete_cmd(
+ db_expr_t addr_,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
int n;
thread_t thread;
@@ -677,11 +680,11 @@ db_delete_cmd(void)
/* Set breakpoint with skip count */
/*ARGSUSED*/
void
-db_breakpoint_cmd(addr, have_addr, count, modif)
- db_expr_t addr;
- int have_addr;
- db_expr_t count;
- const char * modif;
+db_breakpoint_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
int n;
thread_t thread;
@@ -731,7 +734,11 @@ db_breakpoint_cmd(addr, have_addr, count, modif)
/* list breakpoints */
void
-db_listbreak_cmd(void)
+db_listbreak_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
db_list_breakpoints();
}
diff --git a/ddb/db_break.h b/ddb/db_break.h
index 610af2f8..9f0ee95b 100644
--- a/ddb/db_break.h
+++ b/ddb/db_break.h
@@ -88,9 +88,17 @@ extern db_breakpoint_t db_set_breakpoint(const task_t task, db_addr_t addr,
int count, const thread_t thread,
boolean_t task_bpt);
-void db_listbreak_cmd(void);
+void db_listbreak_cmd(
+ db_expr_t addr,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char *modif);
-void db_delete_cmd(void);
+void db_delete_cmd(
+ db_expr_t addr,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char * modif);
void db_breakpoint_cmd(
db_expr_t addr,
diff --git a/ddb/db_command.c b/ddb/db_command.c
index 1d4052cd..4671fe8d 100644
--- a/ddb/db_command.c
+++ b/ddb/db_command.c
@@ -96,11 +96,11 @@ boolean_t db_ed_style = TRUE;
/*
* Search for command prefix.
*/
-int
-db_cmd_search(name, table, cmdp)
- const char * name;
- const struct db_command *table;
- const struct db_command **cmdp; /* out */
+static int
+db_cmd_search(
+ const char * name,
+ const struct db_command *table,
+ const struct db_command **cmdp /* out */)
{
const struct db_command *cmd;
int result = CMD_NONE;
@@ -143,9 +143,8 @@ db_cmd_search(name, table, cmdp)
return (result);
}
-void
-db_cmd_list(table)
- const struct db_command *table;
+static void
+db_cmd_list(const struct db_command *table)
{
const struct db_command *cmd;
@@ -155,12 +154,12 @@ db_cmd_list(table)
}
}
-void
+static void
db_command(
- struct db_command **last_cmdp, /* IN_OUT */
+ const struct db_command **last_cmdp, /* IN_OUT */
struct db_command *cmd_table)
{
- struct db_command *cmd;
+ const struct db_command *cmd = NULL;
int t;
char modif[TOK_STRING_SIZE];
db_expr_t addr, count;
@@ -267,7 +266,7 @@ db_command(
}
}
*last_cmdp = cmd;
- if (cmd != 0) {
+ if (cmd != NULL) {
/*
* Execute the command.
*/
@@ -295,9 +294,9 @@ db_command(
}
}
-void
+static void
db_command_list(
- struct db_command **last_cmdp, /* IN_OUT */
+ const struct db_command **last_cmdp, /* IN_OUT */
struct db_command *cmd_table)
{
do {
@@ -309,29 +308,30 @@ db_command_list(
struct db_command db_show_all_cmds[] = {
{ "tasks", db_show_all_tasks, 0, 0 },
{ "threads", db_show_all_threads, 0, 0 },
- { "slocks", db_show_all_slocks, 0, 0 },
+ { "slocks", (db_command_fun_t)db_show_all_slocks, 0, 0 },
+ { "runqs", (db_command_fun_t)db_show_all_runqs, 0, 0 },
{ (char *)0 }
};
struct db_command db_show_cmds[] = {
{ "all", 0, 0, db_show_all_cmds },
- { "registers", db_show_regs, 0, 0 },
+ { "registers", (db_command_fun_t)db_show_regs, 0, 0 },
{ "breaks", db_listbreak_cmd, 0, 0 },
{ "watches", db_listwatch_cmd, 0, 0 },
{ "thread", db_show_one_thread, 0, 0 },
{ "task", db_show_one_task, 0, 0 },
{ "macro", db_show_macro, CS_OWN, 0 },
{ "map", vm_map_print, 0, 0 },
- { "object", vm_object_print, 0, 0 },
- { "page", vm_page_print, 0, 0 },
- { "copy", vm_map_copy_print, 0, 0 },
- { "port", ipc_port_print, 0, 0 },
- { "pset", ipc_pset_print, 0, 0 },
- { "kmsg", ipc_kmsg_print, 0, 0 },
- { "msg", ipc_msg_print, 0, 0 },
+ { "object", (db_command_fun_t)vm_object_print, 0, 0 },
+ { "page", (db_command_fun_t)vm_page_print, 0, 0 },
+ { "copy", (db_command_fun_t)vm_map_copy_print, 0, 0 },
+ { "port", (db_command_fun_t)ipc_port_print, 0, 0 },
+ { "pset", (db_command_fun_t)ipc_pset_print, 0, 0 },
+ { "kmsg", (db_command_fun_t)ipc_kmsg_print, 0, 0 },
+ { "msg", (db_command_fun_t)ipc_msg_print, 0, 0 },
{ "ipc_port", db_show_port_id, 0, 0 },
- { "slabinfo", db_show_slab_info, 0, 0 },
- { "vmstat", db_show_vmstat, 0, 0 },
+ { "slabinfo", (db_command_fun_t)db_show_slab_info, 0, 0 },
+ { "vmstat", (db_command_fun_t)db_show_vmstat, 0, 0 },
{ (char *)0, }
};
@@ -357,13 +357,14 @@ struct db_command db_command_table[] = {
/* this must be the first entry, if it exists */
{ "machine", 0, 0, 0},
#endif
- { "print", db_print_cmd, CS_OWN, 0 },
+ { "print", (db_command_fun_t)db_print_cmd, CS_OWN, 0 },
{ "examine", db_examine_cmd, CS_MORE|CS_SET_DOT, 0 },
{ "x", db_examine_cmd, CS_MORE|CS_SET_DOT, 0 },
{ "xf", db_examine_forward, CS_SET_DOT, 0 },
{ "xb", db_examine_backward, CS_SET_DOT, 0 },
+ { "whatis", db_whatis_cmd, CS_MORE, 0 },
{ "search", db_search_cmd, CS_OWN|CS_SET_DOT, 0 },
- { "set", db_set_cmd, CS_OWN, 0 },
+ { "set", (db_command_fun_t)db_set_cmd, CS_OWN, 0 },
{ "write", db_write_cmd, CS_MORE|CS_SET_DOT, 0 },
{ "w", db_write_cmd, CS_MORE|CS_SET_DOT, 0 },
{ "delete", db_delete_cmd, CS_OWN, 0 },
@@ -380,14 +381,14 @@ struct db_command db_command_table[] = {
{ "match", db_trace_until_matching_cmd,0, 0 },
{ "trace", db_stack_trace_cmd, 0, 0 },
{ "cond", db_cond_cmd, CS_OWN, 0 },
- { "call", db_fncall, CS_OWN, 0 },
+ { "call", (db_command_fun_t)db_fncall, CS_OWN, 0 },
{ "macro", db_def_macro_cmd, CS_OWN, 0 },
{ "dmacro", db_del_macro_cmd, CS_OWN, 0 },
{ "show", 0, 0, db_show_cmds },
{ "debug", 0, 0, db_debug_cmds },
- { "reset", db_reset_cpu, 0, 0 },
- { "reboot", db_reset_cpu, 0, 0 },
- { "halt", db_halt_cpu, 0, 0 },
+ { "reset", (db_command_fun_t)db_reset_cpu, 0, 0 },
+ { "reboot", (db_command_fun_t)db_reset_cpu, 0, 0 },
+ { "halt", (db_command_fun_t)db_halt_cpu, 0, 0 },
{ (char *)0, }
};
@@ -404,7 +405,7 @@ void db_machine_commands_install(struct db_command *ptr)
#endif /* DB_MACHINE_COMMANDS */
-struct db_command *db_last_command = 0;
+const struct db_command *db_last_command = 0;
void
db_help_cmd(void)
@@ -470,8 +471,7 @@ db_exec_cmd_nest(
return(db_cmd_loop_done == FALSE);
}
-void db_error(s)
- const char *s;
+void db_error(const char *s)
{
extern int db_macro_level;
@@ -502,7 +502,11 @@ db_fncall(void)
db_expr_t args[MAXARGS];
int nargs = 0;
db_expr_t retval;
- db_expr_t (*func)();
+ typedef db_expr_t(*function_t)(db_expr_t, db_expr_t, db_expr_t,
+ db_expr_t, db_expr_t, db_expr_t,
+ db_expr_t, db_expr_t, db_expr_t,
+ db_expr_t);
+ function_t func;
int t;
if (!db_expression(&fn_addr)) {
@@ -510,7 +514,7 @@ db_fncall(void)
db_flush_lex();
return;
}
- func = (db_expr_t (*) ()) fn_addr;
+ func = (function_t) fn_addr;
t = db_read_token();
if (t == tLPAREN) {
@@ -547,9 +551,9 @@ db_fncall(void)
}
boolean_t __attribute__ ((pure))
-db_option(modif, option)
- const char *modif;
- int option;
+db_option(
+ const char *modif,
+ int option)
{
const char *p;
diff --git a/ddb/db_command.h b/ddb/db_command.h
index 4208bda8..73690a48 100644
--- a/ddb/db_command.h
+++ b/ddb/db_command.h
@@ -53,12 +53,14 @@ extern db_addr_t db_next; /* next address to be examined
or written */
extern jmp_buf_t * db_recover; /* error recovery */
+typedef void (*db_command_fun_t)(db_expr_t, boolean_t, db_expr_t, const char *);
+
/*
* Command table
*/
struct db_command {
char * name; /* command name */
- void (*fcn)(); /* function to call */
+ db_command_fun_t fcn; /* function to call */
int flag; /* extra info: */
#define CS_OWN 0x1 /* non-standard syntax */
#define CS_MORE 0x2 /* standard syntax, but may have other
diff --git a/ddb/db_cond.c b/ddb/db_cond.c
index 31e1d241..d45d9b8a 100644
--- a/ddb/db_cond.c
+++ b/ddb/db_cond.c
@@ -102,8 +102,7 @@ db_cond_check(db_thread_breakpoint_t bkpt)
}
void
-db_cond_print(bkpt)
- const db_thread_breakpoint_t bkpt;
+db_cond_print(const db_thread_breakpoint_t bkpt)
{
char *p, *ep;
struct db_cond *cp;
@@ -121,7 +120,11 @@ db_cond_print(bkpt)
}
void
-db_cond_cmd(void)
+db_cond_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
int c;
struct db_cond *cp;
diff --git a/ddb/db_cond.h b/ddb/db_cond.h
index 6b9c3a5b..c867c6ee 100644
--- a/ddb/db_cond.h
+++ b/ddb/db_cond.h
@@ -30,6 +30,10 @@ extern boolean_t db_cond_check (db_thread_breakpoint_t bkpt);
extern void db_cond_print (db_thread_breakpoint_t bkpt);
-extern void db_cond_cmd (void);
+extern void db_cond_cmd (
+ db_expr_t addr,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char * modif);
#endif /* _DDB_DB_COND_H_ */
diff --git a/ddb/db_elf.c b/ddb/db_elf.c
index 7b7eed73..5ccfdd59 100644
--- a/ddb/db_elf.c
+++ b/ddb/db_elf.c
@@ -63,8 +63,8 @@
struct db_symtab_elf {
int type;
- Elf32_Sym *start;
- Elf32_Sym *end;
+ Elf_Sym *start;
+ Elf_Sym *end;
char *strings;
char *map_pointer; /* symbols are for this map only,
if not null */
@@ -80,7 +80,7 @@ elf_db_sym_init (unsigned shdr_num,
char *name,
char *task_addr)
{
- Elf32_Shdr *shdr, *symtab, *strtab;
+ Elf_Shdr *shdr, *symtab, *strtab;
const char *shstrtab;
unsigned i;
@@ -90,7 +90,7 @@ elf_db_sym_init (unsigned shdr_num,
if (shdr_size != sizeof *shdr)
return FALSE;
- shdr = (Elf32_Shdr *) shdr_addr;
+ shdr = (Elf_Shdr *) shdr_addr;
if (shdr[shdr_shndx].sh_type != SHT_STRTAB)
return FALSE;
@@ -127,7 +127,7 @@ elf_db_sym_init (unsigned shdr_num,
(char *) phystokv (strtab->sh_addr),
task_addr)) {
db_printf ("Loaded ELF symbol table for %s (%d symbols)\n",
- name, symtab->sh_size / sizeof (Elf32_Sym));
+ name, symtab->sh_size / sizeof (Elf_Sym));
return TRUE;
}
@@ -142,7 +142,7 @@ elf_db_lookup (db_symtab_t *stab,
char *symstr)
{
struct db_symtab_elf *self = (struct db_symtab_elf *) stab;
- Elf32_Sym *s;
+ Elf_Sym *s;
for (s = self->start; s < self->end; s++)
if (strcmp (symstr, &self->strings[s->st_name]) == 0)
@@ -159,29 +159,29 @@ elf_db_search_symbol (db_symtab_t *stab,
{
struct db_symtab_elf *self = (struct db_symtab_elf *) stab;
unsigned long diff = *diffp;
- Elf32_Sym *s, *symp = NULL;
+ Elf_Sym *s, *symp = NULL;
for (s = self->start; s < self->end; s++) {
if (s->st_name == 0)
continue;
- if (strategy == DB_STGY_XTRN && (ELF32_ST_BIND(s->st_info) != STB_GLOBAL))
+ if (strategy == DB_STGY_XTRN && (ELF_ST_BIND(s->st_info) != STB_GLOBAL))
continue;
if (off >= s->st_value) {
- if (ELF32_ST_TYPE(s->st_info) != STT_FUNC)
+ if (ELF_ST_TYPE(s->st_info) != STT_FUNC)
continue;
if (off - s->st_value < diff) {
diff = off - s->st_value;
symp = s;
- if (diff == 0 && (ELF32_ST_BIND(s->st_info) == STB_GLOBAL))
+ if (diff == 0 && (ELF_ST_BIND(s->st_info) == STB_GLOBAL))
break;
} else if (off - s->st_value == diff) {
if (symp == NULL)
symp = s;
- else if ((ELF32_ST_BIND(symp->st_info) != STB_GLOBAL)
- && (ELF32_ST_BIND(s->st_info) == STB_GLOBAL))
+ else if ((ELF_ST_BIND(symp->st_info) != STB_GLOBAL)
+ && (ELF_ST_BIND(s->st_info) == STB_GLOBAL))
symp = s; /* pick the external symbol */
}
}
@@ -205,7 +205,7 @@ elf_db_symbol_values (db_symtab_t *stab,
db_expr_t *valuep)
{
struct db_symtab_elf *self = (struct db_symtab_elf *) stab;
- Elf32_Sym *s = (Elf32_Sym *) sym;
+ Elf_Sym *s = (Elf_Sym *) sym;
if (namep)
*namep = &self->strings[s->st_name];
diff --git a/ddb/db_examine.c b/ddb/db_examine.c
index 6509a538..1941fc38 100644
--- a/ddb/db_examine.c
+++ b/ddb/db_examine.c
@@ -42,9 +42,12 @@
#include <ddb/db_task_thread.h>
#include <ddb/db_examine.h>
#include <ddb/db_expr.h>
+#include <ddb/db_print.h>
#include <kern/thread.h>
#include <kern/task.h>
+#include <kern/smp.h>
#include <mach/vm_param.h>
+#include <vm/vm_map.h>
#define db_thread_to_task(thread) ((thread)? thread->task: TASK_NULL)
@@ -58,11 +61,11 @@ thread_t db_examine_thread = THREAD_NULL;
*/
/*ARGSUSED*/
void
-db_examine_cmd(addr, have_addr, count, modif)
- db_expr_t addr;
- int have_addr;
- db_expr_t count;
- const char * modif;
+db_examine_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
thread_t thread;
@@ -90,11 +93,11 @@ db_examine_cmd(addr, have_addr, count, modif)
/* ARGSUSED */
void
-db_examine_forward(addr, have_addr, count, modif)
- db_expr_t addr;
- int have_addr;
- db_expr_t count;
- const char * modif;
+db_examine_forward(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
db_examine(db_next, db_examine_format, db_examine_count,
db_thread_to_task(db_examine_thread));
@@ -102,11 +105,11 @@ db_examine_forward(addr, have_addr, count, modif)
/* ARGSUSED */
void
-db_examine_backward(addr, have_addr, count, modif)
- db_expr_t addr;
- int have_addr;
- db_expr_t count;
- const char * modif;
+db_examine_backward(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
db_examine(db_examine_prev_addr - (db_next - db_examine_prev_addr),
@@ -115,11 +118,11 @@ db_examine_backward(addr, have_addr, count, modif)
}
void
-db_examine(addr, fmt, count, task)
- db_addr_t addr;
- const char * fmt; /* format string */
- int count; /* repeat count */
- task_t task;
+db_examine(
+ db_addr_t addr,
+ const char * fmt, /* format string */
+ int count, /* repeat count */
+ task_t task)
{
int c;
db_expr_t value;
@@ -253,6 +256,159 @@ db_examine(addr, fmt, count, task)
}
/*
+ * Find out what this address may be
+ */
+/*ARGSUSED*/
+void
+db_whatis_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
+{
+ /* TODO: Add whatever you can think of */
+
+ int i;
+
+ {
+ /* tasks */
+
+ task_t task;
+ int task_id = 0;
+ processor_set_t pset;
+ thread_t thread;
+ int thread_id;
+ vm_map_entry_t entry;
+
+ queue_iterate(&all_psets, pset, processor_set_t, all_psets)
+ queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
+ if (addr >= (vm_offset_t) task
+ && addr < (vm_offset_t) task + sizeof(*task))
+ db_printf("%3d %0*X %s [%d]\n",
+ task_id,
+ 2*sizeof(vm_offset_t),
+ task,
+ task->name,
+ task->thread_count);
+
+ if (addr >= (vm_offset_t) task->map
+ && addr < (vm_offset_t) task->map + sizeof(*(task->map)))
+ db_printf("$map%d %X for $task%d %s\n",
+ task_id, (vm_offset_t) task->map, task_id, task->name);
+
+ for (entry = vm_map_first_entry(task->map);
+ entry != vm_map_to_entry(task->map);
+ entry = entry->vme_next)
+ if (addr >= (vm_offset_t) entry
+ && addr < (vm_offset_t) entry + sizeof(*entry))
+ db_printf("$map%d %X for $task%d %s entry 0x%X: ",
+ task_id, (vm_offset_t) task->map, task_id, task->name,
+ (vm_offset_t) entry);
+
+ if (pmap_whatis(task->map->pmap, addr))
+ db_printf(" in $task%d %s\n", task_id, task->name);
+
+ if ((task == current_task() || task == kernel_task)
+ && addr >= vm_map_min(task->map)
+ && addr < vm_map_max(task->map)) {
+ db_printf("inside $map%d of $task%d %s\n", task_id, task_id, task->name);
+
+ for (entry = vm_map_first_entry(task->map);
+ entry != vm_map_to_entry(task->map);
+ entry = entry->vme_next)
+ if (addr >= entry->vme_start
+ && addr < entry->vme_end) {
+ db_printf(" entry 0x%X: ", (vm_offset_t) entry);
+ if (entry->is_sub_map)
+ db_printf("submap=0x%X, offset=0x%X\n",
+ (vm_offset_t) entry->object.sub_map,
+ (vm_offset_t) entry->offset);
+ else
+ db_printf("object=0x%X, offset=0x%X\n",
+ (vm_offset_t) entry->object.vm_object,
+ (vm_offset_t) entry->offset);
+ }
+ }
+
+ thread_id = 0;
+ queue_iterate(&task->thread_list, thread, thread_t, thread_list) {
+ if (addr >= (vm_offset_t) thread
+ && addr < (vm_offset_t) thread + sizeof(*thread)) {
+ db_printf("In $task%d %s\n", task_id, task->name);
+ db_print_thread(thread, thread_id, 0);
+ }
+ if (addr >= thread->kernel_stack
+ && addr < thread->kernel_stack + KERNEL_STACK_SIZE) {
+ db_printf("In $task%d %s\n", task_id, task->name);
+ db_printf(" on stack of $thread%d.%d\n", task_id, thread_id);
+ db_print_thread(thread, thread_id, 0);
+ }
+ thread_id++;
+ }
+ task_id++;
+ }
+ }
+
+ pmap_whatis(kernel_pmap, addr);
+
+ {
+ /* runqs */
+ if (addr >= (vm_offset_t) &default_pset.runq
+ && addr < (vm_offset_t) &default_pset.runq + sizeof(default_pset.runq))
+ db_printf("default runq %p\n", &default_pset.runq);
+ for (i = 0; i < smp_get_numcpus(); i++) {
+ processor_t proc = cpu_to_processor(i);
+ if (addr >= (vm_offset_t) &proc->runq
+ && addr < (vm_offset_t) &proc->runq + sizeof(proc->runq))
+ db_printf("Processor #%d runq %p\n", &proc->runq);
+ }
+ }
+
+ {
+ /* stacks */
+ for (i = 0; i < smp_get_numcpus(); i++) {
+ if (addr >= percpu_array[i].active_stack
+ && addr < percpu_array[i].active_stack + KERNEL_STACK_SIZE)
+ db_printf("Processor #%d active stack\n", i);
+ }
+ }
+
+ db_whatis_slab(addr);
+
+ {
+ /* page */
+ phys_addr_t pa;
+ if (DB_VALID_KERN_ADDR(addr))
+ pa = kvtophys(addr);
+ else
+ pa = pmap_extract(current_task()->map->pmap, addr);
+
+ if (pa) {
+ struct vm_page *page = vm_page_lookup_pa(pa);
+ db_printf("phys %llx, page %p\n", (unsigned long long) pa, page);
+ if (page) {
+ const char *types[] = {
+ [VM_PT_FREE] = "free",
+ [VM_PT_RESERVED] = "reserved",
+ [VM_PT_TABLE] = "table",
+ [VM_PT_KERNEL] = "kernel",
+ };
+ db_printf(" %s\n", types[page->type]);
+ db_printf(" free %u\n", page->free);
+ db_printf(" external %u\n", page->external);
+ db_printf(" busy %u\n", page->busy);
+ db_printf(" private %u\n", page->private);
+ db_printf(" object %lx\n", page->object);
+ db_printf(" offset %lx\n", page->offset);
+ db_printf(" wired %u\n", page->wire_count);
+ db_printf(" segment %u\n", page->seg_index);
+ db_printf(" order %u\n", page->order);
+ }
+ }
+ }
+}
+
+/*
* Print value.
*/
char db_print_format = 'x';
@@ -335,9 +491,7 @@ db_print_loc_and_inst(
}
void
-db_strcpy(dst, src)
- char *dst;
- const char *src;
+db_strcpy(char *dst, const char *src)
{
while ((*dst++ = *src++))
;
@@ -348,7 +502,11 @@ db_strcpy(dst, src)
* Syntax: search [/bhl] addr value [mask] [,count] [thread]
*/
void
-db_search_cmd(void)
+db_search_cmd(
+ db_expr_t e,
+ boolean_t b,
+ db_expr_t e2,
+ const char * cc)
{
int t;
db_addr_t addr;
diff --git a/ddb/db_examine.h b/ddb/db_examine.h
index df578a02..c76fa2a2 100644
--- a/ddb/db_examine.h
+++ b/ddb/db_examine.h
@@ -61,9 +61,19 @@ int db_xcdump(
int count,
task_t task);
+extern void db_whatis_cmd (
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char *modif);
+
void db_print_cmd(void);
-void db_search_cmd(void);
+void db_search_cmd(
+ db_expr_t addr,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char * modif);
void db_search(
db_addr_t addr,
diff --git a/ddb/db_expr.c b/ddb/db_expr.c
index c9e6752a..90edb6fb 100644
--- a/ddb/db_expr.c
+++ b/ddb/db_expr.c
@@ -41,7 +41,7 @@
#include <ddb/db_variables.h>
#include <kern/task.h>
-boolean_t
+static boolean_t
db_term(db_expr_t *valuep)
{
int t;
@@ -92,10 +92,7 @@ db_term(db_expr_t *valuep)
}
int
-db_size_option(modif, u_option, t_option)
- const char *modif;
- boolean_t *u_option;
- boolean_t *t_option;
+db_size_option(const char *modif, boolean_t *u_option, boolean_t *t_option)
{
const char *p;
int size = sizeof(int);
@@ -124,7 +121,7 @@ db_size_option(modif, u_option, t_option)
return(size);
}
-boolean_t
+static boolean_t
db_unary(db_expr_t *valuep)
{
int t;
@@ -173,7 +170,7 @@ db_unary(db_expr_t *valuep)
return (db_term(valuep));
}
-boolean_t
+static boolean_t
db_mult_expr(db_expr_t *valuep)
{
db_expr_t lhs = 0, rhs;
@@ -218,7 +215,7 @@ db_mult_expr(db_expr_t *valuep)
return (TRUE);
}
-boolean_t
+static boolean_t
db_add_expr(db_expr_t *valuep)
{
db_expr_t lhs, rhs;
@@ -249,7 +246,7 @@ db_add_expr(db_expr_t *valuep)
return (TRUE);
}
-boolean_t
+static boolean_t
db_shift_expr(db_expr_t *valuep)
{
db_expr_t lhs, rhs;
@@ -283,7 +280,7 @@ db_shift_expr(db_expr_t *valuep)
return (TRUE);
}
-boolean_t
+static boolean_t
db_logical_relation_expr(db_expr_t *valuep)
{
db_expr_t lhs, rhs;
@@ -332,7 +329,7 @@ db_logical_relation_expr(db_expr_t *valuep)
return (TRUE);
}
-boolean_t
+static boolean_t
db_logical_and_expr(db_expr_t *valuep)
{
db_expr_t lhs, rhs;
@@ -354,7 +351,7 @@ db_logical_and_expr(db_expr_t *valuep)
return (TRUE);
}
-boolean_t
+static boolean_t
db_logical_or_expr(db_expr_t *valuep)
{
db_expr_t lhs, rhs;
diff --git a/ddb/db_ext_symtab.c b/ddb/db_ext_symtab.c
index e1bdfd8b..db7bec25 100644
--- a/ddb/db_ext_symtab.c
+++ b/ddb/db_ext_symtab.c
@@ -35,6 +35,7 @@
#include <vm/vm_kern.h>
#include <vm/vm_user.h>
#include <kern/host.h>
+#include <kern/mach_debug.server.h>
#include <kern/task.h>
#include <ddb/db_sym.h>
@@ -49,7 +50,7 @@ kern_return_t
host_load_symbol_table(
host_t host,
task_t task,
- char * name,
+ const char * name,
pointer_t symtab,
unsigned int symtab_count)
{
diff --git a/ddb/db_input.c b/ddb/db_input.c
index 6b6db764..357474b7 100644
--- a/ddb/db_input.c
+++ b/ddb/db_input.c
@@ -67,19 +67,15 @@ char * db_history_prev = (char *) 0; /* start of previous line */
#define BLANK ' '
#define BACKUP '\b'
-void
-db_putstring(s, count)
- const char *s;
- int count;
+static void
+db_putstring(const char *s, int count)
{
while (--count >= 0)
cnputc(*s++);
}
-void
-db_putnchars(c, count)
- int c;
- int count;
+static void
+db_putnchars(int c, int count)
{
while (--count >= 0)
cnputc(c);
@@ -90,7 +86,7 @@ db_putnchars(c, count)
*/
#define DEL_FWD 0
#define DEL_BWD 1
-void
+static void
db_delete(
int n,
int bwd)
@@ -110,7 +106,7 @@ db_delete(
db_le -= n;
}
-void
+static void
db_delete_line(void)
{
db_delete(db_le - db_lc, DEL_FWD);
@@ -136,11 +132,17 @@ db_delete_line(void)
#endif /* DB_HISTORY_SIZE */
/* returns TRUE at end-of-line */
-boolean_t
+static boolean_t
db_inputchar(int c)
{
+ static int escaped, csi;
+ int was_escaped = escaped, was_csi = csi;
+ escaped = 0;
+ csi = 0;
+
switch (c) {
case CTRL('b'):
+ left:
/* back up one character */
if (db_lc > db_lbuf_start) {
cnputc(BACKUP);
@@ -148,6 +150,7 @@ db_inputchar(int c)
}
break;
case CTRL('f'):
+ right:
/* forward one character */
if (db_lc < db_le) {
cnputc(*db_lc);
@@ -202,6 +205,7 @@ db_inputchar(int c)
break;
#if DB_HISTORY_SIZE != 0
case CTRL('p'):
+ up:
DEC_DB_CURR();
while (db_history_curr != db_history_last) {
DEC_DB_CURR();
@@ -227,6 +231,7 @@ db_inputchar(int c)
db_putstring(db_lbuf_start, db_le - db_lbuf_start);
break;
case CTRL('n'):
+ down:
while (db_history_curr != db_history_last) {
if (*db_history_curr == '\0')
break;
@@ -306,7 +311,38 @@ db_inputchar(int c)
#endif /* DB_HISTORY_SIZE */
*db_le++ = c;
return (TRUE);
+ case '\033':
+ escaped = 1;
+ break;
+ case '[':
+ if (was_escaped)
+ csi = 1;
+ else
+ goto plain;
+ break;
+ case 'A':
+ if (was_csi)
+ goto up;
+ else
+ goto plain;
+ case 'B':
+ if (was_csi)
+ goto down;
+ else
+ goto plain;
+ case 'C':
+ if (was_csi)
+ goto right;
+ else
+ goto plain;
+ case 'D':
+ if (was_csi)
+ goto left;
+ else
+ goto plain;
+
default:
+ plain:
if (db_le == db_lbuf_end) {
cnputc('\007');
}
diff --git a/ddb/db_input.h b/ddb/db_input.h
index 77f07bb6..352f035e 100644
--- a/ddb/db_input.h
+++ b/ddb/db_input.h
@@ -23,6 +23,9 @@
#include <sys/types.h>
+/* Needs to be implemented by each arch. */
+extern void kdb_kintr(void);
+
extern int db_readline (char *lstart, int lsize);
extern void db_check_interrupt(void);
diff --git a/ddb/db_lex.c b/ddb/db_lex.c
index 8ab69106..49063e1b 100644
--- a/ddb/db_lex.c
+++ b/ddb/db_lex.c
@@ -49,8 +49,7 @@ int db_look_char = 0;
db_expr_t db_look_token = 0;
int
-db_read_line(repeat_last)
- const char *repeat_last;
+db_read_line(const char *repeat_last)
{
int i;
@@ -103,8 +102,7 @@ db_save_lex_context(struct db_lex_context *lp)
}
void
-db_restore_lex_context(lp)
- const struct db_lex_context *lp;
+db_restore_lex_context(const struct db_lex_context *lp)
{
db_lp = lp->l_ptr;
db_last_lp = db_lp;
diff --git a/ddb/db_macro.c b/ddb/db_macro.c
index 307b7c59..63159d72 100644
--- a/ddb/db_macro.c
+++ b/ddb/db_macro.c
@@ -58,8 +58,7 @@ int db_macro_level = 0;
db_expr_t db_macro_args[DB_MACRO_LEVEL][DB_NARGS];
static struct db_user_macro *
-db_lookup_macro(name)
- const char *name;
+db_lookup_macro(const char *name)
{
struct db_user_macro *mp;
@@ -73,7 +72,11 @@ db_lookup_macro(name)
}
void
-db_def_macro_cmd(void)
+db_def_macro_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
char *p;
int c;
@@ -104,7 +107,11 @@ db_def_macro_cmd(void)
}
void
-db_del_macro_cmd(void)
+db_del_macro_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
struct db_user_macro *mp;
@@ -120,7 +127,11 @@ db_del_macro_cmd(void)
}
void
-db_show_macro(void)
+db_show_macro(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
struct db_user_macro *mp;
int t;
@@ -140,8 +151,7 @@ db_show_macro(void)
}
int
-db_exec_macro(name)
- const char *name;
+db_exec_macro(const char *name)
{
struct db_user_macro *mp;
int n;
diff --git a/ddb/db_macro.h b/ddb/db_macro.h
index 2c0a599b..91882470 100644
--- a/ddb/db_macro.h
+++ b/ddb/db_macro.h
@@ -24,11 +24,23 @@
#include <sys/types.h>
#include <ddb/db_variables.h>
-extern void db_def_macro_cmd (void);
-
-extern void db_del_macro_cmd (void);
-
-extern void db_show_macro (void);
+extern void db_def_macro_cmd (
+ db_expr_t addr,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char * modif);
+
+extern void db_del_macro_cmd (
+ db_expr_t addr,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char * modif);
+
+extern void db_show_macro (
+ db_expr_t addr,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char * modif);
extern int db_exec_macro (const char *name);
diff --git a/ddb/db_mp.c b/ddb/db_mp.c
index 7c0f6f26..5cf800c9 100644
--- a/ddb/db_mp.c
+++ b/ddb/db_mp.c
@@ -38,6 +38,7 @@
#include <machine/db_interface.h>
#include <ddb/db_command.h>
+#include <ddb/db_input.h>
#include <ddb/db_run.h>
#include <ddb/db_mp.h>
#include <ddb/db_output.h>
@@ -47,7 +48,8 @@
* multiprocessors.
*/
-decl_simple_lock_data(,db_lock) /* lock to enter debugger */
+int db_spl;
+def_simple_lock_irq_data(static,db_lock) /* lock to enter debugger */
volatile int db_cpu = -1; /* CPU currently in debugger */
/* -1 if none */
int db_active[NCPUS] = { 0 }; /* count recursive entries
@@ -77,7 +79,7 @@ db_enter(void)
/*
* Wait for other CPUS to leave debugger.
*/
- lock_db();
+ db_spl = lock_db();
if (db_enter_debug)
db_printf(
@@ -131,7 +133,7 @@ db_leave(void)
/*
* Unlock debugger.
*/
- unlock_db();
+ unlock_db(db_spl);
/*
* Drop recursive entry count.
@@ -230,12 +232,12 @@ db_on(int cpu)
* Give debugger to that CPU
*/
db_cpu = cpu;
- unlock_db();
+ unlock_db(db_spl);
/*
* Wait for it to come back again
*/
- lock_db();
+ db_spl = lock_db();
/*
* Restore ddb globals
@@ -267,10 +269,11 @@ remote_db_enter(void)
* As long as db_cpu is not -1 or cpu_number(), we know that debugger
* is active on another cpu.
*/
-void
+int
lock_db(void)
{
int my_cpu = cpu_number();
+ int s;
for (;;) {
#if CONSOLE_ON_MASTER
@@ -283,25 +286,27 @@ lock_db(void)
#if CONSOLE_ON_MASTER
if (my_cpu == master_cpu) {
- if (!simple_lock_try(&db_lock))
+ if (!(s = simple_lock_try_irq(&db_lock)))
continue;
}
else {
- simple_lock(&db_lock);
+ s = simple_lock_irq(&db_lock);
}
#else /* CONSOLE_ON_MASTER */
- simple_lock(&db_lock);
+ s = simple_lock_irq(&db_lock);
#endif /* CONSOLE_ON_MASTER */
if (db_cpu == -1 || db_cpu == my_cpu)
break;
- simple_unlock(&db_lock);
+ unlock_db(s);
}
+
+ return s;
}
void
-unlock_db(void)
+unlock_db(int s)
{
- simple_unlock(&db_lock);
+ simple_unlock_irq(s, &db_lock);
}
#if CONSOLE_ON_MASTER
diff --git a/ddb/db_mp.h b/ddb/db_mp.h
index b0d221ea..8a0a9e13 100644
--- a/ddb/db_mp.h
+++ b/ddb/db_mp.h
@@ -20,14 +20,16 @@
#define _DDB_DB_MP_H_
void remote_db(void);
-void lock_db(void);
-void unlock_db(void);
+int lock_db(void);
+void unlock_db(int);
+void db_on(int i);
#if CONSOLE_ON_MASTER
void db_console(void);
#endif /* CONSOLE_ON_MASTER */
boolean_t db_enter(void);
+void remote_db_enter(void);
void db_leave(void);
#endif /* _DDB_DB_MP_H_ */
diff --git a/ddb/db_output.c b/ddb/db_output.c
index ded9fa96..9a76f545 100644
--- a/ddb/db_output.c
+++ b/ddb/db_output.c
@@ -178,7 +178,7 @@ db_putchar(int c) /* character to output */
/* other characters are assumed non-printing */
}
-void
+static void
db_id_putc(char c, vm_offset_t dummy)
{
db_putchar(c);
diff --git a/ddb/db_print.c b/ddb/db_print.c
index 832faf57..f08dd6ce 100644
--- a/ddb/db_print.c
+++ b/ddb/db_print.c
@@ -39,6 +39,9 @@
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/queue.h>
+#include <kern/sched.h>
+#include <kern/processor.h>
+#include <kern/smp.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_space.h>
@@ -127,10 +130,10 @@ db_show_regs(
#define db_thread_fp_used(thread) FALSE
#endif
-char *
-db_thread_stat(thread, status)
- const thread_t thread;
- char *status;
+static char *
+db_thread_stat(
+ const thread_t thread,
+ char *status)
{
char *p = status;
@@ -219,10 +222,11 @@ db_print_thread(
}
} else {
if (flag & OPTION_INDENT)
- db_printf(" %3d (%0*X) ", thread_id,
- 2*sizeof(vm_offset_t), thread);
- else
- db_printf("(%0*X) ", 2*sizeof(vm_offset_t), thread);
+ db_printf(" %3d ", thread_id);
+ if (thread->name[0] &&
+ strncmp (thread->name, thread->task->name, THREAD_NAME_SIZE))
+ db_printf("%s ", thread->name);
+ db_printf("(%0*X) ", 2*sizeof(vm_offset_t), thread);
char status[8];
db_printf("%s", db_thread_stat(thread, status));
if (thread->state & TH_SWAPPED) {
@@ -244,7 +248,7 @@ db_print_thread(
}
}
-void
+static void
db_print_task(
task_t task,
int task_id,
@@ -329,13 +333,41 @@ db_show_all_tasks(db_expr_t addr,
}
}
+static void showrq(run_queue_t rq)
+{
+ db_printf("count(%d) low(%d)\n", rq->count, rq->low);
+}
+
+/*ARGSUSED*/
+void
+db_show_all_runqs(
+ db_expr_t addr,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char * modif)
+{
+ int i = 0;
+ processor_set_t pset;
+
+ queue_iterate(&all_psets, pset, processor_set_t, all_psets) {
+ db_printf("Processor set #%d runq:\t", i);
+ showrq(&pset->runq);
+ i++;
+ }
+ for (i = 0; i < smp_get_numcpus(); i++) {
+ db_printf("Processor #%d runq:\t", i);
+ showrq(&cpu_to_processor(i)->runq);
+ }
+ db_printf("Stuck threads:\t%d", stuck_count);
+}
+
/*ARGSUSED*/
void
-db_show_all_threads(addr, have_addr, count, modif)
- db_expr_t addr;
- boolean_t have_addr;
- db_expr_t count;
- const char * modif;
+db_show_all_threads(
+ db_expr_t addr,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char * modif)
{
task_t task;
int task_id;
@@ -384,11 +416,11 @@ db_task_from_space(
/*ARGSUSED*/
void
-db_show_one_thread(addr, have_addr, count, modif)
- db_expr_t addr;
- boolean_t have_addr;
- db_expr_t count;
- const char * modif;
+db_show_one_thread(
+ db_expr_t addr,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char * modif)
{
int flag;
int thread_id;
@@ -432,11 +464,11 @@ db_show_one_thread(addr, have_addr, count, modif)
/*ARGSUSED*/
void
-db_show_one_task(addr, have_addr, count, modif)
- db_expr_t addr;
- boolean_t have_addr;
- db_expr_t count;
- const char * modif;
+db_show_one_task(
+ db_expr_t addr,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char * modif)
{
int flag;
int task_id;
@@ -466,10 +498,8 @@ db_show_one_task(addr, have_addr, count, modif)
db_print_task(task, task_id, flag);
}
-int
-db_port_iterate(thread, func)
- const thread_t thread;
- void (*func)();
+static int
+db_port_iterate(const thread_t thread, void (*func)(int, const ipc_port_t, unsigned, int))
{
ipc_entry_t entry;
int n = 0;
@@ -482,29 +512,8 @@ db_port_iterate(thread, func)
return(n);
}
-ipc_port_t
-db_lookup_port(
- thread_t thread,
- int id)
-{
- ipc_entry_t entry;
-
- if (thread == THREAD_NULL)
- return(0);
- if (id < 0)
- return(0);
- entry = ipc_entry_lookup(thread->task->itk_space, (mach_port_t) id);
- if (entry && entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS)
- return((ipc_port_t)entry->ie_object);
- return(0);
-}
-
static void
-db_print_port_id(id, port, bits, n)
- int id;
- const ipc_port_t port;
- unsigned bits;
- int n;
+db_print_port_id(int id, const ipc_port_t port, unsigned bits, int n)
{
if (n != 0 && n % 3 == 0)
db_printf("\n");
@@ -530,11 +539,11 @@ db_print_port_id_long(
/* ARGSUSED */
void
-db_show_port_id(addr, have_addr, count, modif)
- db_expr_t addr;
- boolean_t have_addr;
- db_expr_t count;
- const char * modif;
+db_show_port_id(
+ db_expr_t addr,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char * modif)
{
thread_t thread;
diff --git a/ddb/db_print.h b/ddb/db_print.h
index 87db97be..b86c6966 100644
--- a/ddb/db_print.h
+++ b/ddb/db_print.h
@@ -50,8 +50,19 @@ void db_show_all_threads(
db_expr_t count,
const char * modif);
+void db_show_all_runqs(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif);
+
db_addr_t db_task_from_space(
ipc_space_t space,
int *task_id);
+void db_print_thread(
+ thread_t thread,
+ int thread_id,
+ int flag);
+
#endif /* !_DDB_DB_PRINT_H_ */
diff --git a/ddb/db_run.c b/ddb/db_run.c
index 9b467fc4..0c8c12f4 100644
--- a/ddb/db_run.c
+++ b/ddb/db_run.c
@@ -250,9 +250,7 @@ db_breakpoint_t db_not_taken_bkpt = 0;
db_breakpoint_t db_taken_bkpt = 0;
db_breakpoint_t __attribute__ ((pure))
-db_find_temp_breakpoint(task, addr)
- const task_t task;
- db_addr_t addr;
+db_find_temp_breakpoint(const task_t task, db_addr_t addr)
{
if (db_taken_bkpt && (db_taken_bkpt->address == addr) &&
db_taken_bkpt->task == task)
@@ -309,9 +307,7 @@ db_set_task_single_step(
}
void
-db_clear_task_single_step(regs, task)
- const db_regs_t *regs;
- task_t task;
+db_clear_task_single_step(const db_regs_t *regs, task_t task)
{
if (db_taken_bkpt != 0) {
db_delete_temp_breakpoint(task, db_taken_bkpt);
@@ -331,11 +327,11 @@ extern int db_cmd_loop_done;
/* single-step */
/*ARGSUSED*/
void
-db_single_step_cmd(addr, have_addr, count, modif)
- db_expr_t addr;
- int have_addr;
- db_expr_t count;
- const char * modif;
+db_single_step_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
boolean_t print = FALSE;
@@ -359,11 +355,11 @@ db_single_step_cmd(addr, have_addr, count, modif)
/* trace and print until call/return */
/*ARGSUSED*/
void
-db_trace_until_call_cmd(addr, have_addr, count, modif)
- db_expr_t addr;
- int have_addr;
- db_expr_t count;
- const char * modif;
+db_trace_until_call_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
boolean_t print = FALSE;
@@ -382,11 +378,11 @@ db_trace_until_call_cmd(addr, have_addr, count, modif)
/*ARGSUSED*/
void
-db_trace_until_matching_cmd(addr, have_addr, count, modif)
- db_expr_t addr;
- int have_addr;
- db_expr_t count;
- const char * modif;
+db_trace_until_matching_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
boolean_t print = FALSE;
@@ -407,11 +403,11 @@ db_trace_until_matching_cmd(addr, have_addr, count, modif)
/* continue */
/*ARGSUSED*/
void
-db_continue_cmd(addr, have_addr, count, modif)
- db_expr_t addr;
- int have_addr;
- db_expr_t count;
- const char * modif;
+db_continue_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
if (modif[0] == 'c')
db_run_mode = STEP_COUNT;
diff --git a/ddb/db_sym.c b/ddb/db_sym.c
index 2abd5746..f0adb0c2 100644
--- a/ddb/db_sym.c
+++ b/ddb/db_sym.c
@@ -37,7 +37,6 @@
#include <ddb/db_output.h>
#include <ddb/db_sym.h>
#include <ddb/db_task_thread.h>
-#include <ddb/db_aout.h>
#include <ddb/db_elf.h>
#include <vm/vm_map.h> /* vm_map_t */
@@ -60,7 +59,7 @@ db_add_symbol_table(
int type,
char *start,
char *end,
- char *name,
+ const char *name,
char *ref,
char *map_pointer)
{
@@ -91,9 +90,7 @@ db_add_symbol_table(
* overwritten by each call... but in practice this seems okay.
*/
static char * __attribute__ ((pure))
-db_qualify(symname, symtabname)
- const char *symname;
- const char *symtabname;
+db_qualify(const char *symname, const char *symtabname)
{
static char tmp[256];
char *s;
@@ -194,7 +191,7 @@ db_lookup(char *symstr)
*/
db_sym_t
db_sym_parse_and_lookup(
- db_sym_t (*func)(),
+ db_sym_t (*func) (db_symtab_t *, const char*, const char*, int),
db_symtab_t *symtab,
char *symstr)
{
@@ -264,7 +261,7 @@ out:
*/
boolean_t db_qualify_ambiguous_names = FALSE;
-boolean_t
+static boolean_t
db_name_is_ambiguous(char *sym_name)
{
int i;
@@ -443,10 +440,10 @@ db_symbol_values(
unsigned long db_maxoff = 0x4000;
void
-db_task_printsym(off, strategy, task)
- db_addr_t off;
- db_strategy_t strategy;
- task_t task;
+db_task_printsym(
+ db_addr_t off,
+ db_strategy_t strategy,
+ task_t task)
{
db_addr_t d;
char *filename;
@@ -477,19 +474,19 @@ db_task_printsym(off, strategy, task)
}
void
-db_printsym(off, strategy)
- db_expr_t off;
- db_strategy_t strategy;
+db_printsym(
+ db_expr_t off,
+ db_strategy_t strategy)
{
db_task_printsym(off, strategy, TASK_NULL);
}
boolean_t
-db_line_at_pc( sym, filename, linenum, pc)
- db_sym_t sym;
- char **filename;
- int *linenum;
- db_addr_t pc;
+db_line_at_pc(
+ db_sym_t sym,
+ char **filename,
+ int *linenum,
+ db_addr_t pc)
{
return (db_last_symtab) ?
X_db_line_at_pc( db_last_symtab, sym, filename, linenum, pc) :
@@ -507,21 +504,16 @@ void db_free_symbol(db_sym_t s)
* Switch into symbol-table specific routines
*/
-void dummy_db_free_symbol(db_sym_t symbol) { }
-boolean_t dummy_db_sym_init(char *a, char *b, char *c, char *d) {
+static void dummy_db_free_symbol(db_sym_t symbol) { }
+static boolean_t dummy_db_sym_init(char *a, char *b, const char *c, char *d) {
return FALSE;
}
struct db_sym_switch x_db[] = {
- /* BSD a.out format (really, sdb/dbx(1) symtabs) */
-#ifdef DB_NO_AOUT
+ /* BSD a.out format (really, sdb/dbx(1) symtabs) not supported */
{ 0,},
-#else /* DB_NO_AOUT */
- { aout_db_sym_init, aout_db_lookup, aout_db_search_symbol,
- aout_db_line_at_pc, aout_db_symbol_values, dummy_db_free_symbol },
-#endif /* DB_NO_AOUT */
{ 0,},
diff --git a/ddb/db_sym.h b/ddb/db_sym.h
index d8f33874..f4fb5284 100644
--- a/ddb/db_sym.h
+++ b/ddb/db_sym.h
@@ -94,7 +94,7 @@ extern boolean_t db_qualify_ambiguous_names;
extern boolean_t db_add_symbol_table( int type,
char * start,
char * end,
- char *name,
+ const char *name,
char *ref,
char *map_pointer );
@@ -116,6 +116,10 @@ extern void db_symbol_values( db_symtab_t *stab,
char** namep,
db_expr_t* valuep);
+/* find symbol in current task */
+#define db_search_symbol(val,strgy,offp) \
+ db_search_task_symbol(val,strgy,offp,0)
+
/* find name&value given approx val */
#define db_find_sym_and_offset(val,namep,offp) \
@@ -157,10 +161,6 @@ extern void db_symbol_values( db_symtab_t *stab,
db_free_symbol(s); \
} while(0);
-/* find symbol in current task */
-#define db_search_symbol(val,strgy,offp) \
- db_search_task_symbol(val,strgy,offp,0)
-
/* strcmp, modulo leading char */
extern boolean_t db_eqname( const char* src, const char* dst, char c );
@@ -186,7 +186,7 @@ extern struct db_sym_switch {
boolean_t (*init)(
char *start,
char *end,
- char *name,
+ const char *name,
char *task_addr
);
@@ -222,7 +222,7 @@ extern struct db_sym_switch {
} x_db[];
#ifndef symtab_type
-#define symtab_type(s) SYMTAB_AOUT
+#define symtab_type(s) SYMTAB_ELF
#endif
#define X_db_sym_init(s,e,n,t) x_db[symtab_type(s)].init(s,e,n,t)
@@ -238,12 +238,6 @@ extern boolean_t db_line_at_pc(
int *linenum,
db_addr_t pc);
-extern boolean_t aout_db_sym_init(
- char *symtab,
- char *esymtab,
- char *name,
- char *task_addr);
-
extern boolean_t elf_db_sym_init (
unsigned shdr_num,
vm_size_t shdr_size,
@@ -263,7 +257,7 @@ db_search_in_task_symbol(
extern db_sym_t
db_sym_parse_and_lookup(
- db_sym_t (*func)(),
+ db_sym_t (*func) (db_symtab_t *, const char*, const char*, int),
db_symtab_t *symtab,
char *symstr);
diff --git a/ddb/db_task_thread.c b/ddb/db_task_thread.c
index f7fbb805..fe742c26 100644
--- a/ddb/db_task_thread.c
+++ b/ddb/db_task_thread.c
@@ -51,8 +51,7 @@ thread_t db_default_thread; /* default target thread */
* search valid task queue, and return the queue position as the task id
*/
int
-db_lookup_task(target_task)
- const task_t target_task;
+db_lookup_task(const task_t target_task)
{
task_t task;
int task_id;
@@ -81,9 +80,7 @@ db_lookup_task(target_task)
* search thread queue of the task, and return the queue position
*/
int
-db_lookup_task_thread(task, target_thread)
- const task_t task;
- const thread_t target_thread;
+db_lookup_task_thread(const task_t task, const thread_t target_thread)
{
thread_t thread;
int thread_id;
@@ -105,8 +102,7 @@ db_lookup_task_thread(task, target_thread)
* as the thread id.
*/
int
-db_lookup_thread(target_thread)
- const thread_t target_thread;
+db_lookup_thread(const thread_t target_thread)
{
int thread_id;
task_t task;
@@ -138,8 +134,7 @@ db_lookup_thread(target_thread)
* check the address is a valid thread address
*/
boolean_t
-db_check_thread_address_valid(thread)
- const thread_t thread;
+db_check_thread_address_valid(const thread_t thread)
{
if (db_lookup_thread(thread) < 0) {
db_printf("Bad thread address 0x%x\n", thread);
@@ -152,7 +147,7 @@ db_check_thread_address_valid(thread)
/*
* convert task_id(queue position) to task address
*/
-task_t
+static task_t
db_lookup_task_id(int task_id)
{
task_t task;
@@ -245,11 +240,11 @@ db_init_default_thread(void)
*/
/* ARGSUSED */
void
-db_set_default_thread(vp, valuep, flag, ap)
- struct db_variable *vp;
- db_expr_t *valuep;
- int flag;
- db_var_aux_param_t ap;
+db_set_default_thread(
+ struct db_variable *vp,
+ db_expr_t *valuep,
+ int flag,
+ db_var_aux_param_t ap)
{
thread_t thread;
diff --git a/ddb/db_variables.c b/ddb/db_variables.c
index 0fd9bad0..40f2d4d3 100644
--- a/ddb/db_variables.c
+++ b/ddb/db_variables.c
@@ -70,10 +70,10 @@ struct db_variable db_vars[] = {
};
struct db_variable *db_evars = db_vars + sizeof(db_vars)/sizeof(db_vars[0]);
-const char *
-db_get_suffix(suffix, suffix_value)
- const char *suffix;
- short *suffix_value;
+static const char *
+db_get_suffix(
+ const char *suffix,
+ short *suffix_value)
{
int value;
@@ -89,10 +89,10 @@ db_get_suffix(suffix, suffix_value)
}
static boolean_t
-db_cmp_variable_name(vp, name, ap)
- struct db_variable *vp;
- char *name;
- const db_var_aux_param_t ap;
+db_cmp_variable_name(
+ struct db_variable *vp,
+ char *name,
+ const db_var_aux_param_t ap)
{
char *var_np;
const char *np;
@@ -116,7 +116,7 @@ db_cmp_variable_name(vp, name, ap)
return(TRUE);
}
-int
+static int
db_find_variable(
struct db_variable **varp,
db_var_aux_param_t ap)
@@ -160,22 +160,6 @@ db_get_variable(db_expr_t *valuep)
return (1);
}
-int
-db_set_variable(db_expr_t value)
-{
- struct db_variable *vp;
- struct db_var_aux_param aux_param;
- char modif[TOK_STRING_SIZE];
-
- aux_param.modif = modif;
- if (!db_find_variable(&vp, &aux_param))
- return (0);
-
- db_read_write_variable(vp, &value, DB_VAR_SET, &aux_param);
-
- return (1);
-}
-
void
db_read_write_variable(
struct db_variable *vp,
@@ -183,7 +167,7 @@ db_read_write_variable(
int rw_flag,
db_var_aux_param_t ap)
{
- void (*func)() = vp->fcn;
+ void (*func)(struct db_variable *, db_expr_t *, int, db_var_aux_param_t) = vp->fcn;
struct db_var_aux_param aux_param;
if (ap == 0) {
diff --git a/ddb/db_watch.c b/ddb/db_watch.c
index f0d0443f..c3d28354 100644
--- a/ddb/db_watch.c
+++ b/ddb/db_watch.c
@@ -64,7 +64,7 @@ db_watchpoint_t db_watchpoint_list = 0;
extern vm_map_t kernel_map;
-db_watchpoint_t
+static db_watchpoint_t
db_watchpoint_alloc(void)
{
db_watchpoint_t watch;
@@ -83,19 +83,18 @@ db_watchpoint_alloc(void)
return (watch);
}
-void
-db_watchpoint_free(watch)
- db_watchpoint_t watch;
+static void
+db_watchpoint_free(db_watchpoint_t watch)
{
watch->link = db_free_watchpoints;
db_free_watchpoints = watch;
}
void
-db_set_watchpoint(task, addr, size)
- const task_t task;
- db_addr_t addr;
- vm_size_t size;
+db_set_watchpoint(
+ const task_t task,
+ db_addr_t addr,
+ vm_size_t size)
{
db_watchpoint_t watch;
@@ -129,9 +128,7 @@ db_set_watchpoint(task, addr, size)
}
void
-db_delete_watchpoint(task, addr)
- const task_t task;
- db_addr_t addr;
+db_delete_watchpoint(const task_t task, db_addr_t addr)
{
db_watchpoint_t watch;
db_watchpoint_t *prev;
@@ -178,10 +175,7 @@ db_list_watchpoints(void)
}
static int
-db_get_task(modif, taskp, addr)
- const char *modif;
- task_t *taskp;
- db_addr_t addr;
+db_get_task(const char *modif, task_t *taskp, db_addr_t addr)
{
task_t task = TASK_NULL;
db_expr_t value;
@@ -217,11 +211,11 @@ db_get_task(modif, taskp, addr)
/* Delete watchpoint */
/*ARGSUSED*/
void
-db_deletewatch_cmd(addr, have_addr, count, modif)
- db_expr_t addr;
- int have_addr;
- db_expr_t count;
- const char * modif;
+db_deletewatch_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
task_t task;
@@ -233,11 +227,11 @@ db_deletewatch_cmd(addr, have_addr, count, modif)
/* Set watchpoint */
/*ARGSUSED*/
void
-db_watchpoint_cmd(addr, have_addr, count, modif)
- db_expr_t addr;
- int have_addr;
- db_expr_t count;
- const char * modif;
+db_watchpoint_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
vm_size_t size;
db_expr_t value;
@@ -254,7 +248,11 @@ db_watchpoint_cmd(addr, have_addr, count, modif)
/* list watchpoints */
void
-db_listwatch_cmd(void)
+db_listwatch_cmd(
+ db_expr_t addr,
+ int have_addr,
+ db_expr_t count,
+ const char * modif)
{
db_list_watchpoints();
}
diff --git a/ddb/db_watch.h b/ddb/db_watch.h
index 7ef1a207..86f07fb1 100644
--- a/ddb/db_watch.h
+++ b/ddb/db_watch.h
@@ -57,7 +57,11 @@ extern void db_set_watchpoint(const task_t task, db_addr_t addr, vm_size_t size)
extern void db_delete_watchpoint(const task_t task, db_addr_t addr);
extern void db_list_watchpoints(void);
-void db_listwatch_cmd(void);
+void db_listwatch_cmd(
+ db_expr_t addr,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char * modif);
void db_deletewatch_cmd(
db_expr_t addr,
diff --git a/ddb/db_write_cmd.c b/ddb/db_write_cmd.c
index 46a2ee32..cfc2b703 100644
--- a/ddb/db_write_cmd.c
+++ b/ddb/db_write_cmd.c
@@ -43,6 +43,7 @@
#include <ddb/db_output.h>
#include <ddb/db_sym.h>
#include <ddb/db_task_thread.h>
+#include <ddb/db_write_cmd.h>
@@ -51,11 +52,11 @@
*/
/*ARGSUSED*/
void
-db_write_cmd(address, have_addr, count, modif)
- db_expr_t address;
- boolean_t have_addr;
- db_expr_t count;
- const char * modif;
+db_write_cmd(
+ db_expr_t address,
+ boolean_t have_addr,
+ db_expr_t count,
+ const char * modif)
{
db_addr_t addr;
db_expr_t old_value;
diff --git a/device/blkio.c b/device/blkio.c
index 7ec1f2cf..0dfa33c4 100644
--- a/device/blkio.c
+++ b/device/blkio.c
@@ -31,56 +31,14 @@
*/
#include <mach/kern_return.h>
+#include <device/blkio.h>
+#include <device/buf.h>
#include <device/param.h>
#include <device/device_types.h>
#include <device/io_req.h>
#include <device/ds_routines.h>
-
-io_return_t block_io(
- void (*strat)(),
- void (*max_count)(),
- io_req_t ior)
-{
- kern_return_t rc;
- boolean_t wait = FALSE;
-
- /*
- * Make sure the size is not too large by letting max_count
- * change io_count. If we are doing a write, then io_alloc_size
- * preserves the original io_count.
- */
- (*max_count)(ior);
-
- /*
- * If reading, allocate memory. If writing, wire
- * down the incoming memory.
- */
- if (ior->io_op & IO_READ)
- rc = device_read_alloc(ior, (vm_size_t)ior->io_count);
- else
- rc = device_write_get(ior, &wait);
-
- if (rc != KERN_SUCCESS)
- return (rc);
-
- /*
- * Queue the operation for the device.
- */
- (*strat)(ior);
-
- /*
- * The io is now queued. Wait for it if needed.
- */
- if (wait) {
- iowait(ior);
- return(D_SUCCESS);
- }
-
- return (D_IO_QUEUED);
-}
-
/*
* 'standard' max_count routine. VM continuations mean that this
* code can cope with arbitrarily-sized write operations (they won't be
diff --git a/device/blkio.h b/device/blkio.h
index aaff9f8a..b188f388 100644
--- a/device/blkio.h
+++ b/device/blkio.h
@@ -19,6 +19,8 @@
#ifndef _DEVICE_BLKIO_H_
#define _DEVICE_BLKIO_H_
+#include <sys/types.h>
+
extern vm_offset_t block_io_mmap(dev_t dev, vm_offset_t off, int prot);
#endif /* _DEVICE_BLKIO_H_ */
diff --git a/device/buf.h b/device/buf.h
index a79ed8e4..7c8a4362 100644
--- a/device/buf.h
+++ b/device/buf.h
@@ -83,12 +83,6 @@
#define B_MD1 IO_SPARE_START
/*
- * Redefine physio routine
- */
-#define physio(strat, xbuf, dev, ops, minphys, ior) \
- block_io(strat, minphys, ior)
-
-/*
* Export standard minphys routine.
*/
extern void minphys(io_req_t);
diff --git a/device/chario.c b/device/chario.c
index 0e9dd70b..3fe93ccb 100644
--- a/device/chario.c
+++ b/device/chario.c
@@ -157,8 +157,7 @@ io_return_t char_open(
spl_t s;
io_return_t rc = D_SUCCESS;
- s = spltty();
- simple_lock(&tp->t_lock);
+ s = simple_lock_irq(&tp->t_lock);
tp->t_dev = dev;
@@ -192,8 +191,7 @@ io_return_t char_open(
if (tp->t_mctl)
(*tp->t_mctl)(tp, TM_RTS, DMBIS);
out:
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
return rc;
}
@@ -206,13 +204,12 @@ boolean_t char_open_done(
io_req_t ior)
{
struct tty *tp = (struct tty *)ior->io_dev_ptr;
- spl_t s = spltty();
+ spl_t s;
- simple_lock(&tp->t_lock);
+ s = simple_lock_irq(&tp->t_lock);
if ((tp->t_state & TS_ISOPEN) == 0) {
queue_delayed_reply(&tp->t_delayed_open, ior, char_open_done);
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
return FALSE;
}
@@ -222,15 +219,14 @@ boolean_t char_open_done(
if (tp->t_mctl)
(*tp->t_mctl)(tp, TM_RTS, DMBIS);
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
ior->io_error = D_SUCCESS;
(void) ds_open_done(ior);
return TRUE;
}
-boolean_t tty_close_open_reply(
+static boolean_t tty_close_open_reply(
io_req_t ior)
{
ior->io_error = D_DEVICE_DOWN;
@@ -277,8 +273,7 @@ io_return_t char_write(
/*
* Check for tty operating.
*/
- s = spltty();
- simple_lock(&tp->t_lock);
+ s = simple_lock_irq(&tp->t_lock);
if ((tp->t_state & TS_CARR_ON) == 0) {
@@ -322,8 +317,7 @@ io_return_t char_write(
rc = D_IO_QUEUED;
}
out:
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
if (!(ior->io_op & IO_INBAND))
(void) vm_deallocate(device_io_map, addr, ior->io_count);
@@ -339,19 +333,17 @@ boolean_t char_write_done(
io_req_t ior)
{
struct tty *tp = (struct tty *)ior->io_dev_ptr;
- spl_t s = spltty();
+ spl_t s;
- simple_lock(&tp->t_lock);
+ s = simple_lock_irq(&tp->t_lock);
if (tp->t_outq.c_cc > TTHIWAT(tp) ||
(tp->t_state & TS_CARR_ON) == 0) {
queue_delayed_reply(&tp->t_delayed_write, ior, char_write_done);
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
return FALSE;
}
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
if (IP_VALID(ior->io_reply_port)) {
(void) (*((ior->io_op & IO_INBAND) ?
@@ -366,7 +358,7 @@ boolean_t char_write_done(
return TRUE;
}
-boolean_t tty_close_write_reply(
+static boolean_t tty_close_write_reply(
io_req_t ior)
{
ior->io_residual = ior->io_count;
@@ -394,8 +386,7 @@ io_return_t char_read(
if (rc != KERN_SUCCESS)
return rc;
- s = spltty();
- simple_lock(&tp->t_lock);
+ s = simple_lock_irq(&tp->t_lock);
if ((tp->t_state & TS_CARR_ON) == 0) {
if ((tp->t_state & TS_ONDELAY) == 0) {
@@ -431,8 +422,7 @@ io_return_t char_read(
}
out:
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
return rc;
}
@@ -445,16 +435,15 @@ boolean_t char_read_done(
io_req_t ior)
{
struct tty *tp = (struct tty *)ior->io_dev_ptr;
- spl_t s = spltty();
+ spl_t s;
- simple_lock(&tp->t_lock);
+ s = simple_lock_irq(&tp->t_lock);
if (tp->t_inq.c_cc <= 0 ||
(tp->t_state & TS_CARR_ON) == 0) {
queue_delayed_reply(&tp->t_delayed_read, ior, char_read_done);
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
return FALSE;
}
@@ -466,14 +455,13 @@ boolean_t char_read_done(
tp->t_state &= ~TS_RTS_DOWN;
}
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
(void) ds_read_done(ior);
return TRUE;
}
-boolean_t tty_close_read_reply(
+static boolean_t tty_close_read_reply(
io_req_t ior)
{
ior->io_residual = ior->io_count;
@@ -524,7 +512,7 @@ void ttyclose(
/*
* Port-death routine to clean up reply messages.
*/
-boolean_t
+static boolean_t
tty_queue_clean(
queue_t q,
const ipc_port_t port,
@@ -555,10 +543,10 @@ tty_portdeath(
struct tty * tp,
const ipc_port_t port)
{
- spl_t spl = spltty();
+ spl_t spl;
boolean_t result;
- simple_lock(&tp->t_lock);
+ spl = simple_lock_irq(&tp->t_lock);
/*
* The queues may never have been initialized
@@ -575,8 +563,7 @@ tty_portdeath(
|| tty_queue_clean(&tp->t_delayed_open, port,
tty_close_open_reply);
}
- simple_unlock(&tp->t_lock);
- splx(spl);
+ simple_unlock_irq(spl, &tp->t_lock);
return result;
}
@@ -603,8 +590,7 @@ io_return_t tty_get_status(
if (*count < TTY_STATUS_COUNT)
return (D_INVALID_OPERATION);
- s = spltty();
- simple_lock(&tp->t_lock);
+ s = simple_lock_irq(&tp->t_lock);
tsp->tt_ispeed = tp->t_ispeed;
tsp->tt_ospeed = tp->t_ospeed;
@@ -613,8 +599,7 @@ io_return_t tty_get_status(
if (tp->t_state & TS_HUPCLS)
tsp->tt_flags |= TF_HUPCLS;
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
*count = TTY_STATUS_COUNT;
break;
@@ -651,36 +636,30 @@ io_return_t tty_set_status(
if (flags == 0)
flags = D_READ | D_WRITE;
- s = spltty();
- simple_lock(&tp->t_lock);
+ s = simple_lock_irq(&tp->t_lock);
tty_flush(tp, flags);
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
break;
}
case TTY_STOP:
/* stop output */
- s = spltty();
- simple_lock(&tp->t_lock);
+ s = simple_lock_irq(&tp->t_lock);
if ((tp->t_state & TS_TTSTOP) == 0) {
tp->t_state |= TS_TTSTOP;
(*tp->t_stop)(tp, 0);
}
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
break;
case TTY_START:
/* start output */
- s = spltty();
- simple_lock(&tp->t_lock);
+ s = simple_lock_irq(&tp->t_lock);
if (tp->t_state & TS_TTSTOP) {
tp->t_state &= ~TS_TTSTOP;
tty_output(tp);
}
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
break;
case TTY_STATUS:
@@ -701,8 +680,7 @@ io_return_t tty_set_status(
return D_INVALID_OPERATION;
}
- s = spltty();
- simple_lock(&tp->t_lock);
+ s = simple_lock_irq(&tp->t_lock);
tp->t_ispeed = tsp->tt_ispeed;
tp->t_ospeed = tsp->tt_ospeed;
@@ -711,8 +689,7 @@ io_return_t tty_set_status(
if (tsp->tt_flags & TF_HUPCLS)
tp->t_state |= TS_HUPCLS;
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
break;
}
default:
@@ -820,14 +797,12 @@ void ttrstrt(
{
spl_t s;
- s = spltty();
- simple_lock(&tp->t_lock);
+ s = simple_lock_irq(&tp->t_lock);
tp->t_state &= ~TS_TIMEOUT;
ttstart (tp);
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
}
/*
@@ -882,14 +857,13 @@ void tty_output(
/*
* Send any buffered recvd chars up to user
*/
-void ttypush(
- void * _tp)
+static void ttypush(void * _tp)
{
struct tty *tp = _tp;
- spl_t s = spltty();
+ spl_t s;
int state;
- simple_lock(&tp->t_lock);
+ s = simple_lock_irq(&tp->t_lock);
/*
The pdma timeout has gone off.
@@ -920,8 +894,7 @@ void ttypush(
tp->t_state = state & ~TS_MIN_TO_RCV;/* sanity */
}
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
}
/*
diff --git a/device/cirbuf.c b/device/cirbuf.c
index 391297ce..ed09f3d1 100644
--- a/device/cirbuf.c
+++ b/device/cirbuf.c
@@ -51,10 +51,10 @@ void
cb_check(struct cirbuf *cb)
{
if (!(cb->c_cf >= cb->c_start && cb->c_cf < cb->c_end))
- panic("cf %x out of range [%x..%x)",
+ panic("cf %p out of range [%p..%p)",
cb->c_cf, cb->c_start, cb->c_end);
if (!(cb->c_cl >= cb->c_start && cb->c_cl < cb->c_end))
- panic("cl %x out of range [%x..%x)",
+ panic("cl %p out of range [%p..%p)",
cb->c_cl, cb->c_start, cb->c_end);
if (cb->c_cf <= cb->c_cl) {
if (!(cb->c_cc == cb->c_cl - cb->c_cf))
@@ -203,31 +203,6 @@ b_to_q( char *cp,
}
/*
- * Return number of contiguous characters up to a character
- * that matches the mask.
- */
-int
-ndqb( struct cirbuf *cb,
- int mask)
-{
- char *cp, *lim;
-
- if (cb->c_cl < cb->c_cf)
- lim = cb->c_end;
- else
- lim = cb->c_cl;
- if (mask == 0)
- return (lim - cb->c_cf);
- cp = cb->c_cf;
- while (cp < lim) {
- if (*cp & mask)
- break;
- cp++;
- }
- return (cp - cb->c_cf);
-}
-
-/*
* Flush characters from circular buffer.
*/
void
diff --git a/device/conf.h b/device/conf.h
index 1af00285..8177966b 100644
--- a/device/conf.h
+++ b/device/conf.h
@@ -36,6 +36,7 @@
#include <mach/port.h>
#include <mach/vm_prot.h>
#include <device/device_types.h>
+#include <device/net_status.h>
struct io_req;
typedef struct io_req *io_req_t;
@@ -54,20 +55,20 @@ struct dev_ops {
int (*d_getstat)(dev_t, dev_flavor_t, dev_status_t, mach_msg_type_number_t *); /* get status/control */
int (*d_setstat)(dev_t, dev_flavor_t, dev_status_t, mach_msg_type_number_t); /* set status/control */
vm_offset_t (*d_mmap)(dev_t, vm_offset_t, vm_prot_t); /* map memory */
- int (*d_async_in)(); /* asynchronous input setup */
- int (*d_reset)(); /* reset device */
+ int (*d_async_in)(dev_t, const ipc_port_t, int, filter_t*, unsigned int); /* asynchronous input setup */
+ int (*d_reset)(dev_t); /* reset device */
int (*d_port_death)(dev_t, mach_port_t);
/* clean up reply ports */
int d_subdev; /* number of sub-devices per
unit */
- int (*d_dev_info)(); /* driver info for kernel */
+ int (*d_dev_info)(dev_t, int, int*); /* driver info for kernel */
};
typedef struct dev_ops *dev_ops_t;
/*
* Routines for null entries.
*/
-extern int nulldev(void); /* no operation - OK */
+extern int nulldev_reset(dev_t dev);
extern int nulldev_open(dev_t dev, int flag, io_req_t ior);
extern void nulldev_close(dev_t dev, int flags);
extern int nulldev_read(dev_t dev, io_req_t ior);
@@ -75,7 +76,8 @@ extern int nulldev_write(dev_t dev, io_req_t ior);
extern io_return_t nulldev_getstat(dev_t dev, dev_flavor_t flavor, dev_status_t data, mach_msg_type_number_t *count);
extern io_return_t nulldev_setstat(dev_t dev, dev_flavor_t flavor, dev_status_t data, mach_msg_type_number_t count);
extern io_return_t nulldev_portdeath(dev_t dev, mach_port_t port);
-extern int nodev(void); /* no operation - error */
+extern int nodev_async_in(dev_t, const ipc_port_t, int, filter_t*, unsigned int); /* no operation - error */
+extern int nodev_info(dev_t, int, int*); /* no operation - error */
extern vm_offset_t nomap(dev_t dev, vm_offset_t off, int prot); /* no operation - error */
/*
diff --git a/device/cons.c b/device/cons.c
index b04621ae..3f7cb9d1 100644
--- a/device/cons.c
+++ b/device/cons.c
@@ -140,8 +140,7 @@ cnmaygetc(void)
}
void
-cnputc(c)
- char c;
+cnputc(char c)
{
if (c == 0)
return;
diff --git a/device/dev_hdr.h b/device/dev_hdr.h
index 4bd12c1c..ac6ce7e1 100644
--- a/device/dev_hdr.h
+++ b/device/dev_hdr.h
@@ -107,7 +107,7 @@ typedef struct mach_device *mach_device_t;
/*
* To find and remove device entries
*/
-mach_device_t device_lookup(char *); /* by name */
+mach_device_t device_lookup(const char *); /* by name */
void mach_device_reference(mach_device_t);
void mach_device_deallocate(mach_device_t);
@@ -119,10 +119,12 @@ device_t dev_port_lookup(ipc_port_t);
void dev_port_enter(mach_device_t);
void dev_port_remove(mach_device_t);
+typedef boolean_t (*dev_map_fn)(mach_device_t, mach_port_t);
+
/*
* To call a routine on each device
*/
-boolean_t dev_map(boolean_t (*)(), mach_port_t);
+boolean_t dev_map(dev_map_fn, mach_port_t);
/*
* To lock and unlock state and open-count
@@ -134,7 +136,7 @@ boolean_t dev_map(boolean_t (*)(), mach_port_t);
* device name lookup
*/
extern boolean_t dev_name_lookup(
- char * name,
+ const char * name,
dev_ops_t *ops, /* out */
int *unit); /* out */
diff --git a/device/dev_lookup.c b/device/dev_lookup.c
index 9af7508c..c9c39f88 100644
--- a/device/dev_lookup.c
+++ b/device/dev_lookup.c
@@ -60,8 +60,7 @@ queue_head_t dev_number_hash_table[NDEVHASH];
* Lock for device-number to device lookup.
* Must be held before device-ref_count lock.
*/
-decl_simple_lock_data(,
- dev_number_lock)
+def_simple_lock_data(static, dev_number_lock)
struct kmem_cache dev_hdr_cache;
@@ -69,9 +68,8 @@ struct kmem_cache dev_hdr_cache;
* Enter device in the number lookup table.
* The number table lock must be held.
*/
-void
-dev_number_enter(device)
- const mach_device_t device;
+static void
+dev_number_enter(const mach_device_t device)
{
queue_t q;
@@ -83,9 +81,8 @@ dev_number_enter(device)
* Remove device from the device-number lookup table.
* The device-number table lock must be held.
*/
-void
-dev_number_remove(device)
- const mach_device_t device;
+static void
+dev_number_remove(const mach_device_t device)
{
queue_t q;
@@ -97,10 +94,8 @@ dev_number_remove(device)
* Lookup a device by device operations and minor number.
* The number table lock must be held.
*/
-mach_device_t
-dev_number_lookup(ops, devnum)
- const dev_ops_t ops;
- int devnum;
+static mach_device_t
+dev_number_lookup(const dev_ops_t ops, int devnum)
{
queue_t q;
mach_device_t device;
@@ -120,7 +115,7 @@ dev_number_lookup(ops, devnum)
* table.
*/
mach_device_t
-device_lookup(char *name)
+device_lookup(const char *name)
{
dev_ops_t dev_ops;
int dev_minor;
@@ -299,8 +294,7 @@ dev_port_lookup(ipc_port_t port)
* Consumes a device reference; produces a naked send right.
*/
ipc_port_t
-convert_device_to_port(device)
- const device_t device;
+convert_device_to_port(const device_t device)
{
if (device == DEVICE_NULL)
return IP_NULL;
@@ -316,7 +310,7 @@ convert_device_to_port(device)
*/
boolean_t
dev_map(
- boolean_t (*routine)(),
+ dev_map_fn routine,
mach_port_t port)
{
int i;
diff --git a/device/dev_name.c b/device/dev_name.c
index 59ea961b..abd525ca 100644
--- a/device/dev_name.c
+++ b/device/dev_name.c
@@ -39,7 +39,7 @@
/*
* Routines placed in empty entries in the device tables
*/
-int nulldev(void)
+int nulldev_reset(dev_t dev)
{
return (D_SUCCESS);
}
@@ -78,7 +78,12 @@ int nulldev_portdeath(dev_t dev, mach_port_t port)
return (D_SUCCESS);
}
-int nodev(void)
+int nodev_async_in(dev_t dev, const ipc_port_t port, int x, filter_t* filter, unsigned int j)
+{
+ return (D_INVALID_OPERATION);
+}
+
+int nodev_info(dev_t dev, int a, int* b)
{
return (D_INVALID_OPERATION);
}
@@ -98,10 +103,9 @@ nomap(dev_t dev, vm_offset_t off, int prot)
* next character of target is 0 (end of string).
*/
boolean_t __attribute__ ((pure))
-name_equal(src, len, target)
- const char *src;
- int len;
- const char *target;
+name_equal(const char *src,
+ int len,
+ const char *target)
{
while (--len >= 0)
if (*src++ != *target++)
@@ -113,7 +117,7 @@ name_equal(src, len, target)
* device name lookup
*/
boolean_t dev_name_lookup(
- char *name,
+ const char *name,
dev_ops_t *ops, /* out */
int *unit) /* out */
{
@@ -129,7 +133,7 @@ boolean_t dev_name_lookup(
* <partition> is a letter in [a-h] (disks only?)
*/
- char *cp = name;
+ const char *cp = name;
int len;
int j = 0;
int c;
@@ -236,26 +240,3 @@ dev_set_indirection(const char *name, dev_ops_t ops, int unit)
}
}
}
-
-boolean_t dev_change_indirect(const char *iname, const char *dname, int unit)
-{
- struct dev_ops *dp;
- struct dev_indirect *di;
- boolean_t found = FALSE;
-
- dev_search(dp) {
- if (!strcmp(dp->d_name, dname)) {
- found = TRUE;
- break;
- }
- }
- if (!found) return FALSE;
- dev_indirect_search(di) {
- if (!strcmp(di->d_name, iname)) {
- di->d_ops = dp;
- di->d_unit = unit;
- return TRUE;
- }
- }
- return FALSE;
-}
diff --git a/device/dev_pager.c b/device/dev_pager.c
index 38f0f8d4..1cd74064 100644
--- a/device/dev_pager.c
+++ b/device/dev_pager.c
@@ -50,6 +50,7 @@
#include <vm/vm_kern.h>
#include <vm/vm_user.h>
+#include <device/device_pager.server.h>
#include <device/device_types.h>
#include <device/ds_routines.h>
#include <device/dev_hdr.h>
@@ -128,14 +129,14 @@ typedef struct dev_pager *dev_pager_t;
struct kmem_cache dev_pager_cache;
-void dev_pager_reference(dev_pager_t ds)
+static void dev_pager_reference(dev_pager_t ds)
{
simple_lock(&ds->lock);
ds->ref_count++;
simple_unlock(&ds->lock);
}
-void dev_pager_deallocate(dev_pager_t ds)
+static void dev_pager_deallocate(dev_pager_t ds)
{
simple_lock(&ds->lock);
if (--ds->ref_count > 0) {
@@ -166,8 +167,7 @@ typedef struct dev_pager_entry *dev_pager_entry_t;
*/
queue_head_t dev_pager_hashtable[DEV_HASH_COUNT];
struct kmem_cache dev_pager_hash_cache;
-decl_simple_lock_data(,
- dev_pager_hash_lock)
+def_simple_lock_data(static, dev_pager_hash_lock)
struct dev_device_entry {
queue_chain_t links;
@@ -183,13 +183,12 @@ typedef struct dev_device_entry *dev_device_entry_t;
*/
queue_head_t dev_device_hashtable[DEV_HASH_COUNT];
struct kmem_cache dev_device_hash_cache;
-decl_simple_lock_data(,
- dev_device_hash_lock)
+def_simple_lock_data(static, dev_device_hash_lock)
#define dev_hash(name_port) \
(((vm_offset_t)(name_port) & 0xffffff) % DEV_HASH_COUNT)
-void dev_pager_hash_init(void)
+static void dev_pager_hash_init(void)
{
int i;
vm_size_t size;
@@ -202,7 +201,7 @@ void dev_pager_hash_init(void)
simple_lock_init(&dev_pager_hash_lock);
}
-void dev_pager_hash_insert(
+static void dev_pager_hash_insert(
const ipc_port_t name_port,
const dev_pager_t rec)
{
@@ -218,7 +217,7 @@ void dev_pager_hash_insert(
simple_unlock(&dev_pager_hash_lock);
}
-void dev_pager_hash_delete(const ipc_port_t name_port)
+static void dev_pager_hash_delete(const ipc_port_t name_port)
{
queue_t bucket;
dev_pager_entry_t entry;
@@ -235,11 +234,11 @@ void dev_pager_hash_delete(const ipc_port_t name_port)
}
}
simple_unlock(&dev_pager_hash_lock);
- if (entry)
+ if (!queue_end(bucket, &entry->links))
kmem_cache_free(&dev_pager_hash_cache, (vm_offset_t)entry);
}
-dev_pager_t dev_pager_hash_lookup(const ipc_port_t name_port)
+static dev_pager_t dev_pager_hash_lookup(const ipc_port_t name_port)
{
queue_t bucket;
dev_pager_entry_t entry;
@@ -262,7 +261,7 @@ dev_pager_t dev_pager_hash_lookup(const ipc_port_t name_port)
return (DEV_PAGER_NULL);
}
-void dev_device_hash_init(void)
+static void dev_device_hash_init(void)
{
int i;
vm_size_t size;
@@ -276,7 +275,7 @@ void dev_device_hash_init(void)
simple_lock_init(&dev_device_hash_lock);
}
-void dev_device_hash_insert(
+static void dev_device_hash_insert(
const mach_device_t device,
const vm_offset_t offset,
const dev_pager_t rec)
@@ -294,7 +293,7 @@ void dev_device_hash_insert(
simple_unlock(&dev_device_hash_lock);
}
-void dev_device_hash_delete(
+static void dev_device_hash_delete(
const mach_device_t device,
const vm_offset_t offset)
{
@@ -313,11 +312,11 @@ void dev_device_hash_delete(
}
}
simple_unlock(&dev_device_hash_lock);
- if (entry)
+ if (!queue_end(bucket, &entry->links))
kmem_cache_free(&dev_device_hash_cache, (vm_offset_t)entry);
}
-dev_pager_t dev_device_hash_lookup(
+static dev_pager_t dev_device_hash_lookup(
const mach_device_t device,
const vm_offset_t offset)
{
@@ -493,7 +492,7 @@ device_pager_data_return(
const ipc_port_t pager_request,
vm_offset_t offset,
pointer_t addr,
- vm_size_t data_cnt,
+ mach_msg_type_number_t data_cnt,
boolean_t dirty,
boolean_t kernel_copy)
{
diff --git a/device/device_emul.h b/device/device_emul.h
index 957bd505..873d7f5b 100644
--- a/device/device_emul.h
+++ b/device/device_emul.h
@@ -36,12 +36,12 @@ struct device_emulation_ops
void (*dealloc) (void *);
ipc_port_t (*dev_to_port) (void *);
io_return_t (*open) (ipc_port_t, mach_msg_type_name_t,
- dev_mode_t, char *, device_t *);
+ dev_mode_t, const char *, device_t *);
io_return_t (*close) (void *);
io_return_t (*write) (void *, ipc_port_t, mach_msg_type_name_t,
dev_mode_t, recnum_t, io_buf_ptr_t, unsigned, int *);
io_return_t (*write_inband) (void *, ipc_port_t, mach_msg_type_name_t,
- dev_mode_t, recnum_t, io_buf_ptr_inband_t,
+ dev_mode_t, recnum_t, const io_buf_ptr_inband_t,
unsigned, int *);
io_return_t (*read) (void *, ipc_port_t, mach_msg_type_name_t,
dev_mode_t, recnum_t, int, io_buf_ptr_t *, unsigned *);
@@ -56,9 +56,9 @@ struct device_emulation_ops
vm_size_t, ipc_port_t *, boolean_t);
void (*no_senders) (mach_no_senders_notification_t *);
io_return_t (*write_trap) (void *, dev_mode_t,
- recnum_t, vm_offset_t, vm_size_t);
+ rpc_recnum_t, rpc_vm_offset_t, rpc_vm_size_t);
io_return_t (*writev_trap) (void *, dev_mode_t,
- recnum_t, io_buf_vec_t *, vm_size_t);
+ rpc_recnum_t, rpc_io_buf_vec_t *, rpc_vm_size_t);
};
#endif /* _I386AT_DEVICE_EMUL_H_ */
diff --git a/device/device_init.c b/device/device_init.c
index 794186ee..287d0a20 100644
--- a/device/device_init.c
+++ b/device/device_init.c
@@ -38,6 +38,7 @@
#include <device/device_types.h>
#include <device/device_port.h>
#include <device/tty.h>
+#include <device/device_init.h>
#include <device/ds_routines.h>
#include <device/net_io.h>
#include <device/chario.h>
diff --git a/device/ds_routines.c b/device/ds_routines.c
index c883aec7..d97d229e 100644
--- a/device/ds_routines.c
+++ b/device/ds_routines.c
@@ -84,6 +84,7 @@
#include <vm/vm_user.h>
#include <device/device_types.h>
+#include <device/device.server.h>
#include <device/dev_hdr.h>
#include <device/conf.h>
#include <device/io_req.h>
@@ -140,7 +141,7 @@ struct kmem_cache io_inband_cache;
io_return_t
ds_device_open (ipc_port_t open_port, ipc_port_t reply_port,
mach_msg_type_name_t reply_port_type, dev_mode_t mode,
- char *name, device_t *devp)
+ const_dev_name_t name, device_t *devp)
{
unsigned i;
io_return_t err;
@@ -170,6 +171,14 @@ ds_device_open (ipc_port_t open_port, ipc_port_t reply_port,
}
io_return_t
+ds_device_open_new (ipc_port_t open_port, ipc_port_t reply_port,
+ mach_msg_type_name_t reply_port_type, dev_mode_t mode,
+ const_dev_name_t name, device_t *devp)
+{
+ return ds_device_open (open_port, reply_port, reply_port_type, mode, name, devp);
+}
+
+io_return_t
ds_device_close (device_t dev)
{
/* Refuse if device is dead or not completely open. */
@@ -206,7 +215,7 @@ io_return_t
ds_device_write_inband (device_t dev, ipc_port_t reply_port,
mach_msg_type_name_t reply_port_type,
dev_mode_t mode, recnum_t recnum,
- io_buf_ptr_inband_t data, unsigned count,
+ const io_buf_ptr_inband_t data, unsigned count,
int *bytes_written)
{
/* Refuse if device is dead or not completely open. */
@@ -245,7 +254,7 @@ ds_device_read (device_t dev, ipc_port_t reply_port,
io_return_t
ds_device_read_inband (device_t dev, ipc_port_t reply_port,
mach_msg_type_name_t reply_port_type, dev_mode_t mode,
- recnum_t recnum, int count, char *data,
+ recnum_t recnum, int count, io_buf_ptr_inband_t data,
unsigned *bytes_read)
{
/* Refuse if device is dead or not completely open. */
@@ -325,9 +334,9 @@ io_return_t
ds_device_intr_register (device_t dev, int id,
int flags, ipc_port_t receive_port)
{
-#if defined(MACH_XEN) || defined(__x86_64__)
+#if defined(MACH_XEN)
return D_INVALID_OPERATION;
-#else /* MACH_XEN || __x86_64__ */
+#else /* MACH_XEN */
kern_return_t err;
mach_device_t mdev;
@@ -359,15 +368,15 @@ ds_device_intr_register (device_t dev, int id,
ip_reference (receive_port);
}
return err;
-#endif /* MACH_XEN || __x86_64__ */
+#endif /* MACH_XEN */
}
kern_return_t
ds_device_intr_ack (device_t dev, ipc_port_t receive_port)
{
-#if defined(MACH_XEN) || defined(__x86_64__)
+#if defined(MACH_XEN)
return D_INVALID_OPERATION;
-#else /* MACH_XEN || __x86_64__ */
+#else /* MACH_XEN */
mach_device_t mdev;
kern_return_t ret;
@@ -387,7 +396,7 @@ ds_device_intr_ack (device_t dev, ipc_port_t receive_port)
ipc_port_release_send(receive_port);
return ret;
-#endif /* MACH_XEN || __x86_64__ */
+#endif /* MACH_XEN */
}
boolean_t
@@ -412,7 +421,7 @@ ds_notify (mach_msg_header_t *msg)
io_return_t
ds_device_write_trap (device_t dev, dev_mode_t mode,
- recnum_t recnum, vm_offset_t data, vm_size_t count)
+ rpc_recnum_t recnum, rpc_vm_offset_t data, rpc_vm_size_t count)
{
/* Refuse if device is dead or not completely open. */
if (dev == DEVICE_NULL)
@@ -427,7 +436,7 @@ ds_device_write_trap (device_t dev, dev_mode_t mode,
io_return_t
ds_device_writev_trap (device_t dev, dev_mode_t mode,
- recnum_t recnum, io_buf_vec_t *iovec, vm_size_t count)
+ rpc_recnum_t recnum, rpc_io_buf_vec_t *iovec, rpc_vm_size_t count)
{
/* Refuse if device is dead or not completely open. */
if (dev == DEVICE_NULL)
@@ -466,7 +475,7 @@ device_deallocate (device_t dev)
* What follows is the interface for the native Mach devices.
*/
-ipc_port_t
+static ipc_port_t
mach_convert_device_to_port (mach_device_t device)
{
ipc_port_t port;
@@ -492,7 +501,7 @@ static io_return_t
device_open(const ipc_port_t reply_port,
mach_msg_type_name_t reply_port_type,
dev_mode_t mode,
- char * name,
+ const char * name,
device_t *device_p)
{
mach_device_t device;
@@ -826,7 +835,7 @@ device_write_inband(void *dev,
mach_msg_type_name_t reply_port_type,
dev_mode_t mode,
recnum_t recnum,
- io_buf_ptr_inband_t data,
+ const io_buf_ptr_inband_t data,
unsigned int data_count,
int *bytes_written)
{
@@ -849,7 +858,7 @@ device_write_inband(void *dev,
ior->io_op = IO_WRITE | IO_CALL | IO_INBAND;
ior->io_mode = mode;
ior->io_recnum = recnum;
- ior->io_data = data;
+ ior->io_data = (io_buf_ptr_t)data;
ior->io_count = data_count;
ior->io_total = data_count;
ior->io_alloc_size = 0;
@@ -1428,7 +1437,7 @@ device_set_status(
status_count));
}
-io_return_t
+static io_return_t
mach_device_get_status(
void *dev,
dev_flavor_t flavor,
@@ -1501,13 +1510,14 @@ device_map(
static void
ds_no_senders(mach_no_senders_notification_t *notification)
{
- printf("ds_no_senders called! device_port=0x%lx count=%d\n",
+ printf("ds_no_senders called! device_port=0x%zx count=%d\n",
notification->not_header.msgh_remote_port,
notification->not_count);
}
+/* Shall be taken at splio only */
+def_simple_lock_irq_data(static, io_done_list_lock) /* Lock for... */
queue_head_t io_done_list;
-decl_simple_lock_data(, io_done_list_lock)
#define splio splsched /* XXX must block ALL io devices */
@@ -1536,15 +1546,15 @@ void iodone(io_req_t ior)
thread_wakeup((event_t)ior);
} else {
ior->io_op |= IO_DONE;
- simple_lock(&io_done_list_lock);
+ simple_lock_nocheck(&io_done_list_lock.slock);
enqueue_tail(&io_done_list, (queue_entry_t)ior);
thread_wakeup((event_t)&io_done_list);
- simple_unlock(&io_done_list_lock);
+ simple_unlock_nocheck(&io_done_list_lock.slock);
}
splx(s);
}
-void __attribute__ ((noreturn)) io_done_thread_continue(void)
+static void __attribute__ ((noreturn)) io_done_thread_continue(void)
{
for (;;) {
spl_t s;
@@ -1553,11 +1563,9 @@ void __attribute__ ((noreturn)) io_done_thread_continue(void)
#if defined (LINUX_DEV) && defined (CONFIG_INET)
free_skbuffs ();
#endif
- s = splio();
- simple_lock(&io_done_list_lock);
+ s = simple_lock_irq(&io_done_list_lock);
while ((ior = (io_req_t)dequeue_head(&io_done_list)) != 0) {
- simple_unlock(&io_done_list_lock);
- (void) splx(s);
+ simple_unlock_irq(s, &io_done_list_lock);
if ((*ior->io_done)(ior)) {
/*
@@ -1567,13 +1575,11 @@ void __attribute__ ((noreturn)) io_done_thread_continue(void)
}
/* else routine has re-queued it somewhere */
- s = splio();
- simple_lock(&io_done_list_lock);
+ s = simple_lock_irq(&io_done_list_lock);
}
assert_wait(&io_done_list, FALSE);
- simple_unlock(&io_done_list_lock);
- (void) splx(s);
+ simple_unlock_irq(s, &io_done_list_lock);
counter(c_io_done_thread_block++);
thread_block(io_done_thread_continue);
}
@@ -1601,7 +1607,7 @@ void mach_device_init(void)
vm_offset_t device_io_min, device_io_max;
queue_init(&io_done_list);
- simple_lock_init(&io_done_list_lock);
+ simple_lock_init_irq(&io_done_list_lock);
kmem_submap(device_io_map, kernel_map, &device_io_min, &device_io_max,
DEVICE_IO_MAP_SIZE);
@@ -1680,7 +1686,7 @@ mach_device_trap_init(void)
* Could have lists of different size caches.
* Could call a device-specific routine.
*/
-io_req_t
+static io_req_t
ds_trap_req_alloc(const mach_device_t device, vm_size_t data_size)
{
return (io_req_t) kmem_cache_alloc(&io_trap_cache);
@@ -1689,7 +1695,7 @@ ds_trap_req_alloc(const mach_device_t device, vm_size_t data_size)
/*
* Called by iodone to release ior.
*/
-boolean_t
+static boolean_t
ds_trap_write_done(const io_req_t ior)
{
mach_device_t dev;
@@ -1713,7 +1719,7 @@ ds_trap_write_done(const io_req_t ior)
*/
static io_return_t
device_write_trap (mach_device_t device, dev_mode_t mode,
- recnum_t recnum, vm_offset_t data, vm_size_t data_count)
+ rpc_recnum_t recnum, rpc_vm_offset_t data, rpc_vm_size_t data_count)
{
io_req_t ior;
io_return_t result;
@@ -1752,7 +1758,7 @@ device_write_trap (mach_device_t device, dev_mode_t mode,
* Copy the data from user space.
*/
if (data_count > 0)
- copyin((void *)data, ior->io_data, data_count);
+ copyin((void*)(vm_offset_t)data, ior->io_data, data_count);
/*
* The ior keeps an extra reference for the device.
@@ -1781,7 +1787,7 @@ device_write_trap (mach_device_t device, dev_mode_t mode,
static io_return_t
device_writev_trap (mach_device_t device, dev_mode_t mode,
- recnum_t recnum, io_buf_vec_t *iovec, vm_size_t iocount)
+ rpc_recnum_t recnum, rpc_io_buf_vec_t *iovec, rpc_vm_size_t iocount)
{
io_req_t ior;
io_return_t result;
@@ -1799,11 +1805,15 @@ device_writev_trap (mach_device_t device, dev_mode_t mode,
*/
if (iocount > 16)
return KERN_INVALID_VALUE; /* lame */
- copyin(iovec,
- stack_iovec,
- iocount * sizeof(io_buf_vec_t));
- for (data_count = 0, i = 0; i < iocount; i++)
+
+ for (data_count = 0, i=0; i<iocount; i++) {
+ rpc_io_buf_vec_t riov;
+ if (copyin(iovec + i, &riov, sizeof(riov)))
+ return KERN_INVALID_ARGUMENT;
+ stack_iovec[i].data = riov.data;
+ stack_iovec[i].count = riov.count;
data_count += stack_iovec[i].count;
+ }
/*
* Get a buffer to hold the ioreq.
diff --git a/device/ds_routines.h b/device/ds_routines.h
index c0543cbc..48d85dd0 100644
--- a/device/ds_routines.h
+++ b/device/ds_routines.h
@@ -72,15 +72,15 @@ extern void io_done_thread(void) __attribute__ ((noreturn));
io_return_t ds_device_write_trap(
device_t dev,
dev_mode_t mode,
- recnum_t recnum,
- vm_offset_t data,
- vm_size_t count);
+ rpc_recnum_t recnum,
+ rpc_vm_offset_t data,
+ rpc_vm_size_t count);
io_return_t ds_device_writev_trap(
device_t dev,
dev_mode_t mode,
- recnum_t recnum,
- io_buf_vec_t *iovec,
- vm_size_t count);
+ rpc_recnum_t recnum,
+ rpc_io_buf_vec_t *iovec,
+ rpc_vm_size_t count);
#endif /* DS_ROUTINES_H */
diff --git a/device/intr.c b/device/intr.c
index 2298d1bb..9035c036 100644
--- a/device/intr.c
+++ b/device/intr.c
@@ -50,6 +50,20 @@ search_intr (struct irqdev *dev, ipc_port_t dst_port)
return NULL;
}
+
+/*
+ * Interrupt handling logic:
+ *
+ * interrupt.S raises spl (thus IF cleared)
+ * interrupt.S EOI
+ * interrupt.S calls the handler
+ * - for pure in-kernel handlers, they do whatever they want with IF cleared.
+ * - when a userland handler is registered, queue_intr masks the irq.
+ * interrupt.S lowers spl with splx_cli, thus IF still cleared
+ * iret, that also sets IF
+ *
+ * later on, (irq_acknowledge), userland acks the IRQ, that unmasks the irq
+ */
kern_return_t
irq_acknowledge (ipc_port_t receive_port)
{
@@ -76,8 +90,6 @@ irq_acknowledge (ipc_port_t receive_port)
if (ret)
return ret;
- (*(irqtab.irqdev_ack)) (&irqtab, e->id);
-
__enable_irq (irqtab.irq[e->id]);
return D_SUCCESS;
@@ -104,15 +116,10 @@ int
deliver_user_intr (struct irqdev *dev, int id, user_intr_t *e)
{
/* The reference of the port was increased
- * when the port was installed.
- * If the reference is 1, it means the port should
- * have been destroyed and I destroy it now. */
- if (e->dst_port
- && e->dst_port->ip_references == 1)
+ * when the port was installed. If the reference is 1, it means
+ * the port was deallocated and we should clean after it. */
+ if (!e->dst_port || e->dst_port->ip_references == 1)
{
- printf ("irq handler [%d]: release a dead delivery port %p entry %p\n", id, e->dst_port, e);
- ipc_port_release (e->dst_port);
- e->dst_port = MACH_PORT_NULL;
thread_wakeup ((event_t) &intr_thread);
return 0;
}
@@ -246,37 +253,18 @@ intr_thread (void)
thread_set_timeout (hz);
spl_t s = splhigh ();
- /* Check for aborted processes */
- queue_iterate (&main_intr_queue, e, user_intr_t *, chain)
- {
- if ((!e->dst_port || e->dst_port->ip_references == 1) && e->n_unacked)
- {
- printf ("irq handler [%d]: release dead delivery %d unacked irqs port %p entry %p\n", e->id, e->n_unacked, e->dst_port, e);
- /* The reference of the port was increased
- * when the port was installed.
- * If the reference is 1, it means the port should
- * have been destroyed and I clear unacked irqs now, so the Linux
- * handling can trigger, and we will cleanup later after the Linux
- * handler is cleared. */
- /* TODO: rather immediately remove from Linux handler */
- while (e->n_unacked)
- {
- __enable_irq (irqtab.irq[e->id]);
- e->n_unacked--;
- }
- }
- }
-
/* Now check for interrupts */
- while (irqtab.tot_num_intr)
+ int del;
+ do
{
- int del = 0;
+ del = 0;
queue_iterate (&main_intr_queue, e, user_intr_t *, chain)
{
- /* if an entry doesn't have dest port,
- * we should remove it. */
- if (e->dst_port == MACH_PORT_NULL)
+ /* The reference of the port was increased
+ * when the port was installed. If the reference is 1, it means
+ * the port was deallocated and we should clean after it. */
+ if (e->dst_port->ip_references == 1)
{
clear_wait (current_thread (), 0, 0);
del = 1;
@@ -300,8 +288,17 @@ intr_thread (void)
/* remove the entry without dest port from the queue and free it. */
if (del)
{
+ /*
+ * We clear unacked irqs now, so the Linux handling can trigger,
+ * and we will cleanup later after the Linux handler is cleared.
+ */
assert (!queue_empty (&main_intr_queue));
queue_remove (&main_intr_queue, e, user_intr_t *, chain);
+
+ printf ("irq handler [%d]: release a dead delivery port %p entry %p\n", e->id, e->dst_port, e);
+ ipc_port_release (e->dst_port);
+ e->dst_port = MACH_PORT_NULL;
+
if (e->n_unacked)
printf("irq handler [%d]: still %d unacked irqs in entry %p\n", e->id, e->n_unacked, e);
while (e->n_unacked)
@@ -309,12 +306,20 @@ intr_thread (void)
__enable_irq (irqtab.irq[e->id]);
e->n_unacked--;
}
- printf("irq handler [%d]: removed entry %p\n", e->id, e);
+
+#if 0
+#ifndef LINUX_DEV
+ // TODO: remove from the action list
+#else
+ // FIXME: with the Linux irq handler we don't actually control the action list
+#endif
splx (s);
kfree ((vm_offset_t) e, sizeof (*e));
s = splhigh ();
+#endif
}
}
+ while (del || irqtab.tot_num_intr);
splx (s);
thread_block (NULL);
}
diff --git a/device/io_req.h b/device/io_req.h
index b80b3dde..fb636969 100644
--- a/device/io_req.h
+++ b/device/io_req.h
@@ -38,6 +38,7 @@
#include <kern/slab.h>
#include <kern/kalloc.h>
#include <kern/lock.h>
+#include <vm/vm_map.h>
#include <vm/vm_page.h>
#include <device/device_types.h>
#include <device/dev_hdr.h>
@@ -95,6 +96,8 @@ struct io_req {
* locking is needed in this sequence. Unfortunately, a synchronous wait
* for a buffer requires a lock to avoid problems if the wait and interrupt
* happen simultaneously on different processors.
+ *
+ * Shall be taken at splio only
*/
#define ior_lock(ior) simple_lock(&(ior)->io_req_lock)
diff --git a/device/kmsg.c b/device/kmsg.c
index e49eb3d3..e5b518e6 100644
--- a/device/kmsg.c
+++ b/device/kmsg.c
@@ -44,7 +44,7 @@ static queue_head_t kmsg_read_queue;
/* Used for exclusive access to the device */
static boolean_t kmsg_in_use;
/* Used for exclusive access to the routines */
-decl_simple_lock_data (static, kmsg_lock);
+def_simple_lock_irq_data (static, kmsg_lock);
/* If already initialized or not */
static boolean_t kmsg_init_done = FALSE;
@@ -56,23 +56,23 @@ kmsginit (void)
kmsg_read_offset = 0;
queue_init (&kmsg_read_queue);
kmsg_in_use = FALSE;
- simple_lock_init (&kmsg_lock);
+ simple_lock_init_irq (&kmsg_lock);
}
/* Kernel Message Open Handler */
io_return_t
kmsgopen (dev_t dev, int flag, const io_req_t ior)
{
- simple_lock (&kmsg_lock);
+ spl_t s = simple_lock_irq (&kmsg_lock);
if (kmsg_in_use)
{
- simple_unlock (&kmsg_lock);
+ simple_unlock_irq (s, &kmsg_lock);
return D_ALREADY_OPEN;
}
kmsg_in_use = TRUE;
- simple_unlock (&kmsg_lock);
+ simple_unlock_irq (s, &kmsg_lock);
return D_SUCCESS;
}
@@ -80,10 +80,10 @@ kmsgopen (dev_t dev, int flag, const io_req_t ior)
void
kmsgclose (dev_t dev, int flag)
{
- simple_lock (&kmsg_lock);
+ spl_t s = simple_lock_irq (&kmsg_lock);
kmsg_in_use = FALSE;
- simple_unlock (&kmsg_lock);
+ simple_unlock_irq (s, &kmsg_lock);
}
static boolean_t kmsg_read_done (io_req_t ior);
@@ -99,19 +99,19 @@ kmsgread (dev_t dev, io_req_t ior)
if (err != KERN_SUCCESS)
return err;
- simple_lock (&kmsg_lock);
+ spl_t s = simple_lock_irq (&kmsg_lock);
if (kmsg_read_offset == kmsg_write_offset)
{
/* The queue is empty. */
if (ior->io_mode & D_NOWAIT)
{
- simple_unlock (&kmsg_lock);
+ simple_unlock_irq (s, &kmsg_lock);
return D_WOULD_BLOCK;
}
ior->io_done = kmsg_read_done;
enqueue_tail (&kmsg_read_queue, (queue_entry_t) ior);
- simple_unlock (&kmsg_lock);
+ simple_unlock_irq (s, &kmsg_lock);
return D_IO_QUEUED;
}
@@ -142,7 +142,7 @@ kmsgread (dev_t dev, io_req_t ior)
ior->io_residual = ior->io_count - amt;
- simple_unlock (&kmsg_lock);
+ simple_unlock_irq (s, &kmsg_lock);
return D_SUCCESS;
}
@@ -151,13 +151,13 @@ kmsg_read_done (io_req_t ior)
{
int amt, len;
- simple_lock (&kmsg_lock);
+ spl_t s = simple_lock_irq (&kmsg_lock);
if (kmsg_read_offset == kmsg_write_offset)
{
/* The queue is empty. */
ior->io_done = kmsg_read_done;
enqueue_tail (&kmsg_read_queue, (queue_entry_t) ior);
- simple_unlock (&kmsg_lock);
+ simple_unlock_irq (s, &kmsg_lock);
return FALSE;
}
@@ -188,7 +188,7 @@ kmsg_read_done (io_req_t ior)
ior->io_residual = ior->io_count - amt;
- simple_unlock (&kmsg_lock);
+ simple_unlock_irq (s, &kmsg_lock);
ds_read_done (ior);
return TRUE;
@@ -218,6 +218,7 @@ kmsg_putchar (int c)
{
io_req_t ior;
int offset;
+ spl_t s = -1;
/* XXX: cninit is not called before cnputc is used. So call kmsginit
here if not initialized yet. */
@@ -226,8 +227,9 @@ kmsg_putchar (int c)
kmsginit ();
kmsg_init_done = TRUE;
}
-
- simple_lock (&kmsg_lock);
+
+ if (spl_init)
+ s = simple_lock_irq (&kmsg_lock);
offset = kmsg_write_offset + 1;
if (offset == KMSGBUFSIZE)
offset = 0;
@@ -235,7 +237,8 @@ kmsg_putchar (int c)
if (offset == kmsg_read_offset)
{
/* Discard C. */
- simple_unlock (&kmsg_lock);
+ if (spl_init)
+ simple_unlock_irq (s, &kmsg_lock);
return;
}
@@ -246,5 +249,6 @@ kmsg_putchar (int c)
while ((ior = (io_req_t) dequeue_head (&kmsg_read_queue)) != NULL)
iodone (ior);
- simple_unlock (&kmsg_lock);
+ if (spl_init)
+ simple_unlock_irq (s, &kmsg_lock);
}
diff --git a/device/net_io.c b/device/net_io.c
index 72b040a0..ee9435d7 100644
--- a/device/net_io.c
+++ b/device/net_io.c
@@ -86,7 +86,7 @@ int kttd_async_counter= 0;
* Messages can be high priority or low priority.
* The network thread processes high priority messages first.
*/
-decl_simple_lock_data(,net_queue_lock)
+def_simple_lock_data(static,net_queue_lock)
boolean_t net_thread_awake = FALSE;
struct ipc_kmsg_queue net_queue_high;
int net_queue_high_size = 0;
@@ -99,7 +99,7 @@ int net_queue_low_max = 0; /* for debugging */
* List of net kmsgs that can be touched at interrupt level.
* If it is empty, we will also steal low priority messages.
*/
-decl_simple_lock_data(,net_queue_free_lock)
+def_simple_lock_data(static,net_queue_free_lock)
struct ipc_kmsg_queue net_queue_free;
int net_queue_free_size = 0; /* on free list */
int net_queue_free_max = 0; /* for debugging */
@@ -125,7 +125,7 @@ int net_kmsg_send_low_misses = 0; /* for debugging */
int net_thread_awaken = 0; /* for debugging */
int net_ast_taken = 0; /* for debugging */
-decl_simple_lock_data(,net_kmsg_total_lock)
+def_simple_lock_data(static,net_kmsg_total_lock)
int net_kmsg_total = 0; /* total allocated */
int net_kmsg_max; /* initialized below */
@@ -235,7 +235,7 @@ net_kmsg_collect(void)
(void) splx(s);
}
-void
+static void
net_kmsg_more(void)
{
ipc_kmsg_t kmsg;
@@ -337,7 +337,7 @@ struct net_hash_header {
net_hash_entry_t table[NET_HASH_SIZE];
} filter_hash_header[N_NET_HASH];
-decl_simple_lock_data(,net_hash_header_lock)
+def_simple_lock_data(static,net_hash_header_lock)
#define HASH_ITERATE(head, elt) (elt) = (net_hash_entry_t) (head); do {
#define HASH_ITERATE_END(head, elt) \
@@ -368,8 +368,7 @@ decl_simple_lock_data(,net_hash_header_lock)
* Returns TRUE for high-priority packets.
*/
-boolean_t ethernet_priority(kmsg)
- const ipc_kmsg_t kmsg;
+boolean_t ethernet_priority(const ipc_kmsg_t kmsg)
{
unsigned char *addr =
(unsigned char *) net_kmsg(kmsg)->header;
@@ -387,22 +386,23 @@ boolean_t ethernet_priority(kmsg)
}
mach_msg_type_t header_type = {
- MACH_MSG_TYPE_BYTE,
- 8,
- NET_HDW_HDR_MAX,
- TRUE,
- FALSE,
- FALSE,
- 0
+ .msgt_name = MACH_MSG_TYPE_BYTE,
+ .msgt_size = 8,
+ .msgt_number = NET_HDW_HDR_MAX,
+ .msgt_inline = TRUE,
+ .msgt_longform = FALSE,
+ .msgt_deallocate = FALSE,
+ .msgt_unused = 0
};
mach_msg_type_t packet_type = {
- MACH_MSG_TYPE_BYTE, /* name */
- 8, /* size */
- 0, /* number */
- TRUE, /* inline */
- FALSE, /* longform */
- FALSE /* deallocate */
+ .msgt_name = MACH_MSG_TYPE_BYTE,
+ .msgt_size = 8,
+ .msgt_number = 0,
+ .msgt_inline = TRUE,
+ .msgt_longform = FALSE,
+ .msgt_deallocate = FALSE,
+ .msgt_unused = 0
};
/*
@@ -412,7 +412,7 @@ mach_msg_type_t packet_type = {
* Dequeues a message and delivers it at spl0.
* Returns FALSE if no messages.
*/
-boolean_t net_deliver(boolean_t nonblocking)
+static boolean_t net_deliver(boolean_t nonblocking)
{
ipc_kmsg_t kmsg;
boolean_t high_priority;
@@ -466,9 +466,10 @@ boolean_t net_deliver(boolean_t nonblocking)
MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, 0);
/* remember message sizes must be rounded up */
kmsg->ikm_header.msgh_size =
- (((mach_msg_size_t) (sizeof(struct net_rcv_msg)
+ (mach_msg_size_t) P2ROUND(sizeof(struct net_rcv_msg)
- sizeof net_kmsg(kmsg)->sent
- - NET_RCV_MAX + count)) + 3) &~ 3;
+ - NET_RCV_MAX + count,
+ __alignof__ (uintptr_t));
kmsg->ikm_header.msgh_local_port = MACH_PORT_NULL;
kmsg->ikm_header.msgh_kind = MACH_MSGH_KIND_NORMAL;
kmsg->ikm_header.msgh_id = NET_RCV_MSG_ID;
@@ -549,7 +550,7 @@ void net_ast(void)
(void) splx(s);
}
-void __attribute__ ((noreturn)) net_thread_continue(void)
+static void __attribute__ ((noreturn)) net_thread_continue(void)
{
for (;;) {
spl_t s;
@@ -602,7 +603,7 @@ void net_thread(void)
/*NOTREACHED*/
}
-void
+static void
reorder_queue(
queue_t first,
queue_t last)
@@ -690,9 +691,8 @@ int net_filter_queue_reorder = 0; /* non-zero to enable reordering */
* We are *not* called at interrupt level.
*/
void
-net_filter(kmsg, send_list)
- const ipc_kmsg_t kmsg;
- ipc_kmsg_queue_t send_list;
+net_filter(const ipc_kmsg_t kmsg,
+ ipc_kmsg_queue_t send_list)
{
struct ifnet *ifp;
net_rcv_port_t infp, nextfp;
@@ -871,11 +871,10 @@ net_filter(kmsg, send_list)
}
boolean_t
-net_do_filter(infp, data, data_count, header)
- net_rcv_port_t infp;
- const char * data;
- unsigned int data_count;
- const char * header;
+net_do_filter(net_rcv_port_t infp,
+ const char * data,
+ unsigned int data_count,
+ const char * header)
{
int stack[NET_FILTER_STACK_DEPTH+1];
int *sp;
@@ -1010,7 +1009,7 @@ net_do_filter(infp, data, data_count, header)
/*
* Check filter for invalid operations or stack over/under-flow.
*/
-boolean_t
+static boolean_t
parse_net_filter(
filter_t *filter,
unsigned int count)
@@ -1437,7 +1436,7 @@ printf ("net_getstat: count: %d, addr_int_count: %d\n",
io_return_t
net_write(
struct ifnet *ifp,
- int (*start)(),
+ net_write_start_device_fn start,
io_req_t ior)
{
spl_t s;
@@ -1571,17 +1570,17 @@ net_io_init(void)
#ifndef BPF_ALIGN
#define EXTRACT_SHORT(p) ((u_short)ntohs(*(u_short *)p))
-#define EXTRACT_LONG(p) (ntohl(*(u_long *)p))
+#define EXTRACT_LONG(p) (ntohl(*(u_int *)p))
#else
#define EXTRACT_SHORT(p)\
((u_short)\
((u_short)*((u_char *)p+0)<<8|\
(u_short)*((u_char *)p+1)<<0))
#define EXTRACT_LONG(p)\
- ((u_long)*((u_char *)p+0)<<24|\
- (u_long)*((u_char *)p+1)<<16|\
- (u_long)*((u_char *)p+2)<<8|\
- (u_long)*((u_char *)p+3)<<0)
+ ((u_int)*((u_char *)p+0)<<24|\
+ (u_int)*((u_char *)p+1)<<16|\
+ (u_int)*((u_char *)p+2)<<8|\
+ (u_int)*((u_char *)p+3)<<0)
#endif
/*
@@ -1983,9 +1982,8 @@ bpf_eq(
}
unsigned int
-bpf_hash (n, keys)
- int n;
- const unsigned int *keys;
+bpf_hash (int n,
+ const unsigned int *keys)
{
unsigned int hval = 0;
@@ -1997,11 +1995,11 @@ bpf_hash (n, keys)
int
-bpf_match (hash, n_keys, keys, hash_headpp, entpp)
- net_hash_header_t hash;
- int n_keys;
- const unsigned int *keys;
- net_hash_entry_t **hash_headpp, *entpp;
+bpf_match (net_hash_header_t hash,
+ int n_keys,
+ const unsigned int *keys,
+ net_hash_entry_t **hash_headpp,
+ net_hash_entry_t *entpp)
{
net_hash_entry_t head, entp;
int i;
@@ -2100,7 +2098,7 @@ net_add_q_info(ipc_port_t rcv_port)
return (int)qlimit;
}
-void
+static void
net_del_q_info(int qlimit)
{
simple_lock(&net_kmsg_total_lock);
diff --git a/device/net_io.h b/device/net_io.h
index 9468e34b..c9af85ee 100644
--- a/device/net_io.h
+++ b/device/net_io.h
@@ -79,7 +79,9 @@ extern void net_packet(struct ifnet *, ipc_kmsg_t, unsigned int, boolean_t);
extern void net_filter(ipc_kmsg_t, ipc_kmsg_queue_t);
extern io_return_t net_getstat(struct ifnet *, dev_flavor_t, dev_status_t,
mach_msg_type_number_t *);
-extern io_return_t net_write(struct ifnet *, int (*)(), io_req_t);
+
+typedef int (*net_write_start_device_fn)(short);
+extern io_return_t net_write(struct ifnet *, net_write_start_device_fn, io_req_t);
/*
* Non-interrupt code may allocate and free net_kmsgs with these functions.
diff --git a/device/subrs.c b/device/subrs.c
index be710132..6e90a810 100644
--- a/device/subrs.c
+++ b/device/subrs.c
@@ -34,22 +34,11 @@
#include <device/buf.h>
#include <device/if_hdr.h>
#include <device/if_ether.h>
+#include <device/subrs.h>
/*
- * Print out disk name and block number for hard disk errors.
- */
-void harderr(const io_req_t ior, const char *cp)
-{
- printf("%s%d%c: hard error sn%d ",
- cp,
- minor(ior->io_unit) >> 3,
- 'a' + (minor(ior->io_unit) & 0x7),
- ior->io_recnum);
-}
-
-/*
* Convert Ethernet address to printable (loggable) representation.
*/
char *
@@ -95,35 +84,3 @@ void wakeup(vm_offset_t channel)
{
thread_wakeup((event_t) channel);
}
-
-io_req_t
-geteblk(int size)
-{
- io_req_t ior;
-
- io_req_alloc(ior, 0);
- ior->io_device = (mach_device_t)0;
- ior->io_unit = 0;
- ior->io_op = 0;
- ior->io_mode = 0;
- ior->io_recnum = 0;
- ior->io_count = size;
- ior->io_residual = 0;
- ior->io_error = 0;
-
- size = round_page(size);
- ior->io_alloc_size = size;
- if (kmem_alloc(kernel_map, (vm_offset_t *)&ior->io_data, size)
- != KERN_SUCCESS)
- panic("geteblk");
-
- return (ior);
-}
-
-void brelse(io_req_t ior)
-{
- (void) vm_deallocate(kernel_map,
- (vm_offset_t) ior->io_data,
- ior->io_alloc_size);
- io_req_free(ior);
-}
diff --git a/device/tty.h b/device/tty.h
index 0bdb2db9..3f8b2f63 100644
--- a/device/tty.h
+++ b/device/tty.h
@@ -43,7 +43,7 @@
#include <device/io_req.h>
struct tty {
- decl_simple_lock_data(,t_lock)
+ decl_simple_lock_irq_data(,t_lock) /* Shall be taken at spltty only */
struct cirbuf t_inq; /* input buffer */
struct cirbuf t_outq; /* output buffer */
char * t_addr; /* device pointer */
diff --git a/doc/mach.texi b/doc/mach.texi
index fc74b384..f85288e0 100644
--- a/doc/mach.texi
+++ b/doc/mach.texi
@@ -3010,6 +3010,7 @@ the kernel.
* Memory Attributes:: Tweaking memory regions.
* Mapping Memory Objects:: How to map memory objects.
* Memory Statistics:: How to get statistics about memory usage.
+* Memory physical addresses:: How to get physical addresses of memory.
@end menu
@node Memory Allocation
@@ -3514,6 +3515,23 @@ constant for the life of the task.
@end deftypefun
+@node Memory physical addresses
+@section Memory physical addresses
+
+@deftypefun kern_return_t vm_pages_phys (@w{host_t @var{host}}, @w{vm_task_t @var{target_task}}, @w{vm_address_t @var{address}}, @w{vm_size_t @var{size}}, @w{rpc_phys_addr_array_t *@var{pages}, @w{mach_msg_type_number_t *@var{pagesCnt}}})
+The function @code{vm_pages_phys} retrieves the physical addresses of the
+specified region (@var{size} bytes starting from @var{address}) of
+@var{target_task}'s virtual address space.
+
+Both @var{address} and @var{size} have to be aligned on @code{vm_page_size}.
+
+@var{pages} is an array of @code{rpc_phys_addr_array_t} that is supplied by the
+caller and returned filled with the physical page numbers. @var{pagesCnt} is
+supplied as the maximum number of elements in the @var{pages} array. On
+return, it contains the actual number of integers in @var{pages}.
+@end deftypefun
+
+
@node External Memory Management
@chapter External Memory Management
@@ -4409,6 +4427,17 @@ The last processor used by the thread.
This is a pointer to a @code{struct thread_sched_info}.
@end deftp
+@deftypefun kern_return_t thread_set_name (@w{thread_t @var{target_thread}}, @w{const_kernel_debug_name_t @var{name}})
+
+The function @code{thread_set_name} sets the name of @var{target_thread}
+to @var{name}, truncating it if necessary.
+
+This is a debugging aid. The name is used in diagnostic messages
+printed by the kernel.
+
+The function returns @code{KERN_SUCCESS} if the call succeeded.
+@end deftypefun
+
@node Thread Settings
@subsection Thread Settings
@@ -4830,11 +4859,11 @@ argument set to @code{THREAD_EXCEPTION_PORT}.
@node Exceptions
@subsection Exceptions
-@deftypefun kern_return_t catch_exception_raise (@w{mach_port_t @var{exception_port}}, @w{thread_t @var{thread}}, @w{task_t @var{task}}, @w{int @var{exception}}, @w{int @var{code}}, @w{int @var{subcode}})
+@deftypefun kern_return_t catch_exception_raise (@w{mach_port_t @var{exception_port}}, @w{thread_t @var{thread}}, @w{task_t @var{task}}, @w{int @var{exception}}, @w{int @var{code}}, @w{long @var{subcode}})
XXX Fixme
@end deftypefun
-@deftypefun kern_return_t exception_raise (@w{mach_port_t @var{exception_port}}, @w{mach_port_t @var{thread}}, @w{mach_port_t @var{task}}, @w{integer_t @var{exception}}, @w{integer_t @var{code}}, @w{integer_t @var{subcode}})
+@deftypefun kern_return_t exception_raise (@w{mach_port_t @var{exception_port}}, @w{mach_port_t @var{thread}}, @w{mach_port_t @var{task}}, @w{integer_t @var{exception}}, @w{integer_t @var{code}}, @w{long_integer_t @var{subcode}})
XXX Fixme
@end deftypefun
@@ -5011,10 +5040,10 @@ suspend count for task
@item integer_t base_priority
base scheduling priority
-@item vm_size_t virtual_size
+@item rpc_vm_size_t virtual_size
number of virtual pages
-@item vm_size_t resident_size
+@item rpc_vm_size_t resident_size
number of resident pages
@item time_value_t user_time
@@ -5041,25 +5070,25 @@ provided it as the @var{task_info} parameter for the
following members:
@table @code
-@item natural_t faults
+@item rpc_long_natural_t faults
number of page faults
-@item natural_t zero_fills
+@item rpc_long_natural_t zero_fills
number of zero fill pages
-@item natural_t reactivations
+@item rpc_long_natural_t reactivations
number of reactivated pages
-@item natural_t pageins
+@item rpc_long_natural_t pageins
number of actual pageins
-@item natural_t cow_faults
+@item rpc_long_natural_t cow_faults
number of copy-on-write faults
-@item natural_t messages_sent
+@item rpc_long_natural_t messages_sent
number of messages sent
-@item natural_t messages_received
+@item rpc_long_natural_t messages_received
number of messages received
@end table
@end deftp
@@ -5100,6 +5129,17 @@ printed by the kernel.
The function returns @code{KERN_SUCCESS} if the call succeeded.
@end deftypefun
+@deftypefun kern_return_t task_set_essential (@w{task_t @var{target_task}}, @w{boolean_t @var{essential}})
+
+The function @code{task_set_essential} sets whether @var{target_task} is
+essential for the system, i.e. the system will completely crash and reboot if
+that task crashes. This means that when the debugger is enabled, it should be
+triggered on the crash, so as to get the opportunity to debug the issue instead
+of just rebooting.
+
+The function returns @code{KERN_SUCCESS} if the call succeeded.
+@end deftypefun
+
@node Task Execution
@subsection Task Execution
@@ -5566,8 +5606,8 @@ The minimum quantum and unit of quantum in milliseconds.
This is a pointer to a @code{struct host_sched_info}.
@end deftp
-@deftypefun kern_return_t host_kernel_version (@w{host_t @var{host}}, @w{kernel_version_t *@var{version}})
-The @code{host_kernel_version} function returns the version string
+@deftypefun kern_return_t host_get_kernel_version (@w{host_t @var{host}}, @w{kernel_version_t *@var{version}})
+The @code{host_get_kernel_version} function returns the version string
compiled into the kernel executing on @var{host} at the time it was
built in the character string @var{version}. This string describes the
version of the kernel. The constant @code{KERNEL_VERSION_MAX} should be
@@ -5584,65 +5624,51 @@ inaccessible memory, it returns @code{KERN_INVALID_ADDRESS}, and
@code{KERN_SUCCESS} otherwise.
@end deftypefun
-@deftypefun kern_return_t host_get_boot_info (@w{host_priv_t @var{host_priv}}, @w{kernel_boot_info_t @var{boot_info}})
-The @code{host_get_boot_info} function returns the boot-time information
-string supplied by the operator to the kernel executing on
-@var{host_priv} in the character string @var{boot_info}. The constant
-@code{KERNEL_BOOT_INFO_MAX} should be used to dimension storage for the
-returned string if the @code{kernel_boot_info_t} declaration is not
-used.
-
-If the boot-time information string supplied by the operator is longer
-than @code{KERNEL_BOOT_INFO_MAX}, the result is truncated and not
-necessarily null-terminated.
-@end deftypefun
-
-
@node Host Time
@section Host Time
-@deftp {Data type} time_value_t
+@deftp {Data type} time_value64_t
This is the representation of a time in Mach. It is a @code{struct
-time_value} and consists of the following members:
+time_value64} and consists of the following members:
@table @code
-@item integer_t seconds
+@item int64_t seconds
The number of seconds.
-@item integer_t microseconds
-The number of microseconds.
+@item int64_t nanoseconds
+The number of nanoseconds.
@end table
@end deftp
-The number of microseconds should always be smaller than
-@code{TIME_MICROS_MAX} (100000). A time with this property is
+The number of nanoseconds should always be smaller than
+@code{TIME_NANOS_MAX} (100000000). A time with this property is
@dfn{normalized}. Normalized time values can be manipulated with the
following macros:
-@defmac time_value_add_usec (@w{time_value_t *@var{val}}, @w{integer_t *@var{micros}})
-Add @var{micros} microseconds to @var{val}. If @var{val} is normalized
-and @var{micros} smaller than @code{TIME_MICROS_MAX}, @var{val} will be
+@defmac time_value64_add_nanos (@w{time_value64_t *@var{val}}, @w{int64_t *@var{nanos}})
+Add @var{nanos} nanoseconds to @var{val}. If @var{val} is normalized
+and @var{nanos} smaller than @code{TIME_NANOS_MAX}, @var{val} will be
normalized afterwards.
@end defmac
-@defmac time_value_add (@w{time_value_t *@var{result}}, @w{time_value_t *@var{addend}})
+@defmac time_value64_add (@w{time_value64_t *@var{result}}, @w{time_value64_t *@var{addend}})
Add the values in @var{addend} to @var{result}. If both are normalized,
@var{result} will be normalized afterwards.
@end defmac
-A variable of type @code{time_value_t} can either represent a duration
+A variable of type @code{time_value64_t} can either represent a duration
or a fixed point in time. In the latter case, it shall be interpreted as
-the number of seconds and microseconds after the epoch 1. Jan 1970.
+the number of seconds and nanoseconds after the epoch 1. Jan 1970.
-@deftypefun kern_return_t host_get_time (@w{host_t @var{host}}, @w{time_value_t *@var{current_time}})
+@deftypefun kern_return_t host_get_time64 (@w{host_t @var{host}}, @w{time_value64_t *@var{current_time}})
Get the current time as seen by @var{host}. On success, the time passed
since the epoch is returned in @var{current_time}.
@end deftypefun
-@deftypefun kern_return_t host_set_time (@w{host_priv_t @var{host_priv}}, @w{time_value_t @var{new_time}})
+@deftypefun kern_return_t host_set_time64 (@w{host_priv_t @var{host_priv}}, @w{time_value64_t @var{new_time}})
Set the time of @var{host_priv} to @var{new_time}.
@end deftypefun
-@deftypefun kern_return_t host_adjust_time (@w{host_priv_t @var{host_priv}}, @w{time_value_t @var{new_adjustment}}, @w{time_value_t *@var{old_adjustment}})
+@deftypefun kern_return_t host_adjust_time64 (@w{host_priv_t @var{host_priv}}, @w{time_value64_t @var{new_adjustment}}, @w{time_value64_t *@var{old_adjustment}})
Arrange for the current time as seen by @var{host_priv} to be gradually
changed by the adjustment value @var{new_adjustment}, and return the old
adjustment value in @var{old_adjustment}.
@@ -5664,6 +5690,14 @@ The number of microseconds.
@item integer_t check_seconds
This is a copy of the seconds value, which must be checked to protect
+against a race condition when reading out the two time values. This
+should only be used when getting the 32 bit version of @code{time_value64_t}.
+
+@item time_value64_t time_value
+The current time.
+
+@item int64_t check_seconds64
+This is a copy of the seconds value in @var{time_value}, which must be checked to protect
against a race condition when reading out the two time values.
@end table
@end deftp
@@ -5675,12 +5709,12 @@ mapped-time interface:
@example
do
@{
- secs = mtime->seconds;
+ secs = mtime->time_value.seconds;
__sync_synchronize();
- usecs = mtime->microseconds;
+ nanos = mtime->time_value.nanoseconds;
__sync_synchronize();
@}
-while (secs != mtime->check_seconds);
+while (secs != mtime->check_seconds64);
@end example
@@ -6913,6 +6947,10 @@ Examine backward. It executes an examine command with the last
specified parameters to it except that the last start address subtracted
by the size displayed by it is used as the start address.
+@item whatis @var{addr}
+Try to find what this address is. This looks up in the various tasks, threads,
+maps, caches etc. to give an idea what is behind this address.
+
@item print[/axzodurc] @var{addr1} [ @var{addr2} @dots{} ]
Print @var{addr}'s according to the modifier character. Valid formats
are: @code{a} @code{x} @code{z} @code{o} @code{d} @code{u} @code{r}
@@ -7122,10 +7160,16 @@ information about the individual threads.
Display the information of a task specified by @var{addr}. If
@var{addr} is omitted, current task information is displayed.
+@code{show task $taskxx} can notably be used to show task number
+@var{xx}
+
@item show thread [ @var{addr} ]
Display the information of a thread specified by @var{addr}. If
@var{addr} is omitted, current thread information is displayed.
+@code{show thread $taskxx.yy} can notably be used to show thread
+number @var{yy} of task number @var{xx}.
+
@item show registers[/tu [ @var{thread} ]]
Display the register set. Target thread can be specified with @code{t}
option and @var{thread} parameter. If @code{u} option is specified, it
@@ -7137,6 +7181,9 @@ machine. If not supported, incorrect information will be displayed.
@item show map @var{addr}
Prints the @code{vm_map} at @var{addr}.
+@code{show map $mapxx} can notably be used to show the map of task
+number @var{xx}.
+
@item show object @var{addr}
Prints the @code{vm_object} at @var{addr}.
diff --git a/gensym.awk b/gensym.awk
index f5eabae5..609d9273 100644
--- a/gensym.awk
+++ b/gensym.awk
@@ -27,7 +27,8 @@ BEGIN {
/^[a-z]/ {
if (bogus_printed == "no")
{
- print "void bogus() {";
+ print "void bogus(void);"
+ print "void bogus(void) {";
bogus_printed = "yes";
}
}
diff --git a/i386/Makefrag.am b/i386/Makefrag.am
index c1043c4f..58ee3273 100644
--- a/i386/Makefrag.am
+++ b/i386/Makefrag.am
@@ -30,6 +30,8 @@ if HOST_ix86
#
libkernel_a_SOURCES += \
+ i386/i386at/acpi_parse_apic.h \
+ i386/i386at/acpi_parse_apic.c \
i386/i386at/autoconf.c \
i386/i386at/autoconf.h \
i386/i386at/biosmem.c \
@@ -89,79 +91,15 @@ endif
#
libkernel_a_SOURCES += \
- i386/i386/ast.h \
- i386/i386/ast_check.c \
- i386/i386/ast_types.h \
- i386/i386/cpu.h \
- i386/i386/cpu_number.h \
+ i386/i386/copy_user.h \
i386/i386/cswitch.S \
- i386/i386/db_disasm.c \
- i386/i386/db_interface.c \
- i386/i386/db_interface.h \
- i386/i386/db_machdep.h \
- i386/i386/db_trace.c \
- i386/i386/db_trace.h \
- i386/i386/debug.h \
- i386/i386/debug_i386.c \
i386/i386/debug_trace.S \
- i386/i386/eflags.h \
- i386/i386/fpu.c \
- i386/i386/fpu.h \
- i386/i386/gdt.c \
- i386/i386/gdt.h \
- i386/i386/idt-gen.h \
- i386/i386/idt.c \
i386/i386/idt_inittab.S \
- i386/i386/io_perm.c \
- i386/i386/io_perm.h \
- i386/i386/ipl.h \
- i386/i386/ktss.c \
- i386/i386/ktss.h \
- i386/i386/kttd_interface.c \
- i386/i386/kttd_machdep.h \
- i386/i386/ldt.c \
- i386/i386/ldt.h \
- i386/i386/lock.h \
i386/i386/locore.S \
- i386/i386/locore.h \
- i386/i386/loose_ends.c \
- i386/i386/loose_ends.h \
- i386/i386/mach_param.h \
- i386/i386/machine_routines.h \
- i386/i386/machine_task.c \
- i386/i386/machspl.h \
- i386/i386/model_dep.h \
- i386/i386/mp_desc.c \
- i386/i386/mp_desc.h \
- i386/i386/pcb.c \
- i386/i386/pcb.h \
- i386/i386/phys.c \
- i386/i386/pio.h \
- i386/i386/pmap.h \
- i386/i386/proc_reg.h \
- i386/i386/sched_param.h \
- i386/i386/seg.c \
- i386/i386/seg.h \
- i386/i386/setjmp.h \
- i386/i386/smp.h \
- i386/i386/smp.c \
+ i386/i386/percpu.c \
+ i386/i386/percpu.h \
i386/i386/spl.S \
- i386/i386/spl.h \
- i386/i386/strings.c \
- i386/i386/task.h \
- i386/i386/thread.h \
- i386/i386/time_stamp.h \
- i386/i386/trap.c \
- i386/i386/trap.h \
- i386/i386/tss.h \
- i386/i386/user_ldt.c \
- i386/i386/user_ldt.h \
- i386/i386/vm_param.h \
- i386/i386/xpr.h \
- i386/intel/pmap.c \
- i386/intel/pmap.h \
- i386/intel/read_fault.c \
- i386/intel/read_fault.h
+ i386/i386/cpuboot.S
if PLATFORM_at
libkernel_a_SOURCES += \
@@ -169,9 +107,9 @@ libkernel_a_SOURCES += \
i386/i386/apic.c \
i386/i386/hardclock.c \
i386/i386/hardclock.h \
- i386/i386/io_map.c \
i386/i386/irq.c \
i386/i386/irq.h \
+ i386/i386/msr.h \
i386/i386/pit.c \
i386/i386/pit.h
diff --git a/i386/Makefrag_x86.am b/i386/Makefrag_x86.am
new file mode 100644
index 00000000..272de023
--- /dev/null
+++ b/i386/Makefrag_x86.am
@@ -0,0 +1,84 @@
+# Copyright (C) 2023 Free Software Foundation, Inc.
+
+# Permission to use, copy, modify and distribute this software and its
+# documentation is hereby granted, provided that both the copyright
+# notice and this permission notice appear in all copies of the
+# software, derivative works or modified versions, and any portions
+# thereof, and that both notices appear in supporting documentation.
+#
+# THE FREE SOFTWARE FOUNDATION ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+# "AS IS" CONDITION. THE FREE SOFTWARE FOUNDATION DISCLAIMS ANY
+# LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE
+# USE OF THIS SOFTWARE.
+
+# Shared files for all x86.
+
+libkernel_a_SOURCES += \
+ i386/i386/ast.h \
+ i386/i386/ast_check.c \
+ i386/i386/ast_types.h \
+ i386/i386/cpu.h \
+ i386/i386/cpu_number.h \
+ i386/i386/db_disasm.c \
+ i386/i386/db_interface.c \
+ i386/i386/db_interface.h \
+ i386/i386/db_machdep.h \
+ i386/i386/db_trace.c \
+ i386/i386/db_trace.h \
+ i386/i386/debug.h \
+ i386/i386/debug_i386.c \
+ i386/i386/eflags.h \
+ i386/i386/fpu.c \
+ i386/i386/fpu.h \
+ i386/i386/gdt.c \
+ i386/i386/gdt.h \
+ i386/i386/idt-gen.h \
+ i386/i386/idt.c \
+ i386/i386/io_perm.c \
+ i386/i386/io_perm.h \
+ i386/i386/ipl.h \
+ i386/i386/ktss.c \
+ i386/i386/ktss.h \
+ i386/i386/kttd_interface.c \
+ i386/i386/kttd_machdep.h \
+ i386/i386/ldt.c \
+ i386/i386/ldt.h \
+ i386/i386/lock.h \
+ i386/i386/locore.h \
+ i386/i386/loose_ends.c \
+ i386/i386/loose_ends.h \
+ i386/i386/mach_param.h \
+ i386/i386/machine_routines.h \
+ i386/i386/machine_task.c \
+ i386/i386/machspl.h \
+ i386/i386/model_dep.h \
+ i386/i386/mp_desc.c \
+ i386/i386/mp_desc.h \
+ i386/i386/pcb.c \
+ i386/i386/pcb.h \
+ i386/i386/phys.c \
+ i386/i386/pio.h \
+ i386/i386/pmap.h \
+ i386/i386/proc_reg.h \
+ i386/i386/sched_param.h \
+ i386/i386/seg.h \
+ i386/i386/setjmp.h \
+ i386/i386/smp.c \
+ i386/i386/smp.h \
+ i386/i386/spl.h \
+ i386/i386/strings.c \
+ i386/i386/task.h \
+ i386/i386/thread.h \
+ i386/i386/time_stamp.h \
+ i386/i386/trap.c \
+ i386/i386/trap.h \
+ i386/i386/tss.h \
+ i386/i386/user_ldt.c \
+ i386/i386/user_ldt.h \
+ i386/i386/vm_param.h \
+ i386/i386/xpr.h \
+ i386/intel/pmap.c \
+ i386/intel/pmap.h \
+ i386/intel/read_fault.c \
+ i386/intel/read_fault.h
+
diff --git a/i386/configfrag.ac b/i386/configfrag.ac
index f697e277..f07a98ca 100644
--- a/i386/configfrag.ac
+++ b/i386/configfrag.ac
@@ -106,6 +106,8 @@ AC_ARG_ENABLE([apic],
enable_pae=${enable_pae-yes};;
*:i?86)
:;;
+ *:x86_64)
+ enable_pae=${enable_pae-yes};;
*)
if [ x"$enable_pae" = xyes ]; then]
AC_MSG_ERROR([can only enable the `PAE' feature on ix86.])
diff --git a/i386/i386/apic.c b/i386/i386/apic.c
index 2e0c1776..0b5be50f 100644
--- a/i386/i386/apic.c
+++ b/i386/i386/apic.c
@@ -19,13 +19,30 @@
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
#include <i386/apic.h>
+#include <i386/cpu.h>
+#include <i386at/idt.h>
#include <string.h>
#include <vm/vm_kern.h>
#include <kern/printf.h>
#include <kern/kalloc.h>
+/*
+ * Period of HPET timer in nanoseconds
+ */
+uint32_t hpet_period_nsec;
+
+/*
+ * This dummy structure is needed so that CPU_NUMBER can be called
+ * before the lapic pointer is initialized to point to the real Local Apic.
+ * It causes the apic_id to be faked as 0, which is the master processor.
+ */
+static ApicLocalUnit dummy_lapic = {0};
+volatile ApicLocalUnit* lapic = &dummy_lapic;
-volatile ApicLocalUnit* lapic = NULL;
+/* This lookup table of [apic_id] -> kernel_id is initially populated with zeroes
+ * so every lookup results in master processor until real kernel ids are populated.
+ */
+int cpu_id_lut[UINT8_MAX + 1] = {0};
ApicInfo apic_data;
@@ -112,7 +129,7 @@ acpi_get_irq_override(uint8_t pin)
* apic_get_cpu_apic_id: returns the apic_id of a cpu.
* Receives as input the kernel ID of a CPU.
*/
-uint16_t
+int
apic_get_cpu_apic_id(int kernel_id)
{
if (kernel_id >= NCPUS)
@@ -121,6 +138,17 @@ apic_get_cpu_apic_id(int kernel_id)
return apic_data.cpu_lapic_list[kernel_id];
}
+
+/*
+ * apic_get_cpu_kernel_id: returns the kernel_id of a cpu.
+ * Receives as input the APIC ID of a CPU.
+ */
+int
+apic_get_cpu_kernel_id(uint16_t apic_id)
+{
+ return cpu_id_lut[apic_id];
+}
+
/* apic_get_lapic: returns a reference to the common memory address for Local APIC. */
volatile ApicLocalUnit*
apic_get_lapic(void)
@@ -155,20 +183,30 @@ apic_get_num_ioapics(void)
return apic_data.nioapics;
}
+/* apic_get_total_gsis: returns the total number of GSIs in the system. */
+int
+apic_get_total_gsis(void)
+{
+ int id;
+ int gsis = 0;
+
+ for (id = 0; id < apic_get_num_ioapics(); id++)
+ gsis += apic_get_ioapic(id)->ngsis;
+
+ return gsis;
+}
+
/*
* apic_get_current_cpu: returns the apic_id of current cpu.
*/
-uint16_t
+int
apic_get_current_cpu(void)
{
- uint16_t apic_id;
-
- if(lapic == NULL)
- apic_id = 0;
- else
- apic_id = lapic->apic_id.r;
-
- return apic_id;
+ unsigned int eax, ebx, ecx, edx;
+ eax = 1;
+ ecx = 0;
+ cpuid(eax, ebx, ecx, edx);
+ return (ebx >> 24);
}
@@ -201,6 +239,22 @@ int apic_refit_cpulist(void)
}
/*
+ * apic_generate_cpu_id_lut: Generate lookup table of cpu kernel ids from apic ids
+ */
+void apic_generate_cpu_id_lut(void)
+{
+ int i, apic_id;
+
+ for (i = 0; i < apic_data.ncpus; i++) {
+ apic_id = apic_get_cpu_apic_id(i);
+ if (apic_id >= 0)
+ cpu_id_lut[apic_id] = i;
+ else
+ printf("apic_get_cpu_apic_id(%d) failed...\n", i);
+ }
+}
+
+/*
* apic_print_info: shows the list of Local APIC and IOAPIC.
* Shows each CPU and IOAPIC, with Its Kernel ID and APIC ID.
*/
@@ -234,3 +288,166 @@ void apic_print_info(void)
}
}
}
+
+void apic_send_ipi(unsigned dest_shorthand, unsigned deliv_mode, unsigned dest_mode, unsigned level, unsigned trig_mode, unsigned vector, unsigned dest_id)
+{
+ IcrLReg icrl_values;
+ IcrHReg icrh_values;
+
+ /* Keep previous values and only overwrite known fields */
+ icrl_values.r = lapic->icr_low.r;
+ icrh_values.r = lapic->icr_high.r;
+
+ icrl_values.destination_shorthand = dest_shorthand;
+ icrl_values.delivery_mode = deliv_mode;
+ icrl_values.destination_mode = dest_mode;
+ icrl_values.level = level;
+ icrl_values.trigger_mode = trig_mode;
+ icrl_values.vector = vector;
+ icrh_values.destination_field = dest_id;
+
+ lapic->icr_high.r = icrh_values.r;
+ lapic->icr_low.r = icrl_values.r;
+}
+
+void
+lapic_enable(void)
+{
+ lapic->spurious_vector.r |= LAPIC_ENABLE;
+}
+
+void
+lapic_disable(void)
+{
+ lapic->spurious_vector.r &= ~LAPIC_ENABLE;
+}
+
+void
+lapic_setup(void)
+{
+ unsigned long flags;
+ int apic_id;
+ volatile uint32_t dummy;
+
+ cpu_intr_save(&flags);
+
+ apic_id = apic_get_current_cpu();
+
+ dummy = lapic->dest_format.r;
+ lapic->dest_format.r = 0xffffffff; /* flat model */
+ dummy = lapic->logical_dest.r;
+ lapic->logical_dest.r = lapic->apic_id.r; /* target self */
+ dummy = lapic->lvt_lint0.r;
+ lapic->lvt_lint0.r = dummy | LAPIC_DISABLE;
+ dummy = lapic->lvt_lint1.r;
+ lapic->lvt_lint1.r = dummy | LAPIC_DISABLE;
+ dummy = lapic->lvt_performance_monitor.r;
+ lapic->lvt_performance_monitor.r = dummy | LAPIC_DISABLE;
+ if (apic_id != 0)
+ {
+ dummy = lapic->lvt_timer.r;
+ lapic->lvt_timer.r = dummy | LAPIC_DISABLE;
+ }
+ dummy = lapic->task_pri.r;
+ lapic->task_pri.r = 0;
+
+ /* Enable LAPIC to send or recieve IPI/SIPIs */
+ dummy = lapic->spurious_vector.r;
+ lapic->spurious_vector.r = IOAPIC_SPURIOUS_BASE
+ | LAPIC_ENABLE_DIRECTED_EOI;
+
+ lapic->error_status.r = 0;
+
+ cpu_intr_restore(flags);
+}
+
+void
+lapic_eoi(void)
+{
+ lapic->eoi.r = 0;
+}
+
+#define HPET32(x) *((volatile uint32_t *)((uint8_t *)hpet_addr + x))
+#define HPET_CAP_PERIOD 0x04
+#define HPET_CFG 0x10
+# define HPET_CFG_ENABLE (1 << 0)
+# define HPET_LEGACY_ROUTE (1 << 1)
+#define HPET_COUNTER 0xf0
+#define HPET_T0_CFG 0x100
+# define HPET_T0_32BIT_MODE (1 << 8)
+# define HPET_T0_VAL_SET (1 << 6)
+# define HPET_T0_TYPE_PERIODIC (1 << 3)
+# define HPET_T0_INT_ENABLE (1 << 2)
+#define HPET_T0_COMPARATOR 0x108
+
+#define FSEC_PER_NSEC 1000000
+#define NSEC_PER_USEC 1000
+
+/* This function sets up the HPET timer to be in
+ * 32 bit periodic mode and not generating any interrupts.
+ * The timer counts upwards and when it reaches 0xffffffff it
+ * wraps to zero. The timer ticks at a constant rate in nanoseconds which
+ * is stored in hpet_period_nsec variable.
+ */
+void
+hpet_init(void)
+{
+ uint32_t period;
+ uint32_t val;
+
+ assert(hpet_addr != 0);
+
+ /* Find out how often the HPET ticks in nanoseconds */
+ period = HPET32(HPET_CAP_PERIOD);
+ hpet_period_nsec = period / FSEC_PER_NSEC;
+ printf("HPET ticks every %d nanoseconds\n", hpet_period_nsec);
+
+ /* Disable HPET and legacy interrupt routing mode */
+ val = HPET32(HPET_CFG);
+ val = val & ~(HPET_LEGACY_ROUTE | HPET_CFG_ENABLE);
+ HPET32(HPET_CFG) = val;
+
+ /* Clear the counter */
+ HPET32(HPET_COUNTER) = 0;
+
+ /* Set up 32 bit periodic timer with no interrupts */
+ val = HPET32(HPET_T0_CFG);
+ val = (val & ~HPET_T0_INT_ENABLE) | HPET_T0_32BIT_MODE | HPET_T0_TYPE_PERIODIC | HPET_T0_VAL_SET;
+ HPET32(HPET_T0_CFG) = val;
+
+ /* Set comparator to max */
+ HPET32(HPET_T0_COMPARATOR) = 0xffffffff;
+
+ /* Enable the HPET */
+ HPET32(HPET_CFG) |= HPET_CFG_ENABLE;
+
+ printf("HPET enabled\n");
+}
+
+void
+hpet_udelay(uint32_t us)
+{
+ uint32_t start, now;
+ uint32_t max_delay_us = 0xffffffff / NSEC_PER_USEC;
+
+ if (us > max_delay_us) {
+ printf("HPET ERROR: Delay too long, %d usec, truncating to %d usec\n",
+ us, max_delay_us);
+ us = max_delay_us;
+ }
+
+ /* Convert us to HPET ticks */
+ us = (us * NSEC_PER_USEC) / hpet_period_nsec;
+
+ start = HPET32(HPET_COUNTER);
+ do {
+ now = HPET32(HPET_COUNTER);
+ } while (now - start < us);
+}
+
+void
+hpet_mdelay(uint32_t ms)
+{
+ hpet_udelay(ms * 1000);
+}
+
diff --git a/i386/i386/apic.h b/i386/i386/apic.h
index add1b8cf..9eef0d8b 100644
--- a/i386/i386/apic.h
+++ b/i386/i386/apic.h
@@ -61,10 +61,99 @@ union ioapic_route_entry_union {
struct ioapic_route_entry both;
};
+
+/* Grateful to trasterlabs for this snippet */
+
+typedef union u_icr_low
+{
+ uint32_t value[4];
+ struct
+ {
+ uint32_t r; // FEE0 0300H - 4 bytes
+ unsigned :32; // FEE0 0304H
+ unsigned :32; // FEE0 0308H
+ unsigned :32; // FEE0 030CH
+ };
+ struct
+ {
+ unsigned vector: 8; /* Vector of interrupt. Lowest 8 bits of routine address */
+ unsigned delivery_mode : 3;
+ unsigned destination_mode: 1;
+ unsigned delivery_status: 1;
+ unsigned :1;
+ unsigned level: 1;
+ unsigned trigger_mode: 1;
+ unsigned remote_read_status: 2; /* Read-only field */
+ unsigned destination_shorthand: 2;
+ unsigned :12;
+ };
+} IcrLReg;
+
+typedef union u_icr_high
+{
+ uint32_t value[4];
+ struct
+ {
+ uint32_t r; // FEE0 0310H - 4 bytes
+ unsigned :32; // FEE0 0314H
+ unsigned :32; // FEE0 0318H
+ unsigned :32; // FEE0 031CH
+ };
+ struct
+ {
+ unsigned :24; // FEE0 0310H - 4 bytes
+ unsigned destination_field :8; /* APIC ID (in physical mode) or MDA (in logical) of destination processor */
+ };
+} IcrHReg;
+
+
+typedef enum e_icr_dest_shorthand
+{
+ NO_SHORTHAND = 0,
+ SELF = 1,
+ ALL_INCLUDING_SELF = 2,
+ ALL_EXCLUDING_SELF = 3
+} icr_dest_shorthand;
+
+typedef enum e_icr_deliv_mode
+{
+ FIXED = 0,
+ LOWEST_PRIORITY = 1,
+ SMI = 2,
+ NMI = 4,
+ INIT = 5,
+ STARTUP = 6,
+} icr_deliv_mode;
+
+typedef enum e_icr_dest_mode
+{
+ PHYSICAL = 0,
+ LOGICAL = 1
+} icr_dest_mode;
+
+typedef enum e_icr_deliv_status
+{
+ IDLE = 0,
+ SEND_PENDING = 1
+} icr_deliv_status;
+
+typedef enum e_icr_level
+{
+ DE_ASSERT = 0,
+ ASSERT = 1
+} icr_level;
+
+typedef enum e_irc_trigger_mode
+{
+ EDGE = 0,
+ LEVEL = 1
+} irc_trigger_mode;
+
+
typedef struct ApicLocalUnit {
ApicReg reserved0; /* 0x000 */
ApicReg reserved1; /* 0x010 */
- ApicReg apic_id; /* 0x020 */
+ ApicReg apic_id; /* 0x020. Hardware ID of current processor */
ApicReg version; /* 0x030 */
ApicReg reserved4; /* 0x040 */
ApicReg reserved5; /* 0x050 */
@@ -84,8 +173,8 @@ typedef struct ApicLocalUnit {
ApicReg error_status; /* 0x280 */
ApicReg reserved28[6]; /* 0x290 */
ApicReg lvt_cmci; /* 0x2f0 */
- ApicReg icr_low; /* 0x300 */
- ApicReg icr_high; /* 0x310 */
+ IcrLReg icr_low; /* 0x300. Store the information to send an IPI (Inter-processor Interrupt) */
+ IcrHReg icr_high; /* 0x310. Store the IPI destination */
ApicReg lvt_timer; /* 0x320 */
ApicReg lvt_thermal; /* 0x330 */
ApicReg lvt_performance_monitor; /* 0x340 */
@@ -104,6 +193,7 @@ typedef struct ApicLocalUnit {
typedef struct IoApicData {
uint8_t apic_id;
+ uint8_t ngsis;
uint32_t addr;
uint32_t gsi_base;
ApicIoUnit *ioapic;
@@ -138,33 +228,46 @@ void apic_add_cpu(uint16_t apic_id);
void apic_lapic_init(ApicLocalUnit* lapic_ptr);
void apic_add_ioapic(struct IoApicData);
void apic_add_irq_override(struct IrqOverrideData irq_over);
+void apic_send_ipi(unsigned dest_shorthand, unsigned deliv_mode, unsigned dest_mode, unsigned level, unsigned trig_mode, unsigned vector, unsigned dest_id);
IrqOverrideData *acpi_get_irq_override(uint8_t gsi);
-uint16_t apic_get_cpu_apic_id(int kernel_id);
+int apic_get_cpu_apic_id(int kernel_id);
+int apic_get_cpu_kernel_id(uint16_t apic_id);
volatile ApicLocalUnit* apic_get_lapic(void);
struct IoApicData *apic_get_ioapic(int kernel_id);
uint8_t apic_get_numcpus(void);
uint8_t apic_get_num_ioapics(void);
-uint16_t apic_get_current_cpu(void);
+int apic_get_current_cpu(void);
void apic_print_info(void);
int apic_refit_cpulist(void);
+void apic_generate_cpu_id_lut(void);
+int apic_get_total_gsis(void);
void picdisable(void);
void lapic_eoi(void);
void ioapic_irq_eoi(int pin);
+void lapic_setup(void);
+void lapic_disable(void);
+void lapic_enable(void);
void lapic_enable_timer(void);
-void ioapic_mask_irqs(void);
+void calibrate_lapic_timer(void);
void ioapic_toggle(int pin, int mask);
void ioapic_configure(void);
+void hpet_init(void);
+void hpet_udelay(uint32_t us);
+void hpet_mdelay(uint32_t ms);
+
extern int timer_pin;
extern void intnull(int unit);
extern volatile ApicLocalUnit* lapic;
-extern inline void mask_irq (unsigned int irq_nr);
-extern inline void unmask_irq (unsigned int irq_nr);
+extern int cpu_id_lut[];
+extern uint32_t *hpet_addr;
#endif
#define APIC_IO_UNIT_ID 0x00
#define APIC_IO_VERSION 0x01
+# define APIC_IO_VERSION_SHIFT 0
+# define APIC_IO_ENTRIES_SHIFT 16
#define APIC_IO_REDIR_LOW(int_pin) (0x10+(int_pin)*2)
#define APIC_IO_REDIR_HIGH(int_pin) (0x11+(int_pin)*2)
@@ -174,9 +277,13 @@ extern inline void unmask_irq (unsigned int irq_nr);
# define IMCR_USE_PIC 0
# define IMCR_USE_APIC 1
+#define LAPIC_LOW_PRIO 0x100
+#define LAPIC_NMI 0x400
+#define LAPIC_EXTINT 0x700
+#define LAPIC_LEVEL_TRIGGERED 0x8000
+
#define LAPIC_ENABLE 0x100
#define LAPIC_FOCUS 0x200
-#define LAPIC_NMI 0x400
#define LAPIC_ENABLE_DIRECTED_EOI 0x1000
#define LAPIC_DISABLE 0x10000
#define LAPIC_TIMER_PERIODIC 0x20000
@@ -187,7 +294,7 @@ extern inline void unmask_irq (unsigned int irq_nr);
#define LAPIC_TIMER_BASEDIV 0x100000
#define LAPIC_HAS_DIRECTED_EOI 0x1000000
-#define NINTR 24
+#define NINTR 64 /* Max 32 GSIs on each of two IOAPICs */
#define IOAPIC_FIXED 0
#define IOAPIC_PHYSICAL 0
#define IOAPIC_LOGICAL 1
@@ -200,6 +307,11 @@ extern inline void unmask_irq (unsigned int irq_nr);
#define IOAPIC_MASK_ENABLED 0
#define IOAPIC_MASK_DISABLED 1
+#define APIC_MSR 0x1b
+#define APIC_MSR_BSP 0x100 /* Processor is a BSP */
+#define APIC_MSR_X2APIC 0x400 /* LAPIC is in x2APIC mode */
+#define APIC_MSR_ENABLE 0x800 /* LAPIC is enabled */
+
/* Set or clear a bit in a 255-bit APIC mask register.
These registers are spread through eight 32-bit registers. */
#define APIC_SET_MASK_BIT(reg, bit) \
@@ -207,5 +319,19 @@ extern inline void unmask_irq (unsigned int irq_nr);
#define APIC_CLEAR_MASK_BIT(reg, bit) \
((reg)[(bit) >> 5].r &= ~(1 << ((bit) & 0x1f)))
+#ifndef __ASSEMBLER__
+
+#ifdef APIC
+static inline void mask_irq (unsigned int irq_nr) {
+ ioapic_toggle(irq_nr, IOAPIC_MASK_DISABLED);
+}
+
+static inline void unmask_irq (unsigned int irq_nr) {
+ ioapic_toggle(irq_nr, IOAPIC_MASK_ENABLED);
+}
+#endif
+
+#endif /* !__ASSEMBLER__ */
+
#endif /*_IMPS_APIC_*/
diff --git a/i386/i386/ast_check.c b/i386/i386/ast_check.c
index f3e1c350..61cd5e87 100644
--- a/i386/i386/ast_check.c
+++ b/i386/i386/ast_check.c
@@ -32,22 +32,25 @@
* Initial i386 implementation does nothing.
*/
+#include <kern/ast.h>
#include <kern/processor.h>
+#include <kern/smp.h>
+#include <machine/cpu_number.h>
+#include <machine/apic.h>
/*
* Initialize for remote invocation of ast_check.
*/
-void init_ast_check(processor)
- const processor_t processor;
+void init_ast_check(const processor_t processor)
{
}
/*
* Cause remote invocation of ast_check. Caller is at splsched().
*/
-void cause_ast_check(processor)
- const processor_t processor;
+void cause_ast_check(const processor_t processor)
{
+ smp_remote_ast(apic_get_cpu_apic_id(processor->slot_num));
}
#endif /* NCPUS > 1 */
diff --git a/i386/i386/copy_user.h b/i386/i386/copy_user.h
new file mode 100644
index 00000000..3d1c7278
--- /dev/null
+++ b/i386/i386/copy_user.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2023 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef COPY_USER_H
+#define COPY_USER_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <machine/locore.h>
+#include <mach/message.h>
+
+/*
+ * The copyin_32to64() and copyout_64to32() routines are meant for data types
+ * that have different size in kernel and user space. They should be independent
+ * of endianness and hopefully can be reused in the future on other archs.
+ * These types are e.g.:
+ * - port names vs port pointers, on a 64-bit kernel
+ * - memory addresses, on a 64-bit kernel and 32-bit user
+ */
+
+static inline int copyin_32to64(const uint32_t *uaddr, uint64_t *kaddr)
+{
+ uint32_t rkaddr;
+ int ret;
+ ret = copyin(uaddr, &rkaddr, sizeof(uint32_t));
+ if (ret)
+ return ret;
+ *kaddr = rkaddr;
+ return 0;
+}
+
+static inline int copyout_64to32(const uint64_t *kaddr, uint32_t *uaddr)
+{
+ uint32_t rkaddr=*kaddr;
+ return copyout(&rkaddr, uaddr, sizeof(uint32_t));
+}
+
+static inline int copyin_address(const rpc_vm_offset_t *uaddr, vm_offset_t *kaddr)
+{
+#ifdef USER32
+ return copyin_32to64(uaddr, kaddr);
+#else /* USER32 */
+ return copyin(uaddr, kaddr, sizeof(*uaddr));
+#endif /* USER32 */
+}
+
+static inline int copyout_address(const vm_offset_t *kaddr, rpc_vm_offset_t *uaddr)
+{
+#ifdef USER32
+ return copyout_64to32(kaddr, uaddr);
+#else /* USER32 */
+ return copyout(kaddr, uaddr, sizeof(*kaddr));
+#endif /* USER32 */
+}
+
+static inline int copyin_port(const mach_port_name_t *uaddr, mach_port_t *kaddr)
+{
+#ifdef __x86_64__
+ return copyin_32to64(uaddr, kaddr);
+#else /* __x86_64__ */
+ return copyin(uaddr, kaddr, sizeof(*uaddr));
+#endif /* __x86_64__ */
+}
+
+static inline int copyout_port(const mach_port_t *kaddr, mach_port_name_t *uaddr)
+{
+#ifdef __x86_64__
+ return copyout_64to32(kaddr, uaddr);
+#else /* __x86_64__ */
+ return copyout(kaddr, uaddr, sizeof(*kaddr));
+#endif /* __x86_64__ */
+}
+
+#if defined(__x86_64__) && defined(USER32)
+/* For 32 bit userland, kernel and user land messages are not the same size. */
+size_t msg_usize(const mach_msg_header_t *kmsg);
+#else
+static inline size_t msg_usize(const mach_msg_header_t *kmsg)
+{
+ return kmsg->msgh_size;
+}
+#endif /* __x86_64__ && USER32 */
+
+#endif /* COPY_USER_H */
diff --git a/i386/i386/cpu_number.h b/i386/i386/cpu_number.h
index 9aef6370..67c19e9b 100644
--- a/i386/i386/cpu_number.h
+++ b/i386/i386/cpu_number.h
@@ -32,7 +32,8 @@
#if NCPUS > 1
-/* More-specific code must define cpu_number() and CPU_NUMBER. */
+#define MY(stm) %gs:PERCPU_##stm
+
#ifdef __i386__
#define CX(addr, reg) addr(,reg,4)
#endif
@@ -40,19 +41,79 @@
#define CX(addr, reg) addr(,reg,8)
#endif
-/* XXX For now */
-#define CPU_NUMBER(reg) movl $0,reg
-#define cpu_number() 0
+#define CPU_NUMBER_NO_STACK(reg) \
+ movl %cs:lapic, reg ;\
+ movl %cs:APIC_ID(reg), reg ;\
+ shrl $24, reg ;\
+ movl %cs:CX(cpu_id_lut, reg), reg ;\
+
+#ifdef __i386__
+/* Never call CPU_NUMBER_NO_GS(%esi) */
+#define CPU_NUMBER_NO_GS(reg) \
+ pushl %esi ;\
+ pushl %eax ;\
+ pushl %ebx ;\
+ pushl %ecx ;\
+ pushl %edx ;\
+ movl $1, %eax ;\
+ cpuid ;\
+ shrl $24, %ebx ;\
+ movl %cs:CX(cpu_id_lut, %ebx), %esi ;\
+ popl %edx ;\
+ popl %ecx ;\
+ popl %ebx ;\
+ popl %eax ;\
+ movl %esi, reg ;\
+ popl %esi
+#endif
+#ifdef __x86_64__
+/* Never call CPU_NUMBER_NO_GS(%esi) */
+#define CPU_NUMBER_NO_GS(reg) \
+ pushq %rsi ;\
+ pushq %rax ;\
+ pushq %rbx ;\
+ pushq %rcx ;\
+ pushq %rdx ;\
+ movl $1, %eax ;\
+ cpuid ;\
+ shrl $24, %ebx ;\
+ movl %cs:CX(cpu_id_lut, %ebx), %esi ;\
+ popq %rdx ;\
+ popq %rcx ;\
+ popq %rbx ;\
+ popq %rax ;\
+ movl %esi, reg ;\
+ popq %rsi
+#endif
+
+#define CPU_NUMBER(reg) \
+ movl MY(CPU_ID), reg;
+
+#ifndef __ASSEMBLER__
+#include <kern/cpu_number.h>
+#include <i386/apic.h>
+#include <i386/percpu.h>
+
+static inline int cpu_number_slow(void)
+{
+ return cpu_id_lut[apic_get_current_cpu()];
+}
+
+static inline int cpu_number(void)
+{
+ return percpu_get(int, cpu_id);
+}
+#endif
#else /* NCPUS == 1 */
+#define MY(stm) (percpu_array + PERCPU_##stm)
+
+#define CPU_NUMBER_NO_STACK(reg)
+#define CPU_NUMBER_NO_GS(reg)
#define CPU_NUMBER(reg)
#define CX(addr,reg) addr
#endif /* NCPUS == 1 */
-#ifndef __ASSEMBLER__
-#include "kern/cpu_number.h"
-#endif
-
#endif /* _I386_CPU_NUMBER_H_ */
diff --git a/i386/i386/cpuboot.S b/i386/i386/cpuboot.S
new file mode 100644
index 00000000..7e6c4770
--- /dev/null
+++ b/i386/i386/cpuboot.S
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2022 Free Software Foundation, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if NCPUS > 1
+#include <mach/machine/asm.h>
+#include <i386/i386asm.h>
+#include <i386/proc_reg.h>
+#include <i386/apic.h>
+#include <i386/cpu_number.h>
+#include <i386/seg.h>
+#include <i386/gdt.h>
+
+#define M(addr) (addr - apboot)
+#define CR0_CLEAR_FLAGS_CACHE_ENABLE (CR0_CD | CR0_NW)
+#define CR0_SET_FLAGS (CR0_CLEAR_FLAGS_CACHE_ENABLE | CR0_PE)
+#define CR0_CLEAR_FLAGS (CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_TS | CR0_EM | CR0_MP)
+#define BOOT_CS 0x8
+#define BOOT_DS 0x10
+
+.data
+
+.align 16
+apboot_idt_ptr:
+ .long 0
+.align 16
+ .word 0
+apboot_gdt_descr:
+ .word 14*8-1
+ .long apboot_gdt - KERNELBASE
+.align 16
+apboot_gdt:
+ /* NULL segment = 0x0 */
+ .quad 0
+
+ /* KERNEL_CS = 0x8 */
+ .word 0xffff /* Segment limit first 0-15 bits*/
+ .word (-KERNELBASE) & 0xffff /*Base first 0-15 bits*/
+ .byte ((-KERNELBASE) >> 16) & 0xff /*Base 16-23 bits */
+ .byte ACC_PL_K | ACC_CODE_R | ACC_P /*Access byte */
+ .byte ((SZ_32 | SZ_G) << 4) | 0xf /* High 4 bits */
+ .byte ((-KERNELBASE) >> 24) & 0xff /*Base 24-31 bits */
+
+ /* KERNEL_DS = 0x10 */
+ .word 0xffff /*Segment limit */
+ .word (-KERNELBASE) & 0xffff /*Base first 0-15 bits*/
+ .byte ((-KERNELBASE) >> 16) & 0xff
+ .byte ACC_PL_K | ACC_DATA_W | ACC_P /*Access byte*/
+ .byte ((SZ_32 | SZ_G) << 4) | 0xf /* High 4 bits */
+ .byte ((-KERNELBASE) >> 24) & 0xff /*Base 24-31 bits */
+
+ /* LDT = 0x18 */
+ .quad 0
+
+ /* TSS = 0x20 */
+ .quad 0
+
+ /* USER_LDT = 0x28 */
+ .quad 0
+
+ /* USER_TSS = 0x30 */
+ .quad 0
+
+ /* LINEAR = 0x38 */
+ .quad 0
+
+ /* FPREGS = 0x40 */
+ .quad 0
+
+ /* USER_GDT = 0x48 and 0x50 */
+ .quad 0
+ .quad 0
+
+ /* USER_TSS64 = 0x58 */
+ .quad 0
+
+ /* USER_TSS64 = 0x60 */
+ .quad 0
+
+ /* boot GS = 0x68 */
+ .word 0xffff
+apboot_percpu_low:
+ .word 0
+apboot_percpu_med:
+ .byte 0
+ .byte ACC_PL_K | ACC_DATA_W | ACC_P
+ .byte ((SZ_32 | SZ_G) << 4) | 0xf
+apboot_percpu_high:
+ .byte 0
+
+.globl apboot, apbootend, gdt_descr_tmp, apboot_jmp_offset
+.align 16
+.code16
+
+apboot:
+_apboot:
+ /* This is now address CS:0 in real mode */
+
+ /* Set data seg same as code seg */
+ mov %cs, %dx
+ mov %dx, %ds
+
+ cli
+ xorl %eax, %eax
+ movl %eax, %cr3
+
+ mov %ax, %es
+ mov %ax, %fs
+ mov %ax, %gs
+ mov %ax, %ss
+
+ lgdt M(gdt_descr_tmp)
+
+ movl %cr0, %eax
+ andl $~CR0_CLEAR_FLAGS, %eax
+ orl $CR0_SET_FLAGS, %eax
+ movl %eax, %cr0
+
+ /* ljmpl with relocation from machine_init */
+ .byte 0x66
+ .byte 0xea
+apboot_jmp_offset:
+ .long M(0f)
+ .word BOOT_CS
+
+0:
+ .code32
+ /* Protected mode! */
+ movw $BOOT_DS, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %ss
+
+ lgdtl apboot_gdt_descr - KERNELBASE
+ ljmpl $KERNEL_CS, $1f
+1:
+ xorl %eax, %eax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %fs
+ movw %ax, %gs
+ movw $KERNEL_DS, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %fs
+ movw %ax, %gs
+ movw %ax, %ss
+
+ /* Get CPU number */
+ movl $1, %eax
+ cpuid
+ shrl $24, %ebx
+ movl %cs:CX(cpu_id_lut, %ebx), %ecx
+
+ /* Access per_cpu area */
+ movl %ecx,%eax
+ movl $PC_SIZE,%ebx
+ mul %ebx
+ addl $percpu_array - KERNELBASE, %eax
+
+ /* Record our cpu number */
+ movl %ecx, (PERCPU_CPU_ID + KERNELBASE)(%eax)
+
+ /* Set up temporary percpu descriptor */
+ movw %ax, apboot_percpu_low
+ shr $16, %eax
+ movb %al, apboot_percpu_med
+ shr $8, %ax
+ movb %al, apboot_percpu_high
+
+ movw $PERCPU_DS, %ax
+ movw %ax, %gs
+
+ /* Load null Interrupt descriptor table */
+ mov apboot_idt_ptr, %ebx
+ lidt (%ebx)
+
+ /* Enable local apic in xAPIC mode */
+ xorl %eax, %eax
+ xorl %edx, %edx
+ movl $APIC_MSR, %ecx
+ rdmsr
+ orl $APIC_MSR_ENABLE, %eax
+ andl $(~(APIC_MSR_BSP | APIC_MSR_X2APIC)), %eax
+ movl $APIC_MSR, %ecx
+ wrmsr
+
+ /* Load int_stack_top[cpu] -> esp */
+ CPU_NUMBER_NO_STACK(%edx)
+ movl CX(EXT(int_stack_top), %edx), %esp
+
+ /* Ensure stack alignment */
+ andl $0xfffffff0, %esp
+
+ /* Reset EFLAGS to a known state */
+ pushl $0
+ popfl
+
+ /* Finish the cpu configuration */
+ call EXT(cpu_ap_main)
+
+ /* NOT REACHED */
+ hlt
+
+.align 16
+ .word 0
+gdt_descr_tmp:
+ .short 3*8-1
+ .long M(gdt_tmp)
+
+.align 16
+gdt_tmp:
+ /* 0 */
+ .quad 0
+ /* BOOT_CS */
+ .word 0xffff
+ .word 0x0000
+ .byte 0x00
+ .byte ACC_PL_K | ACC_CODE_R | ACC_P
+ .byte ((SZ_32 | SZ_G) << 4) | 0xf
+ .byte 0x00
+ /* BOOT_DS */
+ .word 0xffff
+ .word 0x0000
+ .byte 0x00
+ .byte ACC_PL_K | ACC_DATA_W | ACC_P
+ .byte ((SZ_32 | SZ_G) << 4) | 0xf
+ .byte 0x00
+
+_apbootend:
+apbootend:
+#endif
diff --git a/i386/i386/cswitch.S b/i386/i386/cswitch.S
index 718c8aac..2dee309b 100644
--- a/i386/i386/cswitch.S
+++ b/i386/i386/cswitch.S
@@ -29,6 +29,7 @@
#include <i386/proc_reg.h>
#include <i386/i386asm.h>
#include <i386/cpu_number.h>
+#include <i386/gdt.h>
/*
* Context switch routines for i386.
@@ -40,7 +41,7 @@ ENTRY(Load_context)
lea KERNEL_STACK_SIZE-IKS_SIZE-IEL_SIZE(%ecx),%edx
/* point to stack top */
CPU_NUMBER(%eax)
- movl %ecx,CX(EXT(active_stacks),%eax) /* store stack address */
+ movl %ecx,MY(ACTIVE_STACK) /* store stack address */
movl %edx,CX(EXT(kernel_stack),%eax) /* store stack top */
movl KSS_ESP(%ecx),%esp /* switch stacks */
@@ -57,8 +58,7 @@ ENTRY(Load_context)
*/
ENTRY(Switch_context)
- CPU_NUMBER(%edx)
- movl CX(EXT(active_stacks),%edx),%ecx /* get old kernel stack */
+ movl MY(ACTIVE_STACK),%ecx /* get old kernel stack */
movl %ebx,KSS_EBX(%ecx) /* save registers */
movl %ebp,KSS_EBP(%ecx)
@@ -78,8 +78,9 @@ ENTRY(Switch_context)
lea KERNEL_STACK_SIZE-IKS_SIZE-IEL_SIZE(%ecx),%ebx
/* point to stack top */
- movl %esi,CX(EXT(active_threads),%edx) /* new thread is active */
- movl %ecx,CX(EXT(active_stacks),%edx) /* set current stack */
+ CPU_NUMBER(%edx)
+ movl %esi,MY(ACTIVE_THREAD) /* new thread is active */
+ movl %ecx,MY(ACTIVE_STACK) /* set current stack */
movl %ebx,CX(EXT(kernel_stack),%edx) /* set stack top */
movl KSS_ESP(%ecx),%esp /* switch stacks */
@@ -109,8 +110,7 @@ ENTRY(Thread_continue)
* has no FPU state)
*/
ENTRY(switch_to_shutdown_context)
- CPU_NUMBER(%edx)
- movl EXT(active_stacks)(,%edx,4),%ecx /* get old kernel stack */
+ movl MY(ACTIVE_STACK),%ecx /* get old kernel stack */
movl %ebx,KSS_EBX(%ecx) /* save registers */
movl %ebp,KSS_EBP(%ecx)
movl %edi,KSS_EDI(%ecx)
@@ -124,8 +124,9 @@ ENTRY(switch_to_shutdown_context)
movl 4(%esp),%ebx /* get routine to run next */
movl 8(%esp),%esi /* get its argument */
- movl EXT(interrupt_stack)(,%edx,4),%ecx /* point to its interrupt stack */
- lea INTSTACK_SIZE(%ecx),%esp /* switch to it (top) */
+ CPU_NUMBER(%edx)
+ movl CX(EXT(int_stack_base),%edx),%ecx /* point to its interrupt stack */
+ lea -4+INTSTACK_SIZE(%ecx),%esp /* switch to it (top) */
pushl %eax /* push thread */
call EXT(thread_dispatch) /* reschedule thread */
diff --git a/i386/i386/db_disasm.c b/i386/i386/db_disasm.c
index 4afbcf3e..303b4621 100644
--- a/i386/i386/db_disasm.c
+++ b/i386/i386/db_disasm.c
@@ -33,6 +33,7 @@
#include <machine/db_machdep.h>
#include <ddb/db_access.h>
+#include <ddb/db_examine.h>
#include <ddb/db_output.h>
#include <ddb/db_sym.h>
@@ -162,7 +163,7 @@ struct inst db_inst_0f0x[] = {
/*08*/ { "invd", FALSE, NONE, 0, 0 },
/*09*/ { "wbinvd",FALSE, NONE, 0, 0 },
/*0a*/ { "", FALSE, NONE, 0, 0 },
-/*0b*/ { "", FALSE, NONE, 0, 0 },
+/*0b*/ { "ud2", FALSE, NONE, 0, 0 },
/*0c*/ { "", FALSE, NONE, 0, 0 },
/*0d*/ { "", FALSE, NONE, 0, 0 },
/*0e*/ { "", FALSE, NONE, 0, 0 },
@@ -862,7 +863,7 @@ int db_lengths[] = {
/*
* Read address at location and return updated location.
*/
-db_addr_t
+static db_addr_t
db_read_address(
db_addr_t loc,
int short_addr,
@@ -948,7 +949,7 @@ db_read_address(
return loc;
}
-void
+static void
db_print_address(
const char * seg,
int size,
@@ -980,7 +981,7 @@ db_print_address(
* Disassemble floating-point ("escape") instruction
* and return updated location.
*/
-db_addr_t
+static db_addr_t
db_disasm_esc(
db_addr_t loc,
int inst,
@@ -1090,6 +1091,12 @@ db_disasm(
int len;
struct i_addr address;
+#ifdef __x86_64__
+ /* The instruction set decoding needs an update, avoid showing bogus output. */
+ db_printf("TODO\n");
+ return loc+1;
+#endif
+
get_value_inc(inst, loc, 1, FALSE, task);
if (db_disasm_16) {
short_addr = TRUE;
diff --git a/i386/i386/db_interface.c b/i386/i386/db_interface.c
index 0ad1fde5..483991d6 100644
--- a/i386/i386/db_interface.c
+++ b/i386/i386/db_interface.c
@@ -100,7 +100,9 @@ void db_load_context(pcb_t pcb)
}
void cpu_interrupt_to_db(int i){
- printf("TODO: cpu_interrupt_to_db\n");
+#if MACH_KDB && NCPUS > 1
+ db_on(i);
+#endif
}
void db_get_debug_state(
@@ -117,8 +119,8 @@ kern_return_t db_set_debug_state(
int i;
for (i = 0; i <= 3; i++)
- if (state->dr[i] < VM_MIN_ADDRESS
- || state->dr[i] >= VM_MAX_ADDRESS)
+ if (state->dr[i] < VM_MIN_USER_ADDRESS
+ || state->dr[i] >= VM_MAX_USER_ADDRESS)
return KERN_INVALID_ARGUMENT;
pcb->ims.ids = *state;
@@ -234,7 +236,7 @@ db_clear_hw_watchpoint(
/*
* Print trap reason.
*/
-void
+static void
kdbprinttrap(
int type,
int code)
@@ -330,12 +332,13 @@ kdb_trap(
regs->ebp = ddb_regs.ebp;
regs->esi = ddb_regs.esi;
regs->edi = ddb_regs.edi;
- regs->es = ddb_regs.es & 0xffff;
regs->cs = ddb_regs.cs & 0xffff;
+#if !defined(__x86_64__) || defined(USER32)
+ regs->es = ddb_regs.es & 0xffff;
regs->ds = ddb_regs.ds & 0xffff;
regs->fs = ddb_regs.fs & 0xffff;
regs->gs = ddb_regs.gs & 0xffff;
-
+#endif
if ((type == T_INT3) &&
(db_get_task_value(regs->eip, BKPT_SIZE, FALSE, TASK_NULL)
== BKPT_INST))
@@ -399,11 +402,12 @@ kdb_kentry(
ddb_regs.esi = is->rsi;
ddb_regs.edi = is->rdi;
#endif
+#if !defined(__x86_64__) || defined(USER32)
ddb_regs.ds = is->ds;
ddb_regs.es = is->es;
ddb_regs.fs = is->fs;
ddb_regs.gs = is->gs;
-
+#endif
cnpollc(TRUE);
db_task_trap(-1, 0, (ddb_regs.cs & 0x3) != 0);
cnpollc(FALSE);
@@ -428,10 +432,12 @@ kdb_kentry(
is->rsi = ddb_regs.esi;
is->rdi = ddb_regs.edi;
#endif
+#if !defined(__x86_64__) || defined(USER32)
is->ds = ddb_regs.ds & 0xffff;
is->es = ddb_regs.es & 0xffff;
is->fs = ddb_regs.fs & 0xffff;
is->gs = ddb_regs.gs & 0xffff;
+#endif
}
#if NCPUS > 1
db_leave();
@@ -442,7 +448,7 @@ kdb_kentry(
boolean_t db_no_vm_fault = TRUE;
-int
+static int
db_user_to_phys_address(
const task_t task,
vm_offset_t addr,
@@ -535,6 +541,7 @@ db_read_bytes(
size -= n;
addr += n;
copy_from_phys(phys_addr, (vm_offset_t) data, n);
+ data += n;
}
return TRUE;
}
diff --git a/i386/i386/db_interface.h b/i386/i386/db_interface.h
index f0a748f9..69a277ae 100644
--- a/i386/i386/db_interface.h
+++ b/i386/i386/db_interface.h
@@ -32,6 +32,10 @@ extern boolean_t kdb_trap (
int code,
struct i386_saved_state *regs);
+struct int_regs;
+
+extern void kdb_kentry(struct int_regs *int_regs);
+
extern boolean_t db_read_bytes (
vm_offset_t addr,
int size,
@@ -111,6 +115,15 @@ db_i386_reg_value(
int flag,
struct db_var_aux_param *ap);
+void feep(void);
+
+/*
+ * Put a debugging character on the screen.
+ * LOC=0 means put it in the bottom right corner, LOC=1 means put it
+ * one column to the left, etc.
+ */
+void kd_debug_put(int loc, char c);
+
#endif
extern void db_get_debug_state(
diff --git a/i386/i386/db_trace.c b/i386/i386/db_trace.c
index 5e2bef8b..0ef72518 100644
--- a/i386/i386/db_trace.c
+++ b/i386/i386/db_trace.c
@@ -37,6 +37,7 @@
#include <machine/machspl.h>
#include <machine/db_interface.h>
#include <machine/db_trace.h>
+#include <machine/cpu_number.h>
#include <i386at/model_dep.h>
#include <ddb/db_access.h>
@@ -53,10 +54,12 @@
*/
struct db_variable db_regs[] = {
{ "cs", (long *)&ddb_regs.cs, db_i386_reg_value },
+#if !defined(__x86_64__) || defined(USER32)
{ "ds", (long *)&ddb_regs.ds, db_i386_reg_value },
{ "es", (long *)&ddb_regs.es, db_i386_reg_value },
{ "fs", (long *)&ddb_regs.fs, db_i386_reg_value },
{ "gs", (long *)&ddb_regs.gs, db_i386_reg_value },
+#endif
{ "ss", (long *)&ddb_regs.ss, db_i386_reg_value },
{ "eax",(long *)&ddb_regs.eax, db_i386_reg_value },
{ "ecx",(long *)&ddb_regs.ecx, db_i386_reg_value },
@@ -68,6 +71,16 @@ struct db_variable db_regs[] = {
{ "edi",(long *)&ddb_regs.edi, db_i386_reg_value },
{ "eip",(long *)&ddb_regs.eip, db_i386_reg_value },
{ "efl",(long *)&ddb_regs.efl, db_i386_reg_value },
+#ifdef __x86_64__
+ { "r8", (long *)&ddb_regs.r8, db_i386_reg_value },
+ { "r9", (long *)&ddb_regs.r9, db_i386_reg_value },
+ { "r10",(long *)&ddb_regs.r10, db_i386_reg_value },
+ { "r11",(long *)&ddb_regs.r11, db_i386_reg_value },
+ { "r12",(long *)&ddb_regs.r12, db_i386_reg_value },
+ { "r13",(long *)&ddb_regs.r13, db_i386_reg_value },
+ { "r14",(long *)&ddb_regs.r14, db_i386_reg_value },
+ { "r15",(long *)&ddb_regs.r15, db_i386_reg_value },
+#endif
};
struct db_variable *db_eregs = db_regs + sizeof(db_regs)/sizeof(db_regs[0]);
@@ -114,7 +127,7 @@ struct i386_kregs {
{ 0 },
};
-long *
+static long *
db_lookup_i386_kreg(
const char *name,
const long *kregp)
@@ -147,7 +160,7 @@ db_i386_reg_value(
if (thread == current_thread()) {
if (ddb_regs.cs & 0x3)
dp = vp->valuep;
- else if (ON_INT_STACK(ddb_regs.ebp))
+ else if (ON_INT_STACK(ddb_regs.ebp, cpu_number()))
db_error("cannot get/set user registers in nested interrupt\n");
}
} else {
@@ -182,7 +195,7 @@ db_i386_reg_value(
*valuep = *dp;
}
-void
+static void
db_find_trace_symbols(void)
{
db_expr_t value;
@@ -210,7 +223,11 @@ db_find_trace_symbols(void)
*/
const int db_numargs_default = 5;
-int
+#ifdef __x86_64
+/* Args are in registers */
+#define db_numargs(fp, task) -1
+#else
+static int
db_numargs(
struct i386_frame *fp,
task_t task)
@@ -236,6 +253,7 @@ db_numargs(
}
return args;
}
+#endif
struct interrupt_frame {
struct i386_frame *if_frame; /* point to next frame */
@@ -261,7 +279,7 @@ struct interrupt_frame {
* It might be possible to dig out from the next frame up the name
* of the function that faulted, but that could get hairy.
*/
-void
+static void
db_nextframe(
struct i386_frame **lfp, /* in/out */
struct i386_frame **fp, /* in/out */
@@ -282,8 +300,15 @@ db_nextframe(
*/
saved_regs = (struct i386_saved_state *)
db_get_task_value((long)&((*fp)->f_arg0),sizeof(long),FALSE,task);
- db_printf(">>>>> %s (%d) at ",
+ db_printf(">>>>> %s (%d)",
trap_name(saved_regs->trapno), saved_regs->trapno);
+ if (saved_regs->trapno == T_PAGE_FAULT)
+ db_printf(" for %s%s%s %lx",
+ saved_regs->err & T_PF_PROT ? "P" : "",
+ saved_regs->err & T_PF_WRITE ? "W" : "",
+ saved_regs->err & T_PF_USER ? "U" : "",
+ lintokv(saved_regs->cr2));
+ db_printf(" at ");
db_task_printsym(saved_regs->eip, DB_STGY_PROC, task);
db_printf(" <<<<<\n");
*fp = (struct i386_frame *)saved_regs->ebp;
@@ -558,159 +583,4 @@ db_i386_stack_trace(
}
}
-#define CTHREADS_SUPPORT 1
-
-#if CTHREADS_SUPPORT
-
-thread_t
-db_find_kthread(
- vm_offset_t ustack_base,
- vm_size_t ustack_top,
- task_t task)
-{
- thread_t thread;
- if (task == TASK_NULL)
- task = db_current_task();
-
- queue_iterate(&task->thread_list, thread, thread_t, thread_list) {
- vm_offset_t usp = thread->pcb->iss.uesp/*ebp works*/;
- if (usp >= ustack_base && usp < ustack_top)
- return thread;
- }
- return THREAD_NULL;
-}
-
-static void db_cproc_state(
- int state,
- char s[4])
-{
- if (state == 0) {
- *s++ = 'R';
- } else {
- if (state & 1) *s++ = 'S';
- if (state & 2) *s++ = 'B';
- if (state & 4) *s++ = 'C';
- }
- *s = 0;
-}
-
-/* offsets in a cproc structure */
-/* TODO: longs? */
-const int db_cproc_next_offset = 0 * 4;
-const int db_cproc_incarnation_offset = 1 * 4;
-const int db_cproc_list_offset = 2 * 4;
-const int db_cproc_wait_offset = 3 * 4;
-const int db_cproc_context_offset = 5 * 4;
-const int db_cproc_state_offset = 7 * 4;
-const int db_cproc_stack_base_offset = 10 * 4 + sizeof(mach_msg_header_t);
-const int db_cproc_stack_size_offset = 11 * 4 + sizeof(mach_msg_header_t);
-
-/* offsets in a cproc_switch context structure */
-const int db_cprocsw_framep_offset = 3 * 4;
-const int db_cprocsw_pc_offset = 4 * 4;
-
-#include <machine/setjmp.h>
-
-extern jmp_buf_t *db_recover;
-
-void db_trace_cproc(
- vm_offset_t cproc,
- thread_t thread)
-{
- jmp_buf_t db_jmpbuf;
- jmp_buf_t *prev = db_recover;
- task_t task;
- db_addr_t pc, fp, sp;
-
- task = (thread == THREAD_NULL)? TASK_NULL: thread->task;
-
- if (!_setjmp(db_recover = &db_jmpbuf)) {
- char pstate[4];
- unsigned int s, w, n, c, cth;
-
- s = db_get_task_value(cproc + db_cproc_state_offset, 4, FALSE, task);
- w = db_get_task_value(cproc + db_cproc_wait_offset, 4, FALSE, task);
- n = db_get_task_value(cproc + db_cproc_next_offset, 4, FALSE, task);
- c = db_get_task_value(cproc + db_cproc_context_offset, 4, FALSE, task);
- cth = db_get_task_value(cproc + db_cproc_incarnation_offset, 4, FALSE, task);
-
- db_cproc_state(s, pstate);
-
- db_printf("CThread %x (cproc %x) %s", cth, cproc, pstate);
- if (w) db_printf(" awaits %x", w);
- if (n) db_printf(" next %x", n);
- db_printf("\n");
-
- if ((s != 0) && (c != 0)) {
- pc = db_get_task_value(c + db_cprocsw_pc_offset, 4, FALSE, task);
- fp = c + db_cprocsw_framep_offset;
- sp = 0; // TODO
- } else {
- db_addr_t sb;
- vm_size_t ss;
-
- sb = db_get_task_value(cproc + db_cproc_stack_base_offset, sizeof(db_expr_t), FALSE, task);
- ss = db_get_task_value(cproc + db_cproc_stack_size_offset, sizeof(db_expr_t), FALSE, task);
- db_printf(" Stack base: %x\n", sb);
- /*
- * Lessee now..
- */
- thread = db_find_kthread(sb, sb+ss, task);
- if (thread != THREAD_NULL) {
- pc = thread->pcb->iss.eip;
- fp = thread->pcb->iss.ebp;
- sp = thread->pcb->iss.uesp;
- } else {
- fp = -1;
- }
- }
-
- if (fp != -1)
- db_i386_stack_trace(thread, (struct i386_frame*)fp, sp, pc,
- -1, F_USER_TRACE);
- }
-
- db_recover = prev;
-}
-
-void db_all_cprocs(
- const task_t task,
- db_expr_t cproc_list)
-{
- jmp_buf_t db_jmpbuf;
- jmp_buf_t *prev = db_recover;
- thread_t thread;
- db_expr_t cproc, next;
-
-
- if (task != TASK_NULL) {
- thread = (thread_t) queue_first(&task->thread_list);
- } else
- thread = current_thread();
-
- if (cproc_list != 0)
- next = cproc_list;
- else
- if (!db_value_of_name("unix::cproc_list", &next)) {
- db_printf("No cprocs.\n");
- return;
- }
-
-
- while (next) {
- if (_setjmp(db_recover = &db_jmpbuf))
- break;
-
- cproc = db_get_task_value(next, 4, FALSE, TASK_NULL);
- if (cproc == 0) break;
- next = cproc + db_cproc_list_offset;
-
- db_trace_cproc(cproc, thread);
- }
-
- db_recover = prev;
-}
-
-#endif /* CTHREADS_SUPPORT */
-
#endif /* MACH_KDB */
diff --git a/i386/i386/debug_i386.c b/i386/i386/debug_i386.c
index 233caa72..41d032e3 100644
--- a/i386/i386/debug_i386.c
+++ b/i386/i386/debug_i386.c
@@ -26,10 +26,22 @@
#include "thread.h"
#include "trap.h"
#include "debug.h"
+#include "spl.h"
void dump_ss(const struct i386_saved_state *st)
{
printf("Dump of i386_saved_state %p:\n", st);
+#if defined(__x86_64__) && ! defined(USER32)
+ printf("RAX %016lx RBX %016lx RCX %016lx RDX %016lx\n",
+ st->eax, st->ebx, st->ecx, st->edx);
+ printf("RSI %016lx RDI %016lx RBP %016lx RSP %016lx\n",
+ st->esi, st->edi, st->ebp, st->uesp);
+ printf("R8 %016lx R9 %016lx R10 %016lx R11 %016lx\n",
+ st->r8, st->r9, st->r10, st->r11);
+ printf("R12 %016lx R13 %016lx R14 %016lx R15 %016lx\n",
+ st->r12, st->r13, st->r14, st->r15);
+ printf("RIP %016lx EFLAGS %08lx\n", st->eip, st->efl);
+#else
printf("EAX %08lx EBX %08lx ECX %08lx EDX %08lx\n",
st->eax, st->ebx, st->ecx, st->edx);
printf("ESI %08lx EDI %08lx EBP %08lx ESP %08lx\n",
@@ -44,6 +56,7 @@ void dump_ss(const struct i386_saved_state *st)
st->v86_segs.v86_ds & 0xffff, st->v86_segs.v86_es & 0xffff,
st->v86_segs.v86_gs & 0xffff, st->v86_segs.v86_gs & 0xffff);
printf("EIP %08lx EFLAGS %08lx\n", st->eip, st->efl);
+#endif
printf("trapno %ld: %s, error %08lx\n",
st->trapno, trap_name(st->trapno),
st->err);
diff --git a/i386/i386/fpu.c b/i386/i386/fpu.c
index b47bd339..4cd31dd9 100644
--- a/i386/i386/fpu.c
+++ b/i386/i386/fpu.c
@@ -60,8 +60,8 @@
#include <i386/ipl.h>
#define ASSERT_IPL(L) \
{ \
- if (curr_ipl != L) { \
- printf("IPL is %d, expected %d\n", curr_ipl, L); \
+ if (curr_ipl[cpu_number()] != L) { \
+ printf("IPL is %d, expected %d\n", curr_ipl[cpu_number()], L); \
panic("fpu: wrong ipl"); \
} \
}
@@ -163,6 +163,7 @@ init_fpu(void)
if (CPU_HAS_FEATURE(CPU_FEATURE_XSAVE)) {
unsigned eax, ebx, ecx, edx;
+ unsigned xsave_cpu_features;
eax = 0xd;
ecx = 0x0;
@@ -177,29 +178,33 @@ init_fpu(void)
eax = 0xd;
ecx = 0x1;
cpuid(eax, ebx, ecx, edx);
- if (eax & CPU_FEATURE_XSAVES) {
+ xsave_cpu_features = eax;
+
+ if (xsave_cpu_features & CPU_FEATURE_XSAVES) {
+ // all states enabled by XCR0|IA32_XSS
fp_xsave_size = offsetof(struct i386_fpsave_state, xfp_save_state) + ebx;
if (fp_xsave_size < sizeof(struct i386_fpsave_state))
panic("CPU-provided xstate size %d "
"is smaller than our minimum %d!\n",
fp_xsave_size,
- sizeof(struct i386_fpsave_state));
+ (int) sizeof(struct i386_fpsave_state));
fp_save_kind = FP_XSAVES;
} else {
eax = 0xd;
ecx = 0x0;
cpuid(eax, ebx, ecx, edx);
+ // all states enabled by XCR0
fp_xsave_size = offsetof(struct i386_fpsave_state, xfp_save_state) + ebx;
if(fp_xsave_size < sizeof(struct i386_fpsave_state))
panic("CPU-provided xstate size %d "
"is smaller than our minimum %d!\n",
fp_xsave_size,
- sizeof(struct i386_fpsave_state));
+ (int) sizeof(struct i386_fpsave_state));
- if (eax & CPU_FEATURE_XSAVEOPT)
+ if (xsave_cpu_features & CPU_FEATURE_XSAVEOPT)
fp_save_kind = FP_XSAVEOPT;
- else if (eax & CPU_FEATURE_XSAVEC)
+ else if (xsave_cpu_features & CPU_FEATURE_XSAVEC)
fp_save_kind = FP_XSAVEC;
else
fp_save_kind = FP_XSAVE;
@@ -380,9 +385,8 @@ twd_fxsr_to_i387 (struct i386_xfp_save *fxsave)
* concurrent fpu_set_state or fpu_get_state.
*/
kern_return_t
-fpu_set_state(thread, state)
- const thread_t thread;
- struct i386_float_state *state;
+fpu_set_state(const thread_t thread,
+ struct i386_float_state *state)
{
pcb_t pcb = thread->pcb;
struct i386_fpsave_state *ifps;
@@ -491,9 +495,8 @@ ASSERT_IPL(SPL0);
* concurrent fpu_set_state or fpu_get_state.
*/
kern_return_t
-fpu_get_state(thread, state)
- const thread_t thread;
- struct i386_float_state *state;
+fpu_get_state(const thread_t thread,
+ struct i386_float_state *state)
{
pcb_t pcb = thread->pcb;
struct i386_fpsave_state *ifps;
@@ -901,23 +904,6 @@ ASSERT_IPL(SPL0);
ifps->fp_valid = FALSE; /* in FPU */
}
-/*
- * Allocate and initialize FP state for current thread.
- * Don't load state.
- *
- * Locking not needed; always called on the current thread.
- */
-void
-fp_state_alloc(void)
-{
- pcb_t pcb = current_thread()->pcb;
- struct i386_fpsave_state *ifps;
-
- ifps = (struct i386_fpsave_state *)kmem_cache_alloc(&ifps_cache);
- memcpy(ifps, fp_default_state, fp_xsave_size);
- pcb->ims.ifps = ifps;
-}
-
#if (defined(AT386) || defined(ATX86_64)) && !defined(MACH_XEN)
/*
* Handle a coprocessor error interrupt on the AT386.
diff --git a/i386/i386/gdt.c b/i386/i386/gdt.c
index fb18360e..4edd3ec5 100644
--- a/i386/i386/gdt.c
+++ b/i386/i386/gdt.c
@@ -35,10 +35,13 @@
#include <kern/assert.h>
#include <intel/pmap.h>
+#include <kern/cpu_number.h>
+#include <machine/percpu.h>
#include "vm_param.h"
#include "seg.h"
#include "gdt.h"
+#include "mp_desc.h"
#ifdef MACH_PV_DESCRIPTORS
/* It is actually defined in xen_boothdr.S */
@@ -46,37 +49,47 @@ extern
#endif /* MACH_PV_DESCRIPTORS */
struct real_descriptor gdt[GDTSZ];
-void
-gdt_init(void)
+static void
+gdt_fill(int cpu, struct real_descriptor *mygdt)
{
/* Initialize the kernel code and data segment descriptors. */
#ifdef __x86_64__
assert(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS == 0);
- fill_gdt_descriptor(KERNEL_CS, 0, 0, ACC_PL_K|ACC_CODE_R, SZ_64);
- fill_gdt_descriptor(KERNEL_DS, 0, 0, ACC_PL_K|ACC_DATA_W, SZ_64);
+ _fill_gdt_descriptor(mygdt, KERNEL_CS, 0, 0, ACC_PL_K|ACC_CODE_R, SZ_64);
+ _fill_gdt_descriptor(mygdt, KERNEL_DS, 0, 0, ACC_PL_K|ACC_DATA_W, SZ_64);
#ifndef MACH_PV_DESCRIPTORS
- fill_gdt_descriptor(LINEAR_DS, 0, 0, ACC_PL_K|ACC_DATA_W, SZ_64);
+ _fill_gdt_descriptor(mygdt, LINEAR_DS, 0, 0, ACC_PL_K|ACC_DATA_W, SZ_64);
#endif /* MACH_PV_DESCRIPTORS */
#else
- fill_gdt_descriptor(KERNEL_CS,
+ _fill_gdt_descriptor(mygdt, KERNEL_CS,
LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
LINEAR_MAX_KERNEL_ADDRESS - (LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1,
ACC_PL_K|ACC_CODE_R, SZ_32);
- fill_gdt_descriptor(KERNEL_DS,
+ _fill_gdt_descriptor(mygdt, KERNEL_DS,
LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
LINEAR_MAX_KERNEL_ADDRESS - (LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1,
ACC_PL_K|ACC_DATA_W, SZ_32);
#ifndef MACH_PV_DESCRIPTORS
- fill_gdt_descriptor(LINEAR_DS,
+ _fill_gdt_descriptor(mygdt, LINEAR_DS,
0,
0xffffffff,
ACC_PL_K|ACC_DATA_W, SZ_32);
#endif /* MACH_PV_DESCRIPTORS */
+ vm_offset_t thiscpu = kvtolin(&percpu_array[cpu]);
+ _fill_gdt_descriptor(mygdt, PERCPU_DS,
+ thiscpu,
+ thiscpu + sizeof(struct percpu) - 1,
+#ifdef __x86_64__
+ ACC_PL_K|ACC_DATA_W, SZ_64
+#else
+ ACC_PL_K|ACC_DATA_W, SZ_32
+#endif
+ );
#endif
#ifdef MACH_PV_DESCRIPTORS
- unsigned long frame = kv_to_mfn(gdt);
- pmap_set_page_readonly(gdt);
+ unsigned long frame = kv_to_mfn(mygdt);
+ pmap_set_page_readonly(mygdt);
if (hyp_set_gdt(kv_to_la(&frame), GDTSZ))
panic("couldn't set gdt\n");
#endif
@@ -94,12 +107,16 @@ gdt_init(void)
{
struct pseudo_descriptor pdesc;
- pdesc.limit = sizeof(gdt)-1;
- pdesc.linear_base = kvtolin(&gdt);
+ pdesc.limit = (GDTSZ * sizeof(struct real_descriptor))-1;
+ pdesc.linear_base = kvtolin(mygdt);
lgdt(&pdesc);
}
#endif /* MACH_PV_DESCRIPTORS */
+}
+static void
+reload_segs(void)
+{
/* Reload all the segment registers from the new GDT.
We must load ds and es with 0 before loading them with KERNEL_DS
because some processors will "optimize out" the loads
@@ -114,9 +131,19 @@ gdt_init(void)
"movw %w1,%%ds\n"
"movw %w1,%%es\n"
+ "movw %w3,%%gs\n"
"movw %w1,%%ss\n"
- : : "i" (KERNEL_CS), "r" (KERNEL_DS), "r" (0));
+ : : "i" (KERNEL_CS), "r" (KERNEL_DS), "r" (0), "r" (PERCPU_DS));
#endif
+}
+
+void
+gdt_init(void)
+{
+ gdt_fill(0, gdt);
+
+ reload_segs();
+
#ifdef MACH_PV_PAGETABLES
#if VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
/* things now get shifted */
@@ -128,3 +155,12 @@ gdt_init(void)
#endif /* MACH_PV_PAGETABLES */
}
+#if NCPUS > 1
+void
+ap_gdt_init(int cpu)
+{
+ gdt_fill(cpu, mp_gdt[cpu]);
+
+ reload_segs();
+}
+#endif
diff --git a/i386/i386/gdt.h b/i386/i386/gdt.h
index 9879ad3e..c7da012a 100644
--- a/i386/i386/gdt.h
+++ b/i386/i386/gdt.h
@@ -77,11 +77,11 @@
/* 0x58 used by user TSS in 64bit mode */
-#ifdef __x86_64__
-#define GDTSZ sel_idx(0x60)
-#else
-#define GDTSZ sel_idx(0x58)
-#endif
+#define PERCPU_DS 0x68 /* per-cpu data mapping */
+
+#define GDTSZ sel_idx(0x70)
+
+#ifndef __ASSEMBLER__
extern struct real_descriptor gdt[GDTSZ];
@@ -115,5 +115,7 @@ extern struct real_descriptor gdt[GDTSZ];
#endif
extern void gdt_init(void);
+extern void ap_gdt_init(int cpu);
+#endif /* __ASSEMBLER__ */
#endif /* _I386_GDT_ */
diff --git a/i386/i386/hardclock.c b/i386/i386/hardclock.c
index 57259ff3..9ac4f51d 100644
--- a/i386/i386/hardclock.c
+++ b/i386/i386/hardclock.c
@@ -34,6 +34,7 @@
#include <kern/mach_clock.h>
#include <i386/thread.h>
+#include <i386/hardclock.h>
#if defined(AT386) || defined(ATX86_64)
#include <i386/ipl.h>
@@ -48,7 +49,6 @@ extern char return_to_iret[];
void
hardclock(int iunit, /* 'unit' number */
int old_ipl, /* old interrupt level */
- int irq, /* irq number */
const char *ret_addr, /* return address in interrupt handler */
struct i386_interrupt_state *regs /* saved registers */
)
diff --git a/i386/i386/hardclock.h b/i386/i386/hardclock.h
index 96ebf52d..b326c3cb 100644
--- a/i386/i386/hardclock.h
+++ b/i386/i386/hardclock.h
@@ -22,7 +22,6 @@
void hardclock(
int iunit,
int old_ipl,
- int irq,
const char *ret_addr,
struct i386_interrupt_state *regs);
diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym
index 0662aea0..e1f5c6bb 100644
--- a/i386/i386/i386asm.sym
+++ b/i386/i386/i386asm.sym
@@ -45,8 +45,21 @@
#include <i386/gdt.h>
#include <i386/ldt.h>
#include <i386/mp_desc.h>
+#include <i386/apic.h>
#include <i386/xen.h>
+expr CALL_AST_CHECK
+expr CALL_PMAP_UPDATE
+
+offset ApicLocalUnit lu apic_id APIC_ID
+
+offset percpu pc cpu_id PERCPU_CPU_ID
+offset percpu pc active_thread PERCPU_ACTIVE_THREAD
+offset percpu pc active_stack PERCPU_ACTIVE_STACK
+
+offset pcb pcb iss
+
+size percpu pc
offset thread th pcb
offset thread th task
@@ -78,16 +91,31 @@ size i386_kernel_state iks
size i386_exception_link iel
+#if !defined(__x86_64__) || defined(USER32)
+offset i386_saved_state r gs
+offset i386_saved_state r fs
+#endif
offset i386_saved_state r cs
offset i386_saved_state r uesp
offset i386_saved_state r eax
+offset i386_saved_state r ebx
+offset i386_saved_state r ecx
+offset i386_saved_state r edx
+offset i386_saved_state r ebp
offset i386_saved_state r trapno
offset i386_saved_state r err
offset i386_saved_state r efl R_EFLAGS
offset i386_saved_state r eip
offset i386_saved_state r cr2
offset i386_saved_state r edi
+offset i386_saved_state r esi
#ifdef __x86_64__
+offset i386_saved_state r r8
+offset i386_saved_state r r9
+offset i386_saved_state r r10
+offset i386_saved_state r r12
+offset i386_saved_state r r13
+offset i386_saved_state r r14
offset i386_saved_state r r15
#endif
@@ -95,8 +123,12 @@ offset i386_interrupt_state i eip
offset i386_interrupt_state i cs
offset i386_interrupt_state i efl
+#ifdef __x86_64__
+offset i386_tss tss rsp0
+#else
offset i386_tss tss esp0
offset i386_tss tss ss0
+#endif
offset machine_slot sub_type cpu_type
@@ -122,23 +154,15 @@ expr sizeof(pt_entry_t) PTE_SIZE
expr INTEL_PTE_PFN PTE_PFN
expr INTEL_PTE_VALID PTE_V
expr INTEL_PTE_WRITE PTE_W
+expr INTEL_PTE_PS PTE_S
expr ~INTEL_PTE_VALID PTE_INVALID
expr NPTES PTES_PER_PAGE
expr INTEL_PTE_VALID|INTEL_PTE_WRITE INTEL_PTE_KERNEL
expr IDTSZ
-expr GDTSZ
-expr LDTSZ
expr KERNEL_RING
-expr KERNEL_CS
-expr KERNEL_DS
-expr KERNEL_TSS
-#ifndef MACH_PV_DESCRIPTORS
-expr KERNEL_LDT
-#endif /* MACH_PV_DESCRIPTORS */
-
expr (VM_MIN_KERNEL_ADDRESS>>PDESHIFT)*sizeof(pt_entry_t) KERNELBASEPDE
#if MACH_KDB
@@ -166,3 +190,5 @@ offset shared_info si evtchn_mask EVTMASK
offset shared_info si vcpu_info[0].arch.cr2 CR2
#endif /* MACH_PV_PAGETABLES */
#endif /* MACH_XEN */
+
+offset mach_msg_header msgh msgh_size
diff --git a/i386/i386/idt-gen.h b/i386/i386/idt-gen.h
index f86afb41..daa6aaf2 100644
--- a/i386/i386/idt-gen.h
+++ b/i386/i386/idt-gen.h
@@ -41,7 +41,7 @@
extern struct real_gate idt[IDTSZ];
/* Fill a gate in the IDT. */
-#define fill_idt_gate(int_num, entry, selector, access, dword_count) \
- fill_gate(&idt[int_num], entry, selector, access, dword_count)
+#define fill_idt_gate(_idt, int_num, entry, selector, access, dword_count) \
+ fill_gate(&_idt[int_num], entry, selector, access, dword_count)
#endif /* _I386_IDT_ */
diff --git a/i386/i386/idt.c b/i386/i386/idt.c
index c6a778f1..caa44d71 100644
--- a/i386/i386/idt.c
+++ b/i386/i386/idt.c
@@ -25,6 +25,7 @@
#include <i386/seg.h>
#include <i386at/idt.h>
#include <i386/gdt.h>
+#include <i386/mp_desc.h>
struct real_gate idt[IDTSZ];
@@ -33,10 +34,15 @@ struct idt_init_entry
unsigned long entrypoint;
unsigned short vector;
unsigned short type;
+#ifdef __x86_64__
+ unsigned short ist;
+ unsigned short pad_0;
+#endif
};
extern struct idt_init_entry idt_inittab[];
-void idt_init(void)
+static void
+idt_fill(struct real_gate *myidt)
{
#ifdef MACH_PV_DESCRIPTORS
if (hyp_set_trap_table(kvtolin(idt_inittab)))
@@ -47,7 +53,13 @@ void idt_init(void)
/* Initialize the exception vectors from the idt_inittab. */
while (iie->entrypoint)
{
- fill_idt_gate(iie->vector, iie->entrypoint, KERNEL_CS, iie->type, 0);
+ fill_idt_gate(myidt, iie->vector, iie->entrypoint, KERNEL_CS, iie->type,
+#ifdef __x86_64__
+ iie->ist
+#else
+ 0
+#endif
+ );
iie++;
}
@@ -55,10 +67,21 @@ void idt_init(void)
{
struct pseudo_descriptor pdesc;
- pdesc.limit = sizeof(idt)-1;
- pdesc.linear_base = kvtolin(&idt);
+ pdesc.limit = (IDTSZ * sizeof(struct real_gate))-1;
+ pdesc.linear_base = kvtolin(myidt);
lidt(&pdesc);
}
#endif /* MACH_PV_DESCRIPTORS */
}
+void idt_init(void)
+{
+ idt_fill(idt);
+}
+
+#if NCPUS > 1
+void ap_idt_init(int cpu)
+{
+ idt_fill(mp_desc_table[cpu]->idt);
+}
+#endif
diff --git a/i386/i386/idt_inittab.S b/i386/i386/idt_inittab.S
index 8e92d805..fc80e21b 100644
--- a/i386/i386/idt_inittab.S
+++ b/i386/i386/idt_inittab.S
@@ -27,6 +27,7 @@
#include <i386/seg.h>
#include <i386/i386asm.h>
+#include <i386/gdt.h>
/* We'll be using macros to fill in a table in data hunk 2
diff --git a/i386/i386/io_map.c b/i386/i386/io_map.c
deleted file mode 100644
index 96062243..00000000
--- a/i386/i386/io_map.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-#include <kern/printf.h>
-#include <mach/vm_param.h>
-#include <vm/pmap.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_map.h>
-
-extern vm_offset_t kernel_virtual_start;
-
-/*
- * Allocate and map memory for devices that may need to be mapped before
- * Mach VM is running.
- */
-vm_offset_t
-io_map(
- phys_addr_t phys_addr,
- vm_size_t size)
-{
- vm_offset_t start;
-
- if (kernel_map == VM_MAP_NULL) {
- /*
- * VM is not initialized. Grab memory.
- */
- start = kernel_virtual_start;
- kernel_virtual_start += round_page(size);
- printf("stealing kernel virtual addresses %08lx-%08lx\n", start, kernel_virtual_start);
- }
- else {
- (void) kmem_alloc_pageable(kernel_map, &start, round_page(size));
- }
- (void) pmap_map_bd(start, phys_addr, phys_addr + round_page(size),
- VM_PROT_READ|VM_PROT_WRITE);
- return (start);
-}
-
-/*
- * Allocate and map memory for devices that may need to be mapped before
- * Mach VM is running.
- *
- * This maps the all pages containing [PHYS_ADDR:PHYS_ADDR + SIZE].
- * For contiguous requests to those pages will reuse the previously
- * established mapping.
- *
- * Warning: this leaks memory maps for now, do not use it yet for something
- * else than Mach shutdown.
- */
-vm_offset_t
-io_map_cached(
- phys_addr_t phys_addr,
- vm_size_t size)
-{
- static phys_addr_t base;
- static vm_size_t length;
- static vm_offset_t map;
-
- if (! map
- || (phys_addr < base)
- || (base + length < phys_addr + size))
- {
- base = trunc_phys(phys_addr);
- length = round_phys(phys_addr - base + size);
- map = io_map(base, length);
- }
-
- return map + (phys_addr - base);
-}
diff --git a/i386/i386/io_perm.c b/i386/i386/io_perm.c
index c966102c..aabff49b 100644
--- a/i386/i386/io_perm.c
+++ b/i386/i386/io_perm.c
@@ -64,18 +64,17 @@
#include <device/device_emul.h>
#include <device/device_port.h>
+#include <i386/i386/mach_i386.server.h>
+
#include "io_perm.h"
#include "gdt.h"
#include "pcb.h"
#define PCI_CFG1_START 0xcf8
#define PCI_CFG1_END 0xcff
-#define PCI_CFG2_START 0xc000
-#define PCI_CFG2_END 0xcfff
#define CONTAINS_PCI_CFG(from, to) \
- ( ( ( from <= PCI_CFG1_END ) && ( to >= PCI_CFG1_START ) ) || \
- ( ( from <= PCI_CFG2_END ) && ( to >= PCI_CFG2_START ) ) )
+ ( ( from <= PCI_CFG1_END ) && ( to >= PCI_CFG1_START ) )
/* Our device emulation ops. See below, at the bottom of this file. */
diff --git a/i386/i386/ipl.h b/i386/i386/ipl.h
index fb939789..6e59b368 100644
--- a/i386/i386/ipl.h
+++ b/i386/i386/ipl.h
@@ -72,9 +72,11 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#ifdef KERNEL
#ifndef __ASSEMBLER__
#include <machine/machspl.h>
-extern void (*ivect[])();
+/* Note that interrupts have varying signatures */
+typedef void (*interrupt_handler_fn)(int);
+extern interrupt_handler_fn ivect[];
extern int iunit[];
-extern spl_t curr_ipl;
+extern spl_t curr_ipl[NCPUS];
#endif /* __ASSEMBLER__ */
#endif /* KERNEL */
diff --git a/i386/i386/ktss.c b/i386/i386/ktss.c
index 917e6305..34cb6df2 100644
--- a/i386/i386/ktss.c
+++ b/i386/i386/ktss.c
@@ -35,15 +35,18 @@
#include "seg.h"
#include "gdt.h"
#include "ktss.h"
+#include "mp_desc.h"
/* A kernel TSS with a complete I/O bitmap. */
struct task_tss ktss;
void
-ktss_init(void)
+ktss_fill(struct task_tss *myktss, struct real_descriptor *mygdt)
{
- /* XXX temporary exception stack */
+ /* XXX temporary exception stacks */
+ /* FIXME: make it per-processor */
static int exception_stack[1024];
+ static int double_fault_stack[1024];
#ifdef MACH_RING1
/* Xen won't allow us to do any I/O by default anyway, just register
@@ -52,19 +55,38 @@ ktss_init(void)
panic("couldn't register exception stack\n");
#else /* MACH_RING1 */
/* Initialize the master TSS descriptor. */
- fill_gdt_sys_descriptor(KERNEL_TSS,
- kvtolin(&ktss), sizeof(struct task_tss) - 1,
+ _fill_gdt_sys_descriptor(mygdt, KERNEL_TSS,
+ kvtolin(myktss), sizeof(struct task_tss) - 1,
ACC_PL_K|ACC_TSS, 0);
/* Initialize the master TSS. */
- ktss.tss.ss0 = KERNEL_DS;
- ktss.tss.esp0 = (unsigned long)(exception_stack+1024);
- ktss.tss.io_bit_map_offset = IOPB_INVAL;
+#ifdef __x86_64__
+ myktss->tss.rsp0 = (unsigned long)(exception_stack+1024);
+ myktss->tss.ist1 = (unsigned long)(double_fault_stack+1024);
+#else /* ! __x86_64__ */
+ myktss->tss.ss0 = KERNEL_DS;
+ myktss->tss.esp0 = (unsigned long)(exception_stack+1024);
+#endif /* __x86_64__ */
+
+ myktss->tss.io_bit_map_offset = IOPB_INVAL;
/* Set the last byte in the I/O bitmap to all 1's. */
- ktss.barrier = 0xff;
+ myktss->barrier = 0xff;
/* Load the TSS. */
ltr(KERNEL_TSS);
#endif /* MACH_RING1 */
}
+void
+ktss_init(void)
+{
+ ktss_fill(&ktss, gdt);
+}
+
+#if NCPUS > 1
+void
+ap_ktss_init(int cpu)
+{
+ ktss_fill(&mp_desc_table[cpu]->ktss, mp_gdt[cpu]);
+}
+#endif
diff --git a/i386/i386/ktss.h b/i386/i386/ktss.h
index 304a877a..171332da 100644
--- a/i386/i386/ktss.h
+++ b/i386/i386/ktss.h
@@ -28,5 +28,6 @@
extern struct task_tss ktss;
extern void ktss_init(void);
+extern void ap_ktss_init(int cpu);
#endif /* _I386_KTSS_ */
diff --git a/i386/i386/kttd_interface.c b/i386/i386/kttd_interface.c
index c6caa76d..f48fe8eb 100644
--- a/i386/i386/kttd_interface.c
+++ b/i386/i386/kttd_interface.c
@@ -499,8 +499,7 @@ struct int_regs {
};
void
-kttd_netentry(int_regs)
- struct int_regs *int_regs;
+kttd_netentry(struct int_regs *int_regs)
{
struct i386_interrupt_state *is = int_regs->is;
int s;
diff --git a/i386/i386/ldt.c b/i386/i386/ldt.c
index 261df93a..5db36426 100644
--- a/i386/i386/ldt.c
+++ b/i386/i386/ldt.c
@@ -27,16 +27,20 @@
* "Local" descriptor table. At the moment, all tasks use the
* same LDT.
*/
+#include <mach/machine/eflags.h>
#include <mach/machine/vm_types.h>
#include <mach/xen.h>
#include <intel/pmap.h>
+#include <kern/debug.h>
#include "vm_param.h"
#include "seg.h"
#include "gdt.h"
#include "ldt.h"
#include "locore.h"
+#include "mp_desc.h"
+#include "msr.h"
#ifdef MACH_PV_DESCRIPTORS
/* It is actually defined in xen_boothdr.S */
@@ -44,38 +48,70 @@ extern
#endif /* MACH_PV_DESCRIPTORS */
struct real_descriptor ldt[LDTSZ];
+#if defined(__x86_64__) && ! defined(USER32)
+#define USER_SEGMENT_SIZEBITS SZ_64
+#else
+#define USER_SEGMENT_SIZEBITS SZ_32
+#endif
+
void
-ldt_init(void)
+ldt_fill(struct real_descriptor *myldt, struct real_descriptor *mygdt)
{
#ifdef MACH_PV_DESCRIPTORS
#ifdef MACH_PV_PAGETABLES
- pmap_set_page_readwrite(ldt);
+ pmap_set_page_readwrite(myldt);
#endif /* MACH_PV_PAGETABLES */
#else /* MACH_PV_DESCRIPTORS */
/* Initialize the master LDT descriptor in the GDT. */
- fill_gdt_sys_descriptor(KERNEL_LDT,
- kvtolin(&ldt), sizeof(ldt)-1,
+ _fill_gdt_sys_descriptor(mygdt, KERNEL_LDT,
+ kvtolin(myldt), (LDTSZ * sizeof(struct real_descriptor))-1,
ACC_PL_K|ACC_LDT, 0);
#endif /* MACH_PV_DESCRIPTORS */
- /* Initialize the 32bit LDT descriptors. */
- fill_ldt_gate(USER_SCALL,
+ /* Initialize the syscall entry point */
+#if defined(__x86_64__) && ! defined(USER32)
+ if (!CPU_HAS_FEATURE(CPU_FEATURE_SEP))
+ panic("syscall support is missing on 64 bit");
+ /* Enable 64-bit syscalls */
+ wrmsr(MSR_REG_EFER, rdmsr(MSR_REG_EFER) | MSR_EFER_SCE);
+ wrmsr(MSR_REG_LSTAR, (vm_offset_t)syscall64);
+ wrmsr(MSR_REG_STAR, ((((long)USER_CS - 16) << 16) | (long)KERNEL_CS) << 32);
+ wrmsr(MSR_REG_FMASK, EFL_IF | EFL_IOPL_USER);
+#else /* defined(__x86_64__) && ! defined(USER32) */
+ fill_ldt_gate(myldt, USER_SCALL,
(vm_offset_t)&syscall, KERNEL_CS,
ACC_PL_U|ACC_CALL_GATE, 0);
- fill_ldt_descriptor(USER_CS,
- VM_MIN_ADDRESS,
- VM_MAX_ADDRESS-VM_MIN_ADDRESS-4096,
+#endif /* defined(__x86_64__) && ! defined(USER32) */
+
+ /* Initialize the 32bit LDT descriptors. */
+ fill_ldt_descriptor(myldt, USER_CS,
+ VM_MIN_USER_ADDRESS,
+ VM_MAX_USER_ADDRESS-VM_MIN_USER_ADDRESS-4096,
/* XXX LINEAR_... */
- ACC_PL_U|ACC_CODE_R, SZ_32);
- fill_ldt_descriptor(USER_DS,
- VM_MIN_ADDRESS,
- VM_MAX_ADDRESS-VM_MIN_ADDRESS-4096,
- ACC_PL_U|ACC_DATA_W, SZ_32);
+ ACC_PL_U|ACC_CODE_R, USER_SEGMENT_SIZEBITS);
+ fill_ldt_descriptor(myldt, USER_DS,
+ VM_MIN_USER_ADDRESS,
+ VM_MAX_USER_ADDRESS-VM_MIN_USER_ADDRESS-4096,
+ ACC_PL_U|ACC_DATA_W, USER_SEGMENT_SIZEBITS);
/* Activate the LDT. */
#ifdef MACH_PV_DESCRIPTORS
- hyp_set_ldt(&ldt, LDTSZ);
+ hyp_set_ldt(myldt, LDTSZ);
#else /* MACH_PV_DESCRIPTORS */
lldt(KERNEL_LDT);
#endif /* MACH_PV_DESCRIPTORS */
}
+
+void
+ldt_init(void)
+{
+ ldt_fill(ldt, gdt);
+}
+
+#if NCPUS > 1
+void
+ap_ldt_init(int cpu)
+{
+ ldt_fill(mp_desc_table[cpu]->ldt, mp_gdt[cpu]);
+}
+#endif
diff --git a/i386/i386/ldt.h b/i386/i386/ldt.h
index 1f0d7014..51867f47 100644
--- a/i386/i386/ldt.h
+++ b/i386/i386/ldt.h
@@ -43,11 +43,16 @@
* User descriptors for Mach - 32-bit flat address space
*/
#define USER_SCALL 0x07 /* system call gate */
-#ifdef __x86_64__
+#if defined(__x86_64__) && ! defined(USER32)
/* Call gate needs two entries */
-#endif
+
+/* The sysret instruction puts some constraints on the user segment indexes */
+#define USER_CS 0x1f /* user code segment */
+#define USER_DS 0x17 /* user data segment */
+#else
#define USER_CS 0x17 /* user code segment */
#define USER_DS 0x1f /* user data segment */
+#endif
#define LDTSZ 4
@@ -57,14 +62,15 @@
extern struct real_descriptor ldt[LDTSZ];
/* Fill a 32bit segment descriptor in the LDT. */
-#define fill_ldt_descriptor(selector, base, limit, access, sizebits) \
- fill_descriptor(&ldt[sel_idx(selector)], base, limit, access, sizebits)
+#define fill_ldt_descriptor(_ldt, selector, base, limit, access, sizebits) \
+ fill_descriptor(&_ldt[sel_idx(selector)], base, limit, access, sizebits)
-#define fill_ldt_gate(selector, offset, dest_selector, access, word_count) \
- fill_gate((struct real_gate*)&ldt[sel_idx(selector)], \
+#define fill_ldt_gate(_ldt, selector, offset, dest_selector, access, word_count) \
+ fill_gate((struct real_gate*)&_ldt[sel_idx(selector)], \
offset, dest_selector, access, word_count)
void ldt_init(void);
+void ap_ldt_init(int cpu);
#endif /* !__ASSEMBLER__ */
diff --git a/i386/i386/lock.h b/i386/i386/lock.h
index 8efa0ca0..b325ae0d 100644
--- a/i386/i386/lock.h
+++ b/i386/i386/lock.h
@@ -30,6 +30,7 @@
#define _I386_LOCK_H_
#if NCPUS > 1
+#include <i386/smp.h>
/*
* All of the locking routines are built from calls on
@@ -44,10 +45,10 @@
*/
#define _simple_lock_xchg_(lock, new_val) \
- ({ int _old_val_; \
- asm volatile("xchgl %0, %2" \
+({ natural_t _old_val_; \
+ asm volatile("xchg %0, %2" \
: "=r" (_old_val_) \
- : "0" (new_val), "m" (*(lock)) : "memory" \
+ : "0" ((natural_t)(new_val)), "m" (*(lock)) : "memory" \
); \
_old_val_; \
})
@@ -55,18 +56,21 @@
#define simple_lock_init(l) \
((l)->lock_data = 0)
-#define simple_lock(l) \
+#define SIMPLE_LOCK_INITIALIZER(l) \
+ {.lock_data = 0}
+
+#define _simple_lock(l) \
({ \
while(_simple_lock_xchg_(l, 1)) \
- while (*(volatile int *)&(l)->lock_data) \
- continue; \
+ while (*(volatile natural_t *)&(l)->lock_data) \
+ cpu_pause(); \
0; \
})
-#define simple_unlock(l) \
+#define _simple_unlock(l) \
(_simple_lock_xchg_(l, 0))
-#define simple_lock_try(l) \
+#define _simple_lock_try(l) \
(!_simple_lock_xchg_(l, 1))
/*
@@ -81,7 +85,7 @@
btsl %0, %1 \n\
jb 0b" \
: \
- : "r" (bit), "m" (*(volatile int *)(l)) : "memory"); \
+ : "r" ((int)(bit)), "m" (*(volatile int *)(l)) : "memory"); \
0; \
})
@@ -90,7 +94,7 @@
asm volatile(" lock \n\
btrl %0, %1" \
: \
- : "r" (bit), "m" (*(volatile int *)(l)) : "memory"); \
+ : "r" ((int)(bit)), "m" (*(volatile int *)(l)) : "memory"); \
0; \
})
@@ -104,7 +108,7 @@
asm volatile(" lock \n\
btsl %0, %1" \
: \
- : "r" (bit), "m" (*(l)) ); \
+ : "r" ((int)(bit)), "m" (*(l)) ); \
0; \
})
@@ -113,7 +117,7 @@
asm volatile(" lock \n\
btrl %0, %1" \
: \
- : "r" (bit), "m" (*(l)) ); \
+ : "r" ((int)(bit)), "m" (*(l)) ); \
0; \
})
diff --git a/i386/i386/locore.S b/i386/i386/locore.S
index 8a1054a6..9d0513a1 100644
--- a/i386/i386/locore.S
+++ b/i386/i386/locore.S
@@ -33,11 +33,67 @@
#include <i386/proc_reg.h>
#include <i386/trap.h>
#include <i386/seg.h>
+#include <i386/gdt.h>
#include <i386/ldt.h>
#include <i386/i386asm.h>
#include <i386/cpu_number.h>
#include <i386/xen.h>
+#define PUSH_REGS_ISR \
+ pushl %ecx ;\
+ pushl %edx
+
+#define PUSH_AREGS_ISR \
+ pushl %eax ;\
+ PUSH_REGS_ISR
+
+
+#define POP_REGS_ISR \
+ popl %edx ;\
+ popl %ecx
+
+#define POP_AREGS_ISR \
+ POP_REGS_ISR ;\
+ popl %eax
+
+/*
+ * Note that we have to load the kernel segment registers even if this
+ * is a trap from the kernel, because the kernel uses user segment
+ * registers for copyin/copyout.
+ * (XXX Would it be smarter just to use fs or gs for that?)
+ */
+#define PUSH_SEGMENTS \
+ pushl %ds ;\
+ pushl %es ;\
+ pushl %fs ;\
+ pushl %gs
+
+#define POP_SEGMENTS \
+ popl %gs ;\
+ popl %fs ;\
+ popl %es ;\
+ popl %ds
+
+#define PUSH_SEGMENTS_ISR \
+ pushl %ds ;\
+ pushl %es ;\
+ pushl %fs ;\
+ pushl %gs
+
+#define POP_SEGMENTS_ISR \
+ popl %gs ;\
+ popl %fs ;\
+ popl %es ;\
+ popl %ds
+
+#define SET_KERNEL_SEGMENTS(reg) \
+ mov %ss,reg /* switch to kernel segments */ ;\
+ mov reg,%ds /* (same as kernel stack segment) */ ;\
+ mov reg,%es ;\
+ mov reg,%fs ;\
+ mov $(PERCPU_DS),reg ;\
+ mov reg,%gs
+
/*
* Fault recovery.
*/
@@ -455,21 +511,8 @@ ENTRY(t_page_fault)
ENTRY(alltraps)
pusha /* save the general registers */
trap_push_segs:
- pushl %ds /* and the segment registers */
- pushl %es
- pushl %fs
- pushl %gs
-
- /* Note that we have to load the segment registers
- even if this is a trap from the kernel,
- because the kernel uses user segment registers for copyin/copyout.
- (XXX Would it be smarter just to use fs or gs for that?) */
- mov %ss,%ax /* switch to kernel data segment */
- mov %ax,%ds /* (same as kernel stack segment) */
- mov %ax,%es
- mov %ax,%fs
- mov %ax,%gs
-
+ PUSH_SEGMENTS /* and the segment registers */
+ SET_KERNEL_SEGMENTS(%ax) /* switch to kernel data segment */
trap_set_segs:
cld /* clear direction flag */
testl $(EFL_VM),R_EFLAGS(%esp) /* in V86 mode? */
@@ -541,17 +584,18 @@ _kret_iret:
trap_from_kernel:
#if MACH_KDB || MACH_TTD
movl %esp,%ebx /* save current stack */
-
movl %esp,%edx /* on an interrupt stack? */
- and $(~(KERNEL_STACK_SIZE-1)),%edx
- cmpl EXT(int_stack_base),%edx
+
+ CPU_NUMBER(%ecx)
+ and $(~(INTSTACK_SIZE-1)),%edx
+ cmpl CX(EXT(int_stack_base),%ecx),%edx
je 1f /* OK if so */
- CPU_NUMBER(%edx) /* get CPU number */
+ movl %ecx,%edx
cmpl CX(EXT(kernel_stack),%edx),%esp
/* already on kernel stack? */
ja 0f
- cmpl CX(EXT(active_stacks),%edx),%esp
+ cmpl MY(ACTIVE_STACK),%esp
ja 1f /* switch if not */
0:
movl CX(EXT(kernel_stack),%edx),%esp
@@ -567,6 +611,7 @@ trap_from_kernel:
call EXT(kernel_trap) /* to kernel trap routine */
addl $4,%esp /* pop parameter */
#endif /* MACH_KDB || MACH_TTD */
+
jmp _return_from_kernel
@@ -649,6 +694,55 @@ INTERRUPT(20)
INTERRUPT(21)
INTERRUPT(22)
INTERRUPT(23)
+/* Possibly 8 more GSIs */
+INTERRUPT(24)
+INTERRUPT(25)
+INTERRUPT(26)
+INTERRUPT(27)
+INTERRUPT(28)
+INTERRUPT(29)
+INTERRUPT(30)
+INTERRUPT(31)
+/* ... APIC IOAPIC #2 */
+INTERRUPT(32)
+INTERRUPT(33)
+INTERRUPT(34)
+INTERRUPT(35)
+INTERRUPT(36)
+INTERRUPT(37)
+INTERRUPT(38)
+INTERRUPT(39)
+INTERRUPT(40)
+INTERRUPT(41)
+INTERRUPT(42)
+INTERRUPT(43)
+INTERRUPT(44)
+INTERRUPT(45)
+INTERRUPT(46)
+INTERRUPT(47)
+INTERRUPT(48)
+INTERRUPT(49)
+INTERRUPT(50)
+INTERRUPT(51)
+INTERRUPT(52)
+INTERRUPT(53)
+INTERRUPT(54)
+INTERRUPT(55)
+/* Possibly 8 more GSIs */
+INTERRUPT(56)
+INTERRUPT(57)
+INTERRUPT(58)
+INTERRUPT(59)
+INTERRUPT(60)
+INTERRUPT(61)
+INTERRUPT(62)
+INTERRUPT(63)
+#endif
+#if NCPUS > 1
+INTERRUPT(CALL_AST_CHECK)
+INTERRUPT(CALL_PMAP_UPDATE)
+#endif
+#ifdef APIC
/* Spurious interrupt, set irq number to vect number */
INTERRUPT(255)
#endif
@@ -660,28 +754,22 @@ INTERRUPT(255)
* old %eax on stack; interrupt number in %eax.
*/
ENTRY(all_intrs)
- pushl %ecx /* save registers */
- pushl %edx
+ PUSH_REGS_ISR /* save registers */
cld /* clear direction flag */
+ CPU_NUMBER_NO_GS(%ecx)
movl %esp,%edx /* on an interrupt stack? */
- and $(~(KERNEL_STACK_SIZE-1)),%edx
- cmpl %ss:EXT(int_stack_base),%edx
+ and $(~(INTSTACK_SIZE-1)),%edx
+ cmpl %ss:CX(EXT(int_stack_base),%ecx),%edx
je int_from_intstack /* if not: */
- pushl %ds /* save segment registers */
- pushl %es
- pushl %fs
- pushl %gs
- mov %ss,%dx /* switch to kernel segments */
- mov %dx,%ds
- mov %dx,%es
- mov %dx,%fs
- mov %dx,%gs
+ PUSH_SEGMENTS_ISR /* save segment registers */
+ SET_KERNEL_SEGMENTS(%dx) /* switch to kernel segments */
CPU_NUMBER(%edx)
movl CX(EXT(int_stack_top),%edx),%ecx
+
xchgl %ecx,%esp /* switch to interrupt stack */
#if STAT_TIME
@@ -693,12 +781,19 @@ ENTRY(all_intrs)
TIME_INT_ENTRY /* do timing */
#endif
- call EXT(interrupt) /* call generic interrupt routine */
+#ifdef MACH_LDEBUG
+ incl CX(EXT(in_interrupt),%edx)
+#endif
- .globl EXT(return_to_iret)
-LEXT(return_to_iret) /* ( label for kdb_kintr and hardclock) */
+ call EXT(interrupt) /* call generic interrupt routine */
+ .globl EXT(return_to_iret) /* ( label for kdb_kintr and hardclock */
+LEXT(return_to_iret) /* to find the return from calling interrupt) */
CPU_NUMBER(%edx)
+#ifdef MACH_LDEBUG
+ decl CX(EXT(in_interrupt),%edx)
+#endif
+
#if STAT_TIME
#else
TIME_INT_EXIT /* do timing */
@@ -716,23 +811,21 @@ LEXT(return_to_iret) /* ( label for kdb_kintr and hardclock) */
cmpl $0,CX(EXT(need_ast),%edx)
jnz ast_from_interrupt /* take it if so */
1:
- pop %gs /* restore segment regs */
- pop %fs
- pop %es
- pop %ds
- pop %edx
- pop %ecx
- pop %eax
+ POP_SEGMENTS_ISR /* restore segment regs */
+ POP_AREGS_ISR /* restore registers */
+
iret /* return to caller */
int_from_intstack:
- cmpl EXT(int_stack_base),%esp /* seemingly looping? */
+ CPU_NUMBER_NO_GS(%edx)
+ cmpl CX(EXT(int_stack_base),%edx),%esp /* seemingly looping? */
jb stack_overflowed /* if not: */
call EXT(interrupt) /* call interrupt routine */
_return_to_iret_i: /* ( label for kdb_kintr) */
- pop %edx /* must have been on kernel segs */
- pop %ecx
- pop %eax /* no ASTs */
+ /* must have been on kernel segs */
+ POP_AREGS_ISR /* restore registers */
+ /* no ASTs */
+
iret
stack_overflowed:
@@ -755,26 +848,13 @@ stack_overflowed:
* ss
*/
ast_from_interrupt:
- pop %gs /* restore all registers ... */
- pop %fs
- pop %es
- pop %ds
- popl %edx
- popl %ecx
- popl %eax
+ POP_SEGMENTS_ISR /* restore all registers ... */
+ POP_AREGS_ISR
pushl $0 /* zero code */
pushl $0 /* zero trap number */
pusha /* save general registers */
- push %ds /* save segment registers */
- push %es
- push %fs
- push %gs
- mov %ss,%dx /* switch to kernel segments */
- mov %dx,%ds
- mov %dx,%es
- mov %dx,%fs
- mov %dx,%gs
-
+ PUSH_SEGMENTS_ISR /* save segment registers */
+ SET_KERNEL_SEGMENTS(%dx) /* switch to kernel segments */
CPU_NUMBER(%edx)
TIME_TRAP_UENTRY
@@ -793,9 +873,12 @@ ast_from_interrupt:
* frame-> saved %ebp
* return address in interrupt handler
* #ifndef MACH_XEN
- * iunit
+ * 1st parameter iunit
+ * 2nd parameter saved SPL
+ * 3th parameter return address
+ * 4th parameter registers
* saved SPL
- * irq
+ * saved IRQ
* #endif
* return address == return_to_iret_i
* saved %edx
@@ -834,7 +917,7 @@ ast_from_interrupt:
#ifdef MACH_XEN
#define RET_OFFSET 8
#else /* MACH_XEN */
-#define RET_OFFSET 20
+#define RET_OFFSET 32
#endif /* MACH_XEN */
ENTRY(kdb_kintr)
@@ -848,7 +931,9 @@ ENTRY(kdb_kintr)
cmpl RET_OFFSET(%eax),%edx /* interrupt handler (2)? */
je 2f /* if not: */
movl (%eax),%eax /* try next frame */
- jmp 0b
+ testl %eax,%eax
+ jnz 0b
+ ud2 /* oops, didn't find frame, fix me :/ */
1: movl $kdb_from_iret,RET_OFFSET(%eax)
ret /* returns to kernel/user stack */
@@ -896,19 +981,13 @@ kdb_from_iret_i: /* on interrupt stack */
pushl $0 /* zero error code */
pushl $0 /* zero trap number */
pusha /* save general registers */
- push %ds /* save segment registers */
- push %es
- push %fs
- push %gs
+ PUSH_SEGMENTS /* save segment registers */
pushl %esp /* pass regs, */
pushl $0 /* code, */
pushl $-1 /* type to kdb */
call EXT(kdb_trap)
addl $12,%esp /* remove parameters */
- pop %gs /* restore segment registers */
- pop %fs
- pop %es
- pop %ds
+ POP_SEGMENTS /* restore segment registers */
popa /* restore general registers */
addl $8,%esp
iret
@@ -978,19 +1057,13 @@ ttd_from_iret_i: /* on interrupt stack */
pushl $0 /* zero error code */
pushl $0 /* zero trap number */
pusha /* save general registers */
- push %ds /* save segment registers */
- push %es
- push %fs
- push %gs
+ PUSH_SEGMENTS_ISR /* save segment registers */
pushl %esp /* pass regs, */
pushl $0 /* code, */
pushl $-1 /* type to kdb */
call _kttd_trap
addl $12,%esp /* remove parameters */
- pop %gs /* restore segment registers */
- pop %fs
- pop %es
- pop %ds
+ POP_SEGMENTS_ISR /* restore segment registers */
popa /* restore general registers */
addl $8,%esp
iret
@@ -1018,16 +1091,8 @@ syscall_entry_2:
pushl $0 /* clear trap number slot */
pusha /* save the general registers */
- pushl %ds /* and the segment registers */
- pushl %es
- pushl %fs
- pushl %gs
-
- mov %ss,%dx /* switch to kernel data segment */
- mov %dx,%ds
- mov %dx,%es
- mov %dx,%fs
- mov %dx,%gs
+ PUSH_SEGMENTS /* and the segment registers */
+ SET_KERNEL_SEGMENTS(%dx) /* switch to kernel data segment */
/*
* Shuffle eflags,eip,cs into proper places
@@ -1053,7 +1118,7 @@ syscall_entry_2:
* Check for MACH or emulated system call
*/
syscall_entry_3:
- movl CX(EXT(active_threads),%edx),%edx
+ movl MY(ACTIVE_THREAD),%edx
/* point to current thread */
movl TH_TASK(%edx),%edx /* point to task */
movl TASK_EMUL(%edx),%edx /* get emulation vector */
@@ -1306,6 +1371,7 @@ copyin_fail:
* arg0: user address
* arg1: kernel address
* arg2: byte count - must be a multiple of four
+ * arg3: kernel byte count
*/
ENTRY(copyinmsg)
pushl %esi
@@ -1314,6 +1380,7 @@ ENTRY(copyinmsg)
movl 8+S_ARG0,%esi /* get user start address */
movl 8+S_ARG1,%edi /* get kernel destination address */
movl 8+S_ARG2,%ecx /* get count */
+ movl %ecx,%edx /* save count */
movl $USER_DS,%eax /* use user data segment for accesses */
mov %ax,%ds
@@ -1325,6 +1392,9 @@ ENTRY(copyinmsg)
movsl /* move longwords */
xorl %eax,%eax /* return 0 for success */
+ movl 8+S_ARG1,%edi
+ movl %edx,%es:MSGH_MSGH_SIZE(%edi) /* set msgh_size */
+
copyinmsg_ret:
mov %ss,%di /* restore DS to kernel segment */
mov %di,%ds
diff --git a/i386/i386/locore.h b/i386/i386/locore.h
index 00da07ad..374c8cf9 100644
--- a/i386/i386/locore.h
+++ b/i386/i386/locore.h
@@ -27,8 +27,8 @@
* Fault recovery in copyin/copyout routines.
*/
struct recovery {
- int fault_addr;
- int recover_addr;
+ vm_offset_t fault_addr;
+ vm_offset_t recover_addr;
};
extern struct recovery recover_table[];
@@ -48,7 +48,7 @@ extern int call_continuation (continuation_t continuation);
extern int discover_x86_cpu_type (void);
extern int copyin (const void *userbuf, void *kernelbuf, size_t cn);
-extern int copyinmsg (const void *userbuf, void *kernelbuf, size_t cn);
+extern int copyinmsg (const void *userbuf, void *kernelbuf, size_t cn, size_t kn);
extern int copyout (const void *kernelbuf, void *userbuf, size_t cn);
extern int copyoutmsg (const void *kernelbuf, void *userbuf, size_t cn);
@@ -57,6 +57,7 @@ extern int inst_fetch (int eip, int cs);
extern void cpu_shutdown (void);
extern int syscall (void);
+extern int syscall64 (void);
extern unsigned int cpu_features[2];
diff --git a/i386/i386/loose_ends.c b/i386/i386/loose_ends.c
index 64b53b71..7e7f943b 100644
--- a/i386/i386/loose_ends.c
+++ b/i386/i386/loose_ends.c
@@ -26,6 +26,8 @@
/*
*/
+#include <i386/i386/loose_ends.h>
+
#ifndef NDEBUG
#define MACH_ASSERT 1
#else
@@ -45,17 +47,3 @@ delay(int n)
{
DELAY(n);
}
-
-#if MACH_ASSERT
-
-/*
- * Machine-dependent routine to fill in an array with up to callstack_max
- * levels of return pc information.
- */
-void machine_callstack(
- const unsigned long *buf,
- int callstack_max)
-{
-}
-
-#endif /* MACH_ASSERT */
diff --git a/i386/i386/machine_task.c b/i386/i386/machine_task.c
index d592838a..8bebf368 100644
--- a/i386/i386/machine_task.c
+++ b/i386/i386/machine_task.c
@@ -23,6 +23,7 @@
#include <kern/lock.h>
#include <mach/mach_types.h>
#include <kern/slab.h>
+#include <kern/task.h>
#include <machine/task.h>
#include <machine/io_perm.h>
diff --git a/i386/i386/model_dep.h b/i386/i386/model_dep.h
index 711f07fd..5369e288 100644
--- a/i386/i386/model_dep.h
+++ b/i386/i386/model_dep.h
@@ -29,6 +29,11 @@
#include <mach/std_types.h>
/*
+ * Address to hold AP boot code, held in ASM
+ */
+extern phys_addr_t apboot_addr;
+
+/*
* Find devices. The system is alive.
*/
extern void machine_init (void);
@@ -55,4 +60,9 @@ extern void halt_all_cpus (boolean_t reboot) __attribute__ ((noreturn));
*/
extern void machine_relax (void);
+/*
+ * C boot entrypoint - called by boot_entry in boothdr.S.
+ */
+extern void c_boot_entry(vm_offset_t bi);
+
#endif /* _I386AT_MODEL_DEP_H_ */
diff --git a/i386/i386/mp_desc.c b/i386/i386/mp_desc.c
index 1e9ea0fc..61a7607b 100644
--- a/i386/i386/mp_desc.c
+++ b/i386/i386/mp_desc.c
@@ -24,25 +24,37 @@
* the rights to redistribute these changes.
*/
-#if NCPUS > 1
-
-#include <string.h>
-
#include <kern/cpu_number.h>
#include <kern/debug.h>
#include <kern/printf.h>
+#include <kern/smp.h>
+#include <kern/startup.h>
+#include <kern/kmutex.h>
#include <mach/machine.h>
#include <mach/xen.h>
#include <vm/vm_kern.h>
#include <i386/mp_desc.h>
#include <i386/lock.h>
+#include <i386/apic.h>
+#include <i386/locore.h>
+#include <i386/fpu.h>
+#include <i386/gdt.h>
+#include <i386at/idt.h>
+#include <i386at/int_init.h>
+#include <i386/cpu.h>
+#include <i386/smp.h>
+
#include <i386at/model_dep.h>
#include <machine/ktss.h>
+#include <machine/smp.h>
#include <machine/tss.h>
#include <machine/io_perm.h>
#include <machine/vm_param.h>
+#include <i386at/acpi_parse_apic.h>
+#include <string.h>
+
/*
* The i386 needs an interrupt stack to keep the PCB stack from being
* overrun by interrupts. All interrupt stacks MUST lie at lower addresses
@@ -52,20 +64,44 @@
/*
* Addresses of bottom and top of interrupt stacks.
*/
-vm_offset_t interrupt_stack[NCPUS];
vm_offset_t int_stack_top[NCPUS];
vm_offset_t int_stack_base[NCPUS];
/*
- * Barrier address.
+ * Whether we are currently handling an interrupt.
+ * To catch code erroneously taking non-irq-safe locks.
*/
-vm_offset_t int_stack_high;
+#ifdef MACH_LDEBUG
+unsigned long in_interrupt[NCPUS];
+#endif
+
+/* Interrupt stack allocation */
+uint8_t solid_intstack[NCPUS*INTSTACK_SIZE] __aligned(NCPUS*INTSTACK_SIZE);
+void
+interrupt_stack_alloc(void)
+{
+ int i;
+
+ /*
+ * Set up pointers to the top of the interrupt stack.
+ */
+
+ for (i = 0; i < NCPUS; i++) {
+ int_stack_base[i] = (vm_offset_t) &solid_intstack[i * INTSTACK_SIZE];
+ int_stack_top[i] = (vm_offset_t) &solid_intstack[(i + 1) * INTSTACK_SIZE] - 4;
+ }
+}
+
+#if NCPUS > 1
/*
- * First cpu`s interrupt stack.
+ * Flag to mark SMP init by BSP complete
*/
-extern char _intstack[]; /* bottom */
-extern char _eintstack[]; /* top */
+int bspdone;
+
+phys_addr_t apboot_addr;
+extern void *apboot, *apbootend;
+extern volatile ApicLocalUnit* lapic;
/*
* Multiprocessor i386/i486 systems use a separate copy of the
@@ -77,7 +113,7 @@ extern char _eintstack[]; /* top */
*/
/*
- * Allocated descriptor tables.
+ * Descriptor tables.
*/
struct mp_desc_table *mp_desc_table[NCPUS] = { 0 };
@@ -102,12 +138,13 @@ extern struct real_descriptor ldt[LDTSZ];
* Allocate and initialize the per-processor descriptor tables.
*/
-struct mp_desc_table *
+int
mp_desc_init(int mycpu)
{
struct mp_desc_table *mpt;
+ vm_offset_t mem;
- if (mycpu == master_cpu) {
+ if (mycpu == 0) {
/*
* Master CPU uses the tables built at boot time.
* Just set the TSS and GDT pointers.
@@ -118,110 +155,28 @@ mp_desc_init(int mycpu)
}
else {
/*
- * Other CPUs allocate the table from the bottom of
- * the interrupt stack.
+ * Allocate tables for other CPUs
*/
- mpt = (struct mp_desc_table *) interrupt_stack[mycpu];
+ if (!init_alloc_aligned(sizeof(struct mp_desc_table), &mem))
+ panic("not enough memory for descriptor tables");
+ mpt = (struct mp_desc_table *)phystokv(mem);
mp_desc_table[mycpu] = mpt;
mp_ktss[mycpu] = &mpt->ktss;
mp_gdt[mycpu] = mpt->gdt;
/*
- * Copy the tables
+ * Zero the tables
*/
- memcpy(mpt->idt,
- idt,
- sizeof(idt));
- memcpy(mpt->gdt,
- gdt,
- sizeof(gdt));
- memcpy(mpt->ldt,
- ldt,
- sizeof(ldt));
- memset(&mpt->ktss, 0,
- sizeof(struct task_tss));
+ memset(mpt->idt, 0, sizeof(idt));
+ memset(mpt->gdt, 0, sizeof(gdt));
+ memset(mpt->ldt, 0, sizeof(ldt));
+ memset(&mpt->ktss, 0, sizeof(struct task_tss));
- /*
- * Fix up the entries in the GDT to point to
- * this LDT and this TSS.
- */
-#ifdef MACH_RING1
- panic("TODO %s:%d\n",__FILE__,__LINE__);
-#else /* MACH_RING1 */
- _fill_gdt_sys_descriptor(mpt->gdt, KERNEL_LDT,
- (unsigned)&mpt->ldt,
- LDTSZ * sizeof(struct real_descriptor) - 1,
- ACC_P|ACC_PL_K|ACC_LDT, 0);
- _fill_gdt_sys_descriptor(mpt->gdt, KERNEL_TSS,
- (unsigned)&mpt->ktss,
- sizeof(struct task_tss) - 1,
- ACC_P|ACC_PL_K|ACC_TSS, 0);
-
- mpt->ktss.tss.ss0 = KERNEL_DS;
- mpt->ktss.tss.io_bit_map_offset = IOPB_INVAL;
- mpt->ktss.barrier = 0xFF;
-#endif /* MACH_RING1 */
-
- return mpt;
+ return mycpu;
}
}
-kern_return_t intel_startCPU(int slot_num)
-{
- printf("TODO: intel_startCPU\n");
-}
-
-/*
- * Called after all CPUs have been found, but before the VM system
- * is running. The machine array must show which CPUs exist.
- */
-void
-interrupt_stack_alloc(void)
-{
- int i;
- int cpu_count;
- vm_offset_t stack_start;
-
- /*
- * Count the number of CPUs.
- */
- cpu_count = 0;
- for (i = 0; i < NCPUS; i++)
- if (machine_slot[i].is_cpu)
- cpu_count++;
-
- /*
- * Allocate an interrupt stack for each CPU except for
- * the master CPU (which uses the bootstrap stack)
- */
- if (!init_alloc_aligned(INTSTACK_SIZE*(cpu_count-1), &stack_start))
- panic("not enough memory for interrupt stacks");
- stack_start = phystokv(stack_start);
-
- /*
- * Set up pointers to the top of the interrupt stack.
- */
- for (i = 0; i < NCPUS; i++) {
- if (i == master_cpu) {
- interrupt_stack[i] = (vm_offset_t) _intstack;
- int_stack_top[i] = (vm_offset_t) _eintstack;
- }
- else if (machine_slot[i].is_cpu) {
- interrupt_stack[i] = stack_start;
- int_stack_top[i] = stack_start + INTSTACK_SIZE;
-
- stack_start += INTSTACK_SIZE;
- }
- }
-
- /*
- * Set up the barrier address. All thread stacks MUST
- * be above this address.
- */
- int_stack_high = stack_start;
-}
-
/* XXX should be adjusted per CPU speed */
int simple_lock_pause_loop = 100;
@@ -254,25 +209,149 @@ cpu_control(int cpu, const int *info, unsigned int count)
void
interrupt_processor(int cpu)
{
- printf("interrupt cpu %d\n",cpu);
+ smp_pmap_update(apic_get_cpu_apic_id(cpu));
+}
+
+static void
+paging_enable(void)
+{
+#ifndef MACH_HYP
+ /* Turn paging on.
+ * TODO: Why does setting the WP bit here cause a crash?
+ */
+#if PAE
+ set_cr4(get_cr4() | CR4_PAE);
+#endif
+ set_cr0(get_cr0() | CR0_PG /* | CR0_WP */);
+ set_cr0(get_cr0() & ~(CR0_CD | CR0_NW));
+ if (CPU_HAS_FEATURE(CPU_FEATURE_PGE))
+ set_cr4(get_cr4() | CR4_PGE);
+#endif /* MACH_HYP */
+}
+
+void
+cpu_setup(int cpu)
+{
+ pmap_make_temporary_mapping();
+ printf("AP=(%u) tempmap done\n", cpu);
+
+ paging_enable();
+ flush_instr_queue();
+ printf("AP=(%u) paging done\n", cpu);
+
+ init_percpu(cpu);
+ mp_desc_init(cpu);
+ printf("AP=(%u) mpdesc done\n", cpu);
+
+ ap_gdt_init(cpu);
+ printf("AP=(%u) gdt done\n", cpu);
+
+ ap_idt_init(cpu);
+ printf("AP=(%u) idt done\n", cpu);
+
+ ap_int_init(cpu);
+ printf("AP=(%u) int done\n", cpu);
+
+ ap_ldt_init(cpu);
+ printf("AP=(%u) ldt done\n", cpu);
+
+ ap_ktss_init(cpu);
+ printf("AP=(%u) ktss done\n", cpu);
+
+ pmap_remove_temporary_mapping();
+ printf("AP=(%u) remove tempmap done\n", cpu);
+
+ pmap_set_page_dir();
+ flush_tlb();
+ printf("AP=(%u) reset page dir done\n", cpu);
+
+ /* Initialize machine_slot fields with the cpu data */
+ machine_slot[cpu].cpu_subtype = CPU_SUBTYPE_AT386;
+ machine_slot[cpu].cpu_type = machine_slot[0].cpu_type;
+
+ init_fpu();
+ lapic_setup();
+ lapic_enable();
+ cpu_launch_first_thread(THREAD_NULL);
+}
+
+void
+cpu_ap_main()
+{
+ int cpu = cpu_number();
+
+ do {
+ cpu_pause();
+ } while (bspdone != cpu);
+
+ __sync_synchronize();
+
+ cpu_setup(cpu);
}
kern_return_t
cpu_start(int cpu)
{
- if (machine_slot[cpu].running)
- return KERN_FAILURE;
+ int err;
- return intel_startCPU(cpu);
+ assert(machine_slot[cpu].running != TRUE);
+
+ uint16_t apic_id = apic_get_cpu_apic_id(cpu);
+
+ printf("Trying to enable: %d at 0x%lx\n", apic_id, apboot_addr);
+
+ err = smp_startup_cpu(apic_id, apboot_addr);
+
+ if (!err) {
+ printf("Started cpu %d (lapic id %04x)\n", cpu, apic_id);
+ return KERN_SUCCESS;
+ }
+ printf("FATAL: Cannot init AP %d\n", cpu);
+ for (;;);
}
void
start_other_cpus(void)
{
- int cpu;
- for (cpu = 0; cpu < NCPUS; cpu++)
- if (cpu != cpu_number())
- cpu_start(cpu);
-}
+ int ncpus = smp_get_numcpus();
+ //Copy cpu initialization assembly routine
+ memcpy((void*) phystokv(apboot_addr), (void*) &apboot,
+ (uint32_t)&apbootend - (uint32_t)&apboot);
+
+ unsigned cpu;
+
+ splhigh();
+
+ /* Disable IOAPIC interrupts (IPIs not affected).
+ * Clearing this flag is similar to masking all
+ * IOAPIC interrupts individually.
+ *
+ * This is done to prevent IOAPIC interrupts from
+ * interfering with SMP startup. splhigh() may be enough for BSP,
+ * but I'm not sure. We cannot control the lapic
+ * on APs because we don't have execution on them yet.
+ */
+ lapic_disable();
+
+ bspdone = 0;
+ for (cpu = 1; cpu < ncpus; cpu++) {
+ machine_slot[cpu].running = FALSE;
+
+ //Start cpu
+ printf("Starting AP %d\n", cpu);
+ cpu_start(cpu);
+
+ bspdone++;
+ do {
+ cpu_pause();
+ } while (machine_slot[cpu].running == FALSE);
+
+ __sync_synchronize();
+ }
+ printf("BSP: Completed SMP init\n");
+
+ /* Re-enable IOAPIC interrupts as per setup */
+ lapic_enable();
+}
#endif /* NCPUS > 1 */
diff --git a/i386/i386/mp_desc.h b/i386/i386/mp_desc.h
index ebe1471d..dc3a7dc8 100644
--- a/i386/i386/mp_desc.h
+++ b/i386/i386/mp_desc.h
@@ -27,6 +27,8 @@
#ifndef _I386_MP_DESC_H_
#define _I386_MP_DESC_H_
+#include <mach/kern_return.h>
+
#if MULTIPROCESSOR
/*
@@ -70,11 +72,14 @@ extern struct task_tss *mp_ktss[NCPUS];
*/
extern struct real_descriptor *mp_gdt[NCPUS];
+extern uint8_t solid_intstack[];
+
+extern int bspdone;
/*
* Each CPU calls this routine to set up its descriptor tables.
*/
-extern struct mp_desc_table * mp_desc_init(int);
+extern int mp_desc_init(int);
extern void interrupt_processor(int cpu);
@@ -88,4 +93,6 @@ extern kern_return_t cpu_start(int cpu);
extern kern_return_t cpu_control(int cpu, const int *info, unsigned int count);
+extern void interrupt_stack_alloc(void);
+
#endif /* _I386_MP_DESC_H_ */
diff --git a/i386/i386/msr.h b/i386/i386/msr.h
new file mode 100644
index 00000000..8f09b80b
--- /dev/null
+++ b/i386/i386/msr.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2023 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _MACHINE_MSR_H_
+#define _MACHINE_MSR_H_
+
+#define MSR_REG_EFER 0xC0000080
+#define MSR_REG_STAR 0xC0000081
+#define MSR_REG_LSTAR 0xC0000082
+#define MSR_REG_CSTAR 0xC0000083
+#define MSR_REG_FMASK 0xC0000084
+#define MSR_REG_FSBASE 0xC0000100
+#define MSR_REG_GSBASE 0xC0000101
+
+#define MSR_EFER_SCE 0x00000001
+
+#ifndef __ASSEMBLER__
+
+static inline void wrmsr(uint32_t regaddr, uint64_t value)
+{
+ uint32_t low = (uint32_t) value, high = ((uint32_t) (value >> 32));
+ asm volatile("wrmsr"
+ :
+ : "c" (regaddr), "a" (low), "d" (high)
+ : "memory" /* wrmsr may cause a read from memory, so
+ * make the compiler flush any changes */
+ );
+}
+
+static inline uint64_t rdmsr(uint32_t regaddr)
+{
+ uint32_t low, high;
+ asm volatile("rdmsr"
+ : "=a" (low), "=d" (high)
+ : "c" (regaddr)
+ );
+ return ((uint64_t)high << 32) | low;
+}
+#endif /* __ASSEMBLER__ */
+
+#endif /* _MACHINE_MSR_H_ */
diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c
index 23585323..e8901550 100644
--- a/i386/i386/pcb.c
+++ b/i386/i386/pcb.c
@@ -51,6 +51,7 @@
#include "eflags.h"
#include "gdt.h"
#include "ldt.h"
+#include "msr.h"
#include "ktss.h"
#include "pcb.h"
@@ -144,16 +145,28 @@ void switch_ktss(pcb_t pcb)
* won`t save the v86 segments, so we leave room.
*/
+#if !defined(__x86_64__) || defined(USER32)
pcb_stack_top = (pcb->iss.efl & EFL_VM)
? (long) (&pcb->iss + 1)
: (long) (&pcb->iss.v86_segs);
+#else
+ pcb_stack_top = (vm_offset_t) (&pcb->iss + 1);
+#endif
+
+#ifdef __x86_64__
+ assert((pcb_stack_top & 0xF) == 0);
+#endif
#ifdef MACH_RING1
/* No IO mask here */
if (hyp_stack_switch(KERNEL_DS, pcb_stack_top))
panic("stack_switch");
#else /* MACH_RING1 */
+#ifdef __x86_64__
+ curr_ktss(mycpu)->tss.rsp0 = pcb_stack_top;
+#else /* __x86_64__ */
curr_ktss(mycpu)->tss.esp0 = pcb_stack_top;
+#endif /* __x86_64__ */
#endif /* MACH_RING1 */
}
@@ -215,6 +228,11 @@ void switch_ktss(pcb_t pcb)
pcb->ims.user_gdt, sizeof pcb->ims.user_gdt);
#endif /* MACH_PV_DESCRIPTORS */
+#if defined(__x86_64__) && !defined(USER32)
+ wrmsr(MSR_REG_FSBASE, pcb->ims.sbs.fsbase);
+ wrmsr(MSR_REG_GSBASE, pcb->ims.sbs.gsbase);
+#endif
+
db_load_context(pcb);
/*
@@ -298,7 +316,7 @@ void stack_handoff(
stack = current_stack();
old->kernel_stack = 0;
new->kernel_stack = stack;
- active_threads[mycpu] = new;
+ percpu_assign(active_thread, new);
/*
* Switch exception link to point to new
@@ -325,7 +343,7 @@ void load_context(thread_t new)
*/
thread_t switch_context(
thread_t old,
- void (*continuation)(),
+ continuation_t continuation,
thread_t new)
{
/*
@@ -365,14 +383,13 @@ thread_t switch_context(
* Load the rest of the user state for the new thread
*/
switch_ktss(new->pcb);
-
return Switch_context(old, continuation, new);
}
void pcb_module_init(void)
{
- kmem_cache_init(&pcb_cache, "pcb", sizeof(struct pcb), 0,
- NULL, 0);
+ kmem_cache_init(&pcb_cache, "pcb", sizeof(struct pcb),
+ KERNEL_STACK_ALIGN, NULL, 0);
fpu_module_init();
}
@@ -400,10 +417,12 @@ void pcb_init(task_t parent_task, thread_t thread)
*/
pcb->iss.cs = USER_CS;
pcb->iss.ss = USER_DS;
+#if !defined(__x86_64__) || defined(USER32)
pcb->iss.ds = USER_DS;
pcb->iss.es = USER_DS;
pcb->iss.fs = USER_DS;
pcb->iss.gs = USER_DS;
+#endif
pcb->iss.efl = EFL_USER_SET;
thread->pcb = pcb;
@@ -435,8 +454,7 @@ void pcb_terminate(thread_t thread)
* Attempt to free excess pcb memory.
*/
-void pcb_collect(thread)
- const thread_t thread;
+void pcb_collect(__attribute__((unused)) const thread_t thread)
{
}
@@ -474,10 +492,12 @@ kern_return_t thread_setstatus(
*/
state->cs &= 0xffff;
state->ss &= 0xffff;
+#if !defined(__x86_64__) || defined(USER32)
state->ds &= 0xffff;
state->es &= 0xffff;
state->fs &= 0xffff;
state->gs &= 0xffff;
+#endif
if (state->cs == 0 || (state->cs & SEL_PL) != SEL_PL_U
|| state->ss == 0 || (state->ss & SEL_PL) != SEL_PL_U)
@@ -489,6 +509,27 @@ kern_return_t thread_setstatus(
/*
* General registers
*/
+#if defined(__x86_64__) && !defined(USER32)
+ saved_state->r8 = state->r8;
+ saved_state->r9 = state->r9;
+ saved_state->r10 = state->r10;
+ saved_state->r11 = state->r11;
+ saved_state->r12 = state->r12;
+ saved_state->r13 = state->r13;
+ saved_state->r14 = state->r14;
+ saved_state->r15 = state->r15;
+ saved_state->edi = state->rdi;
+ saved_state->esi = state->rsi;
+ saved_state->ebp = state->rbp;
+ saved_state->uesp = state->ursp;
+ saved_state->ebx = state->rbx;
+ saved_state->edx = state->rdx;
+ saved_state->ecx = state->rcx;
+ saved_state->eax = state->rax;
+ saved_state->eip = state->rip;
+ saved_state->efl = (state->rfl & ~EFL_USER_CLEAR)
+ | EFL_USER_SET;
+#else
saved_state->edi = state->edi;
saved_state->esi = state->esi;
saved_state->ebp = state->ebp;
@@ -500,11 +541,13 @@ kern_return_t thread_setstatus(
saved_state->eip = state->eip;
saved_state->efl = (state->efl & ~EFL_USER_CLEAR)
| EFL_USER_SET;
+#endif /* __x86_64__ && !USER32 */
+#if !defined(__x86_64__) || defined(USER32)
/*
* Segment registers. Set differently in V8086 mode.
*/
- if (state->efl & EFL_VM) {
+ if (saved_state->efl & EFL_VM) {
/*
* Set V8086 mode segment registers.
*/
@@ -528,20 +571,23 @@ kern_return_t thread_setstatus(
* Hardware assist on.
*/
thread->pcb->ims.v86s.flags =
- state->efl & (EFL_TF | EFL_IF);
+ saved_state->efl & (EFL_TF | EFL_IF);
}
- }
- else if (flavor == i386_THREAD_STATE) {
+ } else
+#endif
+ if (flavor == i386_THREAD_STATE) {
/*
* 386 mode. Set segment registers for flat
* 32-bit address space.
*/
saved_state->cs = USER_CS;
saved_state->ss = USER_DS;
+#if !defined(__x86_64__) || defined(USER32)
saved_state->ds = USER_DS;
saved_state->es = USER_DS;
saved_state->fs = USER_DS;
saved_state->gs = USER_DS;
+#endif
}
else {
/*
@@ -552,10 +598,12 @@ kern_return_t thread_setstatus(
*/
saved_state->cs = state->cs;
saved_state->ss = state->ss;
+#if !defined(__x86_64__) || defined(USER32)
saved_state->ds = state->ds;
saved_state->es = state->es;
saved_state->fs = state->fs;
saved_state->gs = state->gs;
+#endif
}
break;
}
@@ -597,7 +645,7 @@ kern_return_t thread_setstatus(
#endif
break;
}
-
+#if !defined(__x86_64__) || defined(USER32)
case i386_V86_ASSIST_STATE:
{
struct i386_v86_assist_state *state;
@@ -611,10 +659,10 @@ kern_return_t thread_setstatus(
int_table = state->int_table;
int_count = state->int_count;
- if (int_table >= VM_MAX_ADDRESS ||
+ if (int_table >= VM_MAX_USER_ADDRESS ||
int_table +
int_count * sizeof(struct v86_interrupt_table)
- > VM_MAX_ADDRESS)
+ > VM_MAX_USER_ADDRESS)
return KERN_INVALID_ARGUMENT;
thread->pcb->ims.v86s.int_table = int_table;
@@ -624,7 +672,7 @@ kern_return_t thread_setstatus(
USER_REGS(thread)->efl & (EFL_TF | EFL_IF);
break;
}
-
+#endif
case i386_DEBUG_STATE:
{
struct i386_debug_state *state;
@@ -639,7 +687,23 @@ kern_return_t thread_setstatus(
return ret;
break;
}
-
+#if defined(__x86_64__) && !defined(USER32)
+ case i386_FSGS_BASE_STATE:
+ {
+ struct i386_fsgs_base_state *state;
+ if (count < i386_FSGS_BASE_STATE_COUNT)
+ return KERN_INVALID_ARGUMENT;
+
+ state = (struct i386_fsgs_base_state *) tstate;
+ thread->pcb->ims.sbs.fsbase = state->fs_base;
+ thread->pcb->ims.sbs.gsbase = state->gs_base;
+ if (thread == current_thread()) {
+ wrmsr(MSR_REG_FSBASE, state->fs_base);
+ wrmsr(MSR_REG_GSBASE, state->gs_base);
+ }
+ break;
+ }
+#endif
default:
return(KERN_INVALID_ARGUMENT);
}
@@ -661,13 +725,20 @@ kern_return_t thread_getstatus(
{
switch (flavor) {
case THREAD_STATE_FLAVOR_LIST:
- if (*count < 4)
+#if !defined(__x86_64__) || defined(USER32)
+ unsigned int ncount = 4;
+#else
+ unsigned int ncount = 3;
+#endif
+ if (*count < ncount)
return (KERN_INVALID_ARGUMENT);
tstate[0] = i386_THREAD_STATE;
tstate[1] = i386_FLOAT_STATE;
tstate[2] = i386_ISA_PORT_MAP_STATE;
+#if !defined(__x86_64__) || defined(USER32)
tstate[3] = i386_V86_ASSIST_STATE;
- *count = 4;
+#endif
+ *count = ncount;
break;
case i386_THREAD_STATE:
@@ -685,6 +756,27 @@ kern_return_t thread_getstatus(
/*
* General registers.
*/
+#if defined(__x86_64__) && !defined(USER32)
+ state->r8 = saved_state->r8;
+ state->r9 = saved_state->r9;
+ state->r10 = saved_state->r10;
+ state->r11 = saved_state->r11;
+ state->r12 = saved_state->r12;
+ state->r13 = saved_state->r13;
+ state->r14 = saved_state->r14;
+ state->r15 = saved_state->r15;
+ state->rdi = saved_state->edi;
+ state->rsi = saved_state->esi;
+ state->rbp = saved_state->ebp;
+ state->rbx = saved_state->ebx;
+ state->rdx = saved_state->edx;
+ state->rcx = saved_state->ecx;
+ state->rax = saved_state->eax;
+ state->rip = saved_state->eip;
+ state->ursp = saved_state->uesp;
+ state->rfl = saved_state->efl;
+ state->rsp = 0; /* unused */
+#else
state->edi = saved_state->edi;
state->esi = saved_state->esi;
state->ebp = saved_state->ebp;
@@ -693,11 +785,14 @@ kern_return_t thread_getstatus(
state->ecx = saved_state->ecx;
state->eax = saved_state->eax;
state->eip = saved_state->eip;
- state->efl = saved_state->efl;
state->uesp = saved_state->uesp;
+ state->efl = saved_state->efl;
+ state->esp = 0; /* unused */
+#endif /* __x86_64__ && !USER32 */
state->cs = saved_state->cs;
state->ss = saved_state->ss;
+#if !defined(__x86_64__) || defined(USER32)
if (saved_state->efl & EFL_VM) {
/*
* V8086 mode.
@@ -714,10 +809,9 @@ kern_return_t thread_getstatus(
if ((thread->pcb->ims.v86s.flags &
(EFL_IF|V86_IF_PENDING))
== 0)
- state->efl &= ~EFL_IF;
+ saved_state->efl &= ~EFL_IF;
}
- }
- else {
+ } else {
/*
* 386 mode.
*/
@@ -726,6 +820,7 @@ kern_return_t thread_getstatus(
state->fs = saved_state->fs & 0xffff;
state->gs = saved_state->gs & 0xffff;
}
+#endif
*count = i386_THREAD_STATE_COUNT;
break;
}
@@ -763,7 +858,7 @@ kern_return_t thread_getstatus(
*count = i386_ISA_PORT_MAP_STATE_COUNT;
break;
}
-
+#if !defined(__x86_64__) || defined(USER32)
case i386_V86_ASSIST_STATE:
{
struct i386_v86_assist_state *state;
@@ -778,7 +873,7 @@ kern_return_t thread_getstatus(
*count = i386_V86_ASSIST_STATE_COUNT;
break;
}
-
+#endif
case i386_DEBUG_STATE:
{
struct i386_debug_state *state;
@@ -792,7 +887,20 @@ kern_return_t thread_getstatus(
*count = i386_DEBUG_STATE_COUNT;
break;
}
-
+#if defined(__x86_64__) && !defined(USER32)
+ case i386_FSGS_BASE_STATE:
+ {
+ struct i386_fsgs_base_state *state;
+ if (*count < i386_FSGS_BASE_STATE_COUNT)
+ return KERN_INVALID_ARGUMENT;
+
+ state = (struct i386_fsgs_base_state *) tstate;
+ state->fs_base = thread->pcb->ims.sbs.fsbase;
+ state->gs_base = thread->pcb->ims.sbs.gsbase;
+ *count = i386_FSGS_BASE_STATE_COUNT;
+ break;
+ }
+#endif
default:
return(KERN_INVALID_ARGUMENT);
}
@@ -822,27 +930,28 @@ thread_set_syscall_return(
vm_offset_t
user_stack_low(vm_size_t stack_size)
{
- return (VM_MAX_ADDRESS - stack_size);
+ return (VM_MAX_USER_ADDRESS - stack_size);
}
/*
* Allocate argument area and set registers for first user thread.
*/
vm_offset_t
-set_user_regs(stack_base, stack_size, exec_info, arg_size)
- vm_offset_t stack_base; /* low address */
- vm_offset_t stack_size;
- const struct exec_info *exec_info;
- vm_size_t arg_size;
+set_user_regs(vm_offset_t stack_base, /* low address */
+ vm_offset_t stack_size,
+ const struct exec_info *exec_info,
+ vm_size_t arg_size)
{
vm_offset_t arg_addr;
struct i386_saved_state *saved_state;
- arg_size = (arg_size + sizeof(int) - 1) & ~(sizeof(int)-1);
+ assert(P2ALIGNED(stack_size, USER_STACK_ALIGN));
+ assert(P2ALIGNED(stack_base, USER_STACK_ALIGN));
+ arg_size = P2ROUND(arg_size, USER_STACK_ALIGN);
arg_addr = stack_base + stack_size - arg_size;
saved_state = USER_REGS(current_thread());
- saved_state->uesp = (long)arg_addr;
+ saved_state->uesp = (rpc_vm_offset_t)arg_addr;
saved_state->eip = exec_info->entry;
return (arg_addr);
diff --git a/i386/i386/pcb.h b/i386/i386/pcb.h
index 5bc78066..4d48b9f7 100644
--- a/i386/i386/pcb.h
+++ b/i386/i386/pcb.h
@@ -77,7 +77,7 @@ extern void update_ktss_iopb (unsigned char *new_iopb, io_port_t size);
extern thread_t Load_context (thread_t new);
-extern thread_t Switch_context (thread_t old, void (*continuation)(), thread_t new);
+extern thread_t Switch_context (thread_t old, continuation_t continuation, thread_t new);
extern void switch_to_shutdown_context(thread_t thread,
void (*routine)(processor_t),
diff --git a/i386/i386/percpu.c b/i386/i386/percpu.c
new file mode 100644
index 00000000..c6b728b6
--- /dev/null
+++ b/i386/i386/percpu.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2023 Free Software Foundation, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <i386/smp.h>
+#include <i386/apic.h>
+#include <kern/cpu_number.h>
+#include <i386/percpu.h>
+
+struct percpu percpu_array[NCPUS] = {0};
+
+#ifndef MACH_XEN
+void init_percpu(int cpu)
+{
+ int apic_id = apic_get_current_cpu();
+
+ percpu_array[cpu].self = &percpu_array[cpu];
+ percpu_array[cpu].apic_id = apic_id;
+ percpu_array[cpu].cpu_id = cpu;
+}
+#endif
diff --git a/i386/i386/percpu.h b/i386/i386/percpu.h
new file mode 100644
index 00000000..637d2ca6
--- /dev/null
+++ b/i386/i386/percpu.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2023 Free Software Foundation, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PERCPU_H_
+#define _PERCPU_H_
+
+struct percpu;
+
+#if NCPUS > 1
+
+#define percpu_assign(stm, val) \
+ asm("mov %[src], %%gs:%c[offs]" \
+ : /* No outputs */ \
+ : [src] "r" (val), [offs] "e" (__builtin_offsetof(struct percpu, stm)) \
+ : );
+
+#define percpu_get(typ, stm) \
+MACRO_BEGIN \
+ typ val_; \
+ \
+ asm("mov %%gs:%c[offs], %[dst]" \
+ : [dst] "=r" (val_) \
+ : [offs] "e" (__builtin_offsetof(struct percpu, stm)) \
+ : ); \
+ \
+ val_; \
+MACRO_END
+
+#define percpu_ptr(typ, stm) \
+MACRO_BEGIN \
+ typ *ptr_ = (typ *)__builtin_offsetof(struct percpu, stm); \
+ \
+ asm("add %%gs:0, %[pointer]" \
+ : [pointer] "+r" (ptr_) \
+ : /* No inputs */ \
+ : ); \
+ \
+ ptr_; \
+MACRO_END
+
+#else
+
+#define percpu_assign(stm, val) \
+MACRO_BEGIN \
+ percpu_array[0].stm = val; \
+MACRO_END
+#define percpu_get(typ, stm) \
+ (percpu_array[0].stm)
+#define percpu_ptr(typ, stm) \
+ (&percpu_array[0].stm)
+
+#endif
+
+#include <kern/processor.h>
+#include <mach/mach_types.h>
+
+struct percpu {
+ struct percpu *self;
+ int apic_id;
+ int cpu_id;
+ struct processor processor;
+ thread_t active_thread;
+ vm_offset_t active_stack;
+/*
+ struct machine_slot machine_slot;
+ struct mp_desc_table mp_desc_table;
+ vm_offset_t int_stack_top;
+ vm_offset_t int_stack_base;
+ ast_t need_ast;
+ ipc_kmsg_t ipc_kmsg_cache;
+ pmap_update_list cpu_update_list;
+ spl_t saved_ipl;
+ spl_t curr_ipl;
+ timer_data_t kernel_timer;
+ timer_t current_timer;
+ unsigned long in_interrupt;
+*/
+};
+
+extern struct percpu percpu_array[NCPUS];
+
+void init_percpu(int cpu);
+
+#endif /* _PERCPU_H_ */
diff --git a/i386/i386/phys.c b/i386/i386/phys.c
index a5c3a15c..e864489f 100644
--- a/i386/i386/phys.c
+++ b/i386/i386/phys.c
@@ -128,7 +128,7 @@ copy_to_phys(
if (mapped)
{
dst_map = pmap_get_mapwindow(INTEL_PTE_W(dst_addr_p));
- dst_addr_v = dst_map->vaddr;
+ dst_addr_v = dst_map->vaddr + (dst_addr_p & (INTEL_PGBYTES-1));
}
else
dst_addr_v = phystokv(dst_addr_p);
@@ -160,7 +160,7 @@ copy_from_phys(
if (mapped)
{
src_map = pmap_get_mapwindow(INTEL_PTE_R(src_addr_p));
- src_addr_v = src_map->vaddr;
+ src_addr_v = src_map->vaddr + (src_addr_p & (INTEL_PGBYTES-1));
}
else
src_addr_v = phystokv(src_addr_p);
diff --git a/i386/i386/pic.c b/i386/i386/pic.c
index 62ed9ed1..66fbc04a 100644
--- a/i386/i386/pic.c
+++ b/i386/i386/pic.c
@@ -74,8 +74,9 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <i386/machspl.h>
#include <i386/pio.h>
-spl_t curr_ipl;
+spl_t curr_ipl[NCPUS] = {0};
int curr_pic_mask;
+int spl_init = 0;
int iunit[NINTR] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
@@ -112,8 +113,10 @@ picinit(void)
/*
** 0. Initialise the current level to match cli()
*/
+ int i;
- curr_ipl = SPLHI;
+ for (i = 0; i < NCPUS; i++)
+ curr_ipl[i] = SPLHI;
curr_pic_mask = 0;
/*
@@ -207,7 +210,7 @@ intnull(int unit_dev)
/*
* Mask a PIC IRQ.
*/
-inline void
+void
mask_irq (unsigned int irq_nr)
{
int new_pic_mask = curr_pic_mask | 1 << irq_nr;
@@ -229,7 +232,7 @@ mask_irq (unsigned int irq_nr)
/*
* Unmask a PIC IRQ.
*/
-inline void
+void
unmask_irq (unsigned int irq_nr)
{
int mask;
diff --git a/i386/i386/pic.h b/i386/i386/pic.h
index 3ded9aba..aec0ef6b 100644
--- a/i386/i386/pic.h
+++ b/i386/i386/pic.h
@@ -184,8 +184,8 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
extern void picinit (void);
extern int curr_pic_mask;
extern void intnull(int unit);
-extern inline void mask_irq (unsigned int irq_nr);
-extern inline void unmask_irq (unsigned int irq_nr);
+extern void mask_irq (unsigned int irq_nr);
+extern void unmask_irq (unsigned int irq_nr);
#endif /* __ASSEMBLER__ */
#endif /* _I386_PIC_H_ */
diff --git a/i386/i386/pit.c b/i386/i386/pit.c
index 0ead8c9b..6c006a98 100644
--- a/i386/i386/pit.c
+++ b/i386/i386/pit.c
@@ -66,23 +66,29 @@ int pit0_mode = PIT_C0|PIT_SQUAREMODE|PIT_READMODE ;
unsigned int clknumb = CLKNUM; /* interrupt interval for timer 0 */
void
-pit_prepare_sleep(int hz)
+pit_prepare_sleep(int persec)
{
- /* Prepare to sleep for 1/hz seconds */
- int val = 0;
- int lsb, msb;
+ /* Prepare to sleep for 1/persec seconds */
+ uint32_t val = 0;
+ uint8_t lsb, msb;
val = inb(PITAUX_PORT);
val &= ~PITAUX_OUT2;
val |= PITAUX_GATE2;
outb (PITAUX_PORT, val);
- outb (PITCTL_PORT, PIT_C2 | PIT_LOADMODE | PIT_RATEMODE);
- val = CLKNUM / hz;
+ outb (PITCTL_PORT, PIT_C2 | PIT_LOADMODE | PIT_ONESHOTMODE);
+ val = CLKNUM / persec;
lsb = val & 0xff;
msb = val >> 8;
outb (PITCTR2_PORT, lsb);
val = inb(POST_PORT); /* ~1us i/o delay */
outb (PITCTR2_PORT, msb);
+}
+
+void
+pit_sleep(void)
+{
+ uint8_t val;
/* Start counting down */
val = inb(PITAUX_PORT);
@@ -90,13 +96,23 @@ pit_prepare_sleep(int hz)
outb (PITAUX_PORT, val); /* Gate low */
val |= PITAUX_GATE2;
outb (PITAUX_PORT, val); /* Gate high */
+
+ /* Wait until counter reaches zero */
+ while ((inb(PITAUX_PORT) & PITAUX_VAL) == 0);
}
void
-pit_sleep(void)
+pit_udelay(int usec)
{
- /* Wait until counter reaches zero */
- while ((inb(PITAUX_PORT) & PITAUX_VAL) == 0);
+ pit_prepare_sleep(1000000 / usec);
+ pit_sleep();
+}
+
+void
+pit_mdelay(int msec)
+{
+ pit_prepare_sleep(1000 / msec);
+ pit_sleep();
}
void
@@ -115,7 +131,7 @@ clkstart(void)
* timers you do not use
*/
outb(pitctl_port, pit0_mode);
- clknumb = CLKNUM/hz;
+ clknumb = (CLKNUM + hz / 2) / hz;
byte = clknumb;
outb(pitctr0_port, byte);
byte = clknumb>>8;
diff --git a/i386/i386/pit.h b/i386/i386/pit.h
index bac4e985..49e1051b 100644
--- a/i386/i386/pit.h
+++ b/i386/i386/pit.h
@@ -73,7 +73,8 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
* followed by most significant byte */
#define PIT_SQUAREMODE 0x06 /* square-wave mode */
-#define PIT_RATEMODE 0x02 /* rate generator mode */
+#define PIT_RATEMODE 0x04 /* rate generator mode */
+#define PIT_ONESHOTMODE 0x02 /* one-shot mode */
/* Used for Timer 2. */
#define PIT_C2 0x80 /* select counter 2 */
@@ -85,11 +86,13 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
* (defined in param.h)
*/
#if defined(AT386) || defined(ATX86_64)
-#define CLKNUM 1193167
+#define CLKNUM 1193182
#endif /* AT386 */
extern void clkstart(void);
extern void pit_prepare_sleep(int hz);
extern void pit_sleep(void);
+extern void pit_udelay(int usec);
+extern void pit_mdelay(int msec);
#endif /* _I386_PIT_H_ */
diff --git a/i386/i386/proc_reg.h b/i386/i386/proc_reg.h
index a83ca0d8..704676cf 100644
--- a/i386/i386/proc_reg.h
+++ b/i386/i386/proc_reg.h
@@ -84,9 +84,9 @@ get_eflags(void)
{
unsigned long eflags;
#ifdef __x86_64__
- asm("pushfq; pop %0" : "=r" (eflags));
+ asm("pushfq; popq %0" : "=r" (eflags));
#else
- asm("pushfd; pop %0" : "=r" (eflags));
+ asm("pushfl; popl %0" : "=r" (eflags));
#endif
return eflags;
}
@@ -95,9 +95,9 @@ static inline void
set_eflags(unsigned long eflags)
{
#ifdef __x86_64__
- asm volatile("push %0; popfq" : : "r" (eflags));
+ asm volatile("pushq %0; popfq" : : "r" (eflags));
#else
- asm volatile("push %0; popfd" : : "r" (eflags));
+ asm volatile("pushl %0; popfl" : : "r" (eflags));
#endif
}
@@ -111,14 +111,14 @@ set_eflags(unsigned long eflags)
#define get_eflags() \
({ \
register unsigned long _temp__; \
- asm("pushfq; pop %0" : "=r" (_temp__)); \
+ asm("pushfq; popq %0" : "=r" (_temp__)); \
_temp__; \
})
#else
#define get_eflags() \
({ \
register unsigned long _temp__; \
- asm("pushfd; pop %0" : "=r" (_temp__)); \
+ asm("pushfl; popl %0" : "=r" (_temp__)); \
_temp__; \
})
#endif
diff --git a/i386/i386/seg.c b/i386/i386/seg.c
deleted file mode 100644
index d57c255e..00000000
--- a/i386/i386/seg.c
+++ /dev/null
@@ -1,5 +0,0 @@
-
-#define MACH_INLINE
-#include "seg.h"
-#include "tss.h"
-
diff --git a/i386/i386/seg.h b/i386/i386/seg.h
index d2bb3f0d..673d1d9f 100644
--- a/i386/i386/seg.h
+++ b/i386/i386/seg.h
@@ -32,6 +32,7 @@
#define _I386_SEG_H_
#include <mach/inline.h>
+#include <mach/machine/vm_types.h>
/*
* i386 segmentation.
@@ -58,6 +59,9 @@ struct real_descriptor {
granularity:4, /* granularity */
base_high:8; /* base 24..31 */
};
+typedef struct real_descriptor real_descriptor_t;
+typedef real_descriptor_t *real_descriptor_list_t;
+typedef const real_descriptor_list_t const_real_descriptor_list_t;
#ifdef __x86_64__
struct real_descriptor64 {
@@ -156,15 +160,15 @@ struct pseudo_descriptor
/* Load the processor's IDT, GDT, or LDT pointers. */
-MACH_INLINE void lgdt(struct pseudo_descriptor *pdesc)
+static inline void lgdt(struct pseudo_descriptor *pdesc)
{
__asm volatile("lgdt %0" : : "m" (*pdesc));
}
-MACH_INLINE void lidt(struct pseudo_descriptor *pdesc)
+static inline void lidt(struct pseudo_descriptor *pdesc)
{
__asm volatile("lidt %0" : : "m" (*pdesc));
}
-MACH_INLINE void lldt(unsigned short ldt_selector)
+static inline void lldt(unsigned short ldt_selector)
{
__asm volatile("lldt %w0" : : "r" (ldt_selector) : "memory");
}
@@ -177,8 +181,8 @@ MACH_INLINE void lldt(unsigned short ldt_selector)
/* Fill a segment descriptor. */
-MACH_INLINE void
-fill_descriptor(struct real_descriptor *_desc, unsigned base, unsigned limit,
+static inline void
+fill_descriptor(struct real_descriptor *_desc, vm_offset_t base, vm_offset_t limit,
unsigned char access, unsigned char sizebits)
{
/* TODO: when !MACH_PV_DESCRIPTORS, setting desc and just memcpy isn't simpler actually */
@@ -201,12 +205,12 @@ fill_descriptor(struct real_descriptor *_desc, unsigned base, unsigned limit,
desc->base_high = base >> 24;
#ifdef MACH_PV_DESCRIPTORS
if (hyp_do_update_descriptor(kv_to_ma(_desc), *(uint64_t*)desc))
- panic("couldn't update descriptor(%lu to %08lx%08lx)\n", (vm_offset_t) kv_to_ma(_desc), *(((unsigned long*)desc)+1), *(unsigned long *)desc);
+ panic("couldn't update descriptor(%zu to %08lx%08lx)\n", (vm_offset_t) kv_to_ma(_desc), *(((unsigned long*)desc)+1), *(unsigned long *)desc);
#endif /* MACH_PV_DESCRIPTORS */
}
#ifdef __x86_64__
-MACH_INLINE void
+static inline void
fill_descriptor64(struct real_descriptor64 *_desc, unsigned long base, unsigned limit,
unsigned char access, unsigned char sizebits)
{
@@ -240,7 +244,7 @@ fill_descriptor64(struct real_descriptor64 *_desc, unsigned long base, unsigned
#endif
/* Fill a gate with particular values. */
-MACH_INLINE void
+static inline void
fill_gate(struct real_gate *gate, unsigned long offset, unsigned short selector,
unsigned char access, unsigned char word_count)
{
diff --git a/i386/i386/smp.c b/i386/i386/smp.c
index f64fb7f7..05e9de67 100644
--- a/i386/i386/smp.c
+++ b/i386/i386/smp.c
@@ -18,13 +18,20 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
-#include <i386/i386/apic.h>
-#include <i386/i386/smp.h>
-#include <i386/i386at/acpi_parse_apic.h>
+#include <string.h>
+#include <i386/apic.h>
+#include <i386/smp.h>
+#include <i386/cpu.h>
+#include <i386/pio.h>
+#include <i386/vm_param.h>
+#include <i386at/idt.h>
+#include <i386at/cram.h>
+#include <i386at/acpi_parse_apic.h>
+#include <kern/printf.h>
+#include <mach/machine.h>
#include <kern/smp.h>
-
/*
* smp_data_init: initialize smp_data structure
* Must be called after smp_init(), once all APIC structures
@@ -34,6 +41,150 @@ static void smp_data_init(void)
{
uint8_t numcpus = apic_get_numcpus();
smp_set_numcpus(numcpus);
+
+ for(int i = 0; i < numcpus; i++){
+ machine_slot[i].is_cpu = TRUE;
+ }
+
+}
+
+static void smp_send_ipi(unsigned apic_id, unsigned vector)
+{
+ unsigned long flags;
+
+ cpu_intr_save(&flags);
+
+ apic_send_ipi(NO_SHORTHAND, FIXED, PHYSICAL, ASSERT, EDGE, vector, apic_id);
+
+ do {
+ cpu_pause();
+ } while(lapic->icr_low.delivery_status == SEND_PENDING);
+
+ apic_send_ipi(NO_SHORTHAND, FIXED, PHYSICAL, DE_ASSERT, EDGE, vector, apic_id);
+
+ do {
+ cpu_pause();
+ } while(lapic->icr_low.delivery_status == SEND_PENDING);
+
+ cpu_intr_restore(flags);
+}
+
+void smp_remote_ast(unsigned apic_id)
+{
+ smp_send_ipi(apic_id, CALL_AST_CHECK);
+}
+
+void smp_pmap_update(unsigned apic_id)
+{
+ smp_send_ipi(apic_id, CALL_PMAP_UPDATE);
+}
+
+static void
+wait_for_ipi(void)
+{
+ /* This could have a timeout, but if the IPI
+ * is never delivered, its a disaster anyway */
+ while (lapic->icr_low.delivery_status == SEND_PENDING) {
+ cpu_pause();
+ }
+}
+
+static int
+smp_send_ipi_init(int apic_id)
+{
+ int err;
+
+ lapic->error_status.r = 0;
+
+ /* Assert INIT IPI:
+ *
+ * This is EDGE triggered to match the deassert
+ */
+ apic_send_ipi(NO_SHORTHAND, INIT, PHYSICAL, ASSERT, EDGE, 0, apic_id);
+
+ /* Wait for delivery */
+ wait_for_ipi();
+ hpet_mdelay(10);
+
+ /* Deassert INIT IPI:
+ *
+ * NB: This must be an EDGE triggered deassert signal.
+ * A LEVEL triggered deassert is only supported on very old hardware
+ * that does not support STARTUP IPIs at all, and instead jump
+ * via a warm reset vector.
+ */
+ apic_send_ipi(NO_SHORTHAND, INIT, PHYSICAL, DE_ASSERT, EDGE, 0, apic_id);
+
+ /* Wait for delivery */
+ wait_for_ipi();
+
+ err = lapic->error_status.r;
+ if (err) {
+ printf("ESR error upon INIT 0x%x\n", err);
+ }
+ return 0;
+}
+
+static int
+smp_send_ipi_startup(int apic_id, int vector)
+{
+ int err;
+
+ lapic->error_status.r = 0;
+
+ /* StartUp IPI:
+ *
+ * Have not seen any documentation for trigger mode for this IPI
+ * but it seems to work with EDGE. (AMD BKDG FAM16h document specifies dont care)
+ */
+ apic_send_ipi(NO_SHORTHAND, STARTUP, PHYSICAL, ASSERT, EDGE, vector, apic_id);
+
+ /* Wait for delivery */
+ wait_for_ipi();
+
+ err = lapic->error_status.r;
+ if (err) {
+ printf("ESR error upon STARTUP 0x%x\n", err);
+ }
+ return 0;
+}
+
+/* See Intel IA32/64 Software Developer's Manual 3A Section 8.4.4.1 */
+int smp_startup_cpu(unsigned apic_id, phys_addr_t start_eip)
+{
+#if 0
+ /* This block goes with a legacy method of INIT that only works with
+ * old hardware that does not support SIPIs.
+ * Must use INIT DEASSERT LEVEL triggered IPI to use this block.
+ * (At least one AMD FCH does not support this IPI mode,
+ * See AMD BKDG FAM16h document # 48751 page 461).
+ */
+
+ /* Tell CMOS to warm reset through through 40:67 */
+ outb(CMOS_ADDR, CMOS_SHUTDOWN);
+ outb(CMOS_DATA, CM_JMP_467);
+
+ /* Set warm reset vector to point to AP startup code */
+ uint16_t dword[2];
+ dword[0] = 0;
+ dword[1] = start_eip >> 4;
+ memcpy((uint8_t *)phystokv(0x467), dword, 4);
+#endif
+
+ /* Local cache flush */
+ asm("wbinvd":::"memory");
+
+ printf("Sending IPIs to APIC ID %u...\n", apic_id);
+
+ smp_send_ipi_init(apic_id);
+ hpet_mdelay(10);
+ smp_send_ipi_startup(apic_id, start_eip >> STARTUP_VECTOR_SHIFT);
+ hpet_udelay(200);
+ smp_send_ipi_startup(apic_id, start_eip >> STARTUP_VECTOR_SHIFT);
+ hpet_udelay(200);
+
+ printf("done\n");
+ return 0;
}
/*
@@ -42,12 +193,7 @@ static void smp_data_init(void)
*/
int smp_init(void)
{
- int apic_success;
-
- apic_success = acpi_apic_init();
- if (apic_success == ACPI_SUCCESS) {
- smp_data_init();
- }
+ smp_data_init();
- return apic_success;
+ return 0;
}
diff --git a/i386/i386/smp.h b/i386/i386/smp.h
index b36ead08..73d273ef 100644
--- a/i386/i386/smp.h
+++ b/i386/i386/smp.h
@@ -18,4 +18,17 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+#ifndef _SMP_H_
+#define _SMP_H_
+
+#include <mach/machine/vm_types.h>
+
int smp_init(void);
+void smp_remote_ast(unsigned apic_id);
+void smp_pmap_update(unsigned apic_id);
+int smp_startup_cpu(unsigned apic_id, phys_addr_t start_eip);
+
+#define cpu_pause() asm volatile ("pause" : : : "memory")
+#define STARTUP_VECTOR_SHIFT (20 - 8)
+
+#endif
diff --git a/i386/i386/spl.S b/i386/i386/spl.S
index 215142c9..2f2c8e3a 100644
--- a/i386/i386/spl.S
+++ b/i386/i386/spl.S
@@ -21,6 +21,8 @@
#include <i386/ipl.h>
#include <i386/i386asm.h>
#include <i386/xen.h>
+#include <i386/cpu_number.h>
+#include <i386/gdt.h>
#if NCPUS > 1
#define mb lock; addl $0,(%esp)
@@ -46,7 +48,8 @@ lock orl $1,hyp_shared_info+CPU_PENDING_SEL; /* Yes, activate it */ \
ENTRY(spl0)
mb;
- movl EXT(curr_ipl),%eax /* save current ipl */
+ CPU_NUMBER(%edx)
+ movl CX(EXT(curr_ipl),%edx),%eax /* save current ipl */
pushl %eax
cli /* disable interrupts */
#ifdef LINUX_DEV
@@ -74,9 +77,10 @@ ENTRY(spl0)
#endif
cli /* disable interrupts */
1:
- cmpl $(SPL0),EXT(curr_ipl) /* are we at spl0? */
- je 1f /* yes, all done */
- movl $(SPL0),EXT(curr_ipl) /* set ipl */
+ CPU_NUMBER(%edx)
+ cmpl $(SPL0),CX(EXT(curr_ipl),%edx) /* are we at spl0? */
+ je 1f /* yes, all done */
+ movl $(SPL0),CX(EXT(curr_ipl),%edx) /* set ipl */
#ifdef MACH_XEN
movl EXT(int_mask)+SPL0*4,%eax
/* get xen mask */
@@ -119,16 +123,17 @@ ENTRY(spl7)
mb;
/* just clear IF */
cli
+ CPU_NUMBER(%edx)
movl $SPL7,%eax
- xchgl EXT(curr_ipl),%eax
+ xchgl CX(EXT(curr_ipl),%edx),%eax
ret
ENTRY(splx)
movl S_ARG0,%edx /* get ipl */
-
+ CPU_NUMBER(%eax)
#if (MACH_KDB || MACH_TTD) && !defined(MACH_XEN)
/* First make sure that if we're exitting from ipl7, IF is still cleared */
- cmpl $SPL7,EXT(curr_ipl) /* from ipl7? */
+ cmpl $SPL7,CX(EXT(curr_ipl),%eax) /* from ipl7? */
jne 0f
pushfl
popl %eax
@@ -140,7 +145,8 @@ ENTRY(splx)
#endif /* (MACH_KDB || MACH_TTD) && !MACH_XEN */
testl %edx,%edx /* spl0? */
jz EXT(spl0) /* yes, handle specially */
- cmpl EXT(curr_ipl),%edx /* same ipl as current? */
+ CPU_NUMBER(%eax)
+ cmpl CX(EXT(curr_ipl),%eax),%edx /* same ipl as current? */
jne spl /* no */
cmpl $SPL7,%edx /* spl7? */
je 1f /* to ipl7, don't enable interrupts */
@@ -188,9 +194,10 @@ splx_cli:
1:
xorl %edx,%edx /* edx = ipl 0 */
2:
- cmpl EXT(curr_ipl),%edx /* same ipl as current? */
- je 1f /* yes, all done */
- movl %edx,EXT(curr_ipl) /* set ipl */
+ CPU_NUMBER(%eax)
+ cmpl CX(EXT(curr_ipl),%eax),%edx /* same ipl as current? */
+ je 1f /* yes, all done */
+ movl %edx,CX(EXT(curr_ipl),%eax) /* set ipl */
#ifdef MACH_XEN
movl EXT(int_mask)(,%edx,4),%eax
/* get int mask */
@@ -206,9 +213,10 @@ splx_cli:
.align TEXT_ALIGN
.globl spl
spl:
+ CPU_NUMBER(%eax)
#if (MACH_KDB || MACH_TTD) && !defined(MACH_XEN)
/* First make sure that if we're exitting from ipl7, IF is still cleared */
- cmpl $SPL7,EXT(curr_ipl) /* from ipl7? */
+ cmpl $SPL7,CX(EXT(curr_ipl),%eax) /* from ipl7? */
jne 0f
pushfl
popl %eax
@@ -225,7 +233,8 @@ spl:
/* get int mask */
#endif
cli /* disable interrupts */
- xchgl EXT(curr_ipl),%edx /* set ipl */
+ CPU_NUMBER(%eax)
+ xchgl CX(EXT(curr_ipl),%eax),%edx /* set ipl */
#ifdef MACH_XEN
XEN_SETMASK() /* program PICs with new mask */
#endif
diff --git a/i386/i386/spl.h b/i386/i386/spl.h
index 173629fe..834204c4 100644
--- a/i386/i386/spl.h
+++ b/i386/i386/spl.h
@@ -55,10 +55,12 @@ extern spl_t (spl6)(void);
extern spl_t (spltty)(void);
extern spl_t (splimp)(void);
extern spl_t (splvm)(void);
+#define assert_splvm() assert(splvm() == SPL7)
extern spl_t (spl7)(void);
extern spl_t (splclock)(void);
extern spl_t (splsched)(void);
+#define assert_splsched() assert(splsched() == SPL7)
extern spl_t (splhigh)(void);
extern spl_t (splx)(spl_t n);
@@ -69,6 +71,7 @@ extern void splon (unsigned long n);
extern unsigned long sploff (void);
extern void setsoftclock (void);
+extern int spl_init;
/* XXX Include each other... */
#include <i386/ipl.h>
diff --git a/i386/i386/strings.c b/i386/i386/strings.c
index 84a3bc16..f1752de2 100644
--- a/i386/i386/strings.c
+++ b/i386/i386/strings.c
@@ -94,57 +94,3 @@ memcmp(const void *s1, const void *s2, size_t n)
return (int)c1 - (int)c2;
}
#endif /* ARCH_STRING_MEMCMP */
-
-#ifdef ARCH_STRING_STRLEN
-size_t
-strlen(const char *s)
-{
- size_t n;
-
- n = (size_t)-1;
- asm volatile("repne scasb"
- : "+D" (s), "+c" (n)
- : "a" (0)
- : "memory");
- return ~n - 1;
-}
-#endif /* ARCH_STRING_STRLEN */
-
-#ifdef ARCH_STRING_STRCPY
-char *
-strcpy(char *dest, const char *src)
-{
- char *orig_dest;
-
- orig_dest = dest;
- asm volatile("1:\n"
- "lodsb\n"
- "stosb\n"
- "testb %%al, %%al\n"
- "jnz 1b\n"
- : "+D" (dest), "+S" (src)
- : : "al", "memory");
- return orig_dest;
-}
-#endif /* ARCH_STRING_STRCPY */
-
-#ifdef ARCH_STRING_STRCMP
-int
-strcmp(const char *s1, const char *s2)
-{
- unsigned char c1, c2;
-
- asm volatile("1:\n"
- "lodsb\n"
- "scasb\n"
- "jne 1f\n"
- "testb %%al, %%al\n"
- "jnz 1b\n"
- "1:\n"
- : "+D" (s1), "+S" (s2)
- : : "al", "memory");
- c1 = *(((const unsigned char *)s1) - 1);
- c2 = *(((const unsigned char *)s2) - 1);
- return (int)c1 - (int)c2;
-}
-#endif /* ARCH_STRING_STRCMP */
diff --git a/i386/i386/thread.h b/i386/i386/thread.h
index 4a9c1987..9c88d09a 100644
--- a/i386/i386/thread.h
+++ b/i386/i386/thread.h
@@ -51,10 +51,12 @@
*/
struct i386_saved_state {
+#if !defined(__x86_64__) || defined(USER32)
unsigned long gs;
unsigned long fs;
unsigned long es;
unsigned long ds;
+#endif
#ifdef __x86_64__
unsigned long r15;
unsigned long r14;
@@ -81,12 +83,14 @@ struct i386_saved_state {
unsigned long efl;
unsigned long uesp;
unsigned long ss;
+#if !defined(__x86_64__) || defined(USER32)
struct v86_segs {
unsigned long v86_es; /* virtual 8086 segment registers */
unsigned long v86_ds;
unsigned long v86_fs;
unsigned long v86_gs;
} v86_segs;
+#endif
};
/*
@@ -140,6 +144,7 @@ struct i386_fpsave_state {
};
};
+#if !defined(__x86_64__) || defined(USER32)
/*
* v86_assist_state:
*
@@ -153,6 +158,14 @@ struct v86_assist_state {
unsigned short flags; /* 8086 flag bits */
};
#define V86_IF_PENDING 0x8000 /* unused bit */
+#endif
+
+#if defined(__x86_64__) && !defined(USER32)
+struct i386_segment_base_state {
+ unsigned long fsbase;
+ unsigned long gsbase;
+};
+#endif
/*
* i386_interrupt_state:
@@ -163,10 +176,12 @@ struct v86_assist_state {
*/
struct i386_interrupt_state {
+#if !defined(__x86_64__) || defined(USER32)
long gs;
long fs;
long es;
long ds;
+#endif
#ifdef __x86_64__
long r11;
long r10;
@@ -193,14 +208,28 @@ struct i386_interrupt_state {
struct i386_machine_state {
struct user_ldt * ldt;
struct i386_fpsave_state *ifps;
+#if !defined(__x86_64__) || defined(USER32)
struct v86_assist_state v86s;
+#endif
struct real_descriptor user_gdt[USER_GDT_SLOTS];
struct i386_debug_state ids;
+#if defined(__x86_64__) && !defined(USER32)
+ struct i386_segment_base_state sbs;
+#endif
};
typedef struct pcb {
+ /* START of the exception stack.
+ * NOTE: this area is used as exception stack when switching
+ * CPL, and it MUST be big enough to save the thread state and
+ * switch to a proper stack area, even considering recursive
+ * exceptions, otherwise it could corrupt nearby memory */
struct i386_interrupt_state iis[2]; /* interrupt and NMI */
+#ifdef __x86_64__
+ unsigned long pad; /* ensure exception stack is aligned to 16 */
+#endif
struct i386_saved_state iss;
+ /* END of exception stack*/
struct i386_machine_state ims;
decl_simple_lock_data(, lock)
unsigned short init_control; /* Initial FPU control to set */
@@ -222,6 +251,19 @@ typedef struct pcb {
#define STACK_IEL(stack) \
((struct i386_exception_link *)STACK_IKS(stack) - 1)
+#ifdef __x86_64__
+#define KERNEL_STACK_ALIGN 16
+#else
+#define KERNEL_STACK_ALIGN 4
+#endif
+
+#if defined(__x86_64__) && !defined(USER32)
+/* Follow System V AMD64 ABI guidelines. */
+#define USER_STACK_ALIGN 16
+#else
+#define USER_STACK_ALIGN 4
+#endif
+
#define USER_REGS(thread) (&(thread)->pcb->iss)
diff --git a/i386/i386/trap.c b/i386/i386/trap.c
index cbf45914..842a0e04 100644
--- a/i386/i386/trap.c
+++ b/i386/i386/trap.c
@@ -58,6 +58,7 @@
#include <kern/exception.h>
#if MACH_KDB
+#include <ddb/db_break.h>
#include <ddb/db_run.h>
#include <ddb/db_watch.h>
#endif
@@ -87,7 +88,7 @@ extern boolean_t kttd_enabled;
boolean_t debug_all_traps_with_kttd = TRUE;
#endif /* MACH_TTD */
-void
+static void
user_page_fault_continue(kern_return_t kr)
{
thread_t thread = current_thread();
@@ -153,9 +154,9 @@ char *trap_name(unsigned int trapnum)
*/
void kernel_trap(struct i386_saved_state *regs)
{
- int code;
- int subcode;
- int type;
+ unsigned long code;
+ unsigned long subcode;
+ unsigned long type;
vm_map_t map;
kern_return_t result;
thread_t thread;
@@ -171,7 +172,7 @@ void kernel_trap(struct i386_saved_state *regs)
((short*)0xb8700)[2] = 0x0f30+(type % 10);
#endif
#if 0
-printf("kernel trap %d error %d\n", type, code);
+printf("kernel trap %d error %d\n", (int) type, (int) code);
dump_ss(regs);
#endif
@@ -199,7 +200,8 @@ dump_ss(regs);
/* If it's in the kernel linear address region,
convert it to a kernel virtual address
and use the kernel map to process the fault. */
- if (subcode >= LINEAR_MIN_KERNEL_ADDRESS) {
+ if (lintokv(subcode) == 0 ||
+ subcode >= LINEAR_MIN_KERNEL_ADDRESS) {
#if 0
printf("%08x in kernel linear address range\n", subcode);
#endif
@@ -211,7 +213,7 @@ dump_ss(regs);
if (trunc_page(subcode) == 0
|| (subcode >= (long)_start
&& subcode < (long)etext)) {
- printf("Kernel page fault at address 0x%x, "
+ printf("Kernel page fault at address 0x%lx, "
"eip = 0x%lx\n",
subcode, regs->eip);
goto badtrap;
@@ -220,7 +222,7 @@ dump_ss(regs);
if (thread)
map = thread->task->map;
if (!thread || map == kernel_map) {
- printf("kernel page fault at %08x:\n", subcode);
+ printf("kernel page fault at %08lx:\n", subcode);
dump_ss(regs);
panic("kernel thread accessed user space!\n");
}
@@ -289,6 +291,9 @@ dump_ss(regs);
return;
}
+ /* Fall-through */
+ case T_GENERAL_PROTECTION:
+
/*
* If there is a failure recovery address
* for this fault, go there.
@@ -329,8 +334,8 @@ dump_ss(regs);
if (type < TRAP_TYPES)
printf("%s trap", trap_type[type]);
else
- printf("trap %d", type);
- printf(", eip 0x%lx\n", regs->eip);
+ printf("trap %ld", type);
+ printf(", eip 0x%lx, code %lx, cr2 %lx\n", regs->eip, code, regs->cr2);
#if MACH_TTD
if (kttd_enabled && kttd_trap(type, code, regs))
return;
@@ -340,7 +345,7 @@ dump_ss(regs);
return;
#endif /* MACH_KDB */
splhigh();
- printf("kernel trap, type %d, code = %x\n",
+ printf("kernel trap, type %ld, code = %lx\n",
type, code);
dump_ss(regs);
panic("trap");
@@ -356,11 +361,15 @@ dump_ss(regs);
int user_trap(struct i386_saved_state *regs)
{
int exc = 0; /* Suppress gcc warning */
- int code;
- int subcode;
- int type;
+ unsigned long code;
+ unsigned long subcode;
+ unsigned long type;
thread_t thread = current_thread();
+#ifdef __x86_64__
+ assert(regs == &thread->pcb->iss);
+#endif
+
type = regs->trapno;
code = 0;
subcode = 0;
@@ -412,8 +421,6 @@ int user_trap(struct i386_saved_state *regs)
#endif /* MACH_TTD */
#if MACH_KDB
{
- boolean_t db_find_breakpoint_here();
-
if (db_find_breakpoint_here(
(current_thread())? current_thread()->task: TASK_NULL,
regs->eip - 1)) {
@@ -489,6 +496,7 @@ int user_trap(struct i386_saved_state *regs)
opcode = inst_fetch(regs->eip, regs->cs);
for (i = 0; i < 4; i++)
addr[i] = inst_fetch(regs->eip+i+1, regs->cs);
+ (void) addr;
for (i = 0; i < 2; i++)
seg[i] = inst_fetch(regs->eip+i+5, regs->cs);
if (opcode == 0x9a && seg[0] == 0x7 && seg[1] == 0) {
@@ -552,7 +560,7 @@ int user_trap(struct i386_saved_state *regs)
return 0;
#endif /* MACH_KDB */
splhigh();
- printf("user trap, type %d, code = %lx\n",
+ printf("user trap, type %ld, code = %lx\n",
type, regs->err);
dump_ss(regs);
panic("trap");
@@ -560,11 +568,12 @@ int user_trap(struct i386_saved_state *regs)
}
#if MACH_TTD
- if (debug_all_traps_with_kttd && kttd_trap(type, regs->err, regs))
+ if ((debug_all_traps_with_kttd || thread->task->essential) &&
+ kttd_trap(type, regs->err, regs))
return 0;
#endif /* MACH_TTD */
#if MACH_KDB
- if (debug_all_traps_with_kdb &&
+ if ((debug_all_traps_with_kdb || thread->task->essential) &&
kdb_trap(type, regs->err, regs))
return 0;
#endif /* MACH_KDB */
@@ -623,7 +632,7 @@ void
i386_exception(
int exc,
int code,
- int subcode)
+ long subcode)
{
spl_t s;
@@ -643,8 +652,7 @@ i386_exception(
* return saved state for interrupted user thread
*/
unsigned
-interrupted_pc(t)
- const thread_t t;
+interrupted_pc(const thread_t t)
{
struct i386_saved_state *iss;
@@ -662,3 +670,9 @@ db_debug_all_traps (boolean_t enable)
}
#endif /* MACH_KDB */
+
+void handle_double_fault(struct i386_saved_state *regs)
+{
+ dump_ss(regs);
+ panic("DOUBLE FAULT! This is critical\n");
+}
diff --git a/i386/i386/trap.h b/i386/i386/trap.h
index 46612db5..db222737 100644
--- a/i386/i386/trap.h
+++ b/i386/i386/trap.h
@@ -30,6 +30,7 @@
#include <mach/machine/trap.h>
#ifndef __ASSEMBLER__
+#include <i386/thread.h>
#include <mach/mach_types.h>
char *trap_name(unsigned int trapnum);
@@ -40,11 +41,31 @@ void
i386_exception(
int exc,
int code,
- int subcode) __attribute__ ((noreturn));
+ long subcode) __attribute__ ((noreturn));
extern void
thread_kdb_return(void);
+/*
+ * Trap from kernel mode. Only page-fault errors are recoverable,
+ * and then only in special circumstances. All other errors are
+ * fatal.
+ */
+void kernel_trap(struct i386_saved_state *regs);
+
+/*
+ * Trap from user mode.
+ * Return TRUE if from emulated system call.
+ */
+int user_trap(struct i386_saved_state *regs);
+
+/*
+ * Handle AST traps for i386.
+ * Check for delayed floating-point exception from
+ * AT-bus machines.
+ */
+void i386_astintr(void);
+
#endif /* !__ASSEMBLER__ */
#endif /* _I386_TRAP_H_ */
diff --git a/i386/i386/tss.h b/i386/i386/tss.h
index ff25f217..fd7e7147 100644
--- a/i386/i386/tss.h
+++ b/i386/i386/tss.h
@@ -27,13 +27,33 @@
#ifndef _I386_TSS_H_
#define _I386_TSS_H_
+#include <sys/types.h>
#include <mach/inline.h>
#include <machine/io_perm.h>
/*
- * i386 Task State Segment
+ * x86 Task State Segment
*/
+#ifdef __x86_64__
+struct i386_tss {
+ uint32_t _reserved0;
+ uint64_t rsp0;
+ uint64_t rsp1;
+ uint64_t rsp2;
+ uint64_t _reserved1;
+ uint64_t ist1;
+ uint64_t ist2;
+ uint64_t ist3;
+ uint64_t ist4;
+ uint64_t ist5;
+ uint64_t ist6;
+ uint64_t ist7;
+ uint64_t _reserved2;
+ uint16_t _reserved3;
+ uint16_t io_bit_map_offset;
+} __attribute__((__packed__));
+#else /* ! __x86_64__ */
struct i386_tss {
int back_link; /* segment number of previous task,
if nested */
@@ -67,7 +87,7 @@ struct i386_tss {
/* offset to start of IO permission
bit map */
};
-
+#endif /* __x86_64__ */
/* The structure extends the above TSS structure by an I/O permission bitmap
and the barrier. */
@@ -80,7 +100,7 @@ struct task_tss
/* Load the current task register. */
-MACH_INLINE void
+static inline void
ltr(unsigned short segment)
{
__asm volatile("ltr %0" : : "r" (segment) : "memory");
diff --git a/i386/i386/user_ldt.c b/i386/i386/user_ldt.c
index 09500b4d..4c89bd44 100644
--- a/i386/i386/user_ldt.c
+++ b/i386/i386/user_ldt.c
@@ -39,6 +39,7 @@
#include <i386/seg.h>
#include <i386/thread.h>
#include <i386/user_ldt.h>
+#include <i386/i386/mach_i386.server.h>
#include <stddef.h>
#include "ldt.h"
#include "vm_param.h"
@@ -51,10 +52,11 @@ kern_return_t
i386_set_ldt(
thread_t thread,
int first_selector,
- struct real_descriptor *desc_list,
+ const struct descriptor *descriptor_list,
unsigned int count,
boolean_t desc_list_inline)
{
+ struct real_descriptor* desc_list = (struct real_descriptor *)descriptor_list;
user_ldt_t new_ldt, old_ldt, temp;
struct real_descriptor *dp;
unsigned i;
@@ -253,13 +255,14 @@ i386_set_ldt(
}
kern_return_t
-i386_get_ldt(thread, first_selector, selector_count, desc_list, count)
- const thread_t thread;
- int first_selector;
- int selector_count; /* number wanted */
- struct real_descriptor **desc_list; /* in/out */
- unsigned int *count; /* in/out */
+i386_get_ldt(const thread_t thread,
+ int first_selector,
+ int selector_count, /* number wanted */
+ struct descriptor **descriptor_list, /* in/out */
+ unsigned int *count /* in/out */
+ )
{
+ struct real_descriptor** desc_list = (struct real_descriptor **)descriptor_list;
struct user_ldt *user_ldt;
pcb_t pcb;
int first_desc = sel_idx(first_selector);
@@ -385,8 +388,9 @@ user_ldt_free(user_ldt_t user_ldt)
kern_return_t
-i386_set_gdt (thread_t thread, int *selector, struct real_descriptor desc)
+i386_set_gdt (thread_t thread, int *selector, struct descriptor descriptor)
{
+ const struct real_descriptor *desc = (struct real_descriptor *)&descriptor;
int idx;
if (thread == THREAD_NULL)
@@ -410,14 +414,14 @@ i386_set_gdt (thread_t thread, int *selector, struct real_descriptor desc)
else
idx = sel_idx (*selector) - sel_idx(USER_GDT);
- if ((desc.access & ACC_P) == 0)
+ if ((desc->access & ACC_P) == 0)
memset (&thread->pcb->ims.user_gdt[idx], 0,
sizeof thread->pcb->ims.user_gdt[idx]);
- else if ((desc.access & (ACC_TYPE_USER|ACC_PL)) != (ACC_TYPE_USER|ACC_PL_U) || (desc.granularity & SZ_64))
+ else if ((desc->access & (ACC_TYPE_USER|ACC_PL)) != (ACC_TYPE_USER|ACC_PL_U) || (desc->granularity & SZ_64))
return KERN_INVALID_ARGUMENT;
else
- thread->pcb->ims.user_gdt[idx] = desc;
+ memcpy (&thread->pcb->ims.user_gdt[idx], desc, sizeof (struct descriptor));
/*
* If we are modifying the GDT for the current thread,
@@ -430,8 +434,9 @@ i386_set_gdt (thread_t thread, int *selector, struct real_descriptor desc)
}
kern_return_t
-i386_get_gdt (const thread_t thread, int selector, struct real_descriptor *desc)
+i386_get_gdt (const thread_t thread, int selector, struct descriptor *descriptor)
{
+ struct real_descriptor *desc = (struct real_descriptor *)descriptor;
if (thread == THREAD_NULL)
return KERN_INVALID_ARGUMENT;
diff --git a/i386/i386/vm_param.h b/i386/i386/vm_param.h
index edd9522c..056aa52e 100644
--- a/i386/i386/vm_param.h
+++ b/i386/i386/vm_param.h
@@ -31,17 +31,21 @@
#include <xen/public/xen.h>
#endif
+/* To avoid ambiguity in kernel code, make the name explicit */
+#define VM_MIN_USER_ADDRESS VM_MIN_ADDRESS
+#define VM_MAX_USER_ADDRESS VM_MAX_ADDRESS
+
/* The kernel address space is usually 1GB, usually starting at virtual address 0. */
/* This can be changed freely to separate kernel addresses from user addresses
* for better trace support in kdb; the _START symbol has to be offset by the
* same amount. */
#ifdef __x86_64__
-#define VM_MIN_KERNEL_ADDRESS 0x40000000UL
+#define VM_MIN_KERNEL_ADDRESS KERNEL_MAP_BASE
#else
#define VM_MIN_KERNEL_ADDRESS 0xC0000000UL
#endif
-#ifdef MACH_XEN
+#if defined(MACH_XEN) || defined (__x86_64__)
/* PV kernels can be loaded directly to the target virtual address */
#define INIT_VM_MIN_KERNEL_ADDRESS VM_MIN_KERNEL_ADDRESS
#else /* MACH_XEN */
@@ -56,10 +60,11 @@
#else /* PAE */
#define HYP_VIRT_START HYPERVISOR_VIRT_START_NONPAE
#endif /* PAE */
+#define VM_MAX_KERNEL_ADDRESS (HYP_VIRT_START - LINEAR_MIN_KERNEL_ADDRESS + VM_MIN_KERNEL_ADDRESS)
#else
#define HYP_VIRT_START HYPERVISOR_VIRT_START
+#define VM_MAX_KERNEL_ADDRESS (LINEAR_MAX_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS + VM_MIN_KERNEL_ADDRESS)
#endif
-#define VM_MAX_KERNEL_ADDRESS (HYP_VIRT_START - LINEAR_MIN_KERNEL_ADDRESS + VM_MIN_KERNEL_ADDRESS)
#else /* MACH_PV_PAGETABLES */
#define VM_MAX_KERNEL_ADDRESS (LINEAR_MAX_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS + VM_MIN_KERNEL_ADDRESS)
#endif /* MACH_PV_PAGETABLES */
@@ -68,16 +73,26 @@
* Reserve mapping room for the kernel map, which includes
* the device I/O map and the IPC map.
*/
+#ifdef __x86_64__
+/*
+ * Vm structures are quite bigger on 64 bit.
+ * This should be well enough for 8G of physical memory; on the other hand,
+ * maybe not all of them need to be in directly-mapped memory, see the parts
+ * allocated with pmap_steal_memory().
+ */
+#define VM_KERNEL_MAP_SIZE (512 * 1024 * 1024)
+#else
#define VM_KERNEL_MAP_SIZE (152 * 1024 * 1024)
+#endif
/* This is the kernel address range in linear addresses. */
#ifdef __x86_64__
#define LINEAR_MIN_KERNEL_ADDRESS VM_MIN_KERNEL_ADDRESS
-#define LINEAR_MAX_KERNEL_ADDRESS (0x00007fffffffffffUL)
+#define LINEAR_MAX_KERNEL_ADDRESS (0xffffffffffffffffUL)
#else
/* On x86, the kernel virtual address space is actually located
at high linear addresses. */
-#define LINEAR_MIN_KERNEL_ADDRESS (VM_MAX_ADDRESS)
+#define LINEAR_MIN_KERNEL_ADDRESS (VM_MAX_USER_ADDRESS)
#define LINEAR_MAX_KERNEL_ADDRESS (0xffffffffUL)
#endif
@@ -128,7 +143,8 @@
#define VM_PAGE_DIRECTMAP_LIMIT DECL_CONST(0x400000000000, UL)
#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0x10000000000000, ULL)
#else
-#define VM_PAGE_MAX_SEGS 3
+#define VM_PAGE_MAX_SEGS 4
+#define VM_PAGE_DMA32_LIMIT DECL_CONST(0x100000000, UL)
#define VM_PAGE_DIRECTMAP_LIMIT (VM_MAX_KERNEL_ADDRESS \
- VM_MIN_KERNEL_ADDRESS \
- VM_KERNEL_MAP_SIZE)
@@ -138,14 +154,17 @@
#ifdef __LP64__
#define VM_PAGE_MAX_SEGS 4
#define VM_PAGE_DMA32_LIMIT DECL_CONST(0x100000000, UL)
-#define VM_PAGE_DIRECTMAP_LIMIT DECL_CONST(0x400000000000, UL)
+#define VM_PAGE_DIRECTMAP_LIMIT (VM_MAX_KERNEL_ADDRESS \
+ - VM_MIN_KERNEL_ADDRESS \
+ - VM_KERNEL_MAP_SIZE + 1)
#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0x10000000000000, UL)
#else /* __LP64__ */
#define VM_PAGE_DIRECTMAP_LIMIT (VM_MAX_KERNEL_ADDRESS \
- VM_MIN_KERNEL_ADDRESS \
- VM_KERNEL_MAP_SIZE + 1)
#ifdef PAE
-#define VM_PAGE_MAX_SEGS 3
+#define VM_PAGE_MAX_SEGS 4
+#define VM_PAGE_DMA32_LIMIT DECL_CONST(0x100000000, UL)
#define VM_PAGE_HIGHMEM_LIMIT DECL_CONST(0x10000000000000, ULL)
#else /* PAE */
#define VM_PAGE_MAX_SEGS 3
@@ -159,14 +178,23 @@
*/
#define VM_PAGE_SEG_DMA 0
-#ifdef __LP64__
-#define VM_PAGE_SEG_DMA32 1
-#define VM_PAGE_SEG_DIRECTMAP 2
-#define VM_PAGE_SEG_HIGHMEM 3
-#else /* __LP64__ */
-#define VM_PAGE_SEG_DMA32 1 /* Alias for the DIRECTMAP segment */
-#define VM_PAGE_SEG_DIRECTMAP 1
-#define VM_PAGE_SEG_HIGHMEM 2
-#endif /* __LP64__ */
+#if defined(VM_PAGE_DMA32_LIMIT) && (VM_PAGE_DMA32_LIMIT != VM_PAGE_DIRECTMAP_LIMIT)
+
+#if VM_PAGE_DMA32_LIMIT < VM_PAGE_DIRECTMAP_LIMIT
+#define VM_PAGE_SEG_DMA32 (VM_PAGE_SEG_DMA+1)
+#define VM_PAGE_SEG_DIRECTMAP (VM_PAGE_SEG_DMA32+1)
+#define VM_PAGE_SEG_HIGHMEM (VM_PAGE_SEG_DIRECTMAP+1)
+#else /* VM_PAGE_DMA32_LIMIT > VM_PAGE_DIRECTMAP_LIMIT */
+#define VM_PAGE_SEG_DIRECTMAP (VM_PAGE_SEG_DMA+1)
+#define VM_PAGE_SEG_DMA32 (VM_PAGE_SEG_DIRECTMAP+1)
+#define VM_PAGE_SEG_HIGHMEM (VM_PAGE_SEG_DMA32+1)
+#endif
+
+#else
+
+#define VM_PAGE_SEG_DIRECTMAP (VM_PAGE_SEG_DMA+1)
+#define VM_PAGE_SEG_DMA32 VM_PAGE_SEG_DIRECTMAP /* Alias for the DIRECTMAP segment */
+#define VM_PAGE_SEG_HIGHMEM (VM_PAGE_SEG_DIRECTMAP+1)
+#endif
#endif /* _I386_KERNEL_I386_VM_PARAM_ */
diff --git a/i386/i386/xen.h b/i386/i386/xen.h
index 8a17748a..2cd81be8 100644
--- a/i386/i386/xen.h
+++ b/i386/i386/xen.h
@@ -36,7 +36,7 @@
#define mb() __asm__ __volatile__("lock; addl $0,0(%%esp)":::"memory")
#define rmb() mb()
#define wmb() mb()
-MACH_INLINE unsigned long xchgl(volatile unsigned long *ptr, unsigned long x)
+static inline unsigned long xchgl(volatile unsigned long *ptr, unsigned long x)
{
__asm__ __volatile__("xchg %0, %1"
: "=r" (x)
@@ -66,7 +66,7 @@ MACH_INLINE unsigned long xchgl(volatile unsigned long *ptr, unsigned long x)
/* x86-specific hypercall interface. */
#define _hypcall0(type, name) \
-MACH_INLINE type hyp_##name(void) \
+static inline type hyp_##name(void) \
{ \
unsigned long __ret; \
asm volatile ("call hypcalls+("TOSTR(__HYPERVISOR_##name)"*32)" \
@@ -76,7 +76,7 @@ MACH_INLINE type hyp_##name(void) \
}
#define _hypcall1(type, name, type1, arg1) \
-MACH_INLINE type hyp_##name(type1 arg1) \
+static inline type hyp_##name(type1 arg1) \
{ \
unsigned long __ret; \
register unsigned long __arg1 asm(_hypcall_arg1) = (unsigned long) arg1; \
@@ -88,7 +88,7 @@ MACH_INLINE type hyp_##name(type1 arg1) \
}
#define _hypcall2(type, name, type1, arg1, type2, arg2) \
-MACH_INLINE type hyp_##name(type1 arg1, type2 arg2) \
+static inline type hyp_##name(type1 arg1, type2 arg2) \
{ \
unsigned long __ret; \
register unsigned long __arg1 asm(_hypcall_arg1) = (unsigned long) arg1; \
@@ -102,7 +102,7 @@ MACH_INLINE type hyp_##name(type1 arg1, type2 arg2) \
}
#define _hypcall3(type, name, type1, arg1, type2, arg2, type3, arg3) \
-MACH_INLINE type hyp_##name(type1 arg1, type2 arg2, type3 arg3) \
+static inline type hyp_##name(type1 arg1, type2 arg2, type3 arg3) \
{ \
unsigned long __ret; \
register unsigned long __arg1 asm(_hypcall_arg1) = (unsigned long) arg1; \
@@ -118,7 +118,7 @@ MACH_INLINE type hyp_##name(type1 arg1, type2 arg2, type3 arg3) \
}
#define _hypcall4(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) \
-MACH_INLINE type hyp_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \
+static inline type hyp_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \
{ \
unsigned long __ret; \
register unsigned long __arg1 asm(_hypcall_arg1) = (unsigned long) arg1; \
@@ -136,7 +136,7 @@ MACH_INLINE type hyp_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \
}
#define _hypcall5(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) \
-MACH_INLINE type hyp_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \
+static inline type hyp_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \
{ \
unsigned long __ret; \
register unsigned long __arg1 asm(_hypcall_arg1) = (unsigned long) arg1; \
@@ -165,7 +165,7 @@ _hypcall1(long, set_trap_table, vm_offset_t /* struct trap_info * */, traps);
#ifdef MACH_PV_PAGETABLES
_hypcall4(int, mmu_update, vm_offset_t /* struct mmu_update * */, req, int, count, vm_offset_t /* int * */, success_count, domid_t, domid)
-MACH_INLINE int hyp_mmu_update_pte(pt_entry_t pte, pt_entry_t val)
+static inline int hyp_mmu_update_pte(pt_entry_t pte, pt_entry_t val)
{
struct mmu_update update =
{
@@ -221,7 +221,7 @@ _hypcall2(long, set_segment_base, int, reg, unsigned long, value);
#include <xen/public/memory.h>
_hypcall2(long, memory_op, unsigned long, cmd, vm_offset_t /* void * */, arg);
-MACH_INLINE void hyp_free_mfn(unsigned long mfn)
+static inline void hyp_free_mfn(unsigned long mfn)
{
struct xen_memory_reservation reservation;
reservation.extent_start = (void*) kvtolin(&mfn);
@@ -245,7 +245,7 @@ _hypcall3(int, update_va_mapping, unsigned long, va, unsigned long, val, unsigne
#define hyp_do_update_va_mapping(va, val, flags) hyp_update_va_mapping(va, val, flags)
#endif
-MACH_INLINE void hyp_free_page(unsigned long pfn, void *va)
+static inline void hyp_free_page(unsigned long pfn, void *va)
{
/* save mfn */
unsigned long mfn = pfn_to_mfn(pfn);
@@ -267,7 +267,7 @@ MACH_INLINE void hyp_free_page(unsigned long pfn, void *va)
#ifdef MACH_PV_PAGETABLES
_hypcall4(int, mmuext_op, vm_offset_t /* struct mmuext_op * */, op, int, count, vm_offset_t /* int * */, success_count, domid_t, domid);
-MACH_INLINE int hyp_mmuext_op_void(unsigned int cmd)
+static inline int hyp_mmuext_op_void(unsigned int cmd)
{
struct mmuext_op op = {
.cmd = cmd,
@@ -276,7 +276,7 @@ MACH_INLINE int hyp_mmuext_op_void(unsigned int cmd)
hyp_mmuext_op(kv_to_la(&op), 1, kv_to_la(&count), DOMID_SELF);
return count;
}
-MACH_INLINE int hyp_mmuext_op_mfn(unsigned int cmd, unsigned long mfn)
+static inline int hyp_mmuext_op_mfn(unsigned int cmd, unsigned long mfn)
{
struct mmuext_op op = {
.cmd = cmd,
@@ -286,7 +286,7 @@ MACH_INLINE int hyp_mmuext_op_mfn(unsigned int cmd, unsigned long mfn)
hyp_mmuext_op(kv_to_la(&op), 1, kv_to_la(&count), DOMID_SELF);
return count;
}
-MACH_INLINE void hyp_set_ldt(void *ldt, unsigned long nbentries) {
+static inline void hyp_set_ldt(void *ldt, unsigned long nbentries) {
struct mmuext_op op = {
.cmd = MMUEXT_SET_LDT,
.arg1.linear_addr = kvtolin(ldt),
@@ -303,7 +303,7 @@ MACH_INLINE void hyp_set_ldt(void *ldt, unsigned long nbentries) {
}
#define hyp_set_cr3(value) hyp_mmuext_op_mfn(MMUEXT_NEW_BASEPTR, pa_to_mfn(value))
#define hyp_set_user_cr3(value) hyp_mmuext_op_mfn(MMUEXT_NEW_USER_BASEPTR, pa_to_mfn(value))
-MACH_INLINE void hyp_invlpg(vm_offset_t lin) {
+static inline void hyp_invlpg(vm_offset_t lin) {
struct mmuext_op ops;
int n;
ops.cmd = MMUEXT_INVLPG_ALL;
@@ -328,14 +328,14 @@ _hypcall1(long, set_timer_op, unsigned long, absolute);
#include <xen/public/event_channel.h>
_hypcall1(int, event_channel_op, vm_offset_t /* evtchn_op_t * */, op);
-MACH_INLINE int hyp_event_channel_send(evtchn_port_t port) {
+static inline int hyp_event_channel_send(evtchn_port_t port) {
evtchn_op_t op = {
.cmd = EVTCHNOP_send,
.u.send.port = port,
};
return hyp_event_channel_op(kvtolin(&op));
}
-MACH_INLINE evtchn_port_t hyp_event_channel_alloc(domid_t domid) {
+static inline evtchn_port_t hyp_event_channel_alloc(domid_t domid) {
evtchn_op_t op = {
.cmd = EVTCHNOP_alloc_unbound,
.u.alloc_unbound.dom = DOMID_SELF,
@@ -345,7 +345,7 @@ MACH_INLINE evtchn_port_t hyp_event_channel_alloc(domid_t domid) {
panic("couldn't allocate event channel");
return op.u.alloc_unbound.port;
}
-MACH_INLINE evtchn_port_t hyp_event_channel_bind_virq(uint32_t virq, uint32_t vcpu) {
+static inline evtchn_port_t hyp_event_channel_bind_virq(uint32_t virq, uint32_t vcpu) {
evtchn_op_t op = { .cmd = EVTCHNOP_bind_virq, .u.bind_virq = { .virq = virq, .vcpu = vcpu }};
if (hyp_event_channel_op(kvtolin(&op)))
panic("can't bind virq %d\n",virq);
@@ -364,7 +364,7 @@ _hypcall0(long, iret);
_hypcall2(long, sched_op, int, cmd, vm_offset_t /* void* */, arg)
#define hyp_yield() hyp_sched_op(SCHEDOP_yield, 0)
#define hyp_block() hyp_sched_op(SCHEDOP_block, 0)
-MACH_INLINE void __attribute__((noreturn)) hyp_crash(void)
+static inline void __attribute__((noreturn)) hyp_crash(void)
{
unsigned int shut = SHUTDOWN_crash;
hyp_sched_op(SCHEDOP_shutdown, kvtolin(&shut));
@@ -373,7 +373,7 @@ MACH_INLINE void __attribute__((noreturn)) hyp_crash(void)
for(;;);
}
-MACH_INLINE void __attribute__((noreturn)) hyp_halt(void)
+static inline void __attribute__((noreturn)) hyp_halt(void)
{
unsigned int shut = SHUTDOWN_poweroff;
hyp_sched_op(SCHEDOP_shutdown, kvtolin(&shut));
@@ -382,7 +382,7 @@ MACH_INLINE void __attribute__((noreturn)) hyp_halt(void)
for(;;);
}
-MACH_INLINE void __attribute__((noreturn)) hyp_reboot(void)
+static inline void __attribute__((noreturn)) hyp_reboot(void)
{
unsigned int shut = SHUTDOWN_reboot;
hyp_sched_op(SCHEDOP_shutdown, kvtolin(&shut));
@@ -395,7 +395,7 @@ _hypcall2(int, set_debugreg, int, reg, unsigned long, value);
_hypcall1(unsigned long, get_debugreg, int, reg);
/* x86-specific */
-MACH_INLINE uint64_t hyp_cpu_clock(void) {
+static inline uint64_t hyp_cpu_clock(void) {
uint32_t hi, lo;
asm volatile("rdtsc" : "=d"(hi), "=a"(lo));
return (((uint64_t) hi) << 32) | lo;
diff --git a/i386/i386at/acpi_parse_apic.c b/i386/i386at/acpi_parse_apic.c
index 3cf6f042..1cfc1791 100644
--- a/i386/i386at/acpi_parse_apic.c
+++ b/i386/i386at/acpi_parse_apic.c
@@ -21,7 +21,7 @@
#include <string.h> /* memcmp, memcpy... */
-#include <include/stdint.h> /* uint16_t, uint32_t... */
+#include <stdint.h> /* uint16_t, uint32_t... */
#include <mach/machine.h> /* machine_slot */
@@ -33,6 +33,8 @@
#include <vm/vm_kern.h>
static struct acpi_apic *apic_madt = NULL;
+unsigned lapic_addr;
+uint32_t *hpet_addr;
/*
* acpi_print_info: shows by screen the ACPI's rsdp and rsdt virtual address
@@ -42,13 +44,12 @@ static struct acpi_apic *apic_madt = NULL;
* and the number of entries stored in RSDT.
*/
void
-acpi_print_info(struct acpi_rsdp *rsdp, struct acpi_rsdt *rsdt, int acpi_rsdt_n)
+acpi_print_info(phys_addr_t rsdp, void *rsdt, int acpi_rsdt_n)
{
printf("ACPI:\n");
- printf(" rsdp = %p; rsdp->rsdt_addr = %x\n", rsdp, rsdp->rsdt_addr);
- printf(" rsdt = %p; rsdt->length = %x (n = %x)\n", rsdt, rsdt->header.length,
- acpi_rsdt_n);
+ printf(" rsdp = 0x%lx\n", rsdp);
+ printf(" rsdt/xsdt = 0x%p (n = %d)\n", rsdt, acpi_rsdt_n);
}
/*
@@ -98,27 +99,45 @@ acpi_check_signature(const uint8_t table_signature[], const char *real_signature
*
* Preconditions: RSDP pointer must not be NULL.
*
- * Returns 0 if correct.
+ * Returns 1 if ACPI 1.0 and sets sdt_base
+ * Returns 2 if ACPI >= 2.0 and sets sdt_base
*/
static int8_t
-acpi_check_rsdp(struct acpi_rsdp *rsdp)
+acpi_check_rsdp(struct acpi_rsdp2 *rsdp, phys_addr_t *sdt_base)
{
- uint32_t checksum;
int is_rsdp;
+ uint8_t cksum;
/* Check if rsdp signature match with the ACPI RSDP signature. */
- is_rsdp = acpi_check_signature(rsdp->signature, ACPI_RSDP_SIG, 8*sizeof(uint8_t));
+ is_rsdp = acpi_check_signature(rsdp->v1.signature, ACPI_RSDP_SIG, 8*sizeof(uint8_t));
if (is_rsdp != ACPI_SUCCESS)
return ACPI_BAD_SIGNATURE;
- /* If match, calculates rdsp checksum and check It. */
- checksum = acpi_checksum(rsdp, sizeof(struct acpi_rsdp));
+ if (rsdp->v1.revision == 0) {
+ // ACPI 1.0
+ *sdt_base = rsdp->v1.rsdt_addr;
+ printf("ACPI v1.0\n");
+ cksum = acpi_checksum((void *)(&rsdp->v1), sizeof(struct acpi_rsdp));
- if (checksum != 0)
- return ACPI_BAD_CHECKSUM;
+ if (cksum != 0)
+ return ACPI_BAD_CHECKSUM;
- return ACPI_SUCCESS;
+ return 1;
+
+ } else if (rsdp->v1.revision == 2) {
+ // ACPI >= 2.0
+ *sdt_base = rsdp->xsdt_addr;
+ printf("ACPI >= v2.0\n");
+ cksum = acpi_checksum((void *)rsdp, sizeof(struct acpi_rsdp2));
+
+ if (cksum != 0)
+ return ACPI_BAD_CHECKSUM;
+
+ return 2;
+ }
+
+ return ACPI_NO_RSDP;
}
/*
@@ -146,38 +165,41 @@ acpi_check_rsdp_align(void *addr)
*
* Preconditions: The start address (addr) must be aligned.
*
- * Returns the reference to rsdp structure if success, NULL if failure.
+ * Returns the physical address of rsdp structure if success, 0 if failure.
*/
-static struct acpi_rsdp*
-acpi_search_rsdp(void *addr, uint32_t length)
+static phys_addr_t
+acpi_search_rsdp(void *addr, uint32_t length, int *is_64bit)
{
void *end;
+ int version = 0;
+ phys_addr_t sdt_base = 0;
/* Search RDSP in memory space between addr and addr+lenght. */
for (end = addr+length; addr < end; addr += ACPI_RSDP_ALIGN) {
/* Check if the current memory block stores the RDSP. */
- if ((addr != NULL) && (acpi_check_rsdp(addr) == ACPI_SUCCESS)) {
- /* If yes, return RSDP address */
- return (struct acpi_rsdp*) addr;
+ if ((addr != NULL) && ((version = acpi_check_rsdp(addr, &sdt_base)) > 0)) {
+ /* If yes, return RSDT/XSDT address */
+ *is_64bit = (version == 2);
+ return sdt_base;
}
}
- return NULL;
+ return 0;
}
/*
* acpi_get_rsdp: tries to find the RSDP table,
* searching It in many memory ranges, as It's written in ACPI Specification.
*
- * Returns the reference to RDSP structure if success, NULL if failure.
+ * Returns the reference to RDSP structure if success, 0 if failure.
*/
-struct acpi_rsdp*
-acpi_get_rsdp(void)
+static phys_addr_t
+acpi_get_rsdp(int *is_64bit)
{
uint16_t *start = 0;
phys_addr_t base = 0;
- struct acpi_rsdp *rsdp = NULL;
+ phys_addr_t rsdp = 0;
/* EDBA start address. */
start = (uint16_t*) phystokv(0x040e);
@@ -185,41 +207,18 @@ acpi_get_rsdp(void)
/* check alignment. */
if (acpi_check_rsdp_align((void *)base) == ACPI_BAD_ALIGN)
- return NULL;
- rsdp = acpi_search_rsdp((void *)base, 1024);
+ return 0;
+ rsdp = acpi_search_rsdp((void *)base, 1024, is_64bit);
- if (rsdp == NULL) {
+ if (rsdp == 0) {
/* If RSDP isn't in EDBA, search in the BIOS read-only memory space between 0E0000h and 0FFFFFh */
- rsdp = acpi_search_rsdp((void *)phystokv(0xe0000), 0x100000 - 0x0e0000);
+ rsdp = acpi_search_rsdp((void *)phystokv(0xe0000), 0x100000 - 0x0e0000, is_64bit);
}
return rsdp;
}
/*
- * acpi_check_rsdt: check if the RSDT initial address is correct
- * checking its checksum.
- *
- * Receives as input a reference for the RSDT "candidate" table.
- * Returns 0 if success.
- *
- * Preconditions: rsdp must not be NULL.
- *
- */
-static int
-acpi_check_rsdt(struct acpi_rsdt *rsdt)
-{
- uint8_t checksum;
-
- checksum = acpi_checksum(rsdt, rsdt->header.length);
-
- if (checksum != 0)
- return ACPI_BAD_CHECKSUM;
-
- return ACPI_SUCCESS;
-}
-
-/*
* acpi_get_rsdt: Get RSDT table reference from RSDP entries.
*
* Receives as input a reference for RSDP table
@@ -228,16 +227,12 @@ acpi_check_rsdt(struct acpi_rsdt *rsdt)
* Returns the reference to RSDT table if success, NULL if error.
*/
static struct acpi_rsdt*
-acpi_get_rsdt(struct acpi_rsdp *rsdp, int* acpi_rsdt_n)
+acpi_get_rsdt(phys_addr_t rsdp_phys, int* acpi_rsdt_n)
{
- phys_addr_t rsdt_phys;
struct acpi_rsdt *rsdt = NULL;
- int acpi_check;
int signature_check;
- /* Get rsdt address from rsdp table. */
- rsdt_phys = rsdp->rsdt_addr;
- rsdt = (struct acpi_rsdt*) kmem_map_aligned_table(rsdt_phys, sizeof(struct acpi_rsdt), VM_PROT_READ);
+ rsdt = (struct acpi_rsdt*) kmem_map_aligned_table(rsdp_phys, sizeof(struct acpi_rsdt), VM_PROT_READ);
/* Check if the RSDT mapping is fine. */
if (rsdt == NULL)
@@ -250,12 +245,6 @@ acpi_get_rsdt(struct acpi_rsdp *rsdp, int* acpi_rsdt_n)
if (signature_check != ACPI_SUCCESS)
return NULL;
- /* Check if rsdt is correct. */
- acpi_check = acpi_check_rsdt(rsdt);
-
- if (acpi_check != ACPI_SUCCESS)
- return NULL;
-
/* Calculated number of elements stored in rsdt. */
*acpi_rsdt_n = (rsdt->header.length - sizeof(rsdt->header))
/ sizeof(rsdt->entry[0]);
@@ -264,34 +253,117 @@ acpi_get_rsdt(struct acpi_rsdp *rsdp, int* acpi_rsdt_n)
}
/*
+ * acpi_get_xsdt: Get XSDT table reference from RSDPv2 entries.
+ *
+ * Receives as input a reference for RSDPv2 table
+ * and a reference to store the number of entries of XSDT.
+ *
+ * Returns the reference to XSDT table if success, NULL if error.
+ */
+static struct acpi_xsdt*
+acpi_get_xsdt(phys_addr_t rsdp_phys, int* acpi_xsdt_n)
+{
+ struct acpi_xsdt *xsdt = NULL;
+ int signature_check;
+
+ xsdt = (struct acpi_xsdt*) kmem_map_aligned_table(rsdp_phys, sizeof(struct acpi_xsdt), VM_PROT_READ);
+
+ /* Check if the RSDT mapping is fine. */
+ if (xsdt == NULL)
+ return NULL;
+
+ /* Check is rsdt signature is equals to ACPI RSDT signature. */
+ signature_check = acpi_check_signature(xsdt->header.signature, ACPI_XSDT_SIG,
+ 4*sizeof(uint8_t));
+
+ if (signature_check != ACPI_SUCCESS)
+ return NULL;
+
+ /* Calculated number of elements stored in rsdt. */
+ *acpi_xsdt_n = (xsdt->header.length - sizeof(xsdt->header))
+ / sizeof(xsdt->entry[0]);
+
+ return xsdt;
+}
+
+/*
* acpi_get_apic: get MADT/APIC table from RSDT entries.
*
* Receives as input the RSDT initial address,
* and the number of entries of RSDT table.
*
* Returns a reference to APIC/MADT table if success, NULL if failure.
+ * Also sets hpet_addr to base address of HPET.
*/
static struct acpi_apic*
acpi_get_apic(struct acpi_rsdt *rsdt, int acpi_rsdt_n)
{
struct acpi_dhdr *descr_header;
+ struct acpi_apic *madt = NULL;
int check_signature;
+ uint64_t map_addr;
/* Search APIC entries in rsdt table. */
for (int i = 0; i < acpi_rsdt_n; i++) {
descr_header = (struct acpi_dhdr*) kmem_map_aligned_table(rsdt->entry[i], sizeof(struct acpi_dhdr),
VM_PROT_READ);
- /* Check if the entry contains an APIC. */
+ /* Check if the entry is a MADT */
+ check_signature = acpi_check_signature(descr_header->signature, ACPI_APIC_SIG, 4*sizeof(uint8_t));
+ if (check_signature == ACPI_SUCCESS)
+ madt = (struct acpi_apic*) descr_header;
+
+ /* Check if the entry is a HPET */
+ check_signature = acpi_check_signature(descr_header->signature, ACPI_HPET_SIG, 4*sizeof(uint8_t));
+ if (check_signature == ACPI_SUCCESS) {
+ map_addr = ((struct acpi_hpet *)descr_header)->address.addr64;
+ assert (map_addr != 0);
+ hpet_addr = (uint32_t *)kmem_map_aligned_table(map_addr, 1024, VM_PROT_READ | VM_PROT_WRITE);
+ printf("HPET at physical address 0x%llx\n", map_addr);
+ }
+ }
+
+ return madt;
+}
+
+/*
+ * acpi_get_apic2: get MADT/APIC table from XSDT entries.
+ *
+ * Receives as input the XSDT initial address,
+ * and the number of entries of XSDT table.
+ *
+ * Returns a reference to APIC/MADT table if success, NULL if failure.
+ * Also sets hpet_addr to base address of HPET.
+ */
+static struct acpi_apic*
+acpi_get_apic2(struct acpi_xsdt *xsdt, int acpi_xsdt_n)
+{
+ struct acpi_dhdr *descr_header;
+ struct acpi_apic *madt = NULL;
+ int check_signature;
+ uint64_t map_addr;
+
+ /* Search APIC entries in rsdt table. */
+ for (int i = 0; i < acpi_xsdt_n; i++) {
+ descr_header = (struct acpi_dhdr*) kmem_map_aligned_table(xsdt->entry[i], sizeof(struct acpi_dhdr),
+ VM_PROT_READ);
+
+ /* Check if the entry is an APIC. */
check_signature = acpi_check_signature(descr_header->signature, ACPI_APIC_SIG, 4*sizeof(uint8_t));
+ if (check_signature == ACPI_SUCCESS)
+ madt = (struct acpi_apic *)descr_header;
+ /* Check if the entry is a HPET. */
+ check_signature = acpi_check_signature(descr_header->signature, ACPI_HPET_SIG, 4*sizeof(uint8_t));
if (check_signature == ACPI_SUCCESS) {
- /* If yes, return the APIC. */
- return (struct acpi_apic*) descr_header;
+ map_addr = ((struct acpi_hpet *)descr_header)->address.addr64;
+ assert (map_addr != 0);
+ hpet_addr = (uint32_t *)kmem_map_aligned_table(map_addr, 1024, VM_PROT_READ | VM_PROT_WRITE);
+ printf("HPET at physical address 0x%llx\n", map_addr);
}
}
- return NULL;
+ return madt;
}
/*
@@ -330,6 +402,9 @@ acpi_apic_add_ioapic(struct acpi_apic_ioapic *ioapic_entry)
io_apic.ioapic = (ApicIoUnit *)kmem_map_aligned_table(ioapic_entry->addr,
sizeof(ApicIoUnit),
VM_PROT_READ | VM_PROT_WRITE);
+ io_apic.ioapic->select.r = APIC_IO_VERSION;
+ io_apic.ngsis = ((io_apic.ioapic->window.r >> APIC_IO_ENTRIES_SHIFT) & 0xff) + 1;
+
/* Insert IOAPIC in the list. */
apic_add_ioapic(io_apic);
}
@@ -369,24 +444,27 @@ static int
acpi_apic_parse_table(struct acpi_apic *apic)
{
struct acpi_apic_dhdr *apic_entry = NULL;
- uint32_t end = 0;
+ vm_offset_t end = 0;
uint8_t numcpus = 1;
/* Get the address of first APIC entry */
apic_entry = (struct acpi_apic_dhdr*) apic->entry;
/* Get the end address of APIC table */
- end = (uint32_t) apic + apic->header.length;
+ end = (vm_offset_t) apic + apic->header.length;
+
+ printf("APIC entry=0x%p end=0x%x\n", apic_entry, end);
/* Initialize number of cpus */
numcpus = apic_get_numcpus();
/* Search in APIC entry. */
- while ((uint32_t)apic_entry < end) {
+ while ((vm_offset_t)apic_entry < end) {
struct acpi_apic_lapic *lapic_entry;
struct acpi_apic_ioapic *ioapic_entry;
struct acpi_apic_irq_override *irq_override_entry;
+ printf("APIC entry=0x%p end=0x%x\n", apic_entry, end);
/* Check entry type. */
switch(apic_entry->type) {
@@ -417,10 +495,13 @@ acpi_apic_parse_table(struct acpi_apic *apic)
break;
/* FIXME: There is another unhandled case */
+ default:
+ printf("Unhandled APIC entry type 0x%x\n", apic_entry->type);
+ break;
}
/* Get next APIC entry. */
- apic_entry = (struct acpi_apic_dhdr*)((uint32_t) apic_entry
+ apic_entry = (struct acpi_apic_dhdr*)((vm_offset_t) apic_entry
+ apic_entry->length);
/* Update number of cpus. */
@@ -448,17 +529,11 @@ acpi_apic_parse_table(struct acpi_apic *apic)
static int
acpi_apic_setup(struct acpi_apic *apic)
{
- int apic_checksum;
ApicLocalUnit* lapic_unit;
uint8_t ncpus, nioapics;
- /* Check the checksum of the APIC */
- apic_checksum = acpi_checksum(apic, apic->header.length);
-
- if(apic_checksum != 0)
- return ACPI_BAD_CHECKSUM;
-
/* map common lapic address */
+ lapic_addr = apic->lapic_addr;
lapic_unit = kmem_map_aligned_table(apic->lapic_addr, sizeof(ApicLocalUnit),
VM_PROT_READ | VM_PROT_WRITE);
@@ -481,6 +556,8 @@ acpi_apic_setup(struct acpi_apic *apic)
return ACPI_FIT_FAILURE;
}
+ apic_generate_cpu_id_lut();
+
return ACPI_SUCCESS;
}
@@ -495,29 +572,64 @@ acpi_apic_setup(struct acpi_apic *apic)
int
acpi_apic_init(void)
{
- struct acpi_rsdp *rsdp = 0;
+ phys_addr_t rsdp = 0;
struct acpi_rsdt *rsdt = 0;
+ struct acpi_xsdt *xsdt = 0;
int acpi_rsdt_n;
int ret_acpi_setup;
int apic_init_success = 0;
+ int is_64bit = 0;
+ uint8_t checksum;
- /* Try to get the RSDP pointer. */
- rsdp = acpi_get_rsdp();
- if (rsdp == NULL)
+ /* Try to get the RSDP physical address. */
+ rsdp = acpi_get_rsdp(&is_64bit);
+ if (rsdp == 0)
return ACPI_NO_RSDP;
- /* Try to get the RSDT pointer. */
- rsdt = acpi_get_rsdt(rsdp, &acpi_rsdt_n);
- if (rsdt == NULL)
- return ACPI_NO_RSDT;
-
- /* Try to get the APIC table pointer. */
- apic_madt = acpi_get_apic(rsdt, acpi_rsdt_n);
- if (apic_madt == NULL)
- return ACPI_NO_APIC;
-
- /* Print the ACPI tables addresses. */
- acpi_print_info(rsdp, rsdt, acpi_rsdt_n);
+ if (!is_64bit) {
+ /* Try to get the RSDT pointer. */
+ rsdt = acpi_get_rsdt(rsdp, &acpi_rsdt_n);
+ if (rsdt == NULL)
+ return ACPI_NO_RSDT;
+
+ checksum = acpi_checksum((void *)rsdt, rsdt->header.length);
+ if (checksum != 0)
+ return ACPI_BAD_CHECKSUM;
+
+ /* Try to get the APIC table pointer. */
+ apic_madt = acpi_get_apic(rsdt, acpi_rsdt_n);
+ if (apic_madt == NULL)
+ return ACPI_NO_APIC;
+
+ checksum = acpi_checksum((void *)apic_madt, apic_madt->header.length);
+ if (checksum != 0)
+ return ACPI_BAD_CHECKSUM;
+
+ /* Print the ACPI tables addresses. */
+ acpi_print_info(rsdp, rsdt, acpi_rsdt_n);
+
+ } else {
+ /* Try to get the XSDT pointer. */
+ xsdt = acpi_get_xsdt(rsdp, &acpi_rsdt_n);
+ if (xsdt == NULL)
+ return ACPI_NO_RSDT;
+
+ checksum = acpi_checksum((void *)xsdt, xsdt->header.length);
+ if (checksum != 0)
+ return ACPI_BAD_CHECKSUM;
+
+ /* Try to get the APIC table pointer. */
+ apic_madt = acpi_get_apic2(xsdt, acpi_rsdt_n);
+ if (apic_madt == NULL)
+ return ACPI_NO_APIC;
+
+ checksum = acpi_checksum((void *)apic_madt, apic_madt->header.length);
+ if (checksum != 0)
+ return ACPI_BAD_CHECKSUM;
+
+ /* Print the ACPI tables addresses. */
+ acpi_print_info(rsdp, xsdt, acpi_rsdt_n);
+ }
apic_init_success = apic_data_init();
if (apic_init_success != ACPI_SUCCESS)
diff --git a/i386/i386at/acpi_parse_apic.h b/i386/i386at/acpi_parse_apic.h
index 97a59a2e..85e01170 100644
--- a/i386/i386at/acpi_parse_apic.h
+++ b/i386/i386at/acpi_parse_apic.h
@@ -22,7 +22,7 @@
#ifndef __ACPI_H__
#define __ACPI_H__
-#include <include/stdint.h>
+#include <stdint.h>
enum ACPI_RETURN {
ACPI_BAD_CHECKSUM = -1,
@@ -44,10 +44,17 @@ struct acpi_rsdp {
uint8_t signature[8];
uint8_t checksum;
uint8_t oem_id[6];
- uint8_t revision[1];
+ uint8_t revision;
uint32_t rsdt_addr;
} __attribute__((__packed__));
+struct acpi_rsdp2 {
+ struct acpi_rsdp v1;
+ uint32_t length;
+ uint64_t xsdt_addr;
+ uint8_t checksum;
+ uint8_t reserved[3];
+} __attribute__((__packed__));
/*
* RSDT Entry Header
@@ -77,6 +84,21 @@ struct acpi_rsdt {
uint32_t entry[0];
} __attribute__((__packed__));
+#define ACPI_XSDT_SIG "XSDT"
+
+struct acpi_xsdt {
+ struct acpi_dhdr header;
+ uint64_t entry[0];
+} __attribute__((__packed__));
+
+struct acpi_address {
+ uint8_t is_io;
+ uint8_t reg_width;
+ uint8_t reg_offset;
+ uint8_t reserved;
+ uint64_t addr64;
+} __attribute__((__packed__));
+
/* APIC table signature. */
#define ACPI_APIC_SIG "APIC"
@@ -156,8 +178,24 @@ struct acpi_apic_irq_override {
uint16_t flags;
} __attribute__((__packed__));
+
+#define ACPI_HPET_SIG "HPET"
+
+/*
+ * HPET High Precision Event Timer structure
+ */
+struct acpi_hpet {
+ struct acpi_dhdr header;
+ uint32_t id;
+ struct acpi_address address;
+ uint8_t sequence;
+ uint16_t minimum_tick;
+ uint8_t flags;
+} __attribute__((__packed__));
+
int acpi_apic_init(void);
-void acpi_print_info(struct acpi_rsdp *rsdp, struct acpi_rsdt *rsdt, int acpi_rsdt_n);
+void acpi_print_info(phys_addr_t rsdp, void *rsdt, int acpi_rsdt_n);
+extern unsigned lapic_addr;
#endif /* __ACPI_H__ */
diff --git a/i386/i386at/autoconf.c b/i386/i386at/autoconf.c
index 0b1251f5..5c69988f 100644
--- a/i386/i386at/autoconf.c
+++ b/i386/i386at/autoconf.c
@@ -26,8 +26,14 @@
#include <kern/printf.h>
#include <mach/std_types.h>
+#include <i386at/autoconf.h>
#include <i386/irq.h>
#include <i386/ipl.h>
+#ifdef APIC
+# include <i386/apic.h>
+#else
+# include <i386/pic.h>
+#endif
#include <chips/busses.h>
/* initialization typecasts */
@@ -133,25 +139,11 @@ void take_dev_irq(
printf("The device below will clobber IRQ %d (%p).\n", pic, ivect[pic]);
printf("You have two devices at the same IRQ.\n");
printf("This won't work. Reconfigure your hardware and try again.\n");
- printf("%s%d: port = %lx, spl = %ld, pic = %d.\n",
+ printf("%s%d: port = %zx, spl = %zd, pic = %d.\n",
dev->name, dev->unit, dev->address,
dev->sysdep, dev->sysdep1);
while (1);
}
-}
-
-void take_ctlr_irq(
- const struct bus_ctlr *ctlr)
-{
- int pic = ctlr->sysdep1;
- if (ivect[pic] == intnull) {
- iunit[pic] = ctlr->unit;
- ivect[pic] = ctlr->intr;
- } else {
- printf("The device below will clobber IRQ %d (%p).\n", pic, ivect[pic]);
- printf("You have two devices at the same IRQ. This won't work.\n");
- printf("Reconfigure your hardware and try again.\n");
- while (1);
- }
+ unmask_irq(pic);
}
diff --git a/i386/i386at/autoconf.h b/i386/i386at/autoconf.h
index a16a88f9..81fc5da7 100644
--- a/i386/i386at/autoconf.h
+++ b/i386/i386at/autoconf.h
@@ -37,7 +37,7 @@
*/
void probeio(void);
-extern void take_dev_irq (
- struct bus_device *dev);
+void take_dev_irq(
+ const struct bus_device *dev);
#endif /* _AUTOCONF_H_ */
diff --git a/i386/i386at/biosmem.c b/i386/i386at/biosmem.c
index 78e7bb21..937c0e3d 100644
--- a/i386/i386at/biosmem.c
+++ b/i386/i386at/biosmem.c
@@ -637,10 +637,8 @@ biosmem_setup_allocator(const struct multiboot_raw_info *mbi)
*/
end = vm_page_trunc((mbi->mem_upper + 1024) << 10);
-#ifndef __LP64__
if (end > VM_PAGE_DIRECTMAP_LIMIT)
end = VM_PAGE_DIRECTMAP_LIMIT;
-#endif /* __LP64__ */
max_heap_start = 0;
max_heap_end = 0;
@@ -691,10 +689,21 @@ biosmem_bootstrap_common(void)
if (error)
boot_panic(biosmem_panic_noseg_msg);
+#if !defined(MACH_HYP) && NCPUS > 1
+ /*
+ * Grab an early page for AP boot code which needs to be below 1MB.
+ */
+ assert (phys_start < 0x100000);
+ apboot_addr = phys_start;
+ phys_start += PAGE_SIZE;
+#endif
+
biosmem_set_segment(VM_PAGE_SEG_DMA, phys_start, phys_end);
phys_start = VM_PAGE_DMA_LIMIT;
+
#ifdef VM_PAGE_DMA32_LIMIT
+#if VM_PAGE_DMA32_LIMIT < VM_PAGE_DIRECTMAP_LIMIT
phys_end = VM_PAGE_DMA32_LIMIT;
error = biosmem_map_find_avail(&phys_start, &phys_end);
@@ -704,7 +713,9 @@ biosmem_bootstrap_common(void)
biosmem_set_segment(VM_PAGE_SEG_DMA32, phys_start, phys_end);
phys_start = VM_PAGE_DMA32_LIMIT;
+#endif
#endif /* VM_PAGE_DMA32_LIMIT */
+
phys_end = VM_PAGE_DIRECTMAP_LIMIT;
error = biosmem_map_find_avail(&phys_start, &phys_end);
@@ -714,6 +725,21 @@ biosmem_bootstrap_common(void)
biosmem_set_segment(VM_PAGE_SEG_DIRECTMAP, phys_start, phys_end);
phys_start = VM_PAGE_DIRECTMAP_LIMIT;
+
+#ifdef VM_PAGE_DMA32_LIMIT
+#if VM_PAGE_DMA32_LIMIT > VM_PAGE_DIRECTMAP_LIMIT
+ phys_end = VM_PAGE_DMA32_LIMIT;
+ error = biosmem_map_find_avail(&phys_start, &phys_end);
+
+ if (error)
+ return;
+
+ biosmem_set_segment(VM_PAGE_SEG_DMA32, phys_start, phys_end);
+
+ phys_start = VM_PAGE_DMA32_LIMIT;
+#endif
+#endif /* VM_PAGE_DMA32_LIMIT */
+
phys_end = VM_PAGE_HIGHMEM_LIMIT;
error = biosmem_map_find_avail(&phys_start, &phys_end);
@@ -823,10 +849,11 @@ biosmem_directmap_end(void)
{
if (biosmem_segment_size(VM_PAGE_SEG_DIRECTMAP) != 0)
return biosmem_segment_end(VM_PAGE_SEG_DIRECTMAP);
- else if (biosmem_segment_size(VM_PAGE_SEG_DMA32) != 0)
+#if defined(VM_PAGE_DMA32_LIMIT) && (VM_PAGE_DMA32_LIMIT < VM_PAGE_DIRECTMAP_LIMIT)
+ if (biosmem_segment_size(VM_PAGE_SEG_DMA32) != 0)
return biosmem_segment_end(VM_PAGE_SEG_DMA32);
- else
- return biosmem_segment_end(VM_PAGE_SEG_DMA);
+#endif
+ return biosmem_segment_end(VM_PAGE_SEG_DMA);
}
static const char * __init
diff --git a/i386/i386at/boothdr.S b/i386/i386at/boothdr.S
index 82d4b34a..daaf57db 100644
--- a/i386/i386at/boothdr.S
+++ b/i386/i386at/boothdr.S
@@ -1,6 +1,7 @@
#include <mach/machine/asm.h>
-
+#include <i386/apic.h>
+#include <i386/seg.h>
#include <i386/i386asm.h>
/*
@@ -39,22 +40,43 @@ boot_hdr:
#endif /* __ELF__ */
boot_entry:
+ movl $percpu_array - KERNELBASE, %eax
+ movw %ax, boot_percpu_low - KERNELBASE
+ shr $16, %eax
+ movb %al, boot_percpu_med - KERNELBASE
+ shr $8, %ax
+ movb %al, boot_percpu_high - KERNELBASE
+
/* use segmentation to offset ourself. */
lgdt boot_gdt_descr - KERNELBASE
- ljmp $8,$0f
+ ljmp $0x8,$0f
0:
- movw $0,%ax
+ movw $0x0,%ax
movw %ax,%ds
movw %ax,%es
movw %ax,%fs
movw %ax,%gs
- movw $16,%ax
+ movw $0x10,%ax
movw %ax,%ds
movw %ax,%es
movw %ax,%ss
+ movw $0x68,%ax
+ movw %ax,%gs
/* Switch to our own interrupt stack. */
- movl $_intstack+INTSTACK_SIZE,%esp
+ movl $solid_intstack+INTSTACK_SIZE-4, %esp
+ andl $0xfffffff0,%esp
+
+ /* Enable local apic in xAPIC mode */
+ xorl %eax, %eax
+ xorl %edx, %edx
+ movl $APIC_MSR, %ecx
+ rdmsr
+ orl $APIC_MSR_ENABLE, %eax
+ orl $APIC_MSR_BSP, %eax
+ andl $(~APIC_MSR_X2APIC), %eax
+ movl $APIC_MSR, %ecx
+ wrmsr
/* Reset EFLAGS to a known state. */
pushl $0
@@ -91,30 +113,67 @@ iplt_done:
/* Jump into C code. */
call EXT(c_boot_entry)
- .comm _intstack,INTSTACK_SIZE
- .comm _eintstack,0
-
.align 16
.word 0
boot_gdt_descr:
- .word 3*8+7
+ .word 14*8-1
.long boot_gdt - KERNELBASE
.align 16
boot_gdt:
/* 0 */
.quad 0
- /* boot CS = 8 */
+
+ /* boot CS = 0x08 */
.word 0xffff
.word (-KERNELBASE) & 0xffff
.byte ((-KERNELBASE) >> 16) & 0xff
- .byte 0x9a
- .byte 0xcf
+ .byte ACC_PL_K | ACC_CODE_R | ACC_P
+ .byte ((SZ_32 | SZ_G) << 4) | 0xf
.byte ((-KERNELBASE) >> 24) & 0xff
- /* boot DS = 8 */
+
+ /* boot DS = 0x10 */
.word 0xffff
.word (-KERNELBASE) & 0xffff
.byte ((-KERNELBASE) >> 16) & 0xff
- .byte 0x92
- .byte 0xcf
+ .byte ACC_PL_K | ACC_DATA_W | ACC_P
+ .byte ((SZ_32 | SZ_G) << 4) | 0xf
.byte ((-KERNELBASE) >> 24) & 0xff
+ /* LDT = 0x18 */
+ .quad 0
+
+ /* TSS = 0x20 */
+ .quad 0
+
+ /* USER_LDT = 0x28 */
+ .quad 0
+
+ /* USER_TSS = 0x30 */
+ .quad 0
+
+ /* LINEAR = 0x38 */
+ .quad 0
+
+ /* FPREGS = 0x40 */
+ .quad 0
+
+ /* USER_GDT = 0x48 and 0x50 */
+ .quad 0
+ .quad 0
+
+ /* USER_TSS64 = 0x58 */
+ .quad 0
+
+ /* USER_TSS64 = 0x60 */
+ .quad 0
+
+ /* boot GS = 0x68 */
+ .word 0xffff
+boot_percpu_low:
+ .word 0
+boot_percpu_med:
+ .byte 0
+ .byte ACC_PL_K | ACC_DATA_W | ACC_P
+ .byte ((SZ_32 | SZ_G) << 4) | 0xf
+boot_percpu_high:
+ .byte 0
diff --git a/i386/i386at/com.c b/i386/i386at/com.c
index d565a053..bfe353ce 100644
--- a/i386/i386at/com.c
+++ b/i386/i386at/com.c
@@ -33,7 +33,6 @@
#include <sys/types.h>
#include <kern/printf.h>
#include <kern/mach_clock.h>
-#include <sys/time.h>
#include <device/conf.h>
#include <device/device_types.h>
#include <device/tty.h>
@@ -49,7 +48,7 @@
#include <device/cons.h>
-static void comparam();
+static void comparam(int);
static vm_offset_t com_std[NCOM] = { 0 };
struct bus_device *cominfo[NCOM];
@@ -63,20 +62,15 @@ boolean_t comfifo[NCOM];
boolean_t comtimer_active;
int comtimer_state[NCOM];
-#define RCBAUD B9600
+#define RCBAUD B115200
static int rcline = -1;
static struct bus_device *comcndev;
/* XX */
extern char *kernel_cmdline;
-#ifndef PORTSELECTOR
-#define ISPEED B9600
-#define IFLAGS (EVENP|ODDP|ECHO|CRMOD)
-#else
-#define ISPEED B4800
-#define IFLAGS (EVENP|ODDP)
-#endif
+#define ISPEED B115200
+#define IFLAGS (EVENP|ODDP|ECHO|CRMOD|XTABS|LITOUT)
u_short divisorreg[] = {
0, 2304, 1536, 1047, /* 0, 50, 75, 110*/
@@ -92,7 +86,7 @@ u_short divisorreg[] = {
* the relevant device is present today.
*
*/
-int
+static int
comprobe_general(struct bus_device *dev, int noisy)
{
u_short addr = dev->address;
@@ -189,6 +183,11 @@ comcnprobe(struct consdev *cp)
if (console)
mach_atoi(console + strlen(CONSOLE_PARAMETER), &rcline);
+ if (strncmp(kernel_cmdline, CONSOLE_PARAMETER + 1,
+ strlen(CONSOLE_PARAMETER) - 1) == 0)
+ mach_atoi((u_char*)kernel_cmdline + strlen(CONSOLE_PARAMETER) - 1,
+ &rcline);
+
maj = 0;
unit = -1;
pri = CN_DEAD;
@@ -230,7 +229,7 @@ comattach(struct bus_device *dev)
}
take_dev_irq(dev);
- printf(", port = %lx, spl = %ld, pic = %d. (DOS COM%d)",
+ printf(", port = %zx, spl = %zu, pic = %d. (DOS COM%d)",
dev->address, dev->sysdep, dev->sysdep1, unit+1);
/* comcarrier[unit] = addr->flags;*/
@@ -272,7 +271,7 @@ comcninit(struct consdev *cp)
{
char msg[128];
- volatile unsigned char *p = (volatile unsigned char *)0xb8000;
+ volatile unsigned char *p = (volatile unsigned char *)phystokv(0xb8000);
int i;
sprintf(msg, " **** using COM port %d for console ****",
@@ -294,7 +293,7 @@ comcninit(struct consdev *cp)
* Used to handle PCMCIA modems, which may appear
* at any time.
*/
-boolean_t com_reprobe(
+static boolean_t com_reprobe(
int unit)
{
struct bus_device *device;
@@ -357,18 +356,12 @@ io_return_t comopen(
tp->t_mctl = commctl;
tp->t_getstat = comgetstat;
tp->t_setstat = comsetstat;
-#ifndef PORTSELECTOR
if (tp->t_ispeed == 0) {
-#else
- tp->t_state |= TS_HUPCLS;
-#endif /* PORTSELECTOR */
tp->t_ispeed = ISPEED;
tp->t_ospeed = ISPEED;
tp->t_flags = IFLAGS;
tp->t_state &= ~TS_BUSY;
-#ifndef PORTSELECTOR
}
-#endif /* PORTSELECTOR */
}
/*rvb tp->t_state |= TS_WOPEN; */
if ((tp->t_state & TS_ISOPEN) == 0)
@@ -613,26 +606,6 @@ comparam(int unit)
splx(s);
}
-void
-comparm(int unit, int baud, int intr, int mode, int modem)
-{
- u_short addr = (u_short)(cominfo[unit]->address);
- spl_t s = spltty();
-
- if (unit != 0 && unit != 1) {
- printf("comparm(unit, baud, mode, intr, modem)\n");
- splx(s);
- return;
- }
- outb(LINE_CTL(addr), iDLAB);
- outb(BAUD_LSB(addr), divisorreg[baud] & 0xff);
- outb(BAUD_MSB(addr), divisorreg[baud] >> 8);
- outb(LINE_CTL(addr), mode);
- outb(INTR_ENAB(addr), intr);
- outb(MODEM_CTL(addr), modem);
- splx(s);
-}
-
int comst_1, comst_2, comst_3, comst_4, comst_5 = 14;
void
@@ -856,11 +829,11 @@ void compr_addr(vm_offset_t addr)
/* The two line_stat prints may show different values, since
* touching some of the registers constitutes changing them.
*/
- printf("LINE_STAT(%lu) %x\n",
+ printf("LINE_STAT(%zu) %x\n",
LINE_STAT(addr), inb(LINE_STAT(addr)));
- printf("TXRX(%lu) %x, INTR_ENAB(%lu) %x, INTR_ID(%lu) %x, LINE_CTL(%lu) %x,\n\
-MODEM_CTL(%lu) %x, LINE_STAT(%lu) %x, MODEM_STAT(%lu) %x\n",
+ printf("TXRX(%zu) %x, INTR_ENAB(%zu) %x, INTR_ID(%zu) %x, LINE_CTL(%zu) %x,\n\
+MODEM_CTL(%zu) %x, LINE_STAT(%zu) %x, MODEM_STAT(%zu) %x\n",
TXRX(addr), inb(TXRX(addr)),
INTR_ENAB(addr), inb(INTR_ENAB(addr)),
INTR_ID(addr), inb(INTR_ID(addr)),
diff --git a/i386/i386at/com.h b/i386/i386at/com.h
index a415488c..3be29305 100644
--- a/i386/i386at/com.h
+++ b/i386/i386at/com.h
@@ -73,6 +73,8 @@ comsetstat(
#if MACH_KDB
extern void kdb_kintr(void);
+extern void compr_addr(vm_offset_t addr);
+extern int compr(int unit);
#endif /* MACH_KDB */
extern io_return_t comopen(dev_t dev, int flag, io_req_t ior);
diff --git a/i386/i386at/conf.c b/i386/i386at/conf.c
index ca5d0dfb..ecbf1e45 100644
--- a/i386/i386at/conf.c
+++ b/i386/i386at/conf.c
@@ -86,76 +86,76 @@ struct dev_ops dev_name_list[] =
indirect list */
{ "cn", nulldev_open, nulldev_close, nulldev_read,
nulldev_write, nulldev_getstat, nulldev_setstat, nomap,
- nodev, nulldev, nulldev_portdeath, 0,
- nodev },
+ nodev_async_in, nulldev_reset, nulldev_portdeath, 0,
+ nodev_info},
#ifndef MACH_HYP
#if ENABLE_IMMEDIATE_CONSOLE
{ "immc", nulldev_open, nulldev_close, nulldev_read,
nulldev_write, nulldev_getstat, nulldev_setstat,
- nomap, nodev, nulldev, nulldev_portdeath, 0,
- nodev },
+ nomap, nodev_async_in, nulldev_reset, nulldev_portdeath, 0,
+ nodev_info },
#endif /* ENABLE_IMMEDIATE_CONSOLE */
{ kdname, kdopen, kdclose, kdread,
kdwrite, kdgetstat, kdsetstat, kdmmap,
- nodev, nulldev, kdportdeath, 0,
- nodev },
+ nodev_async_in, nulldev_reset, kdportdeath, 0,
+ nodev_info },
#endif /* MACH_HYP */
{ timename, timeopen, timeclose, nulldev_read,
nulldev_write, nulldev_getstat, nulldev_setstat, timemmap,
- nodev, nulldev, nulldev_portdeath, 0,
- nodev },
+ nodev_async_in, nulldev_reset, nulldev_portdeath, 0,
+ nodev_info },
#ifndef MACH_HYP
#if NCOM > 0
{ comname, comopen, comclose, comread,
comwrite, comgetstat, comsetstat, nomap,
- nodev, nulldev, comportdeath, 0,
- nodev },
+ nodev_async_in, nulldev_reset, comportdeath, 0,
+ nodev_info },
#endif
#ifdef MACH_LPR
{ lprname, lpropen, lprclose, lprread,
lprwrite, lprgetstat, lprsetstat, nomap,
- nodev, nulldev, lprportdeath, 0,
- nodev },
+ nodev_async_in, nulldev_reset, lprportdeath, 0,
+ nodev_info },
#endif
{ mousename, mouseopen, mouseclose, mouseread,
nulldev_write, mousegetstat, nulldev_setstat, nomap,
- nodev, nulldev, nulldev_portdeath, 0,
- nodev },
+ nodev_async_in, nulldev_reset, nulldev_portdeath, 0,
+ nodev_info },
{ kbdname, kbdopen, kbdclose, kbdread,
nulldev_write, kbdgetstat, kbdsetstat, nomap,
- nodev, nulldev, nulldev_portdeath, 0,
- nodev },
+ nodev_async_in, nulldev_reset, nulldev_portdeath, 0,
+ nodev_info },
{ memname, nulldev_open, nulldev_close, nulldev_read,
nulldev_write, nulldev_getstat, nulldev_setstat, memmmap,
- nodev, nulldev, nulldev_portdeath, 0,
- nodev },
+ nodev_async_in, nulldev_reset, nulldev_portdeath, 0,
+ nodev_info },
#endif /* MACH_HYP */
#ifdef MACH_KMSG
{ kmsgname, kmsgopen, kmsgclose, kmsgread,
nulldev_write, kmsggetstat, nulldev_setstat, nomap,
- nodev, nulldev, nulldev_portdeath, 0,
- nodev },
+ nodev_async_in, nulldev_reset, nulldev_portdeath, 0,
+ nodev_info },
#endif
#ifdef MACH_HYP
{ hypcnname, hypcnopen, hypcnclose, hypcnread,
hypcnwrite, hypcngetstat, hypcnsetstat, nomap,
- nodev, nulldev, hypcnportdeath, 0,
- nodev },
+ nodev_async_in, nulldev_reset, hypcnportdeath, 0,
+ nodev_info },
#endif /* MACH_HYP */
{ irqname, nulldev_open, nulldev_close, nulldev_read,
nulldev_write,nulldev_getstat,nulldev_setstat, nomap,
- nodev, nulldev, nulldev_portdeath,0,
- nodev },
+ nodev_async_in, nulldev_reset, nulldev_portdeath,0,
+ nodev_info },
};
int dev_name_count = sizeof(dev_name_list)/sizeof(dev_name_list[0]);
diff --git a/i386/i386at/cram.h b/i386/i386at/cram.h
index 8a3a6ec9..ac40cf13 100644
--- a/i386/i386at/cram.h
+++ b/i386/i386at/cram.h
@@ -71,6 +71,11 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
/* Addresses, related masks, and potential results */
+#define CMOS_SHUTDOWN 0xf
+#define CM_NORM_RST 0x0
+#define CM_LOAD_SYS 0x4
+#define CM_JMP_467 0xa
+
#define CMOS_EB 0x14 /* read Equipment Byte */
#define CM_SCRMSK 0x30 /* mask for EB query to get screen */
#define CM_EGA_VGA 0x00 /* "not CGA or MONO" */
diff --git a/i386/i386at/idt.h b/i386/i386at/idt.h
index ac065aef..19e0abe8 100644
--- a/i386/i386at/idt.h
+++ b/i386/i386at/idt.h
@@ -37,10 +37,17 @@
/* IOAPIC spurious interrupt vector set to 0xff */
#define IOAPIC_SPURIOUS_BASE 0xff
+/* Remote -> local AST requests */
+#define CALL_AST_CHECK 0xfa
+
+/* Currently for TLB shootdowns */
+#define CALL_PMAP_UPDATE 0xfb
+
#include <i386/idt-gen.h>
#ifndef __ASSEMBLER__
extern void idt_init (void);
+extern void ap_idt_init (int cpu);
#endif /* __ASSEMBLER__ */
#endif /* _I386AT_IDT_ */
diff --git a/i386/i386at/immc.c b/i386/i386at/immc.c
index bd61522d..00fc973d 100644
--- a/i386/i386at/immc.c
+++ b/i386/i386at/immc.c
@@ -99,6 +99,14 @@ immc_cnputc(dev_t dev, int c)
memset((void *) phystokv((0xb8000+80*2*24)), 0, 80*2);
ofs = 0;
}
+ else if (c == '\r')
+ {
+ ofs = 0;
+ }
+ else if (c == '\t')
+ {
+ ofs = (ofs & ~7) + 8;
+ }
else
{
volatile unsigned char *p;
diff --git a/i386/i386at/int_init.c b/i386/i386at/int_init.c
index 6da627dd..5c8fce6d 100644
--- a/i386/i386at/int_init.c
+++ b/i386/i386at/int_init.c
@@ -22,29 +22,61 @@
*/
#include <i386at/idt.h>
+#include <i386at/int_init.h>
#include <i386/gdt.h>
+#include <i386/mp_desc.h>
+#include <kern/printf.h>
+#ifdef APIC
+#include <i386/apic.h>
+#endif
/* defined in locore.S */
extern vm_offset_t int_entry_table[];
-void int_init(void)
+static void
+int_fill(struct real_gate *myidt)
{
int i;
#ifndef APIC
- for (i = 0; i < 16; i++) {
- fill_idt_gate(PIC_INT_BASE + i,
+ int base = PIC_INT_BASE;
+ int nirq = 16;
+#else
+ int base = IOAPIC_INT_BASE;
+ int nirq = NINTR;
+#endif
+
+ for (i = 0; i < nirq; i++) {
+ fill_idt_gate(myidt, base + i,
int_entry_table[i], KERNEL_CS,
ACC_PL_K|ACC_INTR_GATE, 0);
}
-#else
- for (i = 0; i < 24; i++) {
- fill_idt_gate(IOAPIC_INT_BASE + i,
+#if NCPUS > 1
+ fill_idt_gate(myidt, CALL_AST_CHECK,
int_entry_table[i], KERNEL_CS,
ACC_PL_K|ACC_INTR_GATE, 0);
- }
- fill_idt_gate(IOAPIC_SPURIOUS_BASE,
- int_entry_table[24], KERNEL_CS,
+ i++;
+ fill_idt_gate(myidt, CALL_PMAP_UPDATE,
+ int_entry_table[i], KERNEL_CS,
+ ACC_PL_K|ACC_INTR_GATE, 0);
+ i++;
+#endif
+#ifdef APIC
+ fill_idt_gate(myidt, IOAPIC_SPURIOUS_BASE,
+ int_entry_table[i], KERNEL_CS,
ACC_PL_K|ACC_INTR_GATE, 0);
+ i++;
#endif
}
+void
+int_init(void)
+{
+ int_fill(idt);
+}
+
+#if NCPUS > 1
+void ap_int_init(int cpu)
+{
+ int_fill(mp_desc_table[cpu]->idt);
+}
+#endif
diff --git a/i386/i386at/int_init.h b/i386/i386at/int_init.h
index f9b03b74..3c11ebce 100644
--- a/i386/i386at/int_init.h
+++ b/i386/i386at/int_init.h
@@ -29,6 +29,7 @@
#ifndef __ASSEMBLER__
extern void int_init (void);
+extern void ap_int_init (int cpu);
#endif /* __ASSEMBLER__ */
#endif /* _INT_INIT_H_ */
diff --git a/i386/i386at/interrupt.S b/i386/i386at/interrupt.S
index e6a6af00..77424b43 100644
--- a/i386/i386at/interrupt.S
+++ b/i386/i386at/interrupt.S
@@ -29,25 +29,38 @@
* Generic interrupt handler.
*
* On entry, %eax contains the irq number.
+ *
+ * Note: kdb_kintr needs to know our stack usage
*/
+
+#define S_REGS 28(%esp)
+#define S_RET 24(%esp)
+#define S_IRQ 20(%esp)
+#define S_IPL 16(%esp)
+
ENTRY(interrupt)
#ifdef APIC
cmpl $255,%eax /* was this a spurious intr? */
- je _no_eoi /* if so, just return */
+ jne 1f
+ ret /* if so, just return */
+1:
#endif
- pushl %eax /* save irq number */
- movl %eax,%ecx /* copy irq number */
- shll $2,%ecx /* irq * 4 */
+ subl $24,%esp /* Two local variables + 4 parameters */
+ movl %eax,S_IRQ /* save irq number */
+
call spl7 /* set ipl */
- movl EXT(iunit)(%ecx),%edx /* get device unit number */
- pushl %eax /* push previous ipl */
- pushl %edx /* push unit number */
- call *EXT(ivect)(%ecx) /* call interrupt handler */
- addl $4,%esp /* pop unit number */
- call splx_cli /* restore previous ipl */
- addl $4,%esp /* pop previous ipl */
- cli /* XXX no more nested interrupts */
- popl %ecx /* restore irq number */
+ movl %eax,S_IPL /* save previous ipl */
+
+ movl S_IRQ,%ecx /* restore irq number */
+
+#if NCPUS > 1
+ cmpl $CALL_PMAP_UPDATE,%ecx /* was this a SMP pmap_update request? */
+ je _call_single
+
+ cmpl $CALL_AST_CHECK,%ecx /* was this a SMP remote -> local ast request? */
+ je _call_local_ast
+#endif
+
#ifndef APIC
movl $1,%eax
shll %cl,%eax /* get corresponding IRQ mask */
@@ -84,15 +97,46 @@ ENTRY(interrupt)
movl EXT(curr_pic_mask),%eax /* restore original mask */
outb %al,$(PIC_MASTER_OCW) /* unmask master */
2:
- ret
#else
- cmpl $16,%ecx /* was this a low ISA intr? */
- jge _no_eoi /* no, must be PCI (let irq_ack handle EOI) */
-_isa_eoi:
- pushl %ecx /* push irq number */
+ movl %ecx,(%esp) /* load irq number as 1st arg */
call EXT(ioapic_irq_eoi) /* ioapic irq specific EOI */
- addl $4,%esp /* pop irq number */
-_no_eoi:
+#endif
+
+ movl S_IPL,%eax
+ movl %eax,4(%esp) /* previous ipl as 2nd arg */
+
+ movl S_RET,%eax
+ movl %eax,8(%esp) /* return address as 3rd arg */
+
+ movl S_REGS,%eax
+ movl %eax,12(%esp) /* address of interrupted registers as 4th arg */
+
+ movl S_IRQ,%eax /* copy irq number */
+
+ shll $2,%eax /* irq * 4 */
+ movl EXT(iunit)(%eax),%edx /* get device unit number */
+ movl %edx,(%esp) /* unit number as 1st arg */
+
+ call *EXT(ivect)(%eax) /* call interrupt handler */
+
+_completed:
+ movl S_IPL,%eax /* restore previous ipl */
+ movl %eax,(%esp)
+ call splx_cli /* restore previous ipl */
+
+ addl $24,%esp /* pop local variables */
ret
+
+#if NCPUS > 1
+_call_single:
+ call EXT(lapic_eoi) /* lapic EOI before the handler to allow extra update */
+ call EXT(pmap_update_interrupt)
+ jmp _completed
+
+_call_local_ast:
+ call EXT(lapic_eoi) /* lapic EOI */
+ call EXT(ast_check) /* AST check on this cpu */
+ jmp _completed
+
#endif
END(interrupt)
diff --git a/i386/i386at/ioapic.c b/i386/i386at/ioapic.c
index 18a9bec6..2553a2c9 100644
--- a/i386/i386at/ioapic.c
+++ b/i386/i386at/ioapic.c
@@ -28,23 +28,34 @@
#include <i386/pio.h>
#include <i386/pit.h>
#include <i386/pic.h> /* only for macros */
+#include <i386/smp.h>
#include <mach/machine.h>
#include <kern/printf.h>
+#include <kern/timer.h>
+#include <kern/lock.h>
-static int has_irq_specific_eoi = 1; /* FIXME: Assume all machines have this */
-static int timer_gsi;
+static int has_irq_specific_eoi = 0;
int timer_pin;
uint32_t lapic_timer_val = 0;
uint32_t calibrated_ticks = 0;
-spl_t curr_ipl;
+spl_t curr_ipl[NCPUS] = {0};
+int spl_init = 0;
-int iunit[NINTR] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23};
+def_simple_lock_irq_data(static, ioapic_lock) /* Lock for non-atomic window accesses to ioapic */
-void (*ivect[NINTR])() = {
- /* 00 */ intnull, /* install timer later */
+int iunit[NINTR] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ /* 2nd IOAPIC */
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63 };
+
+interrupt_handler_fn ivect[NINTR] = {
+ /* 00 */ (interrupt_handler_fn)hardclock,
/* 01 */ kdintr, /* kdintr, ... */
/* 02 */ intnull,
/* 03 */ intnull, /* lnpoll, comintr, ... */
@@ -72,24 +83,65 @@ void (*ivect[NINTR])() = {
/* 21 */ intnull, /* PIRQF */
/* 22 */ intnull, /* PIRQG */
/* 23 */ intnull, /* PIRQH */
+
+ /* 24 */ intnull,
+ /* 25 */ intnull,
+ /* 26 */ intnull,
+ /* 27 */ intnull,
+ /* 28 */ intnull,
+ /* 29 */ intnull,
+ /* 30 */ intnull,
+ /* 31 */ intnull,
+
+ /* 32 */ intnull,
+ /* 33 */ intnull,
+ /* 34 */ intnull,
+ /* 35 */ intnull,
+ /* 36 */ intnull,
+ /* 37 */ intnull,
+ /* 38 */ intnull,
+ /* 39 */ intnull,
+ /* 40 */ intnull,
+ /* 41 */ intnull,
+ /* 42 */ intnull,
+ /* 43 */ intnull,
+ /* 44 */ intnull,
+ /* 45 */ intnull,
+ /* 46 */ intnull,
+ /* 47 */ intnull,
+ /* 48 */ intnull,
+ /* 49 */ intnull,
+ /* 50 */ intnull,
+ /* 51 */ intnull,
+ /* 52 */ intnull,
+ /* 53 */ intnull,
+ /* 54 */ intnull,
+ /* 55 */ intnull,
+
+ /* 56 */ intnull,
+ /* 57 */ intnull,
+ /* 58 */ intnull,
+ /* 59 */ intnull,
+ /* 60 */ intnull,
+ /* 61 */ intnull,
+ /* 62 */ intnull,
+ /* 63 */ intnull,
};
void
picdisable(void)
{
+ int i;
+
asm("cli");
+ for (i = 0; i < NCPUS; i++)
+ curr_ipl[i] = SPLHI;
/*
** Disable PIC
*/
outb ( PIC_SLAVE_OCW, PICS_MASK );
outb ( PIC_MASTER_OCW, PICM_MASK );
-
- /*
- ** Route interrupts through IOAPIC
- */
- outb ( IMCR_SELECT, MODE_IMCR );
- outb ( IMCR_DATA, IMCR_USE_APIC );
}
void
@@ -142,88 +194,93 @@ ioapic_toggle_entry(int apic, int pin, int mask)
{
union ioapic_route_entry_union entry;
+ spl_t s = simple_lock_irq(&ioapic_lock);
ioapic_read_entry(apic, pin, &entry.both);
entry.both.mask = mask & 0x1;
ioapic_write(apic, APIC_IO_REDIR_LOW(pin), entry.lo);
+ simple_unlock_irq(s, &ioapic_lock);
}
-static void
-cpu_rdmsr(uint32_t msr, uint32_t *lo, uint32_t *hi)
+static int
+ioapic_version(int apic)
{
- __asm__ __volatile__("rdmsr" : "=a"(*lo), "=d"(*hi) : "c"(msr));
+ return (ioapic_read(apic, APIC_IO_VERSION) >> APIC_IO_VERSION_SHIFT) & 0xff;
}
-static void
-cpu_wrmsr(uint32_t msr, uint32_t lo, uint32_t hi)
+static int
+ioapic_gsis(int apic)
{
- __asm__ __volatile__("wrmsr" : : "a"(lo), "d"(hi), "c"(msr));
+ return ((ioapic_read(apic, APIC_IO_VERSION) >> APIC_IO_ENTRIES_SHIFT) & 0xff) + 1;
}
-static void
-global_enable_apic(void)
+static void timer_expiry_callback(void *arg)
{
- uint32_t lo = 0;
- uint32_t hi = 0;
- uint32_t msr = 0x1b;
-
- cpu_rdmsr(msr, &lo, &hi);
-
- if (!(lo & (1 << 11))) {
- lo |= (1 << 11);
- cpu_wrmsr(msr, lo, hi);
- }
+ volatile int *done = arg;
+ *done = 1;
}
static uint32_t
-pit_measure_apic_hz(void)
+timer_measure_10x_apic_hz(void)
{
+ volatile int done = 0;
uint32_t start = 0xffffffff;
+ timer_elt_data_t tmp_timer;
+ tmp_timer.fcn = timer_expiry_callback;
+ tmp_timer.param = (void *)&done;
- /* Prepare accurate delay for 1/100 seconds */
- pit_prepare_sleep(100);
+ printf("timer calibration...");
/* Set APIC timer */
lapic->init_count.r = start;
- /* zZz */
- pit_sleep();
+ /* Delay for 10 ticks (10 * 1/hz seconds) */
+ set_timeout(&tmp_timer, 10);
+ do {
+ cpu_pause();
+ } while (!done);
/* Stop APIC timer */
- lapic->lvt_timer.r = LAPIC_DISABLE;
+ lapic->lvt_timer.r |= LAPIC_DISABLE;
+
+ printf(" done\n");
return start - lapic->cur_count.r;
}
-void lapic_update_timer(void)
+void
+calibrate_lapic_timer(void)
{
- /* Timer decrements until zero and then calls this on every interrupt */
- lapic_timer_val += calibrated_ticks;
+ spl_t s;
+
+ /* Set one-shot timer */
+ lapic->divider_config.r = LAPIC_TIMER_DIVIDE_2;
+ lapic->lvt_timer.r = IOAPIC_INT_BASE;
+
+ /* Measure number of APIC timer ticks in 10 mach ticks
+ * divide by 10 because we want to know how many in 1 tick */
+ if (!calibrated_ticks) {
+ s = splhigh();
+ spl0();
+ calibrated_ticks = timer_measure_10x_apic_hz() / 10;
+ splx(s);
+ }
}
void
lapic_enable_timer(void)
{
- spl_t s;
-
- s = sploff();
- asm("cli");
-
/* Set up counter */
lapic->init_count.r = calibrated_ticks;
- lapic->divider_config.r = LAPIC_TIMER_DIVIDE_16;
+ lapic->divider_config.r = LAPIC_TIMER_DIVIDE_2;
/* Set the timer to interrupt periodically on remapped timer GSI */
- lapic->lvt_timer.r = (IOAPIC_INT_BASE + timer_gsi) | LAPIC_TIMER_PERIODIC;
+ lapic->lvt_timer.r = IOAPIC_INT_BASE | LAPIC_TIMER_PERIODIC;
/* Some buggy hardware requires this set again */
- lapic->divider_config.r = LAPIC_TIMER_DIVIDE_16;
+ lapic->divider_config.r = LAPIC_TIMER_DIVIDE_2;
- /* Unmask the remapped timer pin and pin 0 always */
- ioapic_toggle(0, IOAPIC_MASK_ENABLED);
- ioapic_toggle(timer_pin, IOAPIC_MASK_ENABLED);
-
- splon(s);
- printf("LAPIC timer configured\n");
+ /* Enable interrupts for the first time */
+ printf("LAPIC timer configured on cpu%d\n", cpu_number());
}
void
@@ -234,17 +291,16 @@ ioapic_toggle(int pin, int mask)
}
void
-lapic_eoi(void)
-{
- lapic->eoi.r = 0;
-}
-
-void
ioapic_irq_eoi(int pin)
{
int apic = 0;
union ioapic_route_entry_union oldentry, entry;
+ if (pin == 0)
+ goto skip_specific_eoi;
+
+ spl_t s = simple_lock_irq(&ioapic_lock);
+
if (!has_irq_specific_eoi) {
/* Workaround for old IOAPICs with no specific EOI */
@@ -264,19 +320,10 @@ ioapic_irq_eoi(int pin)
ioapic->eoi.r = entry.both.vector;
}
- lapic_eoi ();
-}
+ simple_unlock_irq(s, &ioapic_lock);
-void
-unmask_irq(unsigned int irq)
-{
- ioapic_toggle(irq, IOAPIC_MASK_ENABLED);
-}
-
-void
-mask_irq(unsigned int irq)
-{
- ioapic_toggle(irq, IOAPIC_MASK_DISABLED);
+skip_specific_eoi:
+ lapic_eoi ();
}
static unsigned int
@@ -321,12 +368,21 @@ ioapic_configure(void)
/* Assume first IO APIC maps to GSI base 0 */
int gsi, apic = 0, bsp = 0, pin;
IrqOverrideData *irq_over;
+ int timer_gsi;
+ int version = ioapic_version(apic);
+ int ngsis = ioapic_gsis(apic);
+ int ngsis2 = 0;
+
+ if (version >= 0x20) {
+ has_irq_specific_eoi = 1;
+ }
+
+ printf("IOAPIC version 0x%x\n", version);
/* Disable IOAPIC interrupts and set spurious interrupt */
lapic->spurious_vector.r = IOAPIC_SPURIOUS_BASE;
union ioapic_route_entry_union entry = {{0, 0}};
- union ioapic_route_entry_union timer_entry = {{0, 0}};
entry.both.delvmode = IOAPIC_FIXED;
entry.both.destmode = IOAPIC_PHYSICAL;
@@ -350,21 +406,20 @@ ioapic_configure(void)
if (pin == 0) {
/* Save timer info */
timer_gsi = gsi;
- timer_entry = entry;
} else {
- /* Get the actual timer pin by assuming that the pin
- * with duplicated gsi from pin 0 maps to the timer pin */
+ /* Remap timer irq */
if (gsi == timer_gsi) {
timer_pin = pin;
- /* Remap pin 0 interrupt vector to GSI base
- * so we don't duplicate vectors */
- timer_entry.both.vector = IOAPIC_INT_BASE;
- ioapic_write_entry(apic, 0, timer_entry.both);
+ /* Remap GSI base to timer pin so ivect[0] is the timer */
+ entry.both.vector = IOAPIC_INT_BASE;
+ ioapic_write_entry(apic, timer_pin, entry.both);
+ /* Mask the duplicate pin 0 as we will be using timer_pin */
+ mask_irq(0);
}
}
}
- for (pin = 16; pin < 24; pin++) {
+ for (pin = 16; pin < ngsis; pin++) {
gsi = pin;
/* PCI IRQs PIRQ A-H */
@@ -378,44 +433,31 @@ ioapic_configure(void)
ioapic_write_entry(apic, pin, entry.both);
}
- /* Start the IO APIC receiving interrupts */
- lapic->apic_id.r = apic_get_cpu_apic_id(bsp);
- lapic->dest_format.r = 0xffffffff; /* flat model */
- lapic->logical_dest.r = 0x01000000; /* target bsp */
- lapic->lvt_timer.r = LAPIC_DISABLE;
- lapic->lvt_performance_monitor.r = LAPIC_NMI;
- lapic->lvt_lint0.r = LAPIC_DISABLE;
- lapic->lvt_lint1.r = LAPIC_DISABLE;
- lapic->task_pri.r = 0;
-
- global_enable_apic();
-
- /* Enable IOAPIC processor focus */
- lapic->spurious_vector.r |= LAPIC_FOCUS;
-
- /* Enable directed EOI if applicable */
- if (has_irq_specific_eoi || lapic->version.r & LAPIC_HAS_DIRECTED_EOI) {
- has_irq_specific_eoi = 1;
- lapic->spurious_vector.r |= LAPIC_ENABLE_DIRECTED_EOI;
- }
+ printf("IOAPIC 0 configured with GSI 0-%d\n", ngsis - 1);
- /* Enable IOAPIC interrupts */
- lapic->spurious_vector.r |= LAPIC_ENABLE;
+ /* Second IOAPIC */
+ if (apic_get_num_ioapics() > 1) {
+ apic = 1;
+ ngsis2 = ioapic_gsis(apic);
- /* Set one-shot timer */
- lapic->divider_config.r = LAPIC_TIMER_DIVIDE_16;
- lapic->lvt_timer.r = IOAPIC_INT_BASE + timer_gsi;
+ for (pin = 0; pin < ngsis2; pin++) {
+ gsi = pin + ngsis;
- /* Measure number of APIC timer ticks in 10ms */
- calibrated_ticks = pit_measure_apic_hz();
+ /* Defaults */
+ entry.both.trigger = IOAPIC_LEVEL_TRIGGERED;
+ entry.both.polarity = IOAPIC_ACTIVE_LOW;
- /* Set up counter later */
- lapic->lvt_timer.r = LAPIC_DISABLE;
+ if ((irq_over = acpi_get_irq_override(pin + ngsis))) {
+ gsi = override_irq(irq_over, &entry);
+ }
+ entry.both.vector = IOAPIC_INT_BASE + gsi;
+ ioapic_write_entry(apic, pin, entry.both);
+ }
- /* Install clock interrupt handler on both remapped timer pin and pin 0
- * since nobody knows how all x86 timers are wired up */
- ivect[0] = hardclock;
- ivect[timer_pin] = hardclock;
+ printf("IOAPIC 1 configured with GSI %d-%d\n", ngsis, ngsis + ngsis2 - 1);
+ }
- printf("IOAPIC 0 configured\n");
+ /* Start the IO APIC receiving interrupts */
+ lapic_setup();
+ lapic_enable();
}
diff --git a/i386/i386at/kd.c b/i386/i386at/kd.c
index b5501873..2bea3c8c 100644
--- a/i386/i386at/kd.c
+++ b/i386/i386at/kd.c
@@ -85,6 +85,7 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <device/io_req.h>
#include <device/buf.h>
#include <vm/vm_kern.h>
+#include <i386/db_interface.h>
#include <i386/locore.h>
#include <i386/loose_ends.h>
#include <i386/vm_param.h>
@@ -107,20 +108,24 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
struct tty kd_tty;
extern boolean_t rebootflag;
-static void charput(), charmvup(), charmvdown(), charclear(), charsetcursor();
+static void charput(csrpos_t pos, char ch, char chattr);
+static void charmvup(csrpos_t from, csrpos_t to, int count);
+static void charmvdown(csrpos_t from, csrpos_t to, int count);
+static void charclear(csrpos_t to, int count, char chattr);
+static void charsetcursor(csrpos_t newpos);
static void kd_noopreset(void);
/*
* These routines define the interface to the device-specific layer.
* See kdsoft.h for a more complete description of what each routine does.
*/
-void (*kd_dput)() = charput; /* put attributed char */
-void (*kd_dmvup)() = charmvup; /* block move up */
-void (*kd_dmvdown)() = charmvdown; /* block move down */
-void (*kd_dclear)() = charclear; /* block clear */
-void (*kd_dsetcursor)() = charsetcursor;
+void (*kd_dput)(csrpos_t, char, char) = charput; /* put attributed char */
+void (*kd_dmvup)(csrpos_t, csrpos_t, int) = charmvup; /* block move up */
+void (*kd_dmvdown)(csrpos_t, csrpos_t, int) = charmvdown; /* block move down */
+void (*kd_dclear)(csrpos_t, int, char) = charclear; /* block clear */
+void (*kd_dsetcursor)(csrpos_t) = charsetcursor;
/* set cursor position on displayed page */
-void (*kd_dreset)() = kd_noopreset; /* prepare for reboot */
+void (*kd_dreset)(void) = kd_noopreset; /* prepare for reboot */
/*
* Globals used for both character-based controllers and bitmap-based
@@ -342,6 +347,15 @@ short font_byte_width = 0; /* num bytes in 1 scan line of font */
int kd_pollc = 0;
#ifdef DEBUG
+static void
+pause(void)
+{
+ int i;
+
+ for (i = 0; i < 50000; ++i)
+ ;
+}
+
/*
* feep:
*
@@ -351,23 +365,11 @@ int kd_pollc = 0;
void
feep(void)
{
- int i;
-
kd_bellon();
- for (i = 0; i < 50000; ++i)
- ;
+ pause();
kd_belloff(NULL);
}
-void
-pause(void)
-{
- int i;
-
- for (i = 0; i < 50000; ++i)
- ;
-}
-
/*
* Put a debugging character on the screen.
* LOC=0 means put it in the bottom right corner, LOC=1 means put it
@@ -439,26 +441,24 @@ kdopen(
spl_t o_pri;
tp = &kd_tty;
- o_pri = spltty();
- simple_lock(&tp->t_lock);
+ o_pri = simple_lock_irq(&tp->t_lock);
if (!(tp->t_state & (TS_ISOPEN|TS_WOPEN))) {
/* XXX ttychars allocates memory */
- simple_unlock(&tp->t_lock);
+ simple_unlock_nocheck(&tp->t_lock.slock);
ttychars(tp);
- simple_lock(&tp->t_lock);
+ simple_lock_nocheck(&tp->t_lock.slock);
/*
* Special support for boot-time rc scripts, which don't
* stty the console.
*/
tp->t_oproc = kdstart;
tp->t_stop = kdstop;
- tp->t_ospeed = tp->t_ispeed = B9600;
- tp->t_flags = ODDP|EVENP|ECHO|CRMOD|XTABS;
+ tp->t_ospeed = tp->t_ispeed = B115200;
+ tp->t_flags = ODDP|EVENP|ECHO|CRMOD|XTABS|LITOUT;
kdinit();
}
tp->t_state |= TS_CARR_ON;
- simple_unlock(&tp->t_lock);
- splx(o_pri);
+ simple_unlock_irq(o_pri, &tp->t_lock);
return (char_open(dev, tp, flag, ior));
}
@@ -476,19 +476,16 @@ kdopen(
*/
/*ARGSUSED*/
void
-kdclose(dev, flag)
-dev_t dev;
-int flag;
+kdclose(dev_t dev, int flag)
{
struct tty *tp;
tp = &kd_tty;
{
- spl_t s = spltty();
- simple_lock(&tp->t_lock);
+ spl_t s;
+ s = simple_lock_irq(&tp->t_lock);
ttyclose(tp);
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
}
return;
@@ -508,9 +505,7 @@ int flag;
*/
/*ARGSUSED*/
int
-kdread(dev, uio)
-dev_t dev;
-io_req_t uio;
+kdread(dev_t dev, io_req_t uio)
{
struct tty *tp;
@@ -533,9 +528,7 @@ io_req_t uio;
*/
/*ARGSUSED*/
int
-kdwrite(dev, uio)
-dev_t dev;
-io_req_t uio;
+kdwrite(dev_t dev, io_req_t uio)
{
return((*linesw[kd_tty.t_line].l_write)(&kd_tty, uio));
}
@@ -546,10 +539,7 @@ io_req_t uio;
/*ARGSUSED*/
vm_offset_t
-kdmmap(dev, off, prot)
- dev_t dev;
- vm_offset_t off;
- vm_prot_t prot;
+kdmmap(dev_t dev, vm_offset_t off, vm_prot_t prot)
{
if (off >= (128*1024))
return(-1);
@@ -1015,9 +1005,8 @@ kdcheckmagic(Scancode scancode)
* corresponds to the given state.
*/
unsigned int
-kdstate2idx(state, extended)
-unsigned int state; /* bit vector, not a state index */
-boolean_t extended;
+kdstate2idx(unsigned int state, /* bit vector, not a state index */
+ boolean_t extended)
{
int state_idx = NORM_STATE;
@@ -1138,6 +1127,7 @@ kdinit(void)
k_comm |= K_CB_ENBLIRQ; /* enable interrupt */
kd_sendcmd(KC_CMD_WRITE); /* write new ctlr command byte */
kd_senddata(k_comm);
+ unmask_irq(KBD_IRQ);
kd_initialized = TRUE;
#if ENABLE_IMMEDIATE_CONSOLE
@@ -1444,7 +1434,7 @@ kd_parseesc(void)
*/
#define reverse_video_char(a) (((a) & 0x88) | ((((a) >> 4) | ((a) << 4)) & 0x77))
-void
+static void
kd_update_kd_attr(void)
{
kd_attr = kd_color;
@@ -2261,20 +2251,6 @@ kd_getdata(void)
return(inb(K_RDWR));
}
-unsigned char
-kd_cmdreg_read(void)
-{
-int ch=KC_CMD_READ;
-
- while (inb(K_STATUS) & K_IBUF_FUL)
- ;
- outb(K_CMD, ch);
-
- while ((inb(K_STATUS) & K_OBUF_FUL) == 0)
- ;
- return(inb(K_RDWR));
-}
-
void
kd_cmdreg_write(int val)
{
@@ -2479,6 +2455,33 @@ int new_button = 0;
*/
#define SLAMBPW 2 /* bytes per word for "slam" fcns */
+/*
+ * xga_getpos:
+ *
+ * This function returns the current hardware cursor position on the
+ * screen, scaled for compatibility with kd_curpos.
+ *
+ * input : None
+ * output : returns the value of cursor position on screen
+ *
+ */
+static csrpos_t
+xga_getpos(void)
+
+{
+ unsigned char low;
+ unsigned char high;
+ short pos;
+
+ outb(kd_index_reg, C_HIGH);
+ high = inb(kd_io_reg);
+ outb(kd_index_reg, C_LOW);
+ low = inb(kd_io_reg);
+ pos = (0xff&low) + ((unsigned short)high<<8);
+
+ return(ONE_SPACE * (csrpos_t)pos);
+}
+
/*
* kd_xga_init:
@@ -2488,7 +2491,6 @@ int new_button = 0;
void
kd_xga_init(void)
{
- csrpos_t xga_getpos();
unsigned char start, stop;
#if 0
@@ -2577,43 +2579,12 @@ kd_xga_init(void)
/*
- * xga_getpos:
- *
- * This function returns the current hardware cursor position on the
- * screen, scaled for compatibility with kd_curpos.
- *
- * input : None
- * output : returns the value of cursor position on screen
- *
- */
-csrpos_t
-xga_getpos(void)
-
-{
- unsigned char low;
- unsigned char high;
- short pos;
-
- outb(kd_index_reg, C_HIGH);
- high = inb(kd_io_reg);
- outb(kd_index_reg, C_LOW);
- low = inb(kd_io_reg);
- pos = (0xff&low) + ((unsigned short)high<<8);
-
- return(ONE_SPACE * (csrpos_t)pos);
-}
-
-
-/*
* charput:
*
* Put attributed character for EGA/CGA/etc.
*/
static void
-charput(pos, ch, chattr)
-csrpos_t pos; /* where to put it */
-char ch; /* the character */
-char chattr; /* its attribute */
+charput(csrpos_t pos, char ch, char chattr)
{
*(vid_start + pos) = ch;
*(vid_start + pos + 1) = chattr;
@@ -2626,8 +2597,7 @@ char chattr; /* its attribute */
* Set hardware cursor position for EGA/CGA/etc.
*/
static void
-charsetcursor(newpos)
-csrpos_t newpos;
+charsetcursor(csrpos_t newpos)
{
short curpos; /* position, not scaled for attribute byte */
@@ -2647,9 +2617,7 @@ csrpos_t newpos;
* Block move up for EGA/CGA/etc.
*/
static void
-charmvup(from, to, count)
-csrpos_t from, to;
-int count;
+charmvup(csrpos_t from, csrpos_t to, int count)
{
kd_slmscu(vid_start+from, vid_start+to, count);
}
@@ -2661,9 +2629,7 @@ int count;
* Block move down for EGA/CGA/etc.
*/
static void
-charmvdown(from, to, count)
-csrpos_t from, to;
-int count;
+charmvdown(csrpos_t from, csrpos_t to, int count)
{
kd_slmscd(vid_start+from, vid_start+to, count);
}
@@ -2675,10 +2641,7 @@ int count;
* Fast clear for CGA/EGA/etc.
*/
static void
-charclear(to, count, chattr)
-csrpos_t to;
-int count;
-char chattr;
+charclear(csrpos_t to, int count, char chattr)
{
kd_slmwd(vid_start+to, count, ((unsigned short)chattr<<8)+K_SPACE);
}
@@ -2728,7 +2691,7 @@ bmpput(
* bmpcp1char: copy 1 char from one place in the frame buffer to
* another.
*/
-void
+static void
bmpcp1char(
csrpos_t from,
csrpos_t to)
diff --git a/i386/i386at/kd.h b/i386/i386at/kd.h
index 6f425ae9..5bfabce2 100644
--- a/i386/i386at/kd.h
+++ b/i386/i386at/kd.h
@@ -71,13 +71,13 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#ifndef _KD_H_
#define _KD_H_
-#include <sys/ioctl.h>
+#include <device/input.h>
#include <mach/boolean.h>
#include <sys/types.h>
-#include <sys/time.h>
#include <device/cons.h>
#include <device/io_req.h>
#include <device/buf.h>
+#include <device/input.h>
#include <device/tty.h>
#include <i386at/kdsoft.h>
@@ -273,11 +273,6 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#define KS_CTLED 0x20
-/*
- * Scancode values, not to be confused with Ascii values.
- */
-typedef u_char Scancode;
-
/* special codes */
#define K_UP 0x80 /* OR'd in if key below is released */
#define K_EXTEND 0xe0 /* marker for "extended" sequence */
@@ -551,6 +546,7 @@ typedef u_char Scancode;
#define K_PDN 0x1b,0x5b,0x55
#define K_INS 0x1b,0x5b,0x40
+#define KBD_IRQ 1
/*
* This array maps scancodes to Ascii characters (or character
@@ -618,34 +614,10 @@ struct kbentry {
* Ioctl's on /dev/kbd.
*/
-/*
- * KDSKBDMODE - When the console is in "ascii" mode, keyboard events are
- * converted to Ascii characters that are readable from /dev/console.
- * When the console is in "event" mode, keyboard events are
- * timestamped and queued up on /dev/kbd as kd_events. When the last
- * close is done on /dev/kbd, the console automatically reverts to ascii
- * mode.
- * When /dev/mouse is opened, mouse events are timestamped and queued
- * on /dev/mouse, again as kd_events.
- *
- * KDGKBDTYPE - Returns the type of keyboard installed. Currently
- * there is only one type, KB_VANILLAKB, which is your standard PC-AT
- * keyboard.
- */
-
#ifdef KERNEL
extern int kb_mode;
#endif
-#define KDSKBDMODE _IOW('K', 1, int) /* set keyboard mode */
-#define KB_EVENT 1
-#define KB_ASCII 2
-
-#define KDGKBDTYPE _IOR('K', 2, int) /* get keyboard type */
-#define KB_VANILLAKB 0
-
-#define KDSETLEDS _IOW('K', 5, int) /* set the keyboard ledstate */
-
struct X_kdb {
u_int *ptr;
u_int size;
@@ -662,35 +634,6 @@ struct X_kdb {
#define K_X_TYPE 0x03070000
#define K_X_PORT 0x0000ffff
-typedef u_short kev_type; /* kd event type */
-
-/* (used for event records) */
-struct mouse_motion {
- short mm_deltaX; /* units? */
- short mm_deltaY;
-};
-
-typedef struct {
- kev_type type; /* see below */
- struct timeval time; /* timestamp */
- union { /* value associated with event */
- boolean_t up; /* MOUSE_LEFT .. MOUSE_RIGHT */
- Scancode sc; /* KEYBD_EVENT */
- struct mouse_motion mmotion; /* MOUSE_MOTION */
- } value;
-} kd_event;
-#define m_deltaX mmotion.mm_deltaX
-#define m_deltaY mmotion.mm_deltaY
-
-/*
- * kd_event ID's.
- */
-#define MOUSE_LEFT 1 /* mouse left button up/down */
-#define MOUSE_MIDDLE 2
-#define MOUSE_RIGHT 3
-#define MOUSE_MOTION 4 /* mouse motion */
-#define KEYBD_EVENT 5 /* key up/down */
-
extern boolean_t kd_isupper (u_char);
extern boolean_t kd_islower (u_char);
extern void kd_senddata (unsigned char);
@@ -750,7 +693,7 @@ extern void kd_slmscd (void *from, void *to, int count);
extern void kdintr(int vec);
#if MACH_KDB
-extern void kdb_kintr(void);
+#include <ddb/db_input.h>
#endif /* MACH_KDB */
extern int kdopen(dev_t dev, int flag, io_req_t ior);
diff --git a/i386/i386at/kd_event.c b/i386/i386at/kd_event.c
index bed9240f..247d95b1 100644
--- a/i386/i386at/kd_event.c
+++ b/i386/i386at/kd_event.c
@@ -66,6 +66,11 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <i386/pio.h>
#include <i386at/kd.h>
#include <i386at/kd_queue.h>
+#ifdef APIC
+# include <i386/apic.h>
+#else
+# include <i386/pic.h>
+#endif
#include "kd_event.h"
@@ -90,7 +95,7 @@ static boolean_t initialized = FALSE;
* kbdinit - set up event queue.
*/
-void
+static void
kbdinit(void)
{
spl_t s = SPLKD();
@@ -110,10 +115,7 @@ kbdinit(void)
/*ARGSUSED*/
int
-kbdopen(dev, flags, ior)
- dev_t dev;
- int flags;
- io_req_t ior;
+kbdopen(dev_t dev, int flags, io_req_t ior)
{
spl_t o_pri = spltty();
kdinit();
@@ -278,7 +280,9 @@ kd_enqsc(Scancode sc)
kd_event ev;
ev.type = KEYBD_EVENT;
- ev.time = time;
+ /* Not used but we set it to avoid garbage */
+ ev.unused_time.seconds = 0;
+ ev.unused_time.microseconds = 0;
ev.value.sc = sc;
kbd_enqueue(&ev);
}
@@ -307,9 +311,8 @@ kbd_enqueue(kd_event *ev)
u_int X_kdb_enter_str[512], X_kdb_exit_str[512];
int X_kdb_enter_len = 0, X_kdb_exit_len = 0;
-void
-kdb_in_out(p)
-const u_int *p;
+static void
+kdb_in_out(const u_int *p)
{
int t = p[0];
diff --git a/i386/i386at/kd_mouse.c b/i386/i386at/kd_mouse.c
index 4b883ba8..9bd001cb 100644
--- a/i386/i386at/kd_mouse.c
+++ b/i386/i386at/kd_mouse.c
@@ -82,7 +82,7 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include "kd_mouse.h"
-static void (*oldvect)(); /* old interrupt vector */
+static interrupt_handler_fn oldvect; /* old interrupt vector */
static int oldunit;
extern struct bus_device *cominfo[];
@@ -106,11 +106,12 @@ boolean_t mouse_char_cmd = FALSE; /* mouse response is to cmd */
boolean_t mouse_char_wanted = FALSE; /* want mouse response */
int mouse_char_index; /* mouse response */
+#define IBM_MOUSE_IRQ 12
/*
* init_mouse_hw - initialize the serial port.
*/
-void
+static void
init_mouse_hw(dev_t unit, int mode)
{
unsigned short base_addr = cominfo[unit]->address;
@@ -146,10 +147,7 @@ int track_man[10];
/*ARGSUSED*/
int
-mouseopen(dev, flags, ior)
- dev_t dev;
- int flags;
- io_req_t ior;
+mouseopen(dev_t dev, int flags, io_req_t ior)
{
if (mouse_in_use)
return (D_ALREADY_OPEN);
@@ -186,7 +184,7 @@ mouseopen(dev, flags, ior)
break;
case IBM_MOUSE:
mousebufsize = 3;
- kd_mouse_open(dev, 12);
+ kd_mouse_open(dev, IBM_MOUSE_IRQ);
ibm_ps2_mouse_open(dev);
break;
case NO_MOUSE:
@@ -225,6 +223,7 @@ kd_mouse_open(
oldvect = ivect[mouse_pic];
ivect[mouse_pic] = kdintr;
+ unmask_irq(mouse_pic);
splx(s);
}
@@ -246,7 +245,7 @@ mouseclose(
break;
case IBM_MOUSE:
ibm_ps2_mouse_close(dev);
- kd_mouse_close(dev, 12);
+ kd_mouse_close(dev, IBM_MOUSE_IRQ);
{int i = 20000; for (;i--;); }
kd_mouse_drain();
break;
@@ -285,6 +284,7 @@ kd_mouse_close(
{
spl_t s = splhi();
+ mask_irq(mouse_pic);
ivect[mouse_pic] = oldvect;
splx(s);
}
@@ -430,7 +430,6 @@ int lastgitech = 0x40; /* figure whether the first 3 bytes imply */
int fourthgitech = 0; /* look for the 4th byte; we must process it */
int middlegitech = 0; /* what should the middle button be */
-#define MOUSEBUFSIZE 5 /* num bytes def'd by protocol */
static u_char mousebuf[MOUSEBUFSIZE]; /* 5-byte packet from mouse */
void
@@ -598,7 +597,7 @@ mouse_packet_microsoft_mouse(u_char mousebuf[MOUSEBUFSIZE])
/*
* Write character to mouse. Called at spltty.
*/
-void kd_mouse_write(
+static void kd_mouse_write(
unsigned char ch)
{
while (inb(K_STATUS) & K_IBUF_FUL)
@@ -614,7 +613,7 @@ void kd_mouse_write(
* Read next character from mouse, waiting for interrupt
* to deliver it. Called at spltty.
*/
-int kd_mouse_read(void)
+static int kd_mouse_read(void)
{
int ch;
@@ -637,7 +636,7 @@ int kd_mouse_read(void)
/*
* Prepare buffer for receiving next packet from mouse.
*/
-void kd_mouse_read_reset(void)
+static void kd_mouse_read_reset(void)
{
mousebufindex = 0;
mouse_char_index = 0;
@@ -755,7 +754,9 @@ mouse_moved(struct mouse_motion where)
kd_event ev;
ev.type = MOUSE_MOTION;
- ev.time = time;
+ /* Not used but we set it to avoid garbage */
+ ev.unused_time.seconds = 0;
+ ev.unused_time.microseconds = 0;
ev.value.mmotion = where;
mouse_enqueue(&ev);
}
@@ -771,8 +772,10 @@ mouse_button(
kd_event ev;
ev.type = which;
- ev.time = time;
ev.value.up = (direction == MOUSE_UP) ? TRUE : FALSE;
+ /* Not used but we set it to avoid garbage */
+ ev.unused_time.seconds = 0;
+ ev.unused_time.microseconds = 0;
mouse_enqueue(&ev);
}
diff --git a/i386/i386at/kd_mouse.h b/i386/i386at/kd_mouse.h
index 2d813c4a..a9fb1284 100644
--- a/i386/i386at/kd_mouse.h
+++ b/i386/i386at/kd_mouse.h
@@ -28,6 +28,8 @@
#include <sys/types.h>
+#define MOUSEBUFSIZE 5 /* num bytes def'd by protocol */
+
extern void mouse_button (kev_type which, u_char direction);
extern void mouse_enqueue (kd_event *ev);
@@ -48,11 +50,11 @@ extern void ibm_ps2_mouse_open (dev_t dev);
extern void ibm_ps2_mouse_close (dev_t dev);
-extern void mouse_packet_microsoft_mouse (u_char *mousebuf);
+extern void mouse_packet_microsoft_mouse (u_char mousebuf[MOUSEBUFSIZE]);
-extern void mouse_packet_mouse_system_mouse (u_char *mousebuf);
+extern void mouse_packet_mouse_system_mouse (u_char mousebuf[MOUSEBUFSIZE]);
-extern void mouse_packet_ibm_ps2_mouse (u_char *mousebuf);
+extern void mouse_packet_ibm_ps2_mouse (u_char mousebuf[MOUSEBUFSIZE]);
extern int mouseopen(dev_t dev, int flags, io_req_t ior);
extern void mouseclose(dev_t dev, int flags);
diff --git a/i386/i386at/kd_queue.c b/i386/i386at/kd_queue.c
index 57d6fbf7..ab399cd8 100644
--- a/i386/i386at/kd_queue.c
+++ b/i386/i386at/kd_queue.c
@@ -71,28 +71,24 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#define q_next(index) (((index)+1) % KDQSIZE)
boolean_t
-kdq_empty(q)
- const kd_event_queue *q;
+kdq_empty(const kd_event_queue *q)
{
return(q->firstfree == q->firstout);
}
boolean_t
-kdq_full(q)
- const kd_event_queue *q;
+kdq_full(const kd_event_queue *q)
{
return(q_next(q->firstfree) == q->firstout);
}
void
-kdq_put(q, ev)
- kd_event_queue *q;
- kd_event *ev;
+kdq_put(kd_event_queue *q, kd_event *ev)
{
kd_event *qp = q->events + q->firstfree;
qp->type = ev->type;
- qp->time = ev->time;
+ qp->unused_time = ev->unused_time;
qp->value = ev->value;
q->firstfree = q_next(q->firstfree);
}
diff --git a/i386/i386at/kdsoft.h b/i386/i386at/kdsoft.h
index 1dfd2b2c..79bfdb06 100644
--- a/i386/i386at/kdsoft.h
+++ b/i386/i386at/kdsoft.h
@@ -145,13 +145,12 @@ extern void bmpmvdown(csrpos_t, csrpos_t, int);
extern void bmpclear(csrpos_t, int, char);
extern void bmpsetcursor(csrpos_t);
-extern void (*kd_dput)(); /* put attributed char */
-extern void (*kd_dmvup)(); /* block move up */
-extern void (*kd_dmvdown)(); /* block move down */
-extern void (*kd_dclear)(); /* block clear */
-extern void (*kd_dsetcursor)();
- /* set cursor position on displayed page */
-extern void (*kd_dreset)(); /* prepare for reboot */
+extern void (*kd_dput)(csrpos_t, char, char); /* put attributed char */
+extern void (*kd_dmvup)(csrpos_t, csrpos_t, int); /* block move up */
+extern void (*kd_dmvdown)(csrpos_t, csrpos_t, int); /* block move down */
+extern void (*kd_dclear)(csrpos_t, int, char); /* block clear */
+extern void (*kd_dsetcursor)(csrpos_t); /* set cursor position on displayed page */
+extern void (*kd_dreset)(void); /* prepare for reboot */
/*
diff --git a/i386/i386at/lpr.c b/i386/i386at/lpr.c
index 9b55a5f2..f8d42f3c 100644
--- a/i386/i386at/lpr.c
+++ b/i386/i386at/lpr.c
@@ -34,7 +34,6 @@
#include <sys/types.h>
#include <kern/printf.h>
#include <kern/mach_clock.h>
-#include <sys/time.h>
#include <device/conf.h>
#include <device/device_types.h>
#include <device/tty.h>
@@ -98,7 +97,7 @@ void lprattach(struct bus_device *dev)
}
take_dev_irq(dev);
- printf(", port = %lx, spl = %ld, pic = %d.",
+ printf(", port = %zx, spl = %zd, pic = %d.",
dev->address, dev->sysdep, dev->sysdep1);
lprinfo[unit] = dev;
@@ -274,12 +273,6 @@ lprstop(
if ((tp->t_state & TS_BUSY) && (tp->t_state & TS_TTSTOP) == 0)
tp->t_state |= TS_FLUSH;
}
-int
-lprpr(int unit)
-{
- lprpr_addr(lprinfo[unit]->address);
- return 0;
-}
void
lprpr_addr(unsigned short addr)
diff --git a/i386/i386at/mem.c b/i386/i386at/mem.c
index 07acc169..f46fc038 100644
--- a/i386/i386at/mem.c
+++ b/i386/i386at/mem.c
@@ -26,6 +26,8 @@
#include <device/io_req.h>
#include <i386/model_dep.h>
+#include <i386at/biosmem.h>
+#include <i386at/mem.h>
/* This provides access to any memory that is not main RAM */
diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c
index 21a36bf2..edb5b48b 100644
--- a/i386/i386at/model_dep.c
+++ b/i386/i386at/model_dep.c
@@ -32,6 +32,7 @@
* Basic initialization for I386 - ISA bus machines.
*/
+#include <inttypes.h>
#include <string.h>
#include <device/cons.h>
@@ -50,7 +51,6 @@
#include <kern/printf.h>
#include <kern/startup.h>
#include <kern/smp.h>
-#include <sys/time.h>
#include <sys/types.h>
#include <vm/vm_page.h>
#include <i386/fpu.h>
@@ -66,6 +66,8 @@
#include <i386/locore.h>
#include <i386/model_dep.h>
#include <i386/smp.h>
+#include <i386/seg.h>
+#include <i386at/acpi_parse_apic.h>
#include <i386at/autoconf.h>
#include <i386at/biosmem.h>
#include <i386at/elf.h>
@@ -93,18 +95,12 @@
#include <ddb/db_sym.h>
#include <i386/db_interface.h>
-/* a.out symbol table */
-static vm_offset_t kern_sym_start, kern_sym_end;
-
/* ELF section header */
static unsigned elf_shdr_num;
static vm_size_t elf_shdr_size;
static vm_offset_t elf_shdr_addr;
static unsigned elf_shdr_shndx;
-#else /* MACH_KDB */
-#define kern_sym_start 0
-#define kern_sym_end 0
#endif /* MACH_KDB */
#define RESERVED_BIOS 0x10000
@@ -122,7 +118,7 @@ unsigned long *pfn_list = (void*) PFN_LIST;
unsigned long la_shift = VM_MIN_KERNEL_ADDRESS;
#endif
#else /* MACH_XEN */
-struct multiboot_info boot_info;
+struct multiboot_raw_info boot_info;
#endif /* MACH_XEN */
/* Command line supplied to kernel. */
@@ -130,15 +126,15 @@ char *kernel_cmdline = "";
extern char version[];
+/* Realmode temporary GDT */
+extern struct pseudo_descriptor gdt_descr_tmp;
+
+/* Realmode relocated jmp */
+extern uint32_t apboot_jmp_offset;
+
/* If set, reboot the system on ctrl-alt-delete. */
boolean_t rebootflag = FALSE; /* exported to kdintr */
-/* Interrupt stack. */
-static char int_stack[KERNEL_STACK_SIZE] __aligned(KERNEL_STACK_SIZE);
-#if NCPUS <= 1
-vm_offset_t int_stack_top[1], int_stack_base[1];
-#endif
-
#ifdef LINUX_DEV
extern void linux_init(void);
#endif
@@ -149,11 +145,6 @@ extern void linux_init(void);
void machine_init(void)
{
/*
- * Initialize the console.
- */
- cninit();
-
- /*
* Make more free memory.
*
* This is particularly important for the Linux drivers which
@@ -169,20 +160,27 @@ void machine_init(void)
#ifdef MACH_HYP
hyp_init();
#else /* MACH_HYP */
+#if defined(APIC)
+ int err;
-#if (NCPUS > 1) && defined(APIC)
+ err = acpi_apic_init();
+ if (err) {
+ printf("acpi_apic_init failed with %d\n", err);
+ for (;;);
+ }
+#endif
+#if (NCPUS > 1)
smp_init();
+#endif
+#if defined(APIC)
ioapic_configure();
- lapic_enable_timer();
+#endif
+ clkstart();
-#warning FIXME: Rather unmask them from their respective drivers
- /* kd */
- unmask_irq(1);
- /* com0 */
- unmask_irq(4);
- /* com1 */
- unmask_irq(3);
-#endif /* NCPUS > 1 */
+ /*
+ * Initialize the console.
+ */
+ cninit();
#ifdef LINUX_DEV
/*
@@ -216,6 +214,20 @@ void machine_init(void)
*/
pmap_unmap_page_zero();
#endif
+
+#if NCPUS > 1
+ /*
+ * Patch the realmode gdt with the correct offset and the first jmp to
+ * protected mode with the correct target.
+ */
+ gdt_descr_tmp.linear_base += apboot_addr;
+ apboot_jmp_offset += apboot_addr;
+
+ /*
+ * Initialize the HPET
+ */
+ hpet_init();
+#endif
}
/* Conserve power on processor CPU. */
@@ -272,11 +284,6 @@ void halt_all_cpus(boolean_t reboot)
machine_idle (cpu_number ());
}
-void exit(int rc)
-{
- halt_all_cpus(0);
-}
-
void db_halt_cpu(void)
{
halt_all_cpus(0);
@@ -355,13 +362,9 @@ register_boot_data(const struct multiboot_raw_info *mbi)
* Basic PC VM initialization.
* Turns on paging and changes the kernel segments to use high linear addresses.
*/
-void
+static void
i386at_init(void)
{
- /* XXX move to intel/pmap.h */
- extern pt_entry_t *kernel_page_dir;
- int i;
-
/*
* Initialize the PIC prior to any possible call to an spl.
*/
@@ -374,6 +377,7 @@ i386at_init(void)
#else /* MACH_HYP */
hyp_intrinit();
#endif /* MACH_HYP */
+ spl_init = 1;
/*
* Read memory map and load it into the physical page allocator.
@@ -403,7 +407,7 @@ i386at_init(void)
}
if (boot_info.flags & MULTIBOOT_MODS && boot_info.mods_count) {
- struct multiboot_module *m;
+ struct multiboot_raw_module *m;
int i;
if (! init_alloc_aligned(
@@ -447,47 +451,8 @@ i386at_init(void)
*/
biosmem_setup();
- /*
- * We'll have to temporarily install a direct mapping
- * between physical memory and low linear memory,
- * until we start using our new kernel segment descriptors.
- */
-#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
- vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS;
- if ((vm_offset_t)(-delta) < delta)
- delta = (vm_offset_t)(-delta);
- int nb_direct = delta >> PDESHIFT;
- for (i = 0; i < nb_direct; i++)
- kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] =
- kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS) + i];
-#endif
- /* We need BIOS memory mapped at 0xc0000 & co for BIOS accesses */
-#if VM_MIN_KERNEL_ADDRESS != 0
- kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] =
- kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
-#endif
+ pmap_make_temporary_mapping();
-#ifdef MACH_PV_PAGETABLES
- for (i = 0; i < PDPNUM; i++)
- pmap_set_page_readonly_init((void*) kernel_page_dir + i * INTEL_PGBYTES);
-#if PAE
- pmap_set_page_readonly_init(kernel_pmap->pdpbase);
-#endif /* PAE */
-#endif /* MACH_PV_PAGETABLES */
-#if PAE
-#ifdef __x86_64__
- set_cr3((unsigned long)_kvtophys(kernel_pmap->l4base));
-#else
- set_cr3((unsigned long)_kvtophys(kernel_pmap->pdpbase));
-#endif
-#ifndef MACH_HYP
- if (!CPU_HAS_FEATURE(CPU_FEATURE_PAE))
- panic("CPU doesn't have support for PAE.");
- set_cr4(get_cr4() | CR4_PAE);
-#endif /* MACH_HYP */
-#else
- set_cr3((unsigned long)_kvtophys(kernel_page_dir));
-#endif /* PAE */
#ifndef MACH_HYP
/* Turn paging on.
* Also set the WP bit so that on 486 or better processors
@@ -514,45 +479,21 @@ i386at_init(void)
ldt_init();
ktss_init();
+#ifndef MACH_XEN
+ init_percpu(0);
+#endif
#if NCPUS > 1
/* Initialize SMP structures in the master processor */
mp_desc_init(0);
#endif // NCPUS
-#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
- /* Get rid of the temporary direct mapping and flush it out of the TLB. */
- for (i = 0 ; i < nb_direct; i++) {
-#ifdef MACH_XEN
-#ifdef MACH_PSEUDO_PHYS
- if (!hyp_mmu_update_pte(kv_to_ma(&kernel_page_dir[lin2pdenum_cont(VM_MIN_KERNEL_ADDRESS) + i]), 0))
-#else /* MACH_PSEUDO_PHYS */
- if (hyp_do_update_va_mapping(VM_MIN_KERNEL_ADDRESS + i * INTEL_PGBYTES, 0, UVMF_INVLPG | UVMF_ALL))
-#endif /* MACH_PSEUDO_PHYS */
- printf("couldn't unmap frame %d\n", i);
-#else /* MACH_XEN */
- kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] = 0;
-#endif /* MACH_XEN */
- }
-#endif
- /* Keep BIOS memory mapped */
-#if VM_MIN_KERNEL_ADDRESS != 0
- kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] =
- kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
-#endif
-
- /* Not used after boot, better give it back. */
-#ifdef MACH_XEN
- hyp_free_page(0, (void*) VM_MIN_KERNEL_ADDRESS);
-#endif /* MACH_XEN */
-
- flush_tlb();
+ pmap_remove_temporary_mapping();
#ifdef MACH_XEN
hyp_p2m_init();
#endif /* MACH_XEN */
- int_stack_base[0] = (vm_offset_t)&int_stack;
- int_stack_top[0] = int_stack_base[0] + KERNEL_STACK_SIZE - 4;
+ interrupt_stack_alloc();
}
/*
@@ -590,30 +531,15 @@ void c_boot_entry(vm_offset_t bi)
* We need to do this before i386at_init()
* so that the symbol table's memory won't be stomped on.
*/
- if ((boot_info.flags & MULTIBOOT_AOUT_SYMS)
- && boot_info.syms.a.addr)
- {
- vm_size_t symtab_size, strtab_size;
-
- kern_sym_start = (vm_offset_t)phystokv(boot_info.syms.a.addr);
- symtab_size = (vm_offset_t)phystokv(boot_info.syms.a.tabsize);
- strtab_size = (vm_offset_t)phystokv(boot_info.syms.a.strsize);
- kern_sym_end = kern_sym_start + 4 + symtab_size + strtab_size;
-
- printf("kernel symbol table at %08lx-%08lx (%ld,%ld)\n",
- kern_sym_start, kern_sym_end,
- (unsigned long) symtab_size, (unsigned long) strtab_size);
- }
-
if ((boot_info.flags & MULTIBOOT_ELF_SHDR)
- && boot_info.syms.e.num)
+ && boot_info.shdr_num)
{
- elf_shdr_num = boot_info.syms.e.num;
- elf_shdr_size = boot_info.syms.e.size;
- elf_shdr_addr = (vm_offset_t)phystokv(boot_info.syms.e.addr);
- elf_shdr_shndx = boot_info.syms.e.shndx;
+ elf_shdr_num = boot_info.shdr_num;
+ elf_shdr_size = boot_info.shdr_size;
+ elf_shdr_addr = (vm_offset_t)phystokv(boot_info.shdr_addr);
+ elf_shdr_shndx = boot_info.shdr_strndx;
- printf("ELF section header table at %08lx\n", elf_shdr_addr);
+ printf("ELF section header table at %08" PRIxPTR "\n", elf_shdr_addr);
}
#endif /* MACH_KDB */
#endif /* MACH_XEN */
@@ -629,11 +555,6 @@ void c_boot_entry(vm_offset_t bi)
/*
* Initialize the kernel debugger's kernel symbol table.
*/
- if (kern_sym_start)
- {
- aout_db_sym_init((char *)kern_sym_start, (char *)kern_sym_end, "mach", (char *)0);
- }
-
if (elf_shdr_num)
{
elf_db_sym_init(elf_shdr_num,elf_shdr_size,
@@ -643,9 +564,11 @@ void c_boot_entry(vm_offset_t bi)
#endif /* MACH_KDB */
machine_slot[0].is_cpu = TRUE;
- machine_slot[0].running = TRUE;
machine_slot[0].cpu_subtype = CPU_SUBTYPE_AT386;
+#if defined(__x86_64__) && !defined(USER32)
+ machine_slot[0].cpu_type = CPU_TYPE_X86_64;
+#else
switch (cpu_type)
{
default:
@@ -664,6 +587,7 @@ void c_boot_entry(vm_offset_t bi)
machine_slot[0].cpu_type = CPU_TYPE_PENTIUMPRO;
break;
}
+#endif
/*
* Start the system.
@@ -677,10 +601,7 @@ void c_boot_entry(vm_offset_t bi)
#include <mach/time_value.h>
vm_offset_t
-timemmap(dev, off, prot)
- dev_t dev;
- vm_offset_t off;
- vm_prot_t prot;
+timemmap(dev_t dev, vm_offset_t off, vm_prot_t prot)
{
extern time_value_t *mtime;
@@ -692,20 +613,29 @@ timemmap(dev, off, prot)
void
startrtclock(void)
{
-#ifndef APIC
+#ifdef APIC
+ unmask_irq(timer_pin);
+ calibrate_lapic_timer();
+ if (cpu_number() != 0) {
+ lapic_enable_timer();
+ }
+#else
clkstart();
+#ifndef MACH_HYP
+ unmask_irq(0);
+#endif
#endif
}
void
inittodr(void)
{
- time_value_t new_time;
+ time_value64_t new_time;
uint64_t newsecs;
(void) readtodc(&newsecs);
new_time.seconds = newsecs;
- new_time.microseconds = 0;
+ new_time.nanoseconds = 0;
{
spl_t s = splhigh();
diff --git a/i386/i386at/model_dep.h b/i386/i386at/model_dep.h
index a972695f..3d5b6645 100644
--- a/i386/i386at/model_dep.h
+++ b/i386/i386at/model_dep.h
@@ -27,8 +27,8 @@
*/
extern vm_offset_t int_stack_top[NCPUS], int_stack_base[NCPUS];
-/* Check whether P points to the interrupt stack. */
-#define ON_INT_STACK(P) (((P) & ~(KERNEL_STACK_SIZE-1)) == int_stack_base[0])
+/* Check whether P points to the per-cpu interrupt stack. */
+#define ON_INT_STACK(P, CPU) (((P) & ~(INTSTACK_SIZE-1)) == int_stack_base[CPU])
extern vm_offset_t timemmap(dev_t dev, vm_offset_t off, vm_prot_t prot);
diff --git a/i386/i386at/pic_isa.c b/i386/i386at/pic_isa.c
index b0415c24..1e5ac103 100644
--- a/i386/i386at/pic_isa.c
+++ b/i386/i386at/pic_isa.c
@@ -33,8 +33,8 @@
/* These interrupts are always present */
-void (*ivect[NINTR])() = {
- /* 00 */ hardclock, /* always */
+interrupt_handler_fn ivect[NINTR] = {
+ /* 00 */ (interrupt_handler_fn)hardclock, /* always */
/* 01 */ kdintr, /* kdintr, ... */
/* 02 */ intnull,
/* 03 */ intnull, /* lnpoll, comintr, ... */
diff --git a/i386/i386at/rtc.c b/i386/i386at/rtc.c
index d771df8e..1930beb0 100644
--- a/i386/i386at/rtc.c
+++ b/i386/i386at/rtc.c
@@ -47,7 +47,6 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/types.h>
-#include <sys/time.h>
#include <kern/mach_clock.h>
#include <kern/printf.h>
#include <i386/machspl.h>
@@ -60,7 +59,7 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
static boolean_t first_rtcopen_ever = TRUE;
-void
+static void
rtcinit(void)
{
outb(RTC_ADDR, RTC_A);
@@ -70,7 +69,7 @@ rtcinit(void)
}
-int
+static int
rtcget(struct rtc_st *st)
{
unsigned char *regs = (unsigned char *)st;
@@ -87,7 +86,7 @@ rtcget(struct rtc_st *st)
return(0);
}
-void
+static void
rtcput(struct rtc_st *st)
{
unsigned char *regs = (unsigned char *)st;
@@ -107,11 +106,9 @@ rtcput(struct rtc_st *st)
}
-extern struct timeval time;
-
static int month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
-int
+static int
yeartoday(int year)
{
if (year%4)
@@ -134,13 +131,13 @@ yeartoday(int year)
return 366;
}
-int
+static int
hexdectodec(char n)
{
return(((n>>4)&0x0F)*10 + (n&0x0F));
}
-char
+static char
dectohexdec(int n)
{
return((char)(((n/10)<<4)&0xF0) | ((n%10)&0x0F));
@@ -213,13 +210,13 @@ writetodc(void)
splx(ospl);
diff = 0;
- n = (time.tv_sec - diff) % (3600 * 24); /* hrs+mins+secs */
+ n = (time.seconds - diff) % (3600 * 24); /* hrs+mins+secs */
rtclk.rtc_sec = dectohexdec(n%60);
n /= 60;
rtclk.rtc_min = dectohexdec(n%60);
rtclk.rtc_hr = dectohexdec(n/60);
- n = (time.tv_sec - diff) / (3600 * 24); /* days */
+ n = (time.seconds - diff) / (3600 * 24); /* days */
rtclk.rtc_dow = (n + 4) % 7; /* 1/1/70 is Thursday */
/* Epoch shall be 1970 January 1st */
diff --git a/i386/include/mach/i386/cthreads.h b/i386/include/mach/i386/cthreads.h
deleted file mode 100644
index d2aa16f5..00000000
--- a/i386/include/mach/i386/cthreads.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1993,1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-#ifndef _MACHINE_CTHREADS_H_
-#define _MACHINE_CTHREADS_H_
-
-typedef volatile int spin_lock_t;
-#define SPIN_LOCK_INITIALIZER 0
-#define spin_lock_init(s) (*(s) = 0)
-#define spin_lock_locked(s) (*(s) != 0)
-
-#ifdef __GNUC__
-
-#define spin_unlock(p) \
- ({ register int _u__ ; \
- __asm__ volatile("xorl %0, %0; \n\
- xchgl %0, %1" \
- : "=&r" (_u__), "=m" (*(p)) : "memory" ); \
- 0; })
-
-#define spin_try_lock(p)\
- (!({ boolean_t _r__; \
- __asm__ volatile("movl $1, %0; \n\
- xchgl %0, %1" \
- : "=&r" (_r__), "=m" (*(p)) : "memory" ); \
- _r__; }))
-
-#define cthread_sp() \
- ({ register unsigned long _sp__ __asm__("esp"); \
- _sp__; })
-
-#endif /* __GNUC__ */
-
-#endif /* _MACHINE_CTHREADS_H_ */
diff --git a/i386/include/mach/i386/exec/elf.h b/i386/include/mach/i386/exec/elf.h
index cfa988d2..60b16575 100644
--- a/i386/include/mach/i386/exec/elf.h
+++ b/i386/include/mach/i386/exec/elf.h
@@ -29,8 +29,25 @@ typedef unsigned int Elf32_Off;
typedef signed int Elf32_Sword;
typedef unsigned int Elf32_Word;
-/* Architecture identification parameters for i386. */
+typedef uint64_t Elf64_Addr;
+typedef uint64_t Elf64_Off;
+typedef int32_t Elf64_Shalf;
+typedef int32_t Elf64_Sword;
+typedef uint32_t Elf64_Word;
+typedef int64_t Elf64_Sxword;
+typedef uint64_t Elf64_Xword;
+typedef uint16_t Elf64_Half;
+
+
+/* Architecture identification parameters for x86. */
+#if defined(__x86_64__) && ! defined(USER32)
+#define MY_ELF_CLASS ELFCLASS64
+#define MY_EI_DATA ELFDATA2LSB
+#define MY_E_MACHINE EM_X86_64
+#else
+#define MY_ELF_CLASS ELFCLASS32
#define MY_EI_DATA ELFDATA2LSB
#define MY_E_MACHINE EM_386
+#endif
#endif /* _MACH_I386_EXEC_ELF_H_ */
diff --git a/i386/include/mach/i386/mach_i386.defs b/i386/include/mach/i386/mach_i386.defs
index e110c899..965d5c3b 100644
--- a/i386/include/mach/i386/mach_i386.defs
+++ b/i386/include/mach/i386/mach_i386.defs
@@ -40,7 +40,7 @@ subsystem
MACH_I386_IMPORTS
#endif
-type descriptor_t = struct[2] of int;
+type descriptor_t = struct[2] of uint32_t;
type descriptor_list_t = array[*] of descriptor_t;
import <mach/machine/mach_i386_types.h>;
diff --git a/i386/include/mach/i386/mach_i386_types.h b/i386/include/mach/i386/mach_i386_types.h
index f003636d..f5177fb5 100644
--- a/i386/include/mach/i386/mach_i386_types.h
+++ b/i386/include/mach/i386/mach_i386_types.h
@@ -30,6 +30,7 @@
#ifndef _MACH_MACH_I386_TYPES_H_
#define _MACH_MACH_I386_TYPES_H_
+#ifndef __ASSEMBLER__
/*
* i386 segment descriptor.
*/
@@ -42,15 +43,15 @@ typedef struct descriptor descriptor_t;
typedef struct descriptor *descriptor_list_t;
typedef const struct descriptor *const_descriptor_list_t;
+#endif /* !__ASSEMBLER__ */
+
/*
* i386 I/O port
*/
-#ifdef MACH_KERNEL
-#include <i386/io_perm.h>
-#else /* MACH_KERNEL */
+#ifndef MACH_KERNEL
typedef unsigned short io_port_t;
typedef mach_port_t io_perm_t;
-#endif /* MACH_KERNEL */
+#endif /* !MACH_KERNEL */
#endif /* _MACH_MACH_I386_TYPES_H_ */
diff --git a/i386/include/mach/i386/machine_types.defs b/i386/include/mach/i386/machine_types.defs
index dfbc521e..76c7dcf9 100755
--- a/i386/include/mach/i386/machine_types.defs
+++ b/i386/include/mach/i386/machine_types.defs
@@ -38,14 +38,10 @@
/*
* A natural_t is the type for the native
- * integer type, e.g. 32 or 64 or.. whatever
- * register size the machine has. Unsigned, it is
- * used for entities that might be either
- * unsigned integers or pointers, and for
- * type-casting between the two.
- * For instance, the IPC system represents
- * a port in user space as an integer and
- * in kernel space as a pointer.
+ * unsigned integer type, usually 32 bits. It is suitable for
+ * most counters with a small chance of overflow.
+ * While historically natural_t was meant to be the same
+ * as a pointer, that is not the case here.
*/
type natural_t = uint32_t;
@@ -59,8 +55,53 @@ type natural_t = uint32_t;
type integer_t = int32_t;
/*
+ * long_natural_t and long_integer_t for kernel <-> userland interfaces as the
+ * size depends on the architecture of both kernel and userland.
+ */
+#if defined(KERNEL_SERVER) && defined(USER32)
+type rpc_long_natural_t = uint32_t;
+type rpc_long_integer_t = int32_t;
+#else /* KERNEL and USER32 */
+#if defined(__x86_64__)
+type rpc_long_natural_t = uint64_t;
+type rpc_long_integer_t = int64_t;
+#else
+type rpc_long_natural_t = uint32_t;
+type rpc_long_integer_t = int32_t;
+#endif /* __x86_64__ */
+#endif /* KERNEL_SERVER and USER32 */
+
+/*
+ * A long_natural_t is a possibly larger unsigned integer type than natural_t.
+ * Should be used instead of natural_t when we want the data to be less subject
+ * to overflows.
+ */
+type long_natural_t = rpc_long_natural_t
+#if defined(KERNEL_SERVER)
+ intran: long_natural_t convert_long_natural_from_user(rpc_long_natural_t)
+ outtran: rpc_long_natural_t convert_long_natural_to_user(long_natural_t)
+#elif defined(KERNEL_USER)
+ ctype: rpc_long_natural_t
+#endif
+ ;
+
+/*
+ * Larger version of integer_t. Only used when we want to hold possibly larger
+ * values than what is possible with integer_t.
+ */
+type long_integer_t = rpc_long_integer_t
+#if defined(KERNEL_SERVER)
+ intran: long_integer_t convert_long_integer_from_user(rpc_long_integer_t)
+ outtran: rpc_long_integer_t convert_long_integer_to_user(long_integer_t)
+#elif defined(KERNEL_USER)
+ ctype: rpc_long_integer_t
+#endif
+ ;
+
+/*
* Physical address size
*/
type rpc_phys_addr_t = uint64_t;
+type rpc_phys_addr_array_t = array[] of rpc_phys_addr_t;
#endif /* _MACHINE_MACHINE_TYPES_DEFS_ */
diff --git a/i386/include/mach/i386/multiboot.h b/i386/include/mach/i386/multiboot.h
index 5a532576..c3538c1f 100644
--- a/i386/include/mach/i386/multiboot.h
+++ b/i386/include/mach/i386/multiboot.h
@@ -25,31 +25,6 @@
#include <mach/machine/vm_types.h>
-/* For a.out kernel boot images, the following header must appear
- somewhere in the first 8192 bytes of the kernel image file. */
-struct multiboot_header
-{
- /* Must be MULTIBOOT_MAGIC */
- unsigned magic;
-
- /* Feature flags - see below. */
- unsigned flags;
-
- /*
- * Checksum
- *
- * The above fields plus this one must equal 0 mod 2^32.
- */
- unsigned checksum;
-
- /* These are only valid if MULTIBOOT_AOUT_KLUDGE is set. */
- vm_offset_t header_addr;
- vm_offset_t load_addr;
- vm_offset_t load_end_addr;
- vm_offset_t bss_end_addr;
- vm_offset_t entry;
-};
-
/* The entire multiboot_header must be contained
within the first MULTIBOOT_SEARCH bytes of the kernel image. */
#define MULTIBOOT_SEARCH 8192
@@ -65,7 +40,7 @@ struct multiboot_header
/* Align all boot modules on page (4KB) boundaries. */
#define MULTIBOOT_PAGE_ALIGN 0x00000001
-/* Must be provided memory information in multiboot_info structure */
+/* Must be provided memory information in multiboot_raw_info structure */
#define MULTIBOOT_MEMORY_INFO 0x00000002
/* Use the load address fields above instead of the ones in the a.out header
@@ -78,61 +53,7 @@ struct multiboot_header
that the multiboot method is being used */
#define MULTIBOOT_VALID 0x2badb002
-/* The boot loader passes this data structure to the kernel in
- register EBX on entry. */
-struct multiboot_info
-{
- /* These flags indicate which parts of the multiboot_info are valid;
- see below for the actual flag bit definitions. */
- unsigned flags;
-
- /* Lower/Upper memory installed in the machine.
- Valid only if MULTIBOOT_MEMORY is set in flags word above. */
- vm_size_t mem_lower;
- vm_size_t mem_upper;
-
- /* BIOS disk device the kernel was loaded from.
- Valid only if MULTIBOOT_BOOT_DEVICE is set in flags word above. */
- unsigned char boot_device[4];
-
- /* Command-line for the OS kernel: a null-terminated ASCII string.
- Valid only if MULTIBOOT_CMDLINE is set in flags word above. */
- vm_offset_t cmdline;
-
- /* List of boot modules loaded with the kernel.
- Valid only if MULTIBOOT_MODS is set in flags word above. */
- unsigned mods_count;
- vm_offset_t mods_addr;
-
- /* Symbol information for a.out or ELF executables. */
- union
- {
- struct
- {
- /* a.out symbol information valid only if MULTIBOOT_AOUT_SYMS
- is set in flags word above. */
- vm_size_t tabsize;
- vm_size_t strsize;
- vm_offset_t addr;
- unsigned reserved;
- } a;
-
- struct
- {
- /* ELF section header information valid only if
- MULTIBOOT_ELF_SHDR is set in flags word above. */
- unsigned num;
- vm_size_t size;
- vm_offset_t addr;
- unsigned shndx;
- } e;
- } syms;
-
- /* Memory map buffer.
- Valid only if MULTIBOOT_MEM_MAP is set in flags word above. */
- vm_size_t mmap_count;
- vm_offset_t mmap_addr;
-};
+
#define MULTIBOOT_MEMORY 0x00000001
#define MULTIBOOT_BOOT_DEVICE 0x00000002
@@ -175,33 +96,6 @@ struct multiboot32_module
};
#endif
-
-/* The mmap_addr field above contains the physical address of the first
- of the AddrRangeDesc structure. "size" represents the size of the
- rest of the structure and optional padding. The offset to the beginning
- of the next structure is therefore "size + 4". */
-struct AddrRangeDesc
-{
- unsigned long size;
- unsigned long BaseAddrLow;
- unsigned long BaseAddrHigh;
- unsigned long LengthLow;
- unsigned long LengthHigh;
- unsigned long Type;
-
- /* unspecified optional padding... */
-};
-
-struct multiboot_mmap
-{
- unsigned long size;
- unsigned long long BaseAddr;
- unsigned long long Length;
- unsigned long Type;
-
- /* unspecified optional padding... */
-};
-
/* usable memory "Type", all others are reserved. */
#define MB_ARD_MEMORY 1
diff --git a/i386/include/mach/i386/syscall_sw.h b/i386/include/mach/i386/syscall_sw.h
index 86f6ff2f..9eeb2939 100644
--- a/i386/include/mach/i386/syscall_sw.h
+++ b/i386/include/mach/i386/syscall_sw.h
@@ -29,21 +29,11 @@
#include <mach/machine/asm.h>
-#if BSD_TRAP
-#define kernel_trap(trap_name,trap_number,number_args) \
+#define kernel_trap(trap_name,trap_number,number_args) \
ENTRY(trap_name) \
movl $ trap_number,%eax; \
SVC; \
- jb LCL(cerror); \
ret; \
END(trap_name)
-#else
-#define kernel_trap(trap_name,trap_number,number_args) \
-ENTRY(trap_name) \
- movl $ trap_number,%eax; \
- SVC; \
- ret; \
-END(trap_name)
-#endif
#endif /* _MACH_I386_SYSCALL_SW_H_ */
diff --git a/i386/include/mach/i386/thread_status.h b/i386/include/mach/i386/thread_status.h
index ba1e3dea..94596a74 100644
--- a/i386/include/mach/i386/thread_status.h
+++ b/i386/include/mach/i386/thread_status.h
@@ -57,16 +57,37 @@
#define i386_V86_ASSIST_STATE 4
#define i386_REGS_SEGS_STATE 5
#define i386_DEBUG_STATE 6
+#define i386_FSGS_BASE_STATE 7
/*
* This structure is used for both
* i386_THREAD_STATE and i386_REGS_SEGS_STATE.
*/
struct i386_thread_state {
+#if defined(__x86_64__) && !defined(USER32)
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ uint64_t r14;
+ uint64_t r15;
+ uint64_t rdi;
+ uint64_t rsi;
+ uint64_t rbp;
+ uint64_t rsp;
+ uint64_t rbx;
+ uint64_t rdx;
+ uint64_t rcx;
+ uint64_t rax;
+ uint64_t rip;
+#else
unsigned int gs;
unsigned int fs;
unsigned int es;
unsigned int ds;
+
unsigned int edi;
unsigned int esi;
unsigned int ebp;
@@ -76,9 +97,17 @@ struct i386_thread_state {
unsigned int ecx;
unsigned int eax;
unsigned int eip;
+#endif /* __x86_64__ && !USER32 */
+
unsigned int cs;
+#if defined(__x86_64__) && !defined(USER32)
+ uint64_t rfl;
+ uint64_t ursp;
+#else
unsigned int efl;
unsigned int uesp;
+#endif /* __x86_64__ and !USER32 */
+
unsigned int ss;
};
#define i386_THREAD_STATE_COUNT (sizeof (struct i386_thread_state)/sizeof(unsigned int))
@@ -151,4 +180,11 @@ struct i386_debug_state {
#define i386_DEBUG_STATE_COUNT \
(sizeof(struct i386_debug_state)/sizeof(unsigned int))
+struct i386_fsgs_base_state {
+ unsigned long fs_base;
+ unsigned long gs_base;
+};
+#define i386_FSGS_BASE_STATE_COUNT \
+ (sizeof(struct i386_fsgs_base_state)/sizeof(unsigned int))
+
#endif /* _MACH_I386_THREAD_STATUS_H_ */
diff --git a/i386/include/mach/i386/vm_param.h b/i386/include/mach/i386/vm_param.h
index a684ed97..3e5c18c9 100644
--- a/i386/include/mach/i386/vm_param.h
+++ b/i386/include/mach/i386/vm_param.h
@@ -52,8 +52,8 @@
* No rounding is used.
*/
-#define i386_btop(x) (((unsigned long)(x)) >> I386_PGSHIFT)
-#define i386_ptob(x) (((unsigned long)(x)) << I386_PGSHIFT)
+#define i386_btop(x) (((phys_addr_t)(x)) >> I386_PGSHIFT)
+#define i386_ptob(x) (((phys_addr_t)(x)) << I386_PGSHIFT)
/*
* Round off or truncate to the nearest page. These will work
@@ -61,22 +61,30 @@
* bytes.)
*/
-#define i386_round_page(x) ((((unsigned long)(x)) + I386_PGBYTES - 1) & \
+#define i386_round_page(x) ((((phys_addr_t)(x)) + I386_PGBYTES - 1) & \
~(I386_PGBYTES-1))
-#define i386_trunc_page(x) (((unsigned long)(x)) & ~(I386_PGBYTES-1))
+#define i386_trunc_page(x) (((phys_addr_t)(x)) & ~(I386_PGBYTES-1))
-/* User address spaces are 3GB each,
- starting at virtual and linear address 0.
+/* User address spaces are 3GB each on a 32-bit kernel, starting at
+ virtual and linear address 0.
+ On a 64-bit krenel we split the address space in half, with the
+ lower 128TB for the user address space and the upper 128TB for the
+ kernel address space.
- VM_MAX_ADDRESS can be reduced to leave more space for the kernel, but must
- not be increased to more than 3GB as glibc and hurd servers would not cope
- with that.
+ On a 32-bit kernel VM_MAX_ADDRESS can be reduced to leave more
+ space for the kernel, but must not be increased to more than 3GB as
+ glibc and hurd servers would not cope with that.
*/
-#define VM_MIN_ADDRESS (0)
+#define VM_MIN_ADDRESS (0ULL)
+
#ifdef __x86_64__
-#define VM_MAX_ADDRESS (0x40000000UL)
-#else
+#if defined(KERNEL) && defined(USER32)
+#define VM_MAX_ADDRESS (0xc0000000UL)
+#else /* defined(KERNEL) && defined(USER32) */
+#define VM_MAX_ADDRESS (0x800000000000ULL)
+#endif /* defined(KERNEL) && defined(USER32) */
+#else /* __x86_64__ */
#define VM_MAX_ADDRESS (0xc0000000UL)
-#endif
+#endif /* __x86_64__ */
#endif /* _MACH_I386_VM_PARAM_H_ */
diff --git a/i386/include/mach/i386/vm_types.h b/i386/include/mach/i386/vm_types.h
index f49a95a1..8f528ae1 100644
--- a/i386/include/mach/i386/vm_types.h
+++ b/i386/include/mach/i386/vm_types.h
@@ -37,20 +37,19 @@
#ifdef __ASSEMBLER__
#else /* __ASSEMBLER__ */
+#include <stdint.h>
+
+#ifdef MACH_KERNEL
+#include <kern/assert.h>
+#endif
+
/*
* A natural_t is the type for the native
- * integer type, e.g. 32 or 64 or.. whatever
- * register size the machine has. Unsigned, it is
- * used for entities that might be either
- * unsigned integers or pointers, and for
- * type-casting between the two.
- * For instance, the IPC system represents
- * a port in user space as an integer and
- * in kernel space as a pointer.
+ * unsigned integer type, usually 32 bits. It is suitable for
+ * most counters with a small chance of overflow.
+ * While historically natural_t was meant to be the same
+ * as a pointer, that is not the case here.
*/
-#ifdef __x86_64__
-// unsigned long ?
-#endif
typedef unsigned int natural_t;
/*
@@ -63,10 +62,23 @@ typedef unsigned int natural_t;
typedef int integer_t;
/*
+ * A long_natural_t is a possibly larger unsigned integer type than natural_t.
+ * Should be used instead of natural_t when we want the data to be less subject
+ * to overflows.
+ */
+typedef unsigned long long_natural_t;
+
+/*
+ * Larger version of integer_t. Only used when we want to hold possibly larger
+ * values than what is possible with integer_t.
+ */
+typedef long long_integer_t;
+
+/*
* A vm_offset_t is a type-neutral pointer,
* e.g. an offset into a virtual memory space.
*/
-typedef unsigned long vm_offset_t;
+typedef uintptr_t vm_offset_t;
typedef vm_offset_t * vm_offset_array_t;
/*
@@ -82,19 +94,74 @@ typedef unsigned long phys_addr_t;
typedef unsigned long long phys_addr_t;
#endif
typedef unsigned long long rpc_phys_addr_t;
+typedef rpc_phys_addr_t *rpc_phys_addr_array_t;
/*
* A vm_size_t is the proper type for e.g.
* expressing the difference between two
* vm_offset_t entities.
*/
-#ifdef __x86_64__
-typedef unsigned long vm_size_t;
-#else
-typedef natural_t vm_size_t;
-#endif
+typedef uintptr_t vm_size_t;
typedef vm_size_t * vm_size_array_t;
+/*
+ * rpc_types are for user/kernel interfaces. On kernel side they may differ from
+ * the native types, while on user space they shall be the same.
+ * These three types are always of the same size, so we can reuse the conversion
+ * functions.
+ */
+#if defined(MACH_KERNEL) && defined(USER32)
+typedef uint32_t rpc_uintptr_t;
+typedef uint32_t rpc_vm_address_t;
+typedef uint32_t rpc_vm_offset_t;
+typedef uint32_t rpc_vm_size_t;
+
+static inline uint64_t convert_vm_from_user(uint32_t uaddr)
+{
+ return (uint64_t)uaddr;
+}
+static inline uint32_t convert_vm_to_user(uint64_t kaddr)
+{
+ assert(kaddr <= 0xFFFFFFFF);
+ return (uint32_t)kaddr;
+}
+
+typedef uint32_t rpc_long_natural_t;
+typedef int32_t rpc_long_integer_t;
+
+static inline int64_t convert_long_integer_from_user(int32_t i)
+{
+ return (int64_t)i;
+}
+static inline int32_t convert_long_integer_to_user(int64_t i)
+{
+ assert(i <= 0x7FFFFFFF);
+ return (int32_t)i;
+}
+typedef uint32_t rpc_long_natural_t;
+typedef int32_t rpc_long_integer_t;
+#else /* MACH_KERNEL */
+typedef uintptr_t rpc_uintptr_t;
+typedef vm_offset_t rpc_vm_address_t;
+typedef vm_offset_t rpc_vm_offset_t;
+typedef vm_size_t rpc_vm_size_t;
+
+#define convert_vm_to_user null_conversion
+#define convert_vm_from_user null_conversion
+
+typedef long_natural_t rpc_long_natural_t;
+typedef long_integer_t rpc_long_integer_t;
+
+#define convert_long_integer_to_user null_conversion
+#define convert_long_integer_from_user null_conversion
+#endif /* MACH_KERNEL */
+
+#define convert_long_natural_to_user convert_vm_to_user
+#define convert_long_natural_from_user convert_vm_from_user
+
+typedef rpc_vm_size_t * rpc_vm_size_array_t;
+typedef rpc_vm_offset_t * rpc_vm_offset_array_t;
+
#endif /* __ASSEMBLER__ */
/*
diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
index d57040cf..94c580e7 100644
--- a/i386/intel/pmap.c
+++ b/i386/intel/pmap.c
@@ -91,6 +91,9 @@
#include <i386/mp_desc.h>
#endif
+#include <ddb/db_output.h>
+#include <machine/db_machdep.h>
+
#ifdef MACH_PSEUDO_PHYS
#define WRITE_PTE(pte_p, pte_entry) *(pte_p) = pte_entry?pa_to_ma(pte_entry):0;
#else /* MACH_PSEUDO_PHYS */
@@ -123,7 +126,7 @@ pv_entry_t pv_head_table; /* array of entries, one per page */
* The list is refilled from the pv_list_cache if it becomes empty.
*/
pv_entry_t pv_free_list; /* free list at SPLVM */
-decl_simple_lock_data(, pv_free_list_lock)
+def_simple_lock_data(static, pv_free_list_lock)
#define PV_ALLOC(pv_e) { \
simple_lock(&pv_free_list_lock); \
@@ -300,7 +303,7 @@ lock_data_t pmap_system_lock;
/* using the pmap */ \
signal_cpus(users, (pmap), (s), (e)); \
while ((pmap)->cpus_using & cpus_active & ~cpu_mask) \
- continue; \
+ cpu_pause(); \
} \
\
/* invalidate our own TLB if pmap is in use */ \
@@ -397,13 +400,15 @@ boolean_t cpu_update_needed[NCPUS];
struct pmap kernel_pmap_store;
pmap_t kernel_pmap;
-struct kmem_cache pmap_cache; /* cache of pmap structures */
-struct kmem_cache pd_cache; /* cache of page directories */
+struct kmem_cache pmap_cache; /* cache of pmap structures */
+struct kmem_cache pt_cache; /* cache of page tables */
+struct kmem_cache pd_cache; /* cache of page directories */
#if PAE
-struct kmem_cache pdpt_cache; /* cache of page
- directory pointer
- tables */
-#endif
+struct kmem_cache pdpt_cache; /* cache of page directory pointer tables */
+#ifdef __x86_64__
+struct kmem_cache l4_cache; /* cache of L4 tables */
+#endif /* __x86_64__ */
+#endif /* PAE */
boolean_t pmap_debug = FALSE; /* flag for debugging prints */
@@ -426,7 +431,37 @@ pt_entry_t *kernel_page_dir;
* Two slots for temporary physical page mapping, to allow for
* physical-to-physical transfers.
*/
-static pmap_mapwindow_t mapwindows[PMAP_NMAPWINDOWS];
+static pmap_mapwindow_t mapwindows[PMAP_NMAPWINDOWS * NCPUS];
+#define MAPWINDOW_SIZE (PMAP_NMAPWINDOWS * NCPUS * PAGE_SIZE)
+
+#ifdef __x86_64__
+static inline pt_entry_t *
+pmap_l4base(const pmap_t pmap, vm_offset_t lin_addr)
+{
+ return &pmap->l4base[lin2l4num(lin_addr)];
+}
+#endif
+
+#ifdef PAE
+static inline pt_entry_t *
+pmap_ptp(const pmap_t pmap, vm_offset_t lin_addr)
+{
+ pt_entry_t *pdp_table;
+#ifdef __x86_64__
+ pt_entry_t *l4_table;
+ l4_table = pmap_l4base(pmap, lin_addr);
+ if (l4_table == PT_ENTRY_NULL)
+ return(PT_ENTRY_NULL);
+ pt_entry_t pdp = *l4_table;
+ if ((pdp & INTEL_PTE_VALID) == 0)
+ return PT_ENTRY_NULL;
+ pdp_table = (pt_entry_t *) ptetokv(pdp);
+#else /* __x86_64__ */
+ pdp_table = pmap->pdpbase;
+#endif /* __x86_64__ */
+ return &pdp_table[lin2pdpnum(lin_addr)];
+}
+#endif
static inline pt_entry_t *
pmap_pde(const pmap_t pmap, vm_offset_t addr)
@@ -435,10 +470,17 @@ pmap_pde(const pmap_t pmap, vm_offset_t addr)
if (pmap == kernel_pmap)
addr = kvtolin(addr);
#if PAE
- page_dir = (pt_entry_t *) ptetokv(pmap->pdpbase[lin2pdpnum(addr)]);
-#else
+ pt_entry_t *pdp_table;
+ pdp_table = pmap_ptp(pmap, addr);
+ if (pdp_table == PT_ENTRY_NULL)
+ return(PT_ENTRY_NULL);
+ pt_entry_t pde = *pdp_table;
+ if ((pde & INTEL_PTE_VALID) == 0)
+ return PT_ENTRY_NULL;
+ page_dir = (pt_entry_t *) ptetokv(pde);
+#else /* PAE */
page_dir = pmap->dirbase;
-#endif
+#endif /* PAE */
return &page_dir[lin2pdenum(addr)];
}
@@ -455,14 +497,20 @@ pmap_pte(const pmap_t pmap, vm_offset_t addr)
pt_entry_t *ptp;
pt_entry_t pte;
-#if PAE
+#ifdef __x86_64__
+ if (pmap->l4base == 0)
+ return(PT_ENTRY_NULL);
+#elif PAE
if (pmap->pdpbase == 0)
return(PT_ENTRY_NULL);
#else
if (pmap->dirbase == 0)
return(PT_ENTRY_NULL);
#endif
- pte = *pmap_pde(pmap, addr);
+ ptp = pmap_pde(pmap, addr);
+ if (ptp == 0)
+ return(PT_ENTRY_NULL);
+ pte = *ptp;
if ((pte & INTEL_PTE_VALID) == 0)
return(PT_ENTRY_NULL);
ptp = (pt_entry_t *)ptetokv(pte);
@@ -502,34 +550,9 @@ void ptep_check(ptep_t ptep)
#endif /* DEBUG_PTE_PAGE */
/*
- * Map memory at initialization. The physical addresses being
- * mapped are not managed and are never unmapped.
- *
- * For now, VM is already on, we only need to map the
- * specified memory.
- */
-vm_offset_t pmap_map(
- vm_offset_t virt,
- phys_addr_t start,
- phys_addr_t end,
- int prot)
-{
- int ps;
-
- ps = PAGE_SIZE;
- while (start < end) {
- pmap_enter(kernel_pmap, virt, start, prot, FALSE);
- virt += ps;
- start += ps;
- }
- return(virt);
-}
-
-/*
* Back-door routine for mapping kernel VM at initialization.
* Useful for mapping memory outside the range of direct mapped
* physical memory (i.e., devices).
- * Otherwise like pmap_map.
*/
vm_offset_t pmap_map_bd(
vm_offset_t virt,
@@ -586,6 +609,111 @@ vm_offset_t pmap_map_bd(
return(virt);
}
+#ifdef PAE
+static void pmap_bootstrap_pae(void)
+{
+ vm_offset_t addr;
+ pt_entry_t *pdp_kernel;
+
+#ifdef __x86_64__
+#ifdef MACH_HYP
+ kernel_pmap->user_l4base = NULL;
+ kernel_pmap->user_pdpbase = NULL;
+#endif
+ kernel_pmap->l4base = (pt_entry_t*)phystokv(pmap_grab_page());
+ memset(kernel_pmap->l4base, 0, INTEL_PGBYTES);
+#else
+ const int PDPNUM_KERNEL = PDPNUM;
+#endif /* x86_64 */
+
+ init_alloc_aligned(PDPNUM_KERNEL * INTEL_PGBYTES, &addr);
+ kernel_page_dir = (pt_entry_t*)phystokv(addr);
+ memset(kernel_page_dir, 0, PDPNUM_KERNEL * INTEL_PGBYTES);
+
+ pdp_kernel = (pt_entry_t*)phystokv(pmap_grab_page());
+ memset(pdp_kernel, 0, INTEL_PGBYTES);
+ for (int i = 0; i < PDPNUM_KERNEL; i++) {
+ int pdp_index = i;
+#ifdef __x86_64__
+ pdp_index += lin2pdpnum(VM_MIN_KERNEL_ADDRESS);
+#endif
+ WRITE_PTE(&pdp_kernel[pdp_index],
+ pa_to_pte(_kvtophys((void *) kernel_page_dir
+ + i * INTEL_PGBYTES))
+ | INTEL_PTE_VALID
+#if (defined(__x86_64__) && !defined(MACH_HYP)) || defined(MACH_PV_PAGETABLES)
+ | INTEL_PTE_WRITE
+#endif
+ );
+ }
+
+#ifdef __x86_64__
+ /* only fill the kernel pdpte during bootstrap */
+ WRITE_PTE(&kernel_pmap->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)],
+ pa_to_pte(_kvtophys(pdp_kernel)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+#ifdef MACH_PV_PAGETABLES
+ pmap_set_page_readonly_init(kernel_pmap->l4base);
+#endif /* MACH_PV_PAGETABLES */
+#else /* x86_64 */
+ kernel_pmap->pdpbase = pdp_kernel;
+#endif /* x86_64 */
+}
+#endif /* PAE */
+
+#ifdef MACH_PV_PAGETABLES
+#ifdef PAE
+#define NSUP_L1 4
+#else
+#define NSUP_L1 1
+#endif
+static void pmap_bootstrap_xen(pt_entry_t *l1_map[NSUP_L1])
+{
+ /* We don't actually deal with the CR3 register content at all */
+ hyp_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3);
+ /*
+ * Xen may only provide as few as 512KB extra bootstrap linear memory,
+ * which is far from enough to map all available memory, so we need to
+ * map more bootstrap linear memory. We here map 1 (resp. 4 for PAE)
+ * other L1 table(s), thus 4MiB extra memory (resp. 8MiB), which is
+ * enough for a pagetable mapping 4GiB.
+ */
+ vm_offset_t la;
+ int n_l1map;
+ for (n_l1map = 0, la = VM_MIN_KERNEL_ADDRESS; la >= VM_MIN_KERNEL_ADDRESS; la += NPTES * PAGE_SIZE) {
+ pt_entry_t *base = (pt_entry_t*) boot_info.pt_base;
+#ifdef PAE
+#ifdef __x86_64__
+ base = (pt_entry_t*) ptetokv(base[0]);
+#endif /* x86_64 */
+ pt_entry_t *l2_map = (pt_entry_t*) ptetokv(base[lin2pdpnum(la)]);
+#else /* PAE */
+ pt_entry_t *l2_map = base;
+#endif /* PAE */
+ /* Like lin2pdenum, but works with non-contiguous boot L3 */
+ l2_map += (la >> PDESHIFT) & PDEMASK;
+ if (!(*l2_map & INTEL_PTE_VALID)) {
+ struct mmu_update update;
+ unsigned j, n;
+
+ l1_map[n_l1map] = (pt_entry_t*) phystokv(pmap_grab_page());
+ for (j = 0; j < NPTES; j++)
+ l1_map[n_l1map][j] = (((pt_entry_t)pfn_to_mfn(lin2pdenum(la - VM_MIN_KERNEL_ADDRESS) * NPTES + j)) << PAGE_SHIFT) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+ pmap_set_page_readonly_init(l1_map[n_l1map]);
+ if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn (l1_map[n_l1map])))
+ panic("couldn't pin page %p(%lx)", l1_map[n_l1map], (vm_offset_t) kv_to_ma (l1_map[n_l1map]));
+ update.ptr = kv_to_ma(l2_map);
+ update.val = kv_to_ma(l1_map[n_l1map]) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+ hyp_mmu_update(kv_to_la(&update), 1, kv_to_la(&n), DOMID_SELF);
+ if (n != 1)
+ panic("couldn't complete bootstrap map");
+ /* added the last L1 table, can stop */
+ if (++n_l1map >= NSUP_L1)
+ break;
+ }
+ }
+}
+#endif /* MACH_PV_PAGETABLES */
+
/*
* Bootstrap the system enough to run with virtual memory.
* Allocate the kernel page directory and page tables,
@@ -632,8 +760,8 @@ void pmap_bootstrap(void)
kernel_virtual_end = kernel_virtual_start + VM_KERNEL_MAP_SIZE;
if (kernel_virtual_end < kernel_virtual_start
- || kernel_virtual_end > VM_MAX_KERNEL_ADDRESS)
- kernel_virtual_end = VM_MAX_KERNEL_ADDRESS;
+ || kernel_virtual_end > VM_MAX_KERNEL_ADDRESS - PAGE_SIZE)
+ kernel_virtual_end = VM_MAX_KERNEL_ADDRESS - PAGE_SIZE;
/*
* Allocate and clear a kernel page directory.
@@ -641,98 +769,19 @@ void pmap_bootstrap(void)
/* Note: initial Xen mapping holds at least 512kB free mapped page.
* We use that for directly building our linear mapping. */
#if PAE
- {
- vm_offset_t addr;
- init_alloc_aligned(PDPNUM * INTEL_PGBYTES, &addr);
- kernel_page_dir = (pt_entry_t*)phystokv(addr);
- }
- kernel_pmap->pdpbase = (pt_entry_t*)phystokv(pmap_grab_page());
- memset(kernel_pmap->pdpbase, 0, INTEL_PGBYTES);
- {
- int i;
- for (i = 0; i < PDPNUM; i++)
- WRITE_PTE(&kernel_pmap->pdpbase[i],
- pa_to_pte(_kvtophys((void *) kernel_page_dir
- + i * INTEL_PGBYTES))
- | INTEL_PTE_VALID
-#ifdef MACH_PV_PAGETABLES
- | INTEL_PTE_WRITE
-#endif
- );
- }
-#ifdef __x86_64__
-#ifdef MACH_HYP
- kernel_pmap->user_l4base = NULL;
- kernel_pmap->user_pdpbase = NULL;
-#endif
- kernel_pmap->l4base = (pt_entry_t*)phystokv(pmap_grab_page());
- memset(kernel_pmap->l4base, 0, INTEL_PGBYTES);
- WRITE_PTE(&kernel_pmap->l4base[0], pa_to_pte(_kvtophys(kernel_pmap->pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
-#ifdef MACH_PV_PAGETABLES
- pmap_set_page_readonly_init(kernel_pmap->l4base);
-#endif
-#endif /* x86_64 */
+ pmap_bootstrap_pae();
#else /* PAE */
kernel_pmap->dirbase = kernel_page_dir = (pt_entry_t*)phystokv(pmap_grab_page());
-#endif /* PAE */
{
unsigned i;
for (i = 0; i < NPDES; i++)
kernel_page_dir[i] = 0;
}
+#endif /* PAE */
#ifdef MACH_PV_PAGETABLES
- /* We don't actually deal with the CR3 register content at all */
- hyp_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3);
- /*
- * Xen may only provide as few as 512KB extra bootstrap linear memory,
- * which is far from enough to map all available memory, so we need to
- * map more bootstrap linear memory. We here map 1 (resp. 4 for PAE)
- * other L1 table(s), thus 4MiB extra memory (resp. 8MiB), which is
- * enough for a pagetable mapping 4GiB.
- */
-#ifdef PAE
-#define NSUP_L1 4
-#else
-#define NSUP_L1 1
-#endif
pt_entry_t *l1_map[NSUP_L1];
- {
- vm_offset_t la;
- int n_l1map;
- for (n_l1map = 0, la = VM_MIN_KERNEL_ADDRESS; la >= VM_MIN_KERNEL_ADDRESS; la += NPTES * PAGE_SIZE) {
- pt_entry_t *base = (pt_entry_t*) boot_info.pt_base;
-#ifdef PAE
-#ifdef __x86_64__
- base = (pt_entry_t*) ptetokv(base[0]);
-#endif /* x86_64 */
- pt_entry_t *l2_map = (pt_entry_t*) ptetokv(base[lin2pdpnum(la)]);
-#else /* PAE */
- pt_entry_t *l2_map = base;
-#endif /* PAE */
- /* Like lin2pdenum, but works with non-contiguous boot L3 */
- l2_map += (la >> PDESHIFT) & PDEMASK;
- if (!(*l2_map & INTEL_PTE_VALID)) {
- struct mmu_update update;
- unsigned j, n;
-
- l1_map[n_l1map] = (pt_entry_t*) phystokv(pmap_grab_page());
- for (j = 0; j < NPTES; j++)
- l1_map[n_l1map][j] = (((pt_entry_t)pfn_to_mfn(lin2pdenum(la - VM_MIN_KERNEL_ADDRESS) * NPTES + j)) << PAGE_SHIFT) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
- pmap_set_page_readonly_init(l1_map[n_l1map]);
- if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn (l1_map[n_l1map])))
- panic("couldn't pin page %p(%lx)", l1_map[n_l1map], (vm_offset_t) kv_to_ma (l1_map[n_l1map]));
- update.ptr = kv_to_ma(l2_map);
- update.val = kv_to_ma(l1_map[n_l1map]) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
- hyp_mmu_update(kv_to_la(&update), 1, kv_to_la(&n), DOMID_SELF);
- if (n != 1)
- panic("couldn't complete bootstrap map");
- /* added the last L1 table, can stop */
- if (++n_l1map >= NSUP_L1)
- break;
- }
- }
- }
+ pmap_bootstrap_xen(l1_map);
#endif /* MACH_PV_PAGETABLES */
/*
@@ -815,9 +864,9 @@ void pmap_bootstrap(void)
}
for (; pte < ptable+NPTES; pte++)
{
- if (va >= kernel_virtual_end - PMAP_NMAPWINDOWS * PAGE_SIZE && va < kernel_virtual_end)
+ if (va >= kernel_virtual_end - MAPWINDOW_SIZE && va < kernel_virtual_end)
{
- pmap_mapwindow_t *win = &mapwindows[atop(va - (kernel_virtual_end - PMAP_NMAPWINDOWS * PAGE_SIZE))];
+ pmap_mapwindow_t *win = &mapwindows[atop(va - (kernel_virtual_end - MAPWINDOW_SIZE))];
win->entry = pte;
win->vaddr = va;
}
@@ -973,14 +1022,15 @@ void pmap_clear_bootstrap_pagetable(pt_entry_t *base) {
pmap_mapwindow_t *pmap_get_mapwindow(pt_entry_t entry)
{
pmap_mapwindow_t *map;
+ int cpu = cpu_number();
assert(entry != 0);
/* Find an empty one. */
- for (map = &mapwindows[0]; map < &mapwindows[sizeof (mapwindows) / sizeof (*mapwindows)]; map++)
+ for (map = &mapwindows[cpu * PMAP_NMAPWINDOWS]; map < &mapwindows[(cpu+1) * PMAP_NMAPWINDOWS]; map++)
if (!(*map->entry))
break;
- assert(map < &mapwindows[sizeof (mapwindows) / sizeof (*mapwindows)]);
+ assert(map < &mapwindows[(cpu+1) * PMAP_NMAPWINDOWS]);
#ifdef MACH_PV_PAGETABLES
if (!hyp_mmu_update_pte(kv_to_ma(map->entry), pa_to_ma(entry)))
@@ -988,6 +1038,7 @@ pmap_mapwindow_t *pmap_get_mapwindow(pt_entry_t entry)
#else /* MACH_PV_PAGETABLES */
WRITE_PTE(map->entry, entry);
#endif /* MACH_PV_PAGETABLES */
+ INVALIDATE_TLB(kernel_pmap, map->vaddr, map->vaddr + PAGE_SIZE);
return map;
}
@@ -1002,7 +1053,7 @@ void pmap_put_mapwindow(pmap_mapwindow_t *map)
#else /* MACH_PV_PAGETABLES */
WRITE_PTE(map->entry, 0);
#endif /* MACH_PV_PAGETABLES */
- PMAP_UPDATE_TLBS(kernel_pmap, map->vaddr, map->vaddr + PAGE_SIZE);
+ INVALIDATE_TLB(kernel_pmap, map->vaddr, map->vaddr + PAGE_SIZE);
}
void pmap_virtual_space(
@@ -1010,7 +1061,7 @@ void pmap_virtual_space(
vm_offset_t *endp)
{
*startp = kernel_virtual_start;
- *endp = kernel_virtual_end - PMAP_NMAPWINDOWS * PAGE_SIZE;
+ *endp = kernel_virtual_end - MAPWINDOW_SIZE;
}
/*
@@ -1059,14 +1110,22 @@ void pmap_init(void)
*/
s = (vm_size_t) sizeof(struct pmap);
kmem_cache_init(&pmap_cache, "pmap", s, 0, NULL, 0);
- kmem_cache_init(&pd_cache, "pd",
+ kmem_cache_init(&pt_cache, "pmap_L1",
+ INTEL_PGBYTES, INTEL_PGBYTES, NULL,
+ KMEM_CACHE_PHYSMEM);
+ kmem_cache_init(&pd_cache, "pmap_L2",
INTEL_PGBYTES, INTEL_PGBYTES, NULL,
KMEM_CACHE_PHYSMEM);
#if PAE
- kmem_cache_init(&pdpt_cache, "pdpt",
+ kmem_cache_init(&pdpt_cache, "pmap_L3",
INTEL_PGBYTES, INTEL_PGBYTES, NULL,
KMEM_CACHE_PHYSMEM);
-#endif
+#ifdef __x86_64__
+ kmem_cache_init(&l4_cache, "pmap_L4",
+ INTEL_PGBYTES, INTEL_PGBYTES, NULL,
+ KMEM_CACHE_PHYSMEM);
+#endif /* __x86_64__ */
+#endif /* PAE */
s = (vm_size_t) sizeof(struct pv_entry);
kmem_cache_init(&pv_list_cache, "pv_entry", s, 0, NULL, 0);
@@ -1108,7 +1167,7 @@ valid_page(phys_addr_t addr)
* Must be called with the pmap system and the pmap unlocked,
* since these must be unlocked to use vm_page_grab.
*/
-vm_offset_t
+static vm_offset_t
pmap_page_table_page_alloc(void)
{
vm_page_t m;
@@ -1172,12 +1231,16 @@ void pmap_map_mfn(void *_addr, unsigned long mfn) {
#ifdef MACH_PV_PAGETABLES
if (!hyp_mmu_update_pte(kv_to_ma(pdp),
pa_to_pte(kv_to_ma(ptp)) | INTEL_PTE_VALID
+#ifndef __x86_64__
| INTEL_PTE_USER
+#endif
| INTEL_PTE_WRITE))
panic("%s:%d could not set pde %llx(%lx) to %lx(%lx)\n",__FILE__,__LINE__,kvtophys((vm_offset_t)pdp),(vm_offset_t) kv_to_ma(pdp), ptp, (vm_offset_t) pa_to_ma(ptp));
#else /* MACH_PV_PAGETABLES */
*pdp = pa_to_pte(kvtophys(ptp)) | INTEL_PTE_VALID
+#ifndef __x86_64__
| INTEL_PTE_USER
+#endif
| INTEL_PTE_WRITE;
#endif /* MACH_PV_PAGETABLES */
pte = pmap_pte(kernel_pmap, addr);
@@ -1198,7 +1261,7 @@ void pmap_map_mfn(void *_addr, unsigned long mfn) {
* The page-table page must have all mappings removed,
* and be removed from its page directory.
*/
-void
+static void
pmap_page_table_page_dealloc(vm_offset_t pa)
{
vm_page_t m;
@@ -1206,6 +1269,11 @@ pmap_page_table_page_dealloc(vm_offset_t pa)
vm_object_lock(pmap_object);
m = vm_page_lookup(pmap_object, pa);
vm_page_lock_queues();
+#ifdef MACH_PV_PAGETABLES
+ if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa)))
+ panic("couldn't unpin page %llx(%lx)\n", pa, (vm_offset_t) kv_to_ma(pa));
+ pmap_set_page_readwrite((void*) phystokv(pa));
+#endif /* MACH_PV_PAGETABLES */
vm_page_free(m);
inuse_ptepages_count--;
vm_page_unlock_queues();
@@ -1226,6 +1294,10 @@ pmap_page_table_page_dealloc(vm_offset_t pa)
*/
pmap_t pmap_create(vm_size_t size)
{
+#ifdef __x86_64__
+ // needs to be reworked if we want to dynamically allocate PDPs for kernel
+ const int PDPNUM = PDPNUM_KERNEL;
+#endif
pt_entry_t *page_dir[PDPNUM];
int i;
pmap_t p;
@@ -1265,6 +1337,7 @@ pmap_t pmap_create(vm_size_t size)
INTEL_PGBYTES);
}
+#ifdef LINUX_DEV
#if VM_MIN_KERNEL_ADDRESS != 0
/* Do not map BIOS in user tasks */
page_dir
@@ -1276,6 +1349,8 @@ pmap_t pmap_create(vm_size_t size)
[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)]
= 0;
#endif
+#endif /* LINUX_DEV */
+
#ifdef MACH_PV_PAGETABLES
{
for (i = 0; i < PDPNUM; i++)
@@ -1284,33 +1359,37 @@ pmap_t pmap_create(vm_size_t size)
#endif /* MACH_PV_PAGETABLES */
#if PAE
- p->pdpbase = (pt_entry_t *) kmem_cache_alloc(&pdpt_cache);
- if (p->pdpbase == NULL) {
+ pt_entry_t *pdp_kernel = (pt_entry_t *) kmem_cache_alloc(&pdpt_cache);
+ if (pdp_kernel == NULL) {
for (i = 0; i < PDPNUM; i++)
kmem_cache_free(&pd_cache, (vm_address_t) page_dir[i]);
kmem_cache_free(&pmap_cache, (vm_address_t) p);
return PMAP_NULL;
}
- memset(p->pdpbase, 0, INTEL_PGBYTES);
+ memset(pdp_kernel, 0, INTEL_PGBYTES);
{
- for (i = 0; i < PDPNUM; i++)
- WRITE_PTE(&p->pdpbase[i],
+ for (i = 0; i < PDPNUM; i++) {
+ int pdp_index = i;
+#ifdef __x86_64__
+ pdp_index += lin2pdpnum(VM_MIN_KERNEL_ADDRESS);
+#endif
+ WRITE_PTE(&pdp_kernel[pdp_index],
pa_to_pte(kvtophys((vm_offset_t) page_dir[i]))
| INTEL_PTE_VALID
-#ifdef MACH_PV_PAGETABLES
+#if (defined(__x86_64__) && !defined(MACH_HYP)) || defined(MACH_PV_PAGETABLES)
| INTEL_PTE_WRITE
#endif
);
+ }
}
#ifdef __x86_64__
- // FIXME: use kmem_cache_alloc instead
- if (kmem_alloc_wired(kernel_map,
- (vm_offset_t *)&p->l4base, INTEL_PGBYTES)
- != KERN_SUCCESS)
+ p->l4base = (pt_entry_t *) kmem_cache_alloc(&l4_cache);
+ if (p->l4base == NULL)
panic("pmap_create");
memset(p->l4base, 0, INTEL_PGBYTES);
- WRITE_PTE(&p->l4base[0], pa_to_pte(kvtophys((vm_offset_t) p->pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+ WRITE_PTE(&p->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)],
+ pa_to_pte(kvtophys((vm_offset_t) pdp_kernel)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
#ifdef MACH_PV_PAGETABLES
// FIXME: use kmem_cache_alloc instead
if (kmem_alloc_wired(kernel_map,
@@ -1320,7 +1399,7 @@ pmap_t pmap_create(vm_size_t size)
memset(p->user_pdpbase, 0, INTEL_PGBYTES);
{
int i;
- for (i = 0; i < lin2pdpnum(VM_MAX_ADDRESS); i++)
+ for (i = 0; i < lin2pdpnum(VM_MAX_USER_ADDRESS); i++)
WRITE_PTE(&p->user_pdpbase[i], pa_to_pte(kvtophys((vm_offset_t) page_dir[i])) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
}
// FIXME: use kmem_cache_alloc instead
@@ -1331,14 +1410,17 @@ pmap_t pmap_create(vm_size_t size)
memset(p->user_l4base, 0, INTEL_PGBYTES);
WRITE_PTE(&p->user_l4base[0], pa_to_pte(kvtophys((vm_offset_t) p->user_pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
#endif /* MACH_PV_PAGETABLES */
+#else /* _x86_64 */
+ p->pdpbase = pdp_kernel;
#endif /* _x86_64 */
#ifdef MACH_PV_PAGETABLES
#ifdef __x86_64__
pmap_set_page_readonly(p->l4base);
pmap_set_page_readonly(p->user_l4base);
pmap_set_page_readonly(p->user_pdpbase);
-#endif
+#else
pmap_set_page_readonly(p->pdpbase);
+#endif
#endif /* MACH_PV_PAGETABLES */
#else /* PAE */
p->dirbase = page_dir[0];
@@ -1368,15 +1450,7 @@ pmap_t pmap_create(vm_size_t size)
void pmap_destroy(pmap_t p)
{
-#if PAE
- int i;
-#endif
- boolean_t free_all;
- pt_entry_t *page_dir;
- pt_entry_t *pdep;
- phys_addr_t pa;
int c, s;
- vm_page_t m;
if (p == PMAP_NULL)
return;
@@ -1391,70 +1465,54 @@ void pmap_destroy(pmap_t p)
return; /* still in use */
}
+ /*
+ * Free the page table tree.
+ */
#if PAE
- for (i = 0; i <= lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); i++) {
- free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
- page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]);
-#else
- free_all = FALSE;
- page_dir = p->dirbase;
-#endif
-
#ifdef __x86_64__
-#warning FIXME 64bit need to free l3
-#endif
- /*
- * Free the memory maps, then the
- * pmap structure.
- */
- for (pdep = page_dir;
- (free_all
- || pdep < &page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)])
- && pdep < &page_dir[NPTES];
- pdep += ptes_per_vm_page) {
- if (*pdep & INTEL_PTE_VALID) {
- pa = pte_to_pa(*pdep);
- assert(pa == (vm_offset_t) pa);
- vm_object_lock(pmap_object);
- m = vm_page_lookup(pmap_object, pa);
- if (m == VM_PAGE_NULL)
- panic("pmap_destroy: pte page not in object");
- vm_page_lock_queues();
-#ifdef MACH_PV_PAGETABLES
- if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa)))
- panic("pmap_destroy: couldn't unpin page %llx(%lx)\n", pa, (vm_offset_t) kv_to_ma(pa));
- pmap_set_page_readwrite((void*) phystokv(pa));
-#endif /* MACH_PV_PAGETABLES */
- vm_page_free(m);
- inuse_ptepages_count--;
- vm_page_unlock_queues();
- vm_object_unlock(pmap_object);
- }
- }
-#ifdef MACH_PV_PAGETABLES
- pmap_set_page_readwrite((void*) page_dir);
-#endif /* MACH_PV_PAGETABLES */
- kmem_cache_free(&pd_cache, (vm_offset_t) page_dir);
+ for (int l4i = 0; l4i < NPTES; l4i++) {
+ pt_entry_t pdp = (pt_entry_t) p->l4base[l4i];
+ if (!(pdp & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp);
+#else /* __x86_64__ */
+ pt_entry_t *pdpbase = p->pdpbase;
+#endif /* __x86_64__ */
+ for (int l3i = 0; l3i < NPTES; l3i++) {
+ pt_entry_t pde = (pt_entry_t) pdpbase[l3i];
+ if (!(pde & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde);
+ if (
+#ifdef __x86_64__
+ l4i < lin2l4num(VM_MAX_USER_ADDRESS) ||
+ (l4i == lin2l4num(VM_MAX_USER_ADDRESS) && l3i < lin2pdpnum(VM_MAX_USER_ADDRESS))
+#else /* __x86_64__ */
+ l3i < lin2pdpnum(VM_MAX_USER_ADDRESS)
+#endif /* __x86_64__ */
+ )
+ for (int l2i = 0; l2i < NPTES; l2i++)
+#else /* PAE */
+ pt_entry_t *pdebase = p->dirbase;
+ for (int l2i = 0; l2i < lin2pdenum(VM_MAX_USER_ADDRESS); l2i++)
+#endif /* PAE */
+ {
+ pt_entry_t pte = (pt_entry_t) pdebase[l2i];
+ if (!(pte & INTEL_PTE_VALID))
+ continue;
+ kmem_cache_free(&pt_cache, (vm_offset_t)ptetokv(pte));
+ }
+ kmem_cache_free(&pd_cache, (vm_offset_t)pdebase);
#if PAE
+ }
+ kmem_cache_free(&pdpt_cache, (vm_offset_t)pdpbase);
+#ifdef __x86_64__
}
+ kmem_cache_free(&l4_cache, (vm_offset_t) p->l4base);
+#endif /* __x86_64__ */
+#endif /* PAE */
-#ifdef MACH_PV_PAGETABLES
-#ifdef __x86_64__
- pmap_set_page_readwrite(p->l4base);
- pmap_set_page_readwrite(p->user_l4base);
- pmap_set_page_readwrite(p->user_pdpbase);
-#endif
- pmap_set_page_readwrite(p->pdpbase);
-#endif /* MACH_PV_PAGETABLES */
-#ifdef __x86_64__
- kmem_free(kernel_map, (vm_offset_t)p->l4base, INTEL_PGBYTES);
-#ifdef MACH_PV_PAGETABLES
- kmem_free(kernel_map, (vm_offset_t)p->user_l4base, INTEL_PGBYTES);
- kmem_free(kernel_map, (vm_offset_t)p->user_pdpbase, INTEL_PGBYTES);
-#endif
-#endif
- kmem_cache_free(&pdpt_cache, (vm_offset_t) p->pdpbase);
-#endif /* PAE */
+ /* Finally, free the pmap itself */
kmem_cache_free(&pmap_cache, (vm_offset_t) p);
}
@@ -1502,6 +1560,9 @@ void pmap_remove_range(
struct mmu_update update[HYP_BATCH_MMU_UPDATES];
#endif /* MACH_PV_PAGETABLES */
+ if (pmap == kernel_pmap && (va < kernel_virtual_start || va + (epte-spte)*PAGE_SIZE > kernel_virtual_end))
+ panic("pmap_remove_range(%lx-%lx) falls in physical memory area!\n", (unsigned long) va, (unsigned long) va + (epte-spte)*PAGE_SIZE);
+
#if DEBUG_PTE_PAGE
if (pmap != kernel_pmap)
ptep_check(get_pte_page(spte));
@@ -1514,6 +1575,9 @@ void pmap_remove_range(
if (*cpte == 0)
continue;
+
+ assert(*cpte & INTEL_PTE_VALID);
+
pa = pte_to_pa(*cpte);
num_removed++;
@@ -1588,7 +1652,7 @@ void pmap_remove_range(
pv_h = pai_to_pvh(pai);
if (pv_h->pmap == PMAP_NULL) {
- panic("pmap_remove: null pv_list!");
+ panic("pmap_remove: null pv_list for pai %lx at va %lx!", pai, (unsigned long) va);
}
if (pv_h->va == va && pv_h->pmap == pmap) {
/*
@@ -1662,9 +1726,9 @@ void pmap_remove(
pt_entry_t *pde = pmap_pde(map, s);
l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
- if (l > e)
+ if (l > e || l < s)
l = e;
- if (*pde & INTEL_PTE_VALID) {
+ if (pde && (*pde & INTEL_PTE_VALID)) {
spte = (pt_entry_t *)ptetokv(*pde);
spte = &spte[ptenum(s)];
epte = &spte[intel_btop(l-s)];
@@ -1855,7 +1919,6 @@ void pmap_protect(
vm_offset_t e,
vm_prot_t prot)
{
- pt_entry_t *pde;
pt_entry_t *spte, *epte;
vm_offset_t l;
int spl;
@@ -1894,12 +1957,13 @@ void pmap_protect(
SPLVM(spl);
simple_lock(&map->lock);
- pde = pmap_pde(map, s);
while (s < e) {
+ pt_entry_t *pde = pde = pmap_pde(map, s);
+
l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
- if (l > e)
+ if (l > e || l < s)
l = e;
- if (*pde & INTEL_PTE_VALID) {
+ if (pde && (*pde & INTEL_PTE_VALID)) {
spte = (pt_entry_t *)ptetokv(*pde);
spte = &spte[ptenum(s)];
epte = &spte[intel_btop(l-s)];
@@ -1936,7 +2000,6 @@ void pmap_protect(
#endif /* MACH_PV_PAGETABLES */
}
s = l;
- pde++;
}
PMAP_UPDATE_TLBS(map, _s, e);
@@ -1944,86 +2007,24 @@ void pmap_protect(
SPLX(spl);
}
+typedef pt_entry_t* (*pmap_level_getter_t)(const pmap_t pmap, vm_offset_t addr);
/*
- * Insert the given physical page (p) at
- * the specified virtual address (v) in the
- * target physical map with the protection requested.
- *
- * If specified, the page will be wired down, meaning
- * that the related pte can not be reclaimed.
- *
- * NB: This is the only routine which MAY NOT lazy-evaluate
- * or lose information. That is, this routine must actually
- * insert this page into the given map NOW.
- */
-void pmap_enter(
- pmap_t pmap,
- vm_offset_t v,
- phys_addr_t pa,
- vm_prot_t prot,
- boolean_t wired)
+* Expand one single level of the page table tree
+*/
+static inline pt_entry_t* pmap_expand_level(pmap_t pmap, vm_offset_t v, int spl,
+ pmap_level_getter_t pmap_level,
+ pmap_level_getter_t pmap_level_upper,
+ int n_per_vm_page,
+ struct kmem_cache *cache)
{
- boolean_t is_physmem;
pt_entry_t *pte;
- pv_entry_t pv_h;
- unsigned long i, pai;
- pv_entry_t pv_e;
- pt_entry_t template;
- int spl;
- phys_addr_t old_pa;
-
- assert(pa != vm_page_fictitious_addr);
- if (pmap_debug) printf("pmap(%lx, %llx)\n", v, (unsigned long long) pa);
- if (pmap == PMAP_NULL)
- return;
-
-#if !MACH_KDB
- if (pmap == kernel_pmap && (v < kernel_virtual_start || v >= kernel_virtual_end))
- panic("pmap_enter(%lx, %llx) falls in physical memory area!\n", v, (unsigned long long) pa);
-#endif
-#if !(__i486__ || __i586__ || __i686__)
- if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
- && !wired /* hack for io_wire */ ) {
- /*
- * Because the 386 ignores write protection in kernel mode,
- * we cannot enter a read-only kernel mapping, and must
- * remove an existing mapping if changing it.
- */
- PMAP_READ_LOCK(pmap, spl);
-
- pte = pmap_pte(pmap, v);
- if (pte != PT_ENTRY_NULL && *pte != 0) {
- /*
- * Invalidate the translation buffer,
- * then remove the mapping.
- */
- pmap_remove_range(pmap, v, pte,
- pte + ptes_per_vm_page);
- PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
- }
- PMAP_READ_UNLOCK(pmap, spl);
- return;
- }
-#endif
-
- /*
- * Must allocate a new pvlist entry while we're unlocked;
- * Allocating may cause pageout (which will lock the pmap system).
- * If we determine we need a pvlist entry, we will unlock
- * and allocate one. Then we will retry, throughing away
- * the allocated entry later (if we no longer need it).
- */
- pv_e = PV_ENTRY_NULL;
-Retry:
- PMAP_READ_LOCK(pmap, spl);
/*
* Expand pmap to include this pte. Assume that
* pmap is always expanded to include enough hardware
* pages to map one VM page.
*/
-
- while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
+ while ((pte = pmap_level(pmap, v)) == PT_ENTRY_NULL) {
/*
* Need to allocate a new page-table page.
*/
@@ -2036,7 +2037,7 @@ Retry:
* Would have to enter the new page-table page in
* EVERY pmap.
*/
- panic("pmap_expand kernel pmap to %#lx", v);
+ panic("pmap_expand kernel pmap to %#zx", v);
}
/*
@@ -2044,7 +2045,9 @@ Retry:
*/
PMAP_READ_UNLOCK(pmap, spl);
- ptp = phystokv(pmap_page_table_page_alloc());
+ while (!(ptp = kmem_cache_alloc(cache)))
+ VM_PAGE_WAIT((void (*)()) 0);
+ memset((void *)ptp, 0, PAGE_SIZE);
/*
* Re-lock the pmap and check that another thread has
@@ -2054,12 +2057,12 @@ Retry:
*/
PMAP_READ_LOCK(pmap, spl);
- if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
+ if (pmap_level(pmap, v) != PT_ENTRY_NULL) {
/*
* Oops...
*/
PMAP_READ_UNLOCK(pmap, spl);
- pmap_page_table_page_dealloc(kvtophys(ptp));
+ kmem_cache_free(cache, ptp);
PMAP_READ_LOCK(pmap, spl);
continue;
}
@@ -2067,8 +2070,8 @@ Retry:
/*
* Enter the new page table page in the page directory.
*/
- i = ptes_per_vm_page;
- pdp = pmap_pde(pmap, v);
+ i = n_per_vm_page;
+ pdp = pmap_level_upper(pmap, v);
do {
#ifdef MACH_PV_PAGETABLES
pmap_set_page_readonly((void *) ptp);
@@ -2076,12 +2079,12 @@ Retry:
panic("couldn't pin page %lx(%lx)\n",ptp,(vm_offset_t) kv_to_ma(ptp));
if (!hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdp)),
pa_to_pte(pa_to_ma(kvtophys(ptp))) | INTEL_PTE_VALID
- | INTEL_PTE_USER
+ | (pmap != kernel_pmap ? INTEL_PTE_USER : 0)
| INTEL_PTE_WRITE))
panic("%s:%d could not set pde %p(%llx,%lx) to %lx(%llx,%lx) %lx\n",__FILE__,__LINE__, pdp, kvtophys((vm_offset_t)pdp), (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)pdp)), ptp, kvtophys(ptp), (vm_offset_t) pa_to_ma(kvtophys(ptp)), (vm_offset_t) pa_to_pte(kv_to_ma(ptp)));
#else /* MACH_PV_PAGETABLES */
*pdp = pa_to_pte(kvtophys(ptp)) | INTEL_PTE_VALID
- | INTEL_PTE_USER
+ | (pmap != kernel_pmap ? INTEL_PTE_USER : 0)
| INTEL_PTE_WRITE;
#endif /* MACH_PV_PAGETABLES */
pdp++; /* Note: This is safe b/c we stay in one page. */
@@ -2093,6 +2096,98 @@ Retry:
*/
continue;
}
+ return pte;
+}
+
+/*
+ * Expand, if required, the PMAP to include the virtual address V.
+ * PMAP needs to be locked, and it will be still locked on return. It
+ * can temporarily unlock the PMAP, during allocation or deallocation
+ * of physical pages.
+ */
+static inline pt_entry_t* pmap_expand(pmap_t pmap, vm_offset_t v, int spl)
+{
+#ifdef PAE
+#ifdef __x86_64__
+ pmap_expand_level(pmap, v, spl, pmap_ptp, pmap_l4base, 1, &pdpt_cache);
+#endif /* __x86_64__ */
+ pmap_expand_level(pmap, v, spl, pmap_pde, pmap_ptp, 1, &pd_cache);
+#endif /* PAE */
+ return pmap_expand_level(pmap, v, spl, pmap_pte, pmap_pde, ptes_per_vm_page, &pt_cache);
+}
+
+/*
+ * Insert the given physical page (p) at
+ * the specified virtual address (v) in the
+ * target physical map with the protection requested.
+ *
+ * If specified, the page will be wired down, meaning
+ * that the related pte can not be reclaimed.
+ *
+ * NB: This is the only routine which MAY NOT lazy-evaluate
+ * or lose information. That is, this routine must actually
+ * insert this page into the given map NOW.
+ */
+void pmap_enter(
+ pmap_t pmap,
+ vm_offset_t v,
+ phys_addr_t pa,
+ vm_prot_t prot,
+ boolean_t wired)
+{
+ boolean_t is_physmem;
+ pt_entry_t *pte;
+ pv_entry_t pv_h;
+ unsigned long i, pai;
+ pv_entry_t pv_e;
+ pt_entry_t template;
+ int spl;
+ phys_addr_t old_pa;
+
+ assert(pa != vm_page_fictitious_addr);
+ if (pmap_debug) printf("pmap(%zx, %llx)\n", v, (unsigned long long) pa);
+ if (pmap == PMAP_NULL)
+ return;
+
+ if (pmap == kernel_pmap && (v < kernel_virtual_start || v >= kernel_virtual_end))
+ panic("pmap_enter(%lx, %llx) falls in physical memory area!\n", (unsigned long) v, (unsigned long long) pa);
+#if !(__i486__ || __i586__ || __i686__)
+ if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
+ && !wired /* hack for io_wire */ ) {
+ /*
+ * Because the 386 ignores write protection in kernel mode,
+ * we cannot enter a read-only kernel mapping, and must
+ * remove an existing mapping if changing it.
+ */
+ PMAP_READ_LOCK(pmap, spl);
+
+ pte = pmap_pte(pmap, v);
+ if (pte != PT_ENTRY_NULL && *pte != 0) {
+ /*
+ * Invalidate the translation buffer,
+ * then remove the mapping.
+ */
+ pmap_remove_range(pmap, v, pte,
+ pte + ptes_per_vm_page);
+ PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
+ }
+ PMAP_READ_UNLOCK(pmap, spl);
+ return;
+ }
+#endif
+
+ /*
+ * Must allocate a new pvlist entry while we're unlocked;
+ * Allocating may cause pageout (which will lock the pmap system).
+ * If we determine we need a pvlist entry, we will unlock
+ * and allocate one. Then we will retry, throughing away
+ * the allocated entry later (if we no longer need it).
+ */
+ pv_e = PV_ENTRY_NULL;
+Retry:
+ PMAP_READ_LOCK(pmap, spl);
+
+ pte = pmap_expand(pmap, v, spl);
if (vm_page_ready())
is_physmem = (vm_page_lookup_pa(pa) != NULL);
@@ -2347,12 +2442,12 @@ phys_addr_t pmap_extract(
* This routine is only advisory and need not do anything.
*/
#if 0
-void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
- pmap_t dst_pmap;
- pmap_t src_pmap;
- vm_offset_t dst_addr;
- vm_size_t len;
- vm_offset_t src_addr;
+void pmap_copy(
+ pmap_t dst_pmap,
+ pmap_t src_pmap,
+ vm_offset_t dst_addr,
+ vm_size_t len,
+ vm_offset_t src_addr)
{
}
#endif /* 0 */
@@ -2370,10 +2465,7 @@ void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
*/
void pmap_collect(pmap_t p)
{
- int i;
- boolean_t free_all;
- pt_entry_t *page_dir;
- pt_entry_t *pdp, *ptp;
+ pt_entry_t *ptp;
pt_entry_t *eptp;
phys_addr_t pa;
int spl, wired;
@@ -2384,115 +2476,211 @@ void pmap_collect(pmap_t p)
if (p == kernel_pmap)
return;
+ /*
+ * Free the page table tree.
+ */
+ PMAP_READ_LOCK(p, spl);
#if PAE
- for (i = 0; i <= lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); i++) {
- free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
- page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]);
-#else
- i = 0;
- free_all = FALSE;
- page_dir = p->dirbase;
-#endif
-
- /*
- * Garbage collect map.
- */
- PMAP_READ_LOCK(p, spl);
- for (pdp = page_dir;
- (free_all
- || pdp < &page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)])
- && pdp < &page_dir[NPTES];
- pdp += ptes_per_vm_page) {
- if (*pdp & INTEL_PTE_VALID) {
-
- pa = pte_to_pa(*pdp);
- ptp = (pt_entry_t *)phystokv(pa);
- eptp = ptp + NPTES*ptes_per_vm_page;
-
- /*
- * If the pte page has any wired mappings, we cannot
- * free it.
- */
- wired = 0;
- {
- pt_entry_t *ptep;
- for (ptep = ptp; ptep < eptp; ptep++) {
- if (*ptep & INTEL_PTE_WIRED) {
- wired = 1;
- break;
- }
- }
- }
- if (!wired) {
- /*
- * Remove the virtual addresses mapped by this pte page.
- */
- { /*XXX big hack*/
- vm_offset_t va = pdenum2lin(pdp - page_dir
- + i * NPTES);
- if (p == kernel_pmap)
- va = lintokv(va);
- pmap_remove_range(p,
- va,
- ptp,
- eptp);
- }
-
- /*
- * Invalidate the page directory pointer.
- */
+#ifdef __x86_64__
+ for (int l4i = 0; l4i < lin2l4num(VM_MAX_USER_ADDRESS); l4i++) {
+ pt_entry_t pdp = (pt_entry_t) p->l4base[l4i];
+ if (!(pdp & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp);
+ for (int l3i = 0; l3i < NPTES; l3i++)
+#else /* __x86_64__ */
+ pt_entry_t *pdpbase = p->pdpbase;
+ for (int l3i = 0; l3i < lin2pdpnum(VM_MAX_USER_ADDRESS); l3i++)
+#endif /* __x86_64__ */
+ {
+ pt_entry_t pde = (pt_entry_t ) pdpbase[l3i];
+ if (!(pde & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde);
+ for (int l2i = 0; l2i < NPTES; l2i++)
+#else /* PAE */
+ pt_entry_t *pdebase = p->dirbase;
+ for (int l2i = 0; l2i < lin2pdenum(VM_MAX_USER_ADDRESS); l2i++)
+#endif /* PAE */
{
- int i = ptes_per_vm_page;
- pt_entry_t *pdep = pdp;
- do {
+ pt_entry_t pte = (pt_entry_t) pdebase[l2i];
+ if (!(pte & INTEL_PTE_VALID))
+ continue;
+
+ pa = pte_to_pa(pte);
+ ptp = (pt_entry_t *)phystokv(pa);
+ eptp = ptp + NPTES*ptes_per_vm_page;
+
+ /*
+ * If the pte page has any wired mappings, we cannot
+ * free it.
+ */
+ wired = 0;
+ {
+ pt_entry_t *ptep;
+ for (ptep = ptp; ptep < eptp; ptep++) {
+ if (*ptep & INTEL_PTE_WIRED) {
+ wired = 1;
+ break;
+ }
+ }
+ }
+ if (!wired) {
+ /*
+ * Remove the virtual addresses mapped by this pte page.
+ */
+ { /*XXX big hack*/
+ vm_offset_t va = pagenum2lin(l4i, l3i, l2i, 0);
+ if (p == kernel_pmap)
+ va = lintokv(va);
+ pmap_remove_range(p, va, ptp, eptp);
+ }
+
+ /*
+ * Invalidate the page directory pointer.
+ */
+ {
+ int i = ptes_per_vm_page;
+ pt_entry_t *pdep = &pdebase[l2i];
+ do {
#ifdef MACH_PV_PAGETABLES
- unsigned long pte = *pdep;
- void *ptable = (void*) ptetokv(pte);
- if (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0)))
- panic("%s:%d could not clear pde %p\n",__FILE__,__LINE__,pdep-1);
- if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(ptable)))
- panic("couldn't unpin page %p(%lx)\n", ptable, (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)ptable)));
- pmap_set_page_readwrite(ptable);
+ unsigned long pte = *pdep;
+ void *ptable = (void*) ptetokv(pte);
+ if (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0)))
+ panic("%s:%d could not clear pde %p\n",__FILE__,__LINE__,pdep-1);
+ if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(ptable)))
+ panic("couldn't unpin page %p(%lx)\n", ptable, (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)ptable)));
+ pmap_set_page_readwrite(ptable);
#else /* MACH_PV_PAGETABLES */
- *pdep++ = 0;
+ *pdep++ = 0;
#endif /* MACH_PV_PAGETABLES */
- } while (--i > 0);
- }
+ } while (--i > 0);
+ }
- PMAP_READ_UNLOCK(p, spl);
+ PMAP_READ_UNLOCK(p, spl);
- /*
- * And free the pte page itself.
- */
- {
- vm_page_t m;
-
- vm_object_lock(pmap_object);
- assert(pa == (vm_offset_t) pa);
- m = vm_page_lookup(pmap_object, pa);
- if (m == VM_PAGE_NULL)
- panic("pmap_collect: pte page not in object");
- vm_page_lock_queues();
- vm_page_free(m);
- inuse_ptepages_count--;
- vm_page_unlock_queues();
- vm_object_unlock(pmap_object);
- }
+ /*
+ * And free the pte page itself.
+ */
+ kmem_cache_free(&pt_cache, (vm_offset_t)ptetokv(pte));
- PMAP_READ_LOCK(p, spl);
- }
- }
- }
+ PMAP_READ_LOCK(p, spl);
+
+ }
+ }
#if PAE
+ // TODO check l2
+ }
+#ifdef __x86_64__
+ // TODO check l3
}
-#endif
- PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
+#endif /* __x86_64__ */
+#endif /* PAE */
+
+ PMAP_UPDATE_TLBS(p, VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
PMAP_READ_UNLOCK(p, spl);
return;
}
+#if MACH_KDB
+/*
+ * Routine: pmap_whatis
+ * Function:
+ * Check whether this address is within a pmap
+ * Usage:
+ * Called from debugger
+ */
+int pmap_whatis(pmap_t p, vm_offset_t a)
+{
+ pt_entry_t *ptp;
+ phys_addr_t pa;
+ int spl;
+ int ret = 0;
+
+ if (p == PMAP_NULL)
+ return 0;
+
+ PMAP_READ_LOCK(p, spl);
+#if PAE
+#ifdef __x86_64__
+ if (a >= (vm_offset_t) p->l4base && a < (vm_offset_t) (&p->l4base[NPTES])) {
+ db_printf("L4 for pmap %p\n", p);
+ ret = 1;
+ }
+ for (int l4i = 0; l4i < NPTES; l4i++) {
+ pt_entry_t pdp = (pt_entry_t) p->l4base[l4i];
+ if (!(pdp & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp);
+#else /* __x86_64__ */
+ int l4i = 0;
+ pt_entry_t *pdpbase = p->pdpbase;
+#endif /* __x86_64__ */
+ if (a >= (vm_offset_t) pdpbase && a < (vm_offset_t) (&pdpbase[NPTES])) {
+ db_printf("PDP %d for pmap %p\n", l4i, p);
+ ret = 1;
+ }
+ for (int l3i = 0; l3i < NPTES; l3i++)
+ {
+ pt_entry_t pde = (pt_entry_t ) pdpbase[l3i];
+ if (!(pde & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde);
+#else /* PAE */
+ int l4i = 0, l3i = 0;
+ pt_entry_t *pdebase = p->dirbase;
+#endif /* PAE */
+ if (a >= (vm_offset_t) pdebase && a < (vm_offset_t) (&pdebase[NPTES])) {
+ db_printf("PDE %d %d for pmap %p\n", l4i, l3i, p);
+ ret = 1;
+ }
+ for (int l2i = 0; l2i < NPTES; l2i++)
+ {
+ pt_entry_t pte = (pt_entry_t) pdebase[l2i];
+ if (!(pte & INTEL_PTE_VALID))
+ continue;
+
+ pa = pte_to_pa(pte);
+ ptp = (pt_entry_t *)phystokv(pa);
+
+ if (a >= (vm_offset_t) ptp && a < (vm_offset_t) (&ptp[NPTES*ptes_per_vm_page])) {
+ db_printf("PTP %d %d %d for pmap %p\n", l4i, l3i, l2i, p);
+ ret = 1;
+ }
+ }
+#if PAE
+ }
+#ifdef __x86_64__
+ }
+#endif /* __x86_64__ */
+#endif /* PAE */
+ PMAP_READ_UNLOCK(p, spl);
+
+ if (p == kernel_pmap) {
+ phys_addr_t pa;
+ if (DB_VALID_KERN_ADDR(a))
+ pa = kvtophys(a);
+ else
+ pa = pmap_extract(current_task()->map->pmap, a);
+
+ if (valid_page(pa)) {
+ unsigned long pai;
+ pv_entry_t pv_h;
+
+ pai = pa_index(pa);
+ for (pv_h = pai_to_pvh(pai);
+ pv_h && pv_h->pmap;
+ pv_h = pv_h->next)
+ db_printf("pmap %p at %llx\n", pv_h->pmap, pv_h->va);
+ }
+ }
+
+ return ret;
+}
+#endif /* MACH_KDB */
+
/*
* Routine: pmap_activate
* Function:
@@ -2500,10 +2688,7 @@ void pmap_collect(pmap_t p)
* processor, and returns a hardware map description.
*/
#if 0
-void pmap_activate(my_pmap, th, my_cpu)
- pmap_t my_pmap;
- thread_t th;
- int my_cpu;
+void pmap_activate(pmap_t my_pmap, thread_t th, int my_cpu)
{
PMAP_ACTIVATE(my_pmap, th, my_cpu);
}
@@ -2517,10 +2702,7 @@ void pmap_activate(my_pmap, th, my_cpu)
* in pmap.h)
*/
#if 0
-void pmap_deactivate(pmap, th, which_cpu)
- pmap_t pmap;
- thread_t th;
- int which_cpu;
+void pmap_deactivate(pmap_t pmap, thread_t th, int which_cpu)
{
PMAP_DEACTIVATE(pmap, th, which_cpu);
}
@@ -2543,8 +2725,7 @@ pmap_t pmap_kernel()
* See machine/phys.c or machine/phys.s for implementation.
*/
#if 0
-pmap_zero_page(phys)
- vm_offset_t phys;
+pmap_zero_page(vm_offset_t phys)
{
int i;
@@ -2562,8 +2743,7 @@ pmap_zero_page(phys)
* See machine/phys.c or machine/phys.s for implementation.
*/
#if 0
-pmap_copy_page(src, dst)
- vm_offset_t src, dst;
+pmap_copy_page(vm_offset_t src, vm_offset_t dst)
{
int i;
@@ -2605,7 +2785,7 @@ pmap_pageable(
/*
* Clear specified attribute bits.
*/
-void
+static void
phys_attribute_clear(
phys_addr_t phys,
int bits)
@@ -2689,7 +2869,7 @@ phys_attribute_clear(
/*
* Check specified attribute bits.
*/
-boolean_t
+static boolean_t
phys_attribute_test(
phys_addr_t phys,
int bits)
@@ -2892,7 +3072,7 @@ void signal_cpus(
int which_cpu, j;
pmap_update_list_t update_list_p;
- while ((which_cpu = ffs(use_list)) != 0) {
+ while ((which_cpu = __builtin_ffs(use_list)) != 0) {
which_cpu -= 1; /* convert to 0 origin */
update_list_p = &cpu_update_list[which_cpu];
@@ -2905,7 +3085,7 @@ void signal_cpus(
* indicate overflow.
*/
update_list_p->item[UPDATE_LIST_SIZE-1].pmap = kernel_pmap;
- update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_ADDRESS;
+ update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_USER_ADDRESS;
update_list_p->item[UPDATE_LIST_SIZE-1].end = VM_MAX_KERNEL_ADDRESS;
}
else {
@@ -2917,12 +3097,16 @@ void signal_cpus(
cpu_update_needed[which_cpu] = TRUE;
simple_unlock(&update_list_p->lock);
- if ((cpus_idle & (1 << which_cpu)) == 0)
+ __sync_synchronize();
+ if (((cpus_idle & (1 << which_cpu)) == 0))
interrupt_processor(which_cpu);
use_list &= ~(1 << which_cpu);
}
}
+/*
+ * This is called at splvm
+ */
void process_pmap_updates(pmap_t my_pmap)
{
int my_cpu = cpu_number();
@@ -2931,7 +3115,8 @@ void process_pmap_updates(pmap_t my_pmap)
pmap_t pmap;
update_list_p = &cpu_update_list[my_cpu];
- simple_lock(&update_list_p->lock);
+ assert_splvm();
+ simple_lock_nocheck(&update_list_p->lock);
for (j = 0; j < update_list_p->count; j++) {
pmap = update_list_p->item[j].pmap;
@@ -2945,7 +3130,7 @@ void process_pmap_updates(pmap_t my_pmap)
}
update_list_p->count = 0;
cpu_update_needed[my_cpu] = FALSE;
- simple_unlock(&update_list_p->lock);
+ simple_unlock_nocheck(&update_list_p->lock);
}
/*
@@ -2995,9 +3180,9 @@ void pmap_update_interrupt(void)
* Wait for any pmap updates in progress, on either user
* or kernel pmap.
*/
- while (*(volatile int *)&my_pmap->lock.lock_data ||
- *(volatile int *)&kernel_pmap->lock.lock_data)
- continue;
+ while (my_pmap->lock.lock_data ||
+ kernel_pmap->lock.lock_data)
+ cpu_pause();
process_pmap_updates(my_pmap);
@@ -3038,3 +3223,105 @@ pmap_unmap_page_zero (void)
#endif /* MACH_PV_PAGETABLES */
}
#endif /* __i386__ */
+
+void
+pmap_make_temporary_mapping(void)
+{
+ int i;
+ /*
+ * We'll have to temporarily install a direct mapping
+ * between physical memory and low linear memory,
+ * until we start using our new kernel segment descriptors.
+ */
+#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
+ vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS;
+ if ((vm_offset_t)(-delta) < delta)
+ delta = (vm_offset_t)(-delta);
+ int nb_direct = delta >> PDESHIFT;
+ for (i = 0; i < nb_direct; i++)
+ kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] =
+ kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS) + i];
+#endif
+
+#ifdef LINUX_DEV
+ /* We need BIOS memory mapped at 0xc0000 & co for BIOS accesses */
+#if VM_MIN_KERNEL_ADDRESS != 0
+ kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] =
+ kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
+#endif
+#endif /* LINUX_DEV */
+
+#ifdef MACH_PV_PAGETABLES
+#ifndef __x86_64__
+ const int PDPNUM_KERNEL = PDPNUM;
+#endif
+ for (i = 0; i < PDPNUM_KERNEL; i++)
+ pmap_set_page_readonly_init((void*) kernel_page_dir + i * INTEL_PGBYTES);
+#if PAE
+#ifndef __x86_64__
+ pmap_set_page_readonly_init(kernel_pmap->pdpbase);
+#endif
+#endif /* PAE */
+#endif /* MACH_PV_PAGETABLES */
+
+ pmap_set_page_dir();
+}
+
+void
+pmap_set_page_dir(void)
+{
+#if PAE
+#ifdef __x86_64__
+ set_cr3((unsigned long)_kvtophys(kernel_pmap->l4base));
+#else
+ set_cr3((unsigned long)_kvtophys(kernel_pmap->pdpbase));
+#endif
+#ifndef MACH_HYP
+ if (!CPU_HAS_FEATURE(CPU_FEATURE_PAE))
+ panic("CPU doesn't have support for PAE.");
+ set_cr4(get_cr4() | CR4_PAE);
+#endif /* MACH_HYP */
+#else
+ set_cr3((unsigned long)_kvtophys(kernel_page_dir));
+#endif /* PAE */
+}
+
+void
+pmap_remove_temporary_mapping(void)
+{
+#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
+ int i;
+ vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS;
+ if ((vm_offset_t)(-delta) < delta)
+ delta = (vm_offset_t)(-delta);
+ int nb_direct = delta >> PDESHIFT;
+ /* Get rid of the temporary direct mapping and flush it out of the TLB. */
+ for (i = 0 ; i < nb_direct; i++) {
+#ifdef MACH_XEN
+#ifdef MACH_PSEUDO_PHYS
+ if (!hyp_mmu_update_pte(kv_to_ma(&kernel_page_dir[lin2pdenum_cont(VM_MIN_KERNEL_ADDRESS) + i]), 0))
+#else /* MACH_PSEUDO_PHYS */
+ if (hyp_do_update_va_mapping(VM_MIN_KERNEL_ADDRESS + i * INTEL_PGBYTES, 0, UVMF_INVLPG | UVMF_ALL))
+#endif /* MACH_PSEUDO_PHYS */
+ printf("couldn't unmap frame %d\n", i);
+#else /* MACH_XEN */
+ kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] = 0;
+#endif /* MACH_XEN */
+ }
+#endif
+
+#ifdef LINUX_DEV
+ /* Keep BIOS memory mapped */
+#if VM_MIN_KERNEL_ADDRESS != 0
+ kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] =
+ kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
+#endif
+#endif /* LINUX_DEV */
+
+ /* Not used after boot, better give it back. */
+#ifdef MACH_XEN
+ hyp_free_page(0, (void*) VM_MIN_KERNEL_ADDRESS);
+#endif /* MACH_XEN */
+
+ flush_tlb();
+}
diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
index 63683bc5..8b0eba0d 100644
--- a/i386/intel/pmap.h
+++ b/i386/intel/pmap.h
@@ -74,16 +74,13 @@ typedef phys_addr_t pt_entry_t;
#ifdef __x86_64__
#define L4SHIFT 39 /* L4 shift */
#define L4MASK 0x1ff /* mask for L4 index */
-#endif
-#define PDPSHIFT 30 /* page directory pointer */
-#ifdef __x86_64__
-/* Enough for 8GiB addressing space. */
-#define PDPNUM 8 /* number of page directory pointers */
+#define PDPNUM_KERNEL (((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) >> PDPSHIFT) + 1)
#define PDPMASK 0x1ff /* mask for page directory pointer index */
-#else
+#else /* __x86_64__ */
#define PDPNUM 4 /* number of page directory pointers */
#define PDPMASK 3 /* mask for page directory pointer index */
-#endif
+#endif /* __x86_64__ */
+#define PDPSHIFT 30 /* page directory pointer */
#define PDESHIFT 21 /* page descriptor shift */
#define PDEMASK 0x1ff /* mask for page descriptor index */
#define PTESHIFT 12 /* page table shift */
@@ -111,7 +108,11 @@ typedef phys_addr_t pt_entry_t;
#if PAE
/* Special version assuming contiguous page directories. Making it
include the page directory pointer table index too. */
+#ifdef __x86_64__
+#define lin2pdenum_cont(a) (((a) >> PDESHIFT) & 0x3ff)
+#else
#define lin2pdenum_cont(a) (((a) >> PDESHIFT) & 0x7ff)
+#endif
#else
#define lin2pdenum_cont(a) lin2pdenum(a)
#endif
@@ -128,6 +129,26 @@ typedef phys_addr_t pt_entry_t;
*/
#define pdenum2lin(a) ((vm_offset_t)(a) << PDESHIFT)
+#if PAE
+#ifdef __x86_64__
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+ (((vm_offset_t)(l4num) << L4SHIFT) + \
+ ((vm_offset_t)(l3num) << PDPSHIFT) + \
+ ((vm_offset_t)(l2num) << PDESHIFT) + \
+ ((vm_offset_t)(l1num) << PTESHIFT))
+#else /* __x86_64__ */
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+ (((vm_offset_t)(l3num) << PDPSHIFT) + \
+ ((vm_offset_t)(l2num) << PDESHIFT) + \
+ ((vm_offset_t)(l1num) << PTESHIFT))
+#endif
+#else /* PAE */
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+ (((vm_offset_t)(l2num) << PDESHIFT) + \
+ ((vm_offset_t)(l1num) << PTESHIFT))
+#endif
+
+
/*
* Convert linear offset to page table index
*/
@@ -148,6 +169,7 @@ typedef phys_addr_t pt_entry_t;
#define INTEL_PTE_NCACHE 0x00000010
#define INTEL_PTE_REF 0x00000020
#define INTEL_PTE_MOD 0x00000040
+#define INTEL_PTE_PS 0x00000080
#ifdef MACH_PV_PAGETABLES
/* Not supported */
#define INTEL_PTE_GLOBAL 0x00000000
@@ -156,7 +178,11 @@ typedef phys_addr_t pt_entry_t;
#endif /* MACH_PV_PAGETABLES */
#define INTEL_PTE_WIRED 0x00000200
#ifdef PAE
+#ifdef __x86_64__
+#define INTEL_PTE_PFN 0xfffffffffffff000ULL
+#else /* __x86_64__ */
#define INTEL_PTE_PFN 0x00007ffffffff000ULL
+#endif/* __x86_64__ */
#else
#define INTEL_PTE_PFN 0xfffff000
#endif
@@ -181,16 +207,17 @@ typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */
struct pmap {
#if ! PAE
pt_entry_t *dirbase; /* page directory table */
-#else
- pt_entry_t *pdpbase; /* page directory pointer table */
-#endif /* ! PAE */
+#else /* PAE */
#ifdef __x86_64__
pt_entry_t *l4base; /* l4 table */
#ifdef MACH_HYP
pt_entry_t *user_l4base; /* Userland l4 table */
pt_entry_t *user_pdpbase; /* Userland l4 table */
#endif /* MACH_HYP */
+#else /* x86_64 */
+ pt_entry_t *pdpbase; /* page directory pointer table */
#endif /* x86_64 */
+#endif /* PAE */
int ref_count; /* reference count */
decl_simple_lock_data(,lock)
/* lock on map */
@@ -239,7 +266,7 @@ typedef struct {
extern pmap_mapwindow_t *pmap_get_mapwindow(pt_entry_t entry);
extern void pmap_put_mapwindow(pmap_mapwindow_t *map);
-#define PMAP_NMAPWINDOWS 2
+#define PMAP_NMAPWINDOWS 2 /* Per CPU */
#if NCPUS > 1
/*
@@ -267,11 +294,12 @@ boolean_t cpu_update_needed[NCPUS];
*/
void process_pmap_updates(pmap_t);
-void pmap_update_interrupt(void);
extern pmap_t kernel_pmap;
#endif /* NCPUS > 1 */
+void pmap_update_interrupt(void);
+
/*
* Machine dependent routines that are used only for i386/i486.
*/
@@ -415,6 +443,7 @@ pt_entry_t *pmap_pte(const pmap_t pmap, vm_offset_t addr);
* interrupt if this happens. \
*/ \
i_bit_clear((my_cpu), &cpus_idle); \
+ __sync_synchronize(); \
\
if (cpu_update_needed[(my_cpu)]) \
pmap_update_interrupt(); \
@@ -467,12 +496,17 @@ pt_entry_t *pmap_pte(const pmap_t pmap, vm_offset_t addr);
#define pmap_kernel() (kernel_pmap)
#define pmap_resident_count(pmap) ((pmap)->stats.resident_count)
-#define pmap_phys_address(frame) ((phys_addr_t) (intel_ptob(frame)))
+#define pmap_phys_address(frame) ((intel_ptob((phys_addr_t) frame)))
#define pmap_phys_to_frame(phys) ((int) (intel_btop(phys)))
#define pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr)
#define pmap_attribute(pmap,addr,size,attr,value) \
(KERN_INVALID_ADDRESS)
+extern pt_entry_t *kernel_page_dir;
+
+extern vm_offset_t kernel_virtual_start;
+extern vm_offset_t kernel_virtual_end;
+
/*
* Bootstrap the system enough to run with virtual memory.
* Allocate the kernel page directory and page tables,
@@ -481,6 +515,10 @@ pt_entry_t *pmap_pte(const pmap_t pmap, vm_offset_t addr);
*/
extern void pmap_bootstrap(void);
+extern void pmap_set_page_dir(void);
+extern void pmap_make_temporary_mapping(void);
+extern void pmap_remove_temporary_mapping(void);
+
extern void pmap_unmap_page_zero (void);
/*
diff --git a/i386/intel/read_fault.c b/i386/intel/read_fault.c
index 0b79e3d8..356145e1 100644
--- a/i386/intel/read_fault.c
+++ b/i386/intel/read_fault.c
@@ -61,7 +61,7 @@ intel_read_fault(
* Find the backing store object and offset into it
* to begin search.
*/
- result = vm_map_lookup(&map, vaddr, VM_PROT_READ, &version,
+ result = vm_map_lookup(&map, vaddr, VM_PROT_READ, FALSE, &version,
&object, &offset, &prot, &wired);
if (result != KERN_SUCCESS)
return (result);
@@ -133,7 +133,7 @@ intel_read_fault(
vm_offset_t retry_offset;
vm_prot_t retry_prot;
- result = vm_map_lookup(&map, vaddr, VM_PROT_READ, &version,
+ result = vm_map_lookup(&map, vaddr, VM_PROT_READ, FALSE, &version,
&retry_object, &retry_offset, &retry_prot,
&wired);
if (result != KERN_SUCCESS) {
diff --git a/i386/xen/xen.c b/i386/xen/xen.c
index f2dedfb9..5309675f 100644
--- a/i386/xen/xen.c
+++ b/i386/xen/xen.c
@@ -23,20 +23,11 @@
#include <mach/machine/eflags.h>
#include <machine/thread.h>
#include <machine/ipl.h>
-
#include <machine/model_dep.h>
-unsigned long cr3;
+#include <xen/xen.h>
-struct failsafe_callback_regs {
- unsigned int ds;
- unsigned int es;
- unsigned int fs;
- unsigned int gs;
- unsigned int ip;
- unsigned int cs_and_mask;
- unsigned int flags;
-};
+unsigned long cr3;
void hyp_failsafe_c_callback(struct failsafe_callback_regs *regs) {
printf("Fail-Safe callback!\n");
diff --git a/include/device/device.defs b/include/device/device.defs
index d1df799d..7f316129 100644
--- a/include/device/device.defs
+++ b/include/device/device.defs
@@ -53,6 +53,7 @@ type reply_port_t = MACH_MSG_TYPE_MAKE_SEND_ONCE | polymorphic
#endif /* KERNEL_SERVER */
;
+/* Deprecated in favor of device_open_new. */
routine device_open(
master_port : mach_port_t;
sreplyport reply_port : reply_port_t;
@@ -110,7 +111,27 @@ routine device_read_inband(
out data : io_buf_ptr_inband_t
);
+#if defined(KERNEL_SERVER) || defined(DEVICE_ENABLE_DEVICE_OPEN_NEW)
+routine device_open_new(
+ master_port : mach_port_t;
+ sreplyport reply_port : reply_port_t;
+ mode : dev_mode_t;
+ name : new_dev_name_t;
+ out device : device_t =
+ MACH_MSG_TYPE_PORT_SEND
+ ctype: mach_port_t
+#if KERNEL_SERVER
+ outtran: mach_port_t convert_device_to_port(device_t)
+#else
+#ifdef DEVICE_OUTTRAN
+ outtran: DEVICE_OUTTRAN
+#endif
+#endif /* KERNEL_SERVER */
+ );
+#else
skip; /* old xxx_device_set_status */
+#endif
+
skip; /* old xxx_device_get_status */
skip; /* old xxx_device_set_filter*/
diff --git a/include/device/device_request.defs b/include/device/device_request.defs
index 7ea8637c..a8af3a89 100644
--- a/include/device/device_request.defs
+++ b/include/device/device_request.defs
@@ -45,6 +45,7 @@ type reply_port_t = MACH_MSG_TYPE_MAKE_SEND_ONCE
#endif /* KERNEL_SERVER */
;
+/* Deprecated in favor of device_open_new_request. */
simpleroutine device_open_request(
device_server_port : mach_port_t;
ureplyport reply_port : reply_port_t;
@@ -85,3 +86,10 @@ simpleroutine device_read_request_inband(
in recnum : recnum_t;
in bytes_wanted : int
);
+
+simpleroutine device_open_new_request(
+ device_server_port : mach_port_t;
+ ureplyport reply_port : reply_port_t;
+ in mode : dev_mode_t;
+ in name : new_dev_name_t
+ );
diff --git a/include/device/device_types.defs b/include/device/device_types.defs
index e97d89ca..c74bff51 100644
--- a/include/device/device_types.defs
+++ b/include/device/device_types.defs
@@ -43,10 +43,21 @@
DEVICE_IMPORTS
#endif
-type recnum_t = uint32_t;
+type rpc_recnum_t = rpc_long_natural_t;
+type recnum_t = rpc_recnum_t
+#if defined(KERNEL_SERVER)
+ intran: recnum_t convert_long_natural_from_user(rpc_recnum_t)
+ outtran: rpc_recnum_t convert_long_natural_to_user(recnum_t)
+#elif defined(KERNEL_USER)
+ ctype: rpc_recnum_t
+#endif
+ ;
+
type dev_mode_t = uint32_t;
type dev_flavor_t = uint32_t;
type dev_name_t = (MACH_MSG_TYPE_STRING_C, 8*128);
+type new_dev_name_t = c_string[128]
+ ctype: dev_name_t;
type dev_status_t = array[*:1024] of int;
type io_buf_ptr_t = ^array[] of MACH_MSG_TYPE_INTEGER_8;
type io_buf_ptr_inband_t= array[*:128] of char;
diff --git a/include/device/device_types.h b/include/device/device_types.h
index f13122f0..583d9e03 100644
--- a/include/device/device_types.h
+++ b/include/device/device_types.h
@@ -85,11 +85,16 @@ typedef struct {
vm_offset_t data;
vm_size_t count;
} io_buf_vec_t;
+typedef struct {
+ rpc_vm_offset_t data;
+ rpc_vm_size_t count;
+} rpc_io_buf_vec_t;
/*
* Record number for random-access devices
*/
-typedef unsigned int recnum_t;
+typedef long_natural_t recnum_t;
+typedef rpc_long_natural_t rpc_recnum_t;
/*
* Flavors of set/get statuses
diff --git a/include/device/input.h b/include/device/input.h
new file mode 100644
index 00000000..9de73a30
--- /dev/null
+++ b/include/device/input.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2023 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Mach.
+ *
+ * GNU Mach is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _DEVICE_INPUT_H
+#define _DEVICE_INPUT_H
+
+#include <mach/boolean.h>
+#include <mach/time_value.h>
+
+/*
+ * Ioctl's have the command encoded in the lower word, and the size of
+ * any in or out parameters in the upper word. The high 3 bits of the
+ * upper word are used to encode the in/out status of the parameter.
+ */
+#define IOCPARM_MASK 0x1fff /* parameter length, at most 13 bits */
+#define IOC_VOID 0x20000000 /* no parameters */
+#define IOC_OUT 0x40000000 /* copy out parameters */
+#define IOC_IN 0x80000000U /* copy in parameters */
+#define IOC_INOUT (IOC_IN|IOC_OUT)
+
+#define _IOC(inout,group,num,len) \
+ (inout | ((len & IOCPARM_MASK) << 16) | ((group) << 8) | (num))
+#define _IO(g,n) _IOC(IOC_VOID, (g), (n), 0)
+#define _IOR(g,n,t) _IOC(IOC_OUT, (g), (n), sizeof(t))
+#define _IOW(g,n,t) _IOC(IOC_IN, (g), (n), sizeof(t))
+#define _IOWR(g,n,t) _IOC(IOC_INOUT, (g), (n), sizeof(t))
+
+typedef uint8_t Scancode;
+typedef uint16_t kev_type; /* kd event type */
+
+/* (used for event records) */
+struct mouse_motion {
+ short mm_deltaX; /* units? */
+ short mm_deltaY;
+};
+
+typedef struct {
+ kev_type type; /* see below */
+ /*
+ * This is not used anymore but is kept for backwards compatibility.
+ * Note the use of rpc_time_value to ensure compatibility for a 64 bit kernel and
+ * 32 bit user land.
+ */
+ struct rpc_time_value unused_time; /* timestamp*/
+ union { /* value associated with event */
+ boolean_t up; /* MOUSE_LEFT .. MOUSE_RIGHT */
+ Scancode sc; /* KEYBD_EVENT */
+ struct mouse_motion mmotion; /* MOUSE_MOTION */
+ } value;
+} kd_event;
+#define m_deltaX mmotion.mm_deltaX
+#define m_deltaY mmotion.mm_deltaY
+
+/*
+ * kd_event ID's.
+ */
+#define MOUSE_LEFT 1 /* mouse left button up/down */
+#define MOUSE_MIDDLE 2
+#define MOUSE_RIGHT 3
+#define MOUSE_MOTION 4 /* mouse motion */
+#define KEYBD_EVENT 5 /* key up/down */
+
+/* Keyboard ioctls */
+
+/*
+ * KDSKBDMODE - When the console is in "ascii" mode, keyboard events are
+ * converted to Ascii characters that are readable from /dev/console.
+ * When the console is in "event" mode, keyboard events are
+ * timestamped and queued up on /dev/kbd as kd_events. When the last
+ * close is done on /dev/kbd, the console automatically reverts to ascii
+ * mode.
+ * When /dev/mouse is opened, mouse events are timestamped and queued
+ * on /dev/mouse, again as kd_events.
+ *
+ * KDGKBDTYPE - Returns the type of keyboard installed. Currently
+ * there is only one type, KB_VANILLAKB, which is your standard PC-AT
+ * keyboard.
+ */
+
+#define KDSKBDMODE _IOW('K', 1, int) /* set keyboard mode */
+#define KB_EVENT 1
+#define KB_ASCII 2
+
+#define KDGKBDTYPE _IOR('K', 2, int) /* get keyboard type */
+#define KB_VANILLAKB 0
+
+#define KDSETLEDS _IOW('K', 5, int) /* set the keyboard ledstate */
+
+#endif /* _DEVICE_INPUT_H */
diff --git a/include/inttypes.h b/include/inttypes.h
index ebafb67a..353984a0 100644
--- a/include/inttypes.h
+++ b/include/inttypes.h
@@ -25,38 +25,40 @@
#ifdef __x86_64__
#define __64PREFIX "l"
+#define __PTRPREFIX "l"
#else
#define __64PREFIX "ll"
+#define __PTRPREFIX
#endif
#define PRId8 "d"
#define PRId16 "d"
#define PRId32 "d"
#define PRId64 __64PREFIX"d"
-#define PRIdPTR __64PREFIX"d"
+#define PRIdPTR __PTRPREFIX"d"
#define PRIi8 "i"
#define PRIi16 "i"
#define PRIi32 "i"
#define PRIi64 __64PREFIX"i"
-#define PRIiPTR __64PREFIX"i"
+#define PRIiPTR __PTRPREFIX"i"
#define PRIu8 "u"
#define PRIu16 "u"
#define PRIu32 "u"
#define PRIu64 __64PREFIX"u"
-#define PRIuPTR __64PREFIX"u"
+#define PRIuPTR __PTRPREFIX"u"
#define PRIx8 "x"
#define PRIx16 "x"
#define PRIx32 "x"
#define PRIx64 __64PREFIX"x"
-#define PRIxPTR __64PREFIX"x"
+#define PRIxPTR __PTRPREFIX"x"
-#define PRIx8 "x"
-#define PRIx16 "x"
-#define PRIx32 "x"
-#define PRIx64 __64PREFIX"x"
-#define PRIxPTR __64PREFIX"x"
+#define PRIo8 "o"
+#define PRIo16 "o"
+#define PRIo32 "o"
+#define PRIo64 __64PREFIX"o"
+#define PRIoPTR __PTRPREFIX"o"
#endif /* _INTTYPES_H_ */
diff --git a/include/mach/bootstrap.defs b/include/mach/bootstrap.defs
deleted file mode 100644
index 0b233e4a..00000000
--- a/include/mach/bootstrap.defs
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1992 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-subsystem bootstrap 999999;
-
-#include <mach/std_types.defs>
-
-ServerPrefix do_;
-
-/*
- * The old form of the bootstrap_privileged_ports RPC
- * is not expressible in Mig syntax, because the reply
- * message doesn't have a return code.
- */
-
-skip; /* old bootstrap_privileged_ports */
-
-/*
- * The startup task can make this call on its bootstrap port
- * to get the privileged ports.
- */
-
-routine bootstrap_privileged_ports(
- bootstrap : mach_port_t;
- out priv_host : mach_port_t;
- out priv_device : mach_port_t);
diff --git a/include/mach/default_pager_helper.defs b/include/mach/default_pager_helper.defs
deleted file mode 100644
index a8a9f78d..00000000
--- a/include/mach/default_pager_helper.defs
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1992 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-subsystem dp_helper 888888;
-
-#include <mach/std_types.defs>
-#include <mach/mach_types.defs>
-
-ServerPrefix do_;
-
-/*
- * Message that the default pager sends to
- * a fileserver who has registered itself
- * with the default pager as a "paging helper"
- * to notify that more paging spage is or
- * is not needed. Same message to let the
- * fileserver know it can (ask to) reclaim space.
- *
- * This message is only sent to a port that
- * has been previously registered via
- * default_pager_register_fileserver.
- * The (optional) reply from the fileserver
- * is a call to default_pager_paging_file.
- */
-
-simpleroutine dp_helper_paging_space(
- dp_helper : mach_port_t;
- space_shortage : boolean_t;
- approx_amount : vm_size_t);
-
diff --git a/include/mach/default_pager_types.defs b/include/mach/default_pager_types.defs
index bee7c259..398c62cd 100644
--- a/include/mach/default_pager_types.defs
+++ b/include/mach/default_pager_types.defs
@@ -29,12 +29,21 @@
#include <mach/std_types.defs>
-type default_pager_info_t = struct[3] of natural_t;
-
-type default_pager_object_t = struct[2] of natural_t;
+type default_pager_info_t = struct {
+ vm_size_t dpi_total_space;
+ vm_size_t dpi_free_space;
+ vm_size_t dpi_page_size;
+};
+
+type default_pager_object_t = struct {
+ vm_offset_t dpo_object;
+ vm_size_t dpo_size;
+};
type default_pager_object_array_t = array[] of default_pager_object_t;
-type default_pager_page_t = struct[1] of natural_t;
+type default_pager_page_t = struct {
+ vm_offset_t dpp_offset;
+};
type default_pager_page_array_t = array[] of default_pager_page_t;
type default_pager_filename_t = (MACH_MSG_TYPE_STRING_C, 8*256);
diff --git a/include/mach/error.h b/include/mach/error.h
index 72a2d79c..035dcf83 100644
--- a/include/mach/error.h
+++ b/include/mach/error.h
@@ -44,7 +44,6 @@
#define err_none (mach_error_t)0
#define ERR_SUCCESS (mach_error_t)0
-#define ERR_ROUTINE_NIL (mach_error_fn_t)0
#define err_system(x) (((x)&0x3f)<<26)
@@ -89,7 +88,6 @@
#ifndef __ASSEMBLER__
typedef kern_return_t mach_error_t;
-typedef mach_error_t (* mach_error_fn_t)();
#endif /* __ASSEMBLER__ */
#endif /* _MACH_ERROR_H_ */
diff --git a/include/mach/exc.defs b/include/mach/exc.defs
index 94af828c..28638e2f 100644
--- a/include/mach/exc.defs
+++ b/include/mach/exc.defs
@@ -44,4 +44,4 @@ routine exception_raise(
task : mach_port_t;
exception : integer_t;
code : integer_t;
- subcode : integer_t);
+ subcode : rpc_long_integer_t);
diff --git a/include/mach/exec/elf.h b/include/mach/exec/elf.h
index 81989309..409947c4 100644
--- a/include/mach/exec/elf.h
+++ b/include/mach/exec/elf.h
@@ -48,6 +48,22 @@ typedef struct {
Elf32_Half e_shstrndx;
} Elf32_Ehdr;
+typedef struct {
+ unsigned char e_ident[EI_NIDENT]; /* Id bytes */
+ Elf64_Half e_type; /* file type */
+ Elf64_Half e_machine; /* machine type */
+ Elf64_Word e_version; /* version number */
+ Elf64_Addr e_entry; /* entry point */
+ Elf64_Off e_phoff; /* Program hdr offset */
+ Elf64_Off e_shoff; /* Section hdr offset */
+ Elf64_Word e_flags; /* Processor flags */
+ Elf64_Half e_ehsize; /* sizeof ehdr */
+ Elf64_Half e_phentsize; /* Program header entry size */
+ Elf64_Half e_phnum; /* Number of program headers */
+ Elf64_Half e_shentsize; /* Section header entry size */
+ Elf64_Half e_shnum; /* Number of section headers */
+ Elf64_Half e_shstrndx; /* String table index */
+} Elf64_Ehdr;
/* e_ident[] identification indexes - figure 4-4, page 4-7 */
@@ -104,6 +120,7 @@ typedef struct {
#define EM_SPARC64 11
#define EM_PARISC 15
#define EM_PPC 20
+#define EM_X86_64 62
/* version - page 4-6 */
@@ -135,6 +152,19 @@ typedef struct {
Elf32_Word sh_entsize;
} Elf32_Shdr;
+typedef struct elf64_shdr {
+ Elf64_Word sh_name;
+ Elf64_Word sh_type;
+ Elf64_Xword sh_flags;
+ Elf64_Addr sh_addr;
+ Elf64_Off sh_offset;
+ Elf64_Xword sh_size;
+ Elf64_Word sh_link;
+ Elf64_Word sh_info;
+ Elf64_Xword sh_addralign;
+ Elf64_Xword sh_entsize;
+} Elf64_Shdr;
+
/* section types - page 4-15, figure 4-9 */
#define SHT_NULL 0
@@ -173,11 +203,28 @@ typedef struct
Elf32_Half st_shndx;
} Elf32_Sym;
+typedef struct elf64_sym {
+ Elf64_Word st_name;
+ unsigned char st_info;
+ unsigned char st_other;
+ Elf64_Half st_shndx;
+ Elf64_Addr st_value;
+ Elf64_Xword st_size;
+} Elf64_Sym;
+
+#ifdef __x86_64__
+#define Elf_Sym Elf64_Sym
+#define Elf_Shdr Elf64_Shdr
+#else
+#define Elf_Sym Elf32_Sym
+#define Elf_Shdr Elf32_Shdr
+#endif
+
/* symbol type and binding attributes - page 4-26 */
-#define ELF32_ST_BIND(i) ((i) >> 4)
-#define ELF32_ST_TYPE(i) ((i) & 0xf)
-#define ELF32_ST_INFO(b,t) (((b)<<4)+((t)&0xf))
+#define ELF_ST_BIND(i) ((i) >> 4)
+#define ELF_ST_TYPE(i) ((i) & 0xf)
+#define ELF_ST_INFO(b,t) (((b)<<4)+((t)&0xf))
/* symbol binding - page 4-26, figure 4-16 */
@@ -233,6 +280,17 @@ typedef struct {
Elf32_Word p_align;
} Elf32_Phdr;
+typedef struct {
+ Elf64_Word p_type; /* entry type */
+ Elf64_Word p_flags; /* flags */
+ Elf64_Off p_offset; /* offset */
+ Elf64_Addr p_vaddr; /* virtual address */
+ Elf64_Addr p_paddr; /* physical address */
+ Elf64_Xword p_filesz; /* file size */
+ Elf64_Xword p_memsz; /* memory size */
+ Elf64_Xword p_align; /* memory & file alignment */
+} Elf64_Phdr;
+
/* segment types - page 5-3, figure 5-2 */
#define PT_NULL 0
@@ -291,6 +349,14 @@ typedef struct {
#define DT_TEXTREL 22
#define DT_JMPREL 23
+#if defined(__x86_64__) && ! defined(USER32)
+typedef Elf64_Ehdr Elf_Ehdr;
+typedef Elf64_Phdr Elf_Phdr;
+#else
+typedef Elf32_Ehdr Elf_Ehdr;
+typedef Elf32_Phdr Elf_Phdr;
+#endif
+
/*
* Bootstrap doesn't need machine dependent extensions.
*/
diff --git a/include/mach/gnumach.defs b/include/mach/gnumach.defs
index 531b5d4d..7ecf74d3 100644
--- a/include/mach/gnumach.defs
+++ b/include/mach/gnumach.defs
@@ -189,3 +189,29 @@ routine vm_allocate_contiguous(
pmin : rpc_phys_addr_t;
pmax : rpc_phys_addr_t;
palign : rpc_phys_addr_t);
+
+/*
+ * Set whether TASK is an essential task, i.e. the whole system will crash
+ * if this task crashes.
+ */
+simpleroutine task_set_essential(
+ task : task_t;
+ essential : boolean_t);
+
+/*
+ * Returns physical addresses of a region of memory
+ */
+routine vm_pages_phys(
+ host_priv : host_priv_t;
+ target_task : vm_task_t;
+ vaddr : vm_address_t;
+ size : vm_size_t;
+ out pages : rpc_phys_addr_array_t);
+
+/*
+ * Set the name of thread THREAD to NAME. This is a debugging aid.
+ * NAME will be used in error messages printed by the kernel.
+ */
+simpleroutine thread_set_name(
+ thread : thread_t;
+ name : kernel_debug_name_t);
diff --git a/include/mach/host_info.h b/include/mach/host_info.h
index 60a6aefd..b84376b8 100644
--- a/include/mach/host_info.h
+++ b/include/mach/host_info.h
@@ -46,9 +46,6 @@ typedef integer_t host_info_data_t[HOST_INFO_MAX];
#define KERNEL_VERSION_MAX (512)
typedef char kernel_version_t[KERNEL_VERSION_MAX];
-#define KERNEL_BOOT_INFO_MAX (4096)
-typedef char kernel_boot_info_t[KERNEL_BOOT_INFO_MAX];
-
/*
* Currently defined information.
*/
@@ -60,7 +57,7 @@ typedef char kernel_boot_info_t[KERNEL_BOOT_INFO_MAX];
struct host_basic_info {
integer_t max_cpus; /* max number of cpus possible */
integer_t avail_cpus; /* number of cpus now available */
- vm_size_t memory_size; /* size of memory in bytes */
+ rpc_vm_size_t memory_size; /* size of memory in bytes */
cpu_type_t cpu_type; /* cpu type */
cpu_subtype_t cpu_subtype; /* cpu subtype */
};
diff --git a/include/mach/kern_return.h b/include/mach/kern_return.h
index a9d16e94..15b836f6 100644
--- a/include/mach/kern_return.h
+++ b/include/mach/kern_return.h
@@ -157,10 +157,10 @@
/* Object has been terminated and is no longer available.
*/
-#define KERN_TIMEDOUT 27
- /* Kernel operation timed out. */
+#define KERN_TIMEDOUT 27
+ /* Kernel operation timed out. */
-#define KERN_INTERRUPTED 28
- /* Kernel operation was interrupted. */
+#define KERN_INTERRUPTED 28
+ /* Kernel operation was interrupted. */
#endif /* _MACH_KERN_RETURN_H_ */
diff --git a/include/mach/mach4.defs b/include/mach/mach4.defs
index 61423a44..d63d6f77 100644
--- a/include/mach/mach4.defs
+++ b/include/mach/mach4.defs
@@ -41,10 +41,16 @@ subsystem
#ifdef MACH_PCSAMPLE
-type sampled_pc_t = struct[3] of natural_t;
+type sampled_pc_flavor_t = unsigned;
+
+type sampled_pc_t = struct {
+ rpc_vm_offset_t id;
+ rpc_vm_offset_t pc;
+ sampled_pc_flavor_t sampletype;
+};
+
type sampled_pc_array_t = array[*:512] of sampled_pc_t;
type sampled_pc_seqno_t = unsigned;
-type sampled_pc_flavor_t = natural_t;
routine task_enable_pc_sampling(
host : task_t;
@@ -102,14 +108,16 @@ skip /* pc_sampling reserved 4*/;
protection MAX_PROTECTION and return it in *PORT. */
type vm_offset_array_t = array[*:1024] of vm_offset_t;
type vm_size_array_t = array[*:1024] of vm_size_t;
+type rpc_vm_size_array_t = array[*:1024] of rpc_vm_size_t;
+type rpc_vm_offset_array_t = array[*:1024] of rpc_vm_offset_t;
routine memory_object_create_proxy(
task : ipc_space_t;
max_protection : vm_prot_t;
object : memory_object_array_t =
array[*:1024] of mach_port_send_t;
- offset : vm_offset_array_t;
- start : vm_offset_array_t;
- len : vm_size_array_t;
+ offset : rpc_vm_offset_array_t;
+ start : rpc_vm_offset_array_t;
+ len : rpc_vm_size_array_t;
out proxy : mach_port_t);
/* Gets a proxy to the region that ADDRESS belongs to, starting at the region
diff --git a/include/mach/mach_host.defs b/include/mach/mach_host.defs
index 28439a01..a8c40af6 100644
--- a/include/mach/mach_host.defs
+++ b/include/mach/mach_host.defs
@@ -161,12 +161,17 @@ routine task_get_assignment(
task : task_t;
out assigned_set : processor_set_name_t);
+#if defined(__x86_64__) && !defined(USER32)
+skip;
+#else
/*
* Get string describing current kernel version.
+ * Deprecated, use host_get_kernel_version.
*/
routine host_kernel_version(
host : host_t;
out kernel_version : kernel_version_t);
+#endif
/*
* Set priority for thread.
@@ -346,9 +351,38 @@ routine processor_control(
processor : processor_t;
processor_cmd : processor_info_t);
+/* host_get_boot_info */
+skip;
+
+/*
+ * Get the time on this host.
+ * Available to all.
+ */
+routine host_get_time64(
+ host : host_t;
+ out current_time : time_value64_t);
+
+/*
+ * Set the time on this host.
+ * Only available to privileged users.
+ */
+routine host_set_time64(
+ host : host_t;
+ new_time : time_value64_t);
+
/*
- * Get boot configuration information from kernel.
+ * Arrange for the time on this host to be gradually changed
+ * by an adjustment value, and return the old value.
+ * Only available to privileged users.
*/
-routine host_get_boot_info(
+routine host_adjust_time64(
host_priv : host_priv_t;
- out boot_info : kernel_boot_info_t);
+ in new_adjustment : time_value64_t;
+ out old_adjustment : time_value64_t);
+
+/*
+ * Get string describing current kernel version.
+ */
+routine host_get_kernel_version(
+ host : host_t;
+ out kernel_version : new_kernel_version_t);
diff --git a/include/mach/mach_port.defs b/include/mach/mach_port.defs
index c21c34bc..3823bb14 100644
--- a/include/mach/mach_port.defs
+++ b/include/mach/mach_port.defs
@@ -53,8 +53,7 @@ subsystem
routine mach_port_names(
task : ipc_space_t;
out names : mach_port_name_array_t =
- ^array[] of mach_port_name_t
- ctype: mach_port_array_t;
+ ^array[] of mach_port_name_t;
out types : mach_port_type_array_t =
^array[] of mach_port_type_t);
@@ -209,8 +208,7 @@ routine mach_port_get_set_status(
task : ipc_space_t;
name : mach_port_name_t;
out members : mach_port_name_array_t =
- ^array[] of mach_port_name_t
- ctype: mach_port_array_t);
+ ^array[] of mach_port_name_t);
/*
* Puts the member port (the task must have receive rights)
@@ -350,7 +348,7 @@ skip; /* mach_port_create_act */
routine mach_port_set_protected_payload(
task : ipc_space_t;
name : mach_port_name_t;
- payload : natural_t);
+ payload : rpc_uintptr_t);
/*
* Only valid for receive rights.
diff --git a/include/mach/mach_traps.h b/include/mach/mach_traps.h
index 0433707a..2a87f62a 100644
--- a/include/mach/mach_traps.h
+++ b/include/mach/mach_traps.h
@@ -35,19 +35,9 @@
#include <mach/port.h>
-mach_port_t mach_reply_port
- (void);
-
-mach_port_t mach_thread_self
- (void);
-
-#ifdef __386BSD__
-#undef mach_task_self
-#endif
-mach_port_t mach_task_self
- (void);
-
-mach_port_t mach_host_self
- (void);
+mach_port_name_t mach_reply_port (void);
+mach_port_name_t mach_thread_self (void);
+mach_port_name_t mach_task_self (void);
+mach_port_name_t mach_host_self (void);
#endif /* _MACH_MACH_TRAPS_H_ */
diff --git a/include/mach/mach_types.defs b/include/mach/mach_types.defs
index a0e9241c..74196018 100644
--- a/include/mach/mach_types.defs
+++ b/include/mach/mach_types.defs
@@ -58,7 +58,17 @@ userprefix USERPREFIX;
serverprefix SERVERPREFIX;
#endif
-type mach_port_status_t = struct[9] of integer_t;
+type mach_port_status_t = struct {
+ mach_port_name_t mps_pset; /* containing port set */
+ mach_port_seqno_t mps_seqno; /* sequence number */
+ mach_port_mscount_t mps_mscount; /* make-send count */
+ mach_port_msgcount_t mps_qlimit; /* queue limit */
+ mach_port_msgcount_t mps_msgcount; /* number in the queue */
+ mach_port_rights_t mps_sorights; /* how many send-once rights */
+ boolean_t mps_srights; /* do send rights exist? */
+ boolean_t mps_pdrequest; /* port-deleted requested? */
+ boolean_t mps_nsrequest; /* no-senders requested? */
+};
type task_t = mach_port_t
ctype: mach_port_t
@@ -110,9 +120,40 @@ type ipc_space_t = mach_port_t
#endif /* KERNEL_SERVER */
;
-type vm_address_t = natural_t;
-type vm_offset_t = natural_t;
-type vm_size_t = natural_t;
+#if defined(KERNEL) && defined(USER32)
+type rpc_uintptr_t = uint32_t;
+type rpc_vm_size_t = uint32_t;
+#else /* KERNEL and USER32 */
+type rpc_uintptr_t = uintptr_t;
+type rpc_vm_size_t = uintptr_t;
+#endif /* KERNEL_SERVER and USER32 */
+
+type rpc_vm_offset_t = rpc_vm_size_t;
+
+type vm_address_t = rpc_vm_size_t
+#if defined(KERNEL_SERVER)
+ intran: vm_address_t convert_vm_from_user(rpc_vm_address_t)
+ outtran: rpc_vm_address_t convert_vm_to_user(vm_address_t)
+#elif defined(KERNEL_USER)
+ ctype: rpc_vm_address_t
+#endif
+ ;
+type vm_offset_t = rpc_vm_offset_t
+#if defined(KERNEL_SERVER)
+ intran: vm_offset_t convert_vm_from_user(rpc_vm_offset_t)
+ outtran: rpc_vm_offset_t convert_vm_to_user(vm_offset_t)
+#elif defined(KERNEL_USER)
+ ctype: rpc_vm_offset_t
+#endif
+ ;
+type vm_size_t = rpc_vm_size_t
+#if defined(KERNEL_SERVER)
+ intran: vm_size_t convert_vm_from_user(rpc_vm_size_t)
+ outtran: rpc_vm_size_t convert_vm_to_user(vm_size_t)
+#elif defined(KERNEL_USER)
+ ctype: rpc_vm_size_t
+#endif
+;
type vm_prot_t = int;
type vm_inherit_t = int;
type vm_statistics_data_t = struct[13] of integer_t;
@@ -121,14 +162,8 @@ type vm_machine_attribute_val_t = int;
type vm_sync_t = int;
type thread_info_t = array[*:1024] of integer_t;
-type thread_basic_info_data_t = struct[11] of integer_t;
-type thread_sched_info_data_t = struct[7] of integer_t;
type task_info_t = array[*:1024] of integer_t;
-type task_basic_info_data_t = struct[8] of integer_t;
-type task_events_info = struct[7] of natural_t;
-type task_thread_times_info_data_t = struct[4] of integer_t;
-
type memory_object_t = mach_port_t
ctype: mach_port_t
@@ -168,9 +203,6 @@ type memory_object_name_t = mach_port_t
type memory_object_copy_strategy_t = int;
type memory_object_return_t = int;
-type machine_info_data_t = struct[5] of integer_t;
-type machine_slot_data_t = struct[8] of integer_t;
-
type host_t = mach_port_t
ctype: mach_port_t
#if KERNEL_SERVER
@@ -187,10 +219,6 @@ type host_priv_t = mach_port_t
;
type host_info_t = array[*:1024] of integer_t;
-type host_basic_info_data_t = struct[5] of integer_t;
-type host_sched_info_data_t = struct[2] of integer_t;
-type host_load_info_data_t = struct[6] of integer_t;
-
type processor_t = mach_port_t
ctype: mach_port_t
@@ -202,8 +230,6 @@ type processor_t = mach_port_t
type processor_array_t = ^array[] of processor_t;
type processor_info_t = array[*:1024] of integer_t;
-type processor_basic_info_data_t = struct[5] of integer_t;
-
type processor_set_t = mach_port_t
ctype: mach_port_t
@@ -228,15 +254,28 @@ type processor_set_name_t = mach_port_t
type processor_set_name_array_t = ^array[] of processor_set_name_t;
type processor_set_info_t = array[*:1024] of integer_t;
-type processor_set_basic_info_data_t = struct[5] of integer_t;
-type processor_set_sched_info_data_t = struct[2] of integer_t;
-
type kernel_version_t = (MACH_MSG_TYPE_STRING, 512*8);
+type new_kernel_version_t = c_string[512]
+ ctype: kernel_version_t;
+
+type rpc_time_value_t = struct {
+ rpc_long_integer_t seconds;
+ integer_t microseconds;
+};
+type time_value_t = rpc_time_value_t
+#if defined(KERNEL_SERVER)
+ intran: time_value_t convert_time_value_from_user(rpc_time_value_t)
+ outtran: rpc_time_value_t convert_time_value_to_user(time_value_t)
+#elif defined(KERNEL_USER)
+ ctype: rpc_time_value_t
+#endif
+ ;
-type kernel_boot_info_t = (MACH_MSG_TYPE_STRING, 4096*8);
-
-type time_value_t = struct[2] of integer_t;
+type time_value64_t = struct {
+ int64_t seconds;
+ int64_t nanoseconds;
+};
type emulation_vector_t = ^array[] of vm_offset_t;
diff --git a/include/mach/mach_types.h b/include/mach/mach_types.h
index 57f8f22d..5ecd686a 100644
--- a/include/mach/mach_types.h
+++ b/include/mach/mach_types.h
@@ -57,13 +57,12 @@
#include <mach/vm_sync.h>
#ifdef MACH_KERNEL
-#include <kern/task.h> /* for task_array_t */
-#include <kern/thread.h> /* for thread_array_t */
-#include <kern/processor.h> /* for processor_array_t,
- processor_set_array_t,
- processor_set_name_array_t */
-#include <kern/syscall_emulation.h>
- /* for emulation_vector_t */
+
+typedef struct task *task_t;
+typedef struct thread *thread_t;
+typedef struct processor *processor_t;
+typedef struct processor_set *processor_set_t;
+
#else /* MACH_KERNEL */
typedef mach_port_t task_t;
typedef task_t *task_array_t;
diff --git a/include/mach/machine.h b/include/mach/machine.h
index b696d4b5..9a176e8c 100644
--- a/include/mach/machine.h
+++ b/include/mach/machine.h
@@ -109,6 +109,7 @@ extern struct machine_slot machine_slot[NCPUS];
#define CPU_TYPE_PENTIUM ((cpu_type_t) 18)
#define CPU_TYPE_PENTIUMPRO ((cpu_type_t) 19)
#define CPU_TYPE_POWERPC ((cpu_type_t) 20)
+#define CPU_TYPE_X86_64 ((cpu_type_t) 21)
/*
* Machine subtypes (these are defined here, instead of in a machine
diff --git a/include/mach/message.h b/include/mach/message.h
index 0a7297e1..9790ef98 100644
--- a/include/mach/message.h
+++ b/include/mach/message.h
@@ -132,18 +132,47 @@ typedef unsigned int mach_msg_size_t;
typedef natural_t mach_msg_seqno_t;
typedef integer_t mach_msg_id_t;
-typedef struct {
+/* full header structure, may have different size in user/kernel spaces */
+typedef struct mach_msg_header {
mach_msg_bits_t msgh_bits;
mach_msg_size_t msgh_size;
- mach_port_t msgh_remote_port;
+ union {
+ mach_port_t msgh_remote_port;
+ /*
+ * Ensure msgh_remote_port is wide enough to hold a kernel pointer
+ * to avoid message resizing for the 64 bits case. This field should
+ * not be used since it is here just for padding purposes.
+ */
+ rpc_uintptr_t msgh_remote_port_do_not_use;
+ };
union {
mach_port_t msgh_local_port;
- unsigned long msgh_protected_payload;
+ rpc_uintptr_t msgh_protected_payload;
};
mach_port_seqno_t msgh_seqno;
mach_msg_id_t msgh_id;
} mach_msg_header_t;
+#ifdef KERNEL
+/* user-side header format, needed in the kernel */
+typedef struct {
+ mach_msg_bits_t msgh_bits;
+ mach_msg_size_t msgh_size;
+ union {
+ mach_port_name_t msgh_remote_port;
+ rpc_uintptr_t msgh_remote_port_do_not_use;
+ };
+ union {
+ mach_port_name_t msgh_local_port;
+ rpc_uintptr_t msgh_protected_payload;
+ };
+ mach_port_seqno_t msgh_seqno;
+ mach_msg_id_t msgh_id;
+} mach_msg_user_header_t;
+#else
+typedef mach_msg_header_t mach_msg_user_header_t;
+#endif
+
/*
* There is no fixed upper bound to the size of Mach messages.
*/
@@ -192,7 +221,49 @@ typedef unsigned int mach_msg_type_name_t;
typedef unsigned int mach_msg_type_size_t;
typedef natural_t mach_msg_type_number_t;
+/**
+ * Structure used for inlined port rights in messages.
+ *
+ * We use this to avoid having to perform message resizing in the kernel
+ * since userspace port rights might be smaller than kernel ports in 64 bit
+ * architectures.
+ */
+typedef struct {
+ union {
+ mach_port_name_t name;
+#ifdef KERNEL
+ mach_port_t kernel_port;
+#else
+ uintptr_t kernel_port_do_not_use;
+#endif /* KERNEL */
+ };
+} mach_port_name_inlined_t;
+
typedef struct {
+#ifdef __x86_64__
+ /*
+ * For 64 bits, this struct is 8 bytes long so we
+ * can pack the same amount of information as mach_msg_type_long_t.
+ * Note that for 64 bit userland, msgt_size only needs to be 8 bits long
+ * but for kernel compatibility with 32 bit userland we allow it to be
+ * 16 bits long.
+ *
+ * Effectively, we don't need mach_msg_type_long_t but we are keeping it
+ * for a while to make the code similar between 32 and 64 bits.
+ *
+ * We also keep the msgt_longform bit around simply because it makes it
+ * very easy to convert messages from a 32 bit userland into a 64 bit
+ * kernel. Otherwise, we would have to replicate some of the MiG logic
+ * internally in the kernel.
+ */
+ unsigned int msgt_name : 8,
+ msgt_size : 16,
+ msgt_unused : 5,
+ msgt_inline : 1,
+ msgt_longform : 1,
+ msgt_deallocate : 1;
+ mach_msg_type_number_t msgt_number;
+#else
unsigned int msgt_name : 8,
msgt_size : 8,
msgt_number : 12,
@@ -200,15 +271,44 @@ typedef struct {
msgt_longform : 1,
msgt_deallocate : 1,
msgt_unused : 1;
-} mach_msg_type_t;
+#endif
+} __attribute__ ((aligned (__alignof__ (uintptr_t)))) mach_msg_type_t;
-typedef struct {
+typedef struct {
+#ifdef __x86_64__
+ union {
+ /* On x86_64 this is equivalent to mach_msg_type_t so use
+ * union to overlay with the old field names. */
+ mach_msg_type_t msgtl_header;
+ struct {
+ unsigned int msgtl_name : 8,
+ msgtl_size : 16,
+ msgtl_unused : 5,
+ msgtl_inline : 1,
+ msgtl_longform : 1,
+ msgtl_deallocate : 1;
+ mach_msg_type_number_t msgtl_number;
+ };
+ };
+#else
mach_msg_type_t msgtl_header;
unsigned short msgtl_name;
unsigned short msgtl_size;
natural_t msgtl_number;
-} mach_msg_type_long_t;
+#endif
+} __attribute__ ((aligned (__alignof__ (uintptr_t)))) mach_msg_type_long_t;
+#ifdef __x86_64__
+#ifdef __cplusplus
+#if __cplusplus >= 201103L
+static_assert (sizeof (mach_msg_type_t) == sizeof (mach_msg_type_long_t),
+ "mach_msg_type_t and mach_msg_type_long_t need to have the same size.");
+#endif
+#else
+_Static_assert (sizeof (mach_msg_type_t) == sizeof (mach_msg_type_long_t),
+ "mach_msg_type_t and mach_msg_type_long_t need to have the same size.");
+#endif
+#endif
/*
* Known values for the msgt_name field.
@@ -301,6 +401,34 @@ typedef integer_t mach_msg_option_t;
#define MACH_SEND_ALWAYS 0x00010000 /* internal use only */
+#ifdef __x86_64__
+#if defined(KERNEL) && defined(USER32)
+#define MACH_MSG_USER_ALIGNMENT 4
+#else
+#define MACH_MSG_USER_ALIGNMENT 8
+#endif
+#else
+#define MACH_MSG_USER_ALIGNMENT 4
+#endif
+
+#ifdef KERNEL
+/* This is the alignment of msg descriptors and the actual data
+ * for both in kernel messages and user land messages.
+ *
+ * We have two types of alignment because for specific configurations
+ * (in particular a 64 bit kernel with 32 bit userland) we transform
+ * 4-byte aligned user messages into 8-byte aligned messages (and vice-versa)
+ * so that kernel messages are correctly aligned.
+ */
+#define MACH_MSG_KERNEL_ALIGNMENT sizeof(uintptr_t)
+
+#define mach_msg_align(x, alignment) \
+ ( ( ((vm_offset_t)(x)) + ((alignment)-1) ) & ~((alignment)-1) )
+#define mach_msg_user_align(x) mach_msg_align(x, MACH_MSG_USER_ALIGNMENT)
+#define mach_msg_kernel_align(x) mach_msg_align(x, MACH_MSG_KERNEL_ALIGNMENT)
+#define mach_msg_user_is_misaligned(x) ((x) & ((MACH_MSG_USER_ALIGNMENT)-1))
+#define mach_msg_kernel_is_misaligned(x) ((x) & ((MACH_MSG_KERNEL_ALIGNMENT)-1))
+#endif /* KERNEL */
/*
* Much code assumes that mach_msg_return_t == kern_return_t.
@@ -386,16 +514,15 @@ typedef kern_return_t mach_msg_return_t;
#define MACH_RCV_BODY_ERROR 0x1000400c
/* Error receiving message body. See special bits. */
-
extern mach_msg_return_t
mach_msg_trap
- (mach_msg_header_t *msg,
+ (mach_msg_user_header_t *msg,
mach_msg_option_t option,
mach_msg_size_t send_size,
mach_msg_size_t rcv_size,
- mach_port_t rcv_name,
+ mach_port_name_t rcv_name,
mach_msg_timeout_t timeout,
- mach_port_t notify);
+ mach_port_name_t notify);
extern mach_msg_return_t
mach_msg
@@ -403,9 +530,9 @@ mach_msg
mach_msg_option_t option,
mach_msg_size_t send_size,
mach_msg_size_t rcv_size,
- mach_port_t rcv_name,
+ mach_port_name_t rcv_name,
mach_msg_timeout_t timeout,
- mach_port_t notify);
+ mach_port_name_t notify);
extern __typeof (mach_msg) __mach_msg;
extern __typeof (mach_msg_trap) __mach_msg_trap;
diff --git a/include/mach/mig_errors.h b/include/mach/mig_errors.h
index 5758ccf3..389ce778 100644
--- a/include/mach/mig_errors.h
+++ b/include/mach/mig_errors.h
@@ -68,7 +68,7 @@ typedef struct mig_symtab {
#else
int
#endif
- (*ms_routine)();
+ (*ms_routine)(void);
} mig_symtab_t;
/*
diff --git a/include/mach/mig_support.h b/include/mach/mig_support.h
index 865bdc5f..ed871c0f 100644
--- a/include/mach/mig_support.h
+++ b/include/mach/mig_support.h
@@ -47,13 +47,11 @@ extern void mig_dealloc_reply_port(mach_port_t);
extern void mig_put_reply_port(mach_port_t);
-extern mach_port_t mig_get_reply_port(void);
+extern mach_port_name_t mig_get_reply_port(void);
extern void mig_reply_setup(const mach_msg_header_t *_request,
mach_msg_header_t *reply);
-#ifndef MACH_KERNEL
-extern vm_size_t mig_strncpy(char *_dest, const char *_src, vm_size_t _len);
-#endif
+extern vm_size_t mig_strncpy(char *_dest, const char *_src, vm_size_t _len);
#endif /* not defined(_MACH_MIG_SUPPORT_H_) */
diff --git a/include/mach/msg_type.h b/include/mach/msg_type.h
deleted file mode 100644
index 3298fd54..00000000
--- a/include/mach/msg_type.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- * This file defines user msg types that may be ored into
- * the msg_type field in a msg header. Values 0-5 are reserved
- * for use by the kernel and are defined in message.h.
- *
- */
-
-#ifndef _MACH_MSG_TYPE_H_
-#define _MACH_MSG_TYPE_H_
-
-#define MSG_TYPE_CAMELOT (1 << 6)
-#define MSG_TYPE_ENCRYPTED (1 << 7)
-#define MSG_TYPE_RPC (1 << 8) /* Reply expected */
-
-#include <mach/message.h>
-
-#endif /* _MACH_MSG_TYPE_H_ */
diff --git a/include/mach/multiboot.h b/include/mach/multiboot.h
deleted file mode 100644
index b23df4a4..00000000
--- a/include/mach/multiboot.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 1995-1994 The University of Utah and
- * the Computer Systems Laboratory at the University of Utah (CSL).
- * All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software is hereby
- * granted provided that (1) source code retains these copyright, permission,
- * and disclaimer notices, and (2) redistributions including binaries
- * reproduce the notices in supporting documentation, and (3) all advertising
- * materials mentioning features or use of this software display the following
- * acknowledgement: ``This product includes software developed by the
- * Computer Systems Laboratory at the University of Utah.''
- *
- * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
- * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
- * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * CSL requests users of this software to return to csl-dist@cs.utah.edu any
- * improvements that they make and grant CSL redistribution rights.
- *
- * Author: Bryan Ford, University of Utah CSL
- */
-#ifndef _MACH_MULTIBOOT_H_
-#define _MACH_MULTIBOOT_H_
-
-#include <mach/machine/vm_types.h>
-#include <mach/machine/multiboot.h>
-
-struct multiboot_region
-{
- vm_offset_t start;
- vm_offset_t end;
-};
-
-struct multiboot_rlist
-{
- int count;
- vm_offset_t regions;
-};
-
-struct multiboot_module
-{
- /* Location and size of the module. */
- struct multiboot_region region;
-
- /* Command-line associated with this boot module:
- a null-terminated ASCII string.
- Both start and end are 0 if there is no command line.
- The end pointer points at least one byte past the terminating null. */
- struct multiboot_region cmdline;
-
- /* Reserved; boot loader must initialize to zero. */
- natural_t pad[4];
-};
-
-struct multiboot_info
-{
- /* List of available physical memory regions.
- Can (and probably does) include the memory containing
- the kernel, boot modules, this structure, etc. */
- struct multiboot_rlist avail;
-
- /* Physical memory region occupied by things the boot loader set up
- and the OS shouldn't clobber at least until it's all done initializing itself.
- This includes the kernel image, boot modules, these structures,
- initial processor tables, etc. */
- struct multiboot_rlist occupied;
-
- /* Command-line for the OS kernel: a null-terminated ASCII string.
- Both start and end are 0 if there is no command line.
- The end pointer points at least one byte past the terminating null. */
- struct multiboot_region cmdline;
-
- /* Secondary boot modules loaded with this kernel image. */
- int nmods;
- vm_offset_t mods;
-
- /* Reserved; boot loader must initialize to zero. */
- natural_t pad[4];
-};
-
-#endif /* _MACH_MULTIBOOT_H_ */
diff --git a/include/mach/notify.h b/include/mach/notify.h
index 6d783dde..14bcd6f6 100644
--- a/include/mach/notify.h
+++ b/include/mach/notify.h
@@ -58,13 +58,13 @@
typedef struct {
mach_msg_header_t not_header;
mach_msg_type_t not_type; /* MACH_MSG_TYPE_PORT_NAME */
- mach_port_t not_port;
+ mach_port_name_t not_port;
} mach_port_deleted_notification_t;
typedef struct {
mach_msg_header_t not_header;
mach_msg_type_t not_type; /* MACH_MSG_TYPE_PORT_NAME */
- mach_port_t not_port;
+ mach_port_name_t not_port;
} mach_msg_accepted_notification_t;
typedef struct {
@@ -86,7 +86,7 @@ typedef struct {
typedef struct {
mach_msg_header_t not_header;
mach_msg_type_t not_type; /* MACH_MSG_TYPE_PORT_NAME */
- mach_port_t not_port;
+ mach_port_name_t not_port;
} mach_dead_name_notification_t;
#endif /* _MACH_NOTIFY_H_ */
diff --git a/include/mach/pc_sample.h b/include/mach/pc_sample.h
index 662addb9..2d56b348 100644
--- a/include/mach/pc_sample.h
+++ b/include/mach/pc_sample.h
@@ -29,7 +29,7 @@
#include <mach/machine/vm_types.h>
-typedef natural_t sampled_pc_flavor_t;
+typedef unsigned int sampled_pc_flavor_t;
#define SAMPLED_PC_PERIODIC 0x1 /* default */
@@ -54,8 +54,8 @@ typedef natural_t sampled_pc_flavor_t;
*/
typedef struct sampled_pc {
- natural_t id;
- vm_offset_t pc;
+ rpc_vm_offset_t id; /* task_t address */
+ rpc_vm_offset_t pc; /* program counter */
sampled_pc_flavor_t sampletype;
} sampled_pc_t;
diff --git a/include/mach/port.h b/include/mach/port.h
index 7ab3ab7c..c9bbcf17 100644
--- a/include/mach/port.h
+++ b/include/mach/port.h
@@ -38,8 +38,25 @@
#include <mach/boolean.h>
#include <mach/machine/vm_types.h>
+/*
+ * Port names are the type used by userspace, they are always 32-bit wide.
+ */
+typedef unsigned int mach_port_name_t;
+typedef mach_port_name_t *mach_port_name_array_t;
+typedef const mach_port_name_t *const_mach_port_name_array_t;
+/*
+ * A port is represented
+ * - by a port name in userspace
+ * - by a pointer in kernel space
+ * While in userspace mach_port_name_t and mach_port_name are interchangable,
+ * in kernelspace they need to be different and appropriately converted.
+ */
+#ifdef KERNEL
typedef vm_offset_t mach_port_t;
+#else /* KERNEL */
+typedef mach_port_name_t mach_port_t;
+#endif
typedef mach_port_t *mach_port_array_t;
typedef const mach_port_t *const_mach_port_array_t;
typedef int *rpc_signature_info_t;
@@ -53,11 +70,15 @@ typedef int *rpc_signature_info_t;
* that a port right was present, but it died.
*/
-#define MACH_PORT_NULL ((mach_port_t) 0)
+#define MACH_PORT_NULL 0 /* works with both user and kernel ports */
#define MACH_PORT_DEAD ((mach_port_t) ~0)
+#define MACH_PORT_NAME_NULL ((mach_port_name_t) 0)
+#define MACH_PORT_NAME_DEAD ((mach_port_name_t) ~0)
-#define MACH_PORT_VALID(name) \
- (((name) != MACH_PORT_NULL) && ((name) != MACH_PORT_DEAD))
+#define MACH_PORT_VALID(port) \
+ (((port) != MACH_PORT_NULL) && ((port) != MACH_PORT_DEAD))
+#define MACH_PORT_NAME_VALID(name) \
+ (((name) != MACH_PORT_NAME_NULL) && ((name) != MACH_PORT_NAME_DEAD))
/*
* These are the different rights a task may have.
@@ -121,15 +142,15 @@ typedef unsigned int mach_port_msgcount_t; /* number of msgs */
typedef unsigned int mach_port_rights_t; /* number of rights */
typedef struct mach_port_status {
- mach_port_t mps_pset; /* containing port set */
+ mach_port_name_t mps_pset; /* containing port set */
mach_port_seqno_t mps_seqno; /* sequence number */
-/*mach_port_mscount_t*/natural_t mps_mscount; /* make-send count */
-/*mach_port_msgcount_t*/natural_t mps_qlimit; /* queue limit */
-/*mach_port_msgcount_t*/natural_t mps_msgcount; /* number in the queue */
-/*mach_port_rights_t*/natural_t mps_sorights; /* how many send-once rights */
-/*boolean_t*/natural_t mps_srights; /* do send rights exist? */
-/*boolean_t*/natural_t mps_pdrequest; /* port-deleted requested? */
-/*boolean_t*/natural_t mps_nsrequest; /* no-senders requested? */
+ mach_port_mscount_t mps_mscount; /* make-send count */
+ mach_port_msgcount_t mps_qlimit; /* queue limit */
+ mach_port_msgcount_t mps_msgcount; /* number in the queue */
+ mach_port_rights_t mps_sorights; /* how many send-once rights */
+ boolean_t mps_srights; /* do send rights exist? */
+ boolean_t mps_pdrequest; /* port-deleted requested? */
+ boolean_t mps_nsrequest; /* no-senders requested? */
} mach_port_status_t;
#define MACH_PORT_QLIMIT_DEFAULT ((mach_port_msgcount_t) 5)
diff --git a/include/mach/rpc.h b/include/mach/rpc.h
deleted file mode 100644
index 36eb5921..00000000
--- a/include/mach/rpc.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 1993,1994 The University of Utah and
- * the Computer Systems Laboratory (CSL). All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
- * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
- * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * CSL requests users of this software to return to csl-dist@cs.utah.edu any
- * improvements that they make and grant CSL redistribution rights.
- */
-
-#ifndef MACH_RPC_H
-#define MACH_RPC_H
-
-#include <mach/kern_return.h>
-#include <mach/message.h>
-
-/*
- * Description of a port passed up by the leaky-register RPC path
- * when it needs to perform translation.
- */
-struct rpc_port_desc {
- mach_port_t name;
- mach_msg_type_name_t msgt_name;
-};
-
-#endif /* MACH_RPC_H */
diff --git a/include/mach/std_types.defs b/include/mach/std_types.defs
index 5d95ab42..b461f062 100644
--- a/include/mach/std_types.defs
+++ b/include/mach/std_types.defs
@@ -30,9 +30,6 @@
#ifndef _MACH_STD_TYPES_DEFS_
#define _MACH_STD_TYPES_DEFS_
-type char = MACH_MSG_TYPE_CHAR;
-type short = MACH_MSG_TYPE_INTEGER_16;
-type int = MACH_MSG_TYPE_INTEGER_32;
type int32_t = MACH_MSG_TYPE_INTEGER_32;
type int64_t = MACH_MSG_TYPE_INTEGER_64;
type boolean_t = MACH_MSG_TYPE_BOOLEAN;
@@ -58,10 +55,8 @@ type mach_port_t = MACH_MSG_TYPE_COPY_SEND
;
type mach_port_array_t = array[] of mach_port_t;
-type mach_port_name_t = MACH_MSG_TYPE_PORT_NAME
- ctype: mach_port_t;
-type mach_port_name_array_t = array[] of mach_port_name_t
- ctype: mach_port_array_t;
+type mach_port_name_t = MACH_MSG_TYPE_PORT_NAME;
+type mach_port_name_array_t = array[] of mach_port_name_t;
type mach_port_right_t = natural_t;
diff --git a/include/mach/std_types.h b/include/mach/std_types.h
index f78e236a..0d5db0ae 100644
--- a/include/mach/std_types.h
+++ b/include/mach/std_types.h
@@ -41,8 +41,4 @@
typedef vm_offset_t pointer_t;
typedef vm_offset_t vm_address_t;
-#ifdef MACH_KERNEL
-#include <ipc/ipc_port.h>
-#endif /* MACH_KERNEL */
-
#endif /* _MACH_STD_TYPES_H_ */
diff --git a/include/mach/task_info.h b/include/mach/task_info.h
index 5607178c..0e048c5e 100644
--- a/include/mach/task_info.h
+++ b/include/mach/task_info.h
@@ -54,51 +54,65 @@ typedef integer_t task_info_data_t[TASK_INFO_MAX];
struct task_basic_info {
integer_t suspend_count; /* suspend count for task */
integer_t base_priority; /* base scheduling priority */
- vm_size_t virtual_size; /* number of virtual pages */
- vm_size_t resident_size; /* number of resident pages */
- time_value_t user_time; /* total user run time for
+ rpc_vm_size_t virtual_size; /* number of virtual pages */
+ rpc_vm_size_t resident_size; /* number of resident pages */
+ /* Deprecated, please use user_time64 */
+ rpc_time_value_t user_time; /* total user run time for
terminated threads */
- time_value_t system_time; /* total system run time for
+ /* Deprecated, please use system_time64 */
+ rpc_time_value_t system_time; /* total system run time for
terminated threads */
- time_value_t creation_time; /* creation time stamp */
+ /* Deprecated, please use creation_time64 */
+ rpc_time_value_t creation_time; /* creation time stamp */
+ time_value64_t user_time64; /* total user run time for
+ terminated threads */
+ time_value64_t system_time64; /* total system run time for
+ terminated threads */
+ time_value64_t creation_time64; /* creation time stamp */
};
typedef struct task_basic_info task_basic_info_data_t;
typedef struct task_basic_info *task_basic_info_t;
#define TASK_BASIC_INFO_COUNT \
- (sizeof(task_basic_info_data_t) / sizeof(natural_t))
+ (sizeof(task_basic_info_data_t) / sizeof(integer_t))
#define TASK_EVENTS_INFO 2 /* various event counts */
struct task_events_info {
- natural_t faults; /* number of page faults */
- natural_t zero_fills; /* number of zero fill pages */
- natural_t reactivations; /* number of reactivated pages */
- natural_t pageins; /* number of actual pageins */
- natural_t cow_faults; /* number of copy-on-write faults */
- natural_t messages_sent; /* number of messages sent */
- natural_t messages_received; /* number of messages received */
+ rpc_long_natural_t faults; /* number of page faults */
+ rpc_long_natural_t zero_fills; /* number of zero fill pages */
+ rpc_long_natural_t reactivations; /* number of reactivated pages */
+ rpc_long_natural_t pageins; /* number of actual pageins */
+ rpc_long_natural_t cow_faults; /* number of copy-on-write faults */
+ rpc_long_natural_t messages_sent; /* number of messages sent */
+ rpc_long_natural_t messages_received; /* number of messages received */
};
typedef struct task_events_info task_events_info_data_t;
typedef struct task_events_info *task_events_info_t;
#define TASK_EVENTS_INFO_COUNT \
- (sizeof(task_events_info_data_t) / sizeof(natural_t))
+ (sizeof(task_events_info_data_t) / sizeof(integer_t))
#define TASK_THREAD_TIMES_INFO 3 /* total times for live threads -
only accurate if suspended */
struct task_thread_times_info {
- time_value_t user_time; /* total user run time for
+ /* Deprecated, please use user_time64 */
+ rpc_time_value_t user_time; /* total user run time for
live threads */
- time_value_t system_time; /* total system run time for
+ /* Deprecated, please use system_time64 */
+ rpc_time_value_t system_time; /* total system run time for
live threads */
+ time_value64_t user_time64; /* total user run time for
+ live threads */
+ time_value64_t system_time64; /* total system run time for
+ live threads */
};
typedef struct task_thread_times_info task_thread_times_info_data_t;
typedef struct task_thread_times_info *task_thread_times_info_t;
#define TASK_THREAD_TIMES_INFO_COUNT \
- (sizeof(task_thread_times_info_data_t) / sizeof(natural_t))
+ (sizeof(task_thread_times_info_data_t) / sizeof(integer_t))
/*
* Flavor definitions for task_ras_control
diff --git a/include/mach/thread_info.h b/include/mach/thread_info.h
index 569c8c84..4f322e0a 100644
--- a/include/mach/thread_info.h
+++ b/include/mach/thread_info.h
@@ -55,8 +55,10 @@ typedef integer_t thread_info_data_t[THREAD_INFO_MAX];
#define THREAD_BASIC_INFO 1 /* basic information */
struct thread_basic_info {
- time_value_t user_time; /* user run time */
- time_value_t system_time; /* system run time */
+ /* Deprecated, please use user_time64 */
+ rpc_time_value_t user_time; /* user run time */
+ /* Deprecated, please use system_time64 */
+ rpc_time_value_t system_time; /* system run time */
integer_t cpu_usage; /* scaled cpu usage percentage */
integer_t base_priority; /* base scheduling priority */
integer_t cur_priority; /* current scheduling priority */
@@ -65,7 +67,11 @@ struct thread_basic_info {
integer_t suspend_count; /* suspend count for thread */
integer_t sleep_time; /* number of seconds that thread
has been sleeping */
- time_value_t creation_time; /* time stamp of creation */
+ /* Deprecated, please use creation_time64 */
+ rpc_time_value_t creation_time; /* time stamp of creation */
+ time_value64_t user_time64; /* user run time */
+ time_value64_t system_time64; /* system run time */
+ time_value64_t creation_time64; /* time stamp of creation */
};
typedef struct thread_basic_info thread_basic_info_data_t;
diff --git a/include/mach/time_value.h b/include/mach/time_value.h
index 3a9c384c..e08707bc 100644
--- a/include/mach/time_value.h
+++ b/include/mach/time_value.h
@@ -33,21 +33,66 @@
* Time value returned by kernel.
*/
+struct rpc_time_value {
+ /* TODO: this should be 64 bits regardless of the arch to be Y2038 proof. */
+ rpc_long_integer_t seconds;
+ integer_t microseconds;
+};
+
+/*
+ * Time value used by kernel interfaces. Ideally they should be migrated
+ * to use time_value64 below.
+ */
struct time_value {
- integer_t seconds;
+ long_integer_t seconds;
integer_t microseconds;
};
typedef struct time_value time_value_t;
+#ifdef KERNEL
+typedef struct rpc_time_value rpc_time_value_t;
+#else
+typedef struct time_value rpc_time_value_t;
+#endif
+
+/*
+ * Time value used internally by the kernel that uses 64 bits to track seconds
+ * and nanoseconds. Note that the current resolution is only microseconds.
+ */
+struct time_value64 {
+ int64_t seconds;
+ int64_t nanoseconds;
+};
+typedef struct time_value64 time_value64_t;
+
+/**
+ * Functions used by Mig to perform user to kernel conversion and vice-versa.
+ * We only do this because we may run a 64 bit kernel with a 32 bit user space.
+ */
+static __inline__ rpc_time_value_t convert_time_value_to_user(time_value_t tv)
+{
+ rpc_time_value_t user = {.seconds = tv.seconds, .microseconds = tv.microseconds};
+ return user;
+}
+static __inline__ time_value_t convert_time_value_from_user(rpc_time_value_t tv)
+{
+ time_value_t kernel = {.seconds = tv.seconds, .microseconds = tv.microseconds};
+ return kernel;
+}
+
/*
* Macros to manipulate time values. Assume that time values
* are normalized (microseconds <= 999999).
*/
#define TIME_MICROS_MAX (1000000)
+#define TIME_NANOS_MAX (1000000000)
#define time_value_assert(val) \
assert(0 <= (val)->microseconds && (val)->microseconds < TIME_MICROS_MAX);
+#define time_value64_assert(val) \
+ assert(0 <= (val)->nanoseconds && (val)->nanoseconds < TIME_NANOS_MAX);
+
#define time_value_add_usec(val, micros) { \
time_value_assert(val); \
if (((val)->microseconds += (micros)) \
@@ -58,13 +103,23 @@ typedef struct time_value time_value_t;
time_value_assert(val); \
}
-#define time_value_sub_usec(val, micros) { \
- time_value_assert(val); \
- if (((val)->microseconds -= (micros)) < 0) { \
- (val)->microseconds += TIME_MICROS_MAX; \
+#define time_value64_add_nanos(val, nanos) { \
+ time_value64_assert(val); \
+ if (((val)->nanoseconds += (nanos)) \
+ >= TIME_NANOS_MAX) { \
+ (val)->nanoseconds -= TIME_NANOS_MAX; \
+ (val)->seconds++; \
+ } \
+ time_value64_assert(val); \
+}
+
+#define time_value64_sub_nanos(val, nanos) { \
+ time_value64_assert(val); \
+ if (((val)->nanoseconds -= (nanos)) < 0) { \
+ (val)->nanoseconds += TIME_NANOS_MAX; \
(val)->seconds--; \
} \
- time_value_assert(val); \
+ time_value64_assert(val); \
}
#define time_value_add(result, addend) { \
@@ -73,12 +128,33 @@ typedef struct time_value time_value_t;
time_value_add_usec(result, (addend)->microseconds); \
}
-#define time_value_sub(result, subtrahend) { \
- time_value_assert(subtrahend); \
+#define time_value64_add(result, addend) { \
+ time_value64_assert(addend); \
+ (result)->seconds += (addend)->seconds; \
+ time_value64_add_nanos(result, (addend)->nanoseconds); \
+ }
+
+#define time_value64_sub(result, subtrahend) { \
+ time_value64_assert(subtrahend); \
(result)->seconds -= (subtrahend)->seconds; \
- time_value_sub_usec(result, (subtrahend)->microseconds); \
+ time_value64_sub_nanos(result, (subtrahend)->nanoseconds); \
}
+#define time_value64_init(tv) { \
+ (tv)->seconds = 0; \
+ (tv)->nanoseconds = 0; \
+ }
+
+#define TIME_VALUE64_TO_TIME_VALUE(tv64, tv) do { \
+ (tv)->seconds = (tv64)->seconds; \
+ (tv)->microseconds = (tv64)->nanoseconds / 1000; \
+} while(0)
+
+#define TIME_VALUE_TO_TIME_VALUE64(tv, tv64) do { \
+ (tv64)->seconds = (tv)->seconds; \
+ (tv64)->nanoseconds = (tv)->microseconds * 1000; \
+} while(0)
+
/*
* Time value available through the mapped-time interface.
* Read this mapped value with
@@ -94,6 +170,8 @@ typedef struct mapped_time_value {
integer_t seconds;
integer_t microseconds;
integer_t check_seconds;
+ struct time_value64 time_value;
+ int64_t check_seconds64;
} mapped_time_value_t;
/* Macros for converting between struct timespec and time_value_t. */
@@ -108,4 +186,16 @@ typedef struct mapped_time_value {
(tv)->microseconds = (ts)->tv_nsec / 1000; \
} while(0)
+/* Macros for converting between struct timespec and time_value64_t. */
+
+#define TIME_VALUE64_TO_TIMESPEC(tv, ts) do { \
+ (ts)->tv_sec = (tv)->seconds; \
+ (ts)->tv_nsec = (tv)->nanoseconds; \
+} while(0)
+
+#define TIMESPEC_TO_TIME_VALUE64(tv, ts) do { \
+ (tv)->seconds = (ts)->tv_sec; \
+ (tv)->nanoseconds = (ts)->tv_nsec; \
+} while(0)
+
#endif /* _MACH_TIME_VALUE_H_ */
diff --git a/include/mach_debug/hash_info.h b/include/mach_debug/hash_info.h
index 6944277d..8e6f19cf 100644
--- a/include/mach_debug/hash_info.h
+++ b/include/mach_debug/hash_info.h
@@ -33,7 +33,7 @@
*/
typedef struct hash_info_bucket {
- natural_t hib_count; /* number of records in bucket */
+ unsigned int hib_count; /* number of records in bucket */
} hash_info_bucket_t;
typedef hash_info_bucket_t *hash_info_bucket_array_t;
diff --git a/include/mach_debug/ipc_info.h b/include/mach_debug/ipc_info.h
deleted file mode 100644
index a47ae7b4..00000000
--- a/include/mach_debug/ipc_info.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- * File: mach_debug/ipc_info.h
- * Author: Rich Draves
- * Date: March, 1990
- *
- * Definitions for the IPC debugging interface.
- */
-
-#ifndef _MACH_DEBUG_IPC_INFO_H_
-#define _MACH_DEBUG_IPC_INFO_H_
-
-#include <mach/boolean.h>
-#include <mach/port.h>
-#include <mach/machine/vm_types.h>
-
-/*
- * Remember to update the mig type definitions
- * in mach_debug_types.defs when adding/removing fields.
- */
-
-typedef struct ipc_info_name {
- mach_port_t iin_name; /* port name, including gen number */
-/*boolean_t*/integer_t iin_marequest; /* extant msg-accepted request? */
- mach_port_type_t iin_type; /* straight port type */
- mach_port_urefs_t iin_urefs; /* user-references */
- vm_offset_t iin_object; /* object pointer */
- natural_t iin_next; /* marequest/next in free list */
-} ipc_info_name_t;
-
-typedef ipc_info_name_t *ipc_info_name_array_t;
-
-/*
- * Type definitions for mach_port_kernel_object.
- * By remarkable coincidence, these closely resemble
- * the IKOT_* definitions in ipc/ipc_kobject.h.
- */
-
-#define IPC_INFO_TYPE_NONE 0
-#define IPC_INFO_TYPE_THREAD 1
-#define IPC_INFO_TYPE_TASK 2
-#define IPC_INFO_TYPE_HOST 3
-#define IPC_INFO_TYPE_HOST_PRIV 4
-#define IPC_INFO_TYPE_PROCESSOR 5
-#define IPC_INFO_TYPE_PSET 6
-#define IPC_INFO_TYPE_PSET_NAME 7
-#define IPC_INFO_TYPE_PAGER 8
-#define IPC_INFO_TYPE_PAGING_REQUEST 9
-#define IPC_INFO_TYPE_DEVICE 10
-#define IPC_INFO_TYPE_XMM_PAGER 11
-#define IPC_INFO_TYPE_PAGING_NAME 12
-
-#endif /* _MACH_DEBUG_IPC_INFO_H_ */
diff --git a/include/mach_debug/mach_debug.defs b/include/mach_debug/mach_debug.defs
index c8e8b1b4..2de7df52 100644
--- a/include/mach_debug/mach_debug.defs
+++ b/include/mach_debug/mach_debug.defs
@@ -135,22 +135,8 @@ routine host_virtual_physical_table_info(
skip; /* host_virtual_physical_table_info */
#endif /* !defined(MACH_VM_DEBUG) || MACH_VM_DEBUG */
-#if !defined(MACH_KDB) || MACH_KDB
-/*
- * Loads a symbol table for an external file into the kernel debugger.
- * The symbol table data is an array of characters. It is assumed that
- * the caller and the kernel debugger agree on its format.
- */
-
-routine host_load_symbol_table(
- host : host_priv_t;
- task : task_t;
- name : symtab_name_t;
- symtab : pointer_t);
-
-#else /* !defined(MACH_KDB) || MACH_KDB */
-skip; /* host_load_symbol_table */
-#endif /* !defined(MACH_KDB) || MACH_KDB */
+/* The old host_load_symbol_table with a different ABI for symtab_name_t */
+skip;
#if !defined(MACH_IPC_DEBUG) || MACH_IPC_DEBUG
@@ -214,3 +200,29 @@ routine host_slab_info(
host : host_t;
out info : cache_info_array_t,
CountInOut, Dealloc);
+
+#if !defined(MACH_KDB) || MACH_KDB
+/*
+ * Loads a symbol table for an external file into the kernel debugger.
+ * The symbol table data is an array of characters. It is assumed that
+ * the caller and the kernel debugger agree on its format.
+ */
+
+routine host_load_symbol_table(
+ host : host_priv_t;
+ task : task_t;
+ name : symtab_name_t;
+ symtab : pointer_t);
+
+#else /* !defined(MACH_KDB) || MACH_KDB */
+skip; /* host_load_symbol_table */
+#endif /* !defined(MACH_KDB) || MACH_KDB */
+
+#if !defined(MACH_VM_DEBUG) || MACH_VM_DEBUG
+routine mach_vm_object_pages_phys(
+ object : memory_object_name_t;
+ out pages : vm_page_phys_info_array_t,
+ CountInOut, Dealloc);
+#else /* !defined(MACH_VM_DEBUG) || MACH_VM_DEBUG */
+skip; /* mach_vm_object_pages_phys */
+#endif /* !defined(MACH_VM_DEBUG) || MACH_VM_DEBUG */
diff --git a/include/mach_debug/mach_debug_types.defs b/include/mach_debug/mach_debug_types.defs
index 8df2f344..d897380f 100644
--- a/include/mach_debug/mach_debug_types.defs
+++ b/include/mach_debug/mach_debug_types.defs
@@ -32,25 +32,87 @@
#include <mach/std_types.defs>
-type cache_info_t = struct[19] of integer_t;
+#define CACHE_NAME_MAX_LEN 32
+type cache_name_t = struct[CACHE_NAME_MAX_LEN] of char;
+#undef CACHE_NAME_MAX_LEN
+type cache_info_t = struct {
+ integer_t flags;
+ rpc_vm_size_t cpu_pool_size;
+ rpc_vm_size_t obj_size;
+ rpc_vm_size_t align;
+ rpc_vm_size_t buf_size;
+ rpc_vm_size_t slab_size;
+ rpc_long_natural_t bufs_per_slab;
+ rpc_long_natural_t nr_objs;
+ rpc_long_natural_t nr_bufs;
+ rpc_long_natural_t nr_slabs;
+ rpc_long_natural_t nr_free_slabs;
+ cache_name_t name;
+};
type cache_info_array_t = array[] of cache_info_t;
-type hash_info_bucket_t = struct[1] of natural_t;
+type hash_info_bucket_t = struct {
+ unsigned hib_count;
+};
type hash_info_bucket_array_t = array[] of hash_info_bucket_t;
-type ipc_info_name_t = struct[6] of natural_t;
-type ipc_info_name_array_t = array[] of ipc_info_name_t;
-
-type vm_region_info_t = struct[11] of natural_t;
+type vm_region_info_t = struct {
+ rpc_vm_offset_t vri_start;
+ rpc_vm_offset_t vri_end;
+ vm_prot_t vri_protection;
+ vm_prot_t vri_max_protection;
+ vm_inherit_t vri_inheritance;
+ unsigned vri_wired_count;
+ unsigned vri_user_wired_count;
+ rpc_vm_offset_t vri_object;
+ rpc_vm_offset_t vri_offset;
+ integer_t vri_needs_copy;
+ unsigned vri_sharing;
+};
type vm_region_info_array_t = array[] of vm_region_info_t;
-type vm_object_info_t = struct[14] of natural_t;
+type vm_object_info_state_t = uint32_t;
+type vm_object_info_t = struct {
+ rpc_vm_offset_t voi_object;
+ rpc_vm_size_t voi_pagesize;
+ rpc_vm_size_t voi_size;
+ unsigned voi_ref_count;
+ unsigned voi_resident_page_count;
+ unsigned voi_absent_count;
+ rpc_vm_offset_t voi_copy;
+ rpc_vm_offset_t voi_shadow;
+ rpc_vm_offset_t voi_shadow_offset;
+ rpc_vm_offset_t voi_paging_offset;
+ memory_object_copy_strategy_t voi_copy_strategy;
+ rpc_vm_offset_t voi_last_alloc;
+ unsigned voi_paging_in_progress;
+ vm_object_info_state_t voi_state;
+};
type vm_object_info_array_t = array[] of vm_object_info_t;
-type vm_page_info_t = struct[6] of natural_t;
+type vm_page_info_state_t = uint32_t;
+
+type vm_page_info_t = struct {
+ rpc_vm_offset_t vpi_offset;
+ rpc_vm_offset_t vpi_phys_addr;
+ unsigned vpi_wire_count;
+ vm_prot_t vpi_page_lock;
+ vm_prot_t vpi_unlock_request;
+ vm_page_info_state_t vpi_state;
+};
type vm_page_info_array_t = array[] of vm_page_info_t;
-type symtab_name_t = (MACH_MSG_TYPE_STRING_C, 8*32);
+type vm_page_phys_info_t = struct {
+ rpc_vm_offset_t vpi_offset;
+ rpc_phys_addr_t vpi_phys_addr;
+ unsigned vpi_wire_count;
+ vm_prot_t vpi_page_lock;
+ vm_prot_t vpi_unlock_request;
+ vm_page_info_state_t vpi_state;
+};
+type vm_page_phys_info_array_t = array[] of vm_page_phys_info_t;
+
+type symtab_name_t = c_string[32];
type kernel_debug_name_t = c_string[*: 64];
diff --git a/include/mach_debug/mach_debug_types.h b/include/mach_debug/mach_debug_types.h
index 1c81ca34..98124adb 100644
--- a/include/mach_debug/mach_debug_types.h
+++ b/include/mach_debug/mach_debug_types.h
@@ -30,7 +30,6 @@
#ifndef _MACH_DEBUG_MACH_DEBUG_TYPES_H_
#define _MACH_DEBUG_MACH_DEBUG_TYPES_H_
-#include <mach_debug/ipc_info.h>
#include <mach_debug/vm_info.h>
#include <mach_debug/slab_info.h>
#include <mach_debug/hash_info.h>
diff --git a/include/mach_debug/slab_info.h b/include/mach_debug/slab_info.h
index 7d12cc18..0f6b5a2c 100644
--- a/include/mach_debug/slab_info.h
+++ b/include/mach_debug/slab_info.h
@@ -38,16 +38,16 @@
typedef struct cache_info {
int flags;
- size_t cpu_pool_size;
- size_t obj_size;
- size_t align;
- size_t buf_size;
- size_t slab_size;
- unsigned long bufs_per_slab;
- unsigned long nr_objs;
- unsigned long nr_bufs;
- unsigned long nr_slabs;
- unsigned long nr_free_slabs;
+ rpc_vm_size_t cpu_pool_size;
+ rpc_vm_size_t obj_size;
+ rpc_vm_size_t align;
+ rpc_vm_size_t buf_size;
+ rpc_vm_size_t slab_size;
+ rpc_long_natural_t bufs_per_slab;
+ rpc_long_natural_t nr_objs;
+ rpc_long_natural_t nr_bufs;
+ rpc_long_natural_t nr_slabs;
+ rpc_long_natural_t nr_free_slabs;
char name[CACHE_NAME_MAX_LEN];
} cache_info_t;
diff --git a/include/mach_debug/vm_info.h b/include/mach_debug/vm_info.h
index b50fb92d..cf45a2cc 100644
--- a/include/mach_debug/vm_info.h
+++ b/include/mach_debug/vm_info.h
@@ -39,6 +39,7 @@
#include <mach/vm_inherit.h>
#include <mach/vm_prot.h>
#include <mach/memory_object.h>
+#include <stdint.h>
/*
* Remember to update the mig type definitions
@@ -46,25 +47,25 @@
*/
typedef struct vm_region_info {
- vm_offset_t vri_start; /* start of region */
- vm_offset_t vri_end; /* end of region */
+ rpc_vm_offset_t vri_start; /* start of region */
+ rpc_vm_offset_t vri_end; /* end of region */
-/*vm_prot_t*/natural_t vri_protection; /* protection code */
-/*vm_prot_t*/natural_t vri_max_protection; /* maximum protection */
-/*vm_inherit_t*/natural_t vri_inheritance; /* inheritance */
- natural_t vri_wired_count; /* number of times wired */
- natural_t vri_user_wired_count; /* number of times user has wired */
+ vm_prot_t vri_protection; /* protection code */
+ vm_prot_t vri_max_protection; /* maximum protection */
+ vm_inherit_t vri_inheritance; /* inheritance */
+ unsigned int vri_wired_count; /* number of times wired */
+ unsigned int vri_user_wired_count; /* number of times user has wired */
- vm_offset_t vri_object; /* the mapped object */
- vm_offset_t vri_offset; /* offset into object */
+ rpc_vm_offset_t vri_object; /* the mapped object */
+ rpc_vm_offset_t vri_offset; /* offset into object */
/*boolean_t*/integer_t vri_needs_copy; /* does object need to be copied? */
- natural_t vri_sharing; /* share map references */
+ unsigned int vri_sharing; /* share map references */
} vm_region_info_t;
typedef vm_region_info_t *vm_region_info_array_t;
-typedef natural_t vm_object_info_state_t;
+typedef uint32_t vm_object_info_state_t;
#define VOI_STATE_PAGER_CREATED 0x00000001
#define VOI_STATE_PAGER_INITIALIZED 0x00000002
@@ -77,27 +78,26 @@ typedef natural_t vm_object_info_state_t;
#define VOI_STATE_LOCK_RESTART 0x00000100
typedef struct vm_object_info {
- vm_offset_t voi_object; /* this object */
- vm_size_t voi_pagesize; /* object's page size */
- vm_size_t voi_size; /* object size (valid if internal) */
- natural_t voi_ref_count; /* number of references */
- natural_t voi_resident_page_count; /* number of resident pages */
- natural_t voi_absent_count; /* number requested but not filled */
- vm_offset_t voi_copy; /* copy object */
- vm_offset_t voi_shadow; /* shadow object */
- vm_offset_t voi_shadow_offset; /* offset into shadow object */
- vm_offset_t voi_paging_offset; /* offset into memory object */
-/*memory_object_copy_strategy_t*/integer_t voi_copy_strategy;
+ rpc_vm_offset_t voi_object; /* this object */
+ rpc_vm_size_t voi_pagesize; /* object's page size */
+ rpc_vm_size_t voi_size; /* object size (valid if internal) */
+ unsigned int voi_ref_count; /* number of references */
+ unsigned int voi_resident_page_count; /* number of resident pages */
+ unsigned int voi_absent_count; /* number requested but not filled */
+ rpc_vm_offset_t voi_copy; /* copy object */
+ rpc_vm_offset_t voi_shadow; /* shadow object */
+ rpc_vm_offset_t voi_shadow_offset; /* offset into shadow object */
+ rpc_vm_offset_t voi_paging_offset; /* offset into memory object */
+ memory_object_copy_strategy_t voi_copy_strategy;
/* how to handle data copy */
- vm_offset_t voi_last_alloc; /* offset of last allocation */
- natural_t voi_paging_in_progress; /* paging references */
+ rpc_vm_offset_t voi_last_alloc; /* offset of last allocation */
+ unsigned int voi_paging_in_progress; /* paging references */
vm_object_info_state_t voi_state; /* random state bits */
} vm_object_info_t;
typedef vm_object_info_t *vm_object_info_array_t;
-
-typedef natural_t vm_page_info_state_t;
+typedef uint32_t vm_page_info_state_t;
#define VPI_STATE_BUSY 0x00000001
#define VPI_STATE_WANTED 0x00000002
@@ -117,15 +117,27 @@ typedef natural_t vm_page_info_state_t;
#define VPI_STATE_PAGER 0x80000000 /* pager has the page */
+/* XXX: This structure holds a 32bit vpi_phys_addr. */
typedef struct vm_page_info {
- vm_offset_t vpi_offset; /* offset in object */
- vm_offset_t vpi_phys_addr; /* physical address */
- natural_t vpi_wire_count; /* number of times wired */
-/*vm_prot_t*/natural_t vpi_page_lock; /* XP access restrictions */
-/*vm_prot_t*/natural_t vpi_unlock_request; /* outstanding unlock requests */
+ rpc_vm_offset_t vpi_offset; /* offset in object */
+ rpc_vm_offset_t vpi_phys_addr; /* physical address */
+ unsigned int vpi_wire_count; /* number of times wired */
+ vm_prot_t vpi_page_lock; /* XP access restrictions */
+ vm_prot_t vpi_unlock_request; /* outstanding unlock requests */
vm_page_info_state_t vpi_state; /* random state bits */
} vm_page_info_t;
typedef vm_page_info_t *vm_page_info_array_t;
+typedef struct vm_page_phys_info {
+ rpc_vm_offset_t vpi_offset; /* offset in object */
+ rpc_phys_addr_t vpi_phys_addr; /* physical address */
+ unsigned int vpi_wire_count; /* number of times wired */
+ vm_prot_t vpi_page_lock; /* XP access restrictions */
+ vm_prot_t vpi_unlock_request; /* outstanding unlock requests */
+ vm_page_info_state_t vpi_state; /* random state bits */
+} vm_page_phys_info_t;
+
+typedef vm_page_phys_info_t *vm_page_phys_info_array_t;
+
#endif /* _MACH_DEBUG_VM_INFO_H_ */
diff --git a/include/stdint.h b/include/stdint.h
deleted file mode 100644
index bea277ec..00000000
--- a/include/stdint.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2016 Free Software Foundation, Inc.
- *
- * This file is part of GNU Mach.
- *
- * GNU Mach is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any later
- * version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef _STDINT_H_
-#define _STDINT_H_
-
-/*
- * These types are _exactly_ as wide as indicated in their names.
- */
-
-typedef char int8_t;
-typedef short int16_t;
-typedef int int32_t;
-#if __x86_64__
-typedef long int int64_t;
-#else
-typedef long long int int64_t;
-#endif /* __x86_64__ */
-
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-#if __x86_64__
-typedef unsigned long int uint64_t;
-#else
-typedef unsigned long long int uint64_t;
-#endif /* __x86_64__ */
-
-/* Types for `void *' pointers. */
-#if __x86_64__
-typedef long int intptr_t;
-typedef unsigned long int uintptr_t;
-#else
-typedef int intptr_t;
-typedef unsigned int uintptr_t;
-#endif /* __x86_64__ */
-
-#endif /* _STDINT_H_ */
diff --git a/include/string.h b/include/string.h
index cddcbeb9..91c5fe46 100644
--- a/include/string.h
+++ b/include/string.h
@@ -42,8 +42,6 @@ extern char *strcpy (char *dest, const char *src);
extern char *strncpy (char *dest, const char *src, size_t n);
-extern char *strrchr (const char *s, int c);
-
extern char *strsep (char **strp, const char *delim);
extern int strcmp (const char *s1, const char *s2) __attribute__ ((pure));
@@ -54,6 +52,4 @@ extern size_t strlen (const char *s) __attribute__ ((pure));
extern char *strstr(const char *haystack, const char *needle);
-extern int ffs(int i);
-
#endif /* _MACH_SA_SYS_STRING_H_ */
diff --git a/include/sys/ioctl.h b/include/sys/ioctl.h
deleted file mode 100644
index 1df6b732..00000000
--- a/include/sys/ioctl.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1991 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon rights
- * to redistribute these changes.
- */
-/*
- * Format definitions for 'ioctl' commands in device definitions.
- *
- * From BSD4.4.
- */
-
-#ifndef _MACH_SYS_IOCTL_H_
-#define _MACH_SYS_IOCTL_H_
-/*
- * Ioctl's have the command encoded in the lower word, and the size of
- * any in or out parameters in the upper word. The high 3 bits of the
- * upper word are used to encode the in/out status of the parameter.
- */
-#define IOCPARM_MASK 0x1fff /* parameter length, at most 13 bits */
-#define IOC_VOID 0x20000000 /* no parameters */
-#define IOC_OUT 0x40000000 /* copy out parameters */
-#define IOC_IN 0x80000000U /* copy in parameters */
-#define IOC_INOUT (IOC_IN|IOC_OUT)
-
-#define _IOC(inout,group,num,len) \
- (inout | ((len & IOCPARM_MASK) << 16) | ((group) << 8) | (num))
-#define _IO(g,n) _IOC(IOC_VOID, (g), (n), 0)
-#define _IOR(g,n,t) _IOC(IOC_OUT, (g), (n), sizeof(t))
-#define _IOW(g,n,t) _IOC(IOC_IN, (g), (n), sizeof(t))
-#define _IOWR(g,n,t) _IOC(IOC_INOUT, (g), (n), sizeof(t))
-
-#endif /* _MACH_SYS_IOCTL_H_ */
diff --git a/include/sys/time.h b/include/sys/time.h
deleted file mode 100644
index de97d325..00000000
--- a/include/sys/time.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1991 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon rights
- * to redistribute these changes.
- */
-/*
- * Time-keeper for kernel IO devices.
- *
- * May or may not have any relation to wall-clock time.
- */
-
-#ifndef _MACH_SA_SYS_TIME_H_
-#define _MACH_SA_SYS_TIME_H_
-
-#include <mach/time_value.h>
-
-extern time_value_t time;
-
-/*
- * Definitions to keep old code happy.
- */
-#define timeval_t time_value_t
-#define timeval time_value
-#define tv_sec seconds
-#define tv_usec microseconds
-
-#define timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_usec)
-#define timercmp(tvp, uvp, cmp) \
- ((tvp)->tv_sec cmp (uvp)->tv_sec || \
- (tvp)->tv_sec == (uvp)->tv_sec && (tvp)->tv_usec cmp (uvp)->tv_usec)
-#define timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_usec = 0
-
-#endif /* _MACH_SA_SYS_TIME_H_ */
diff --git a/ipc/.gitignore b/ipc/.gitignore
new file mode 100644
index 00000000..b7509324
--- /dev/null
+++ b/ipc/.gitignore
@@ -0,0 +1,2 @@
+notify.none.defs.c
+notify.none.msgids
diff --git a/ipc/ipc_entry.c b/ipc/ipc_entry.c
index 0414ba5f..f13c442f 100644
--- a/ipc/ipc_entry.c
+++ b/ipc/ipc_entry.c
@@ -67,7 +67,7 @@ struct kmem_cache ipc_entry_cache;
kern_return_t
ipc_entry_alloc(
ipc_space_t space,
- mach_port_t *namep,
+ mach_port_name_t *namep,
ipc_entry_t *entryp)
{
kern_return_t kr;
@@ -97,10 +97,10 @@ ipc_entry_alloc(
entry->ie_bits = 0;
entry->ie_object = IO_NULL;
entry->ie_request = 0;
- entry->ie_name = (mach_port_t) key;
+ entry->ie_name = (mach_port_name_t) key;
*entryp = entry;
- *namep = (mach_port_t) key;
+ *namep = (mach_port_name_t) key;
return KERN_SUCCESS;
}
@@ -121,13 +121,13 @@ ipc_entry_alloc(
kern_return_t
ipc_entry_alloc_name(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_entry_t *entryp)
{
kern_return_t kr;
ipc_entry_t entry, e, *prevp;
void **slot;
- assert(MACH_PORT_VALID(name));
+ assert(MACH_PORT_NAME_VALID(name));
if (!space->is_active) {
return KERN_INVALID_TASK;
@@ -198,12 +198,11 @@ ipc_entry_alloc_name(
ipc_entry_t
db_ipc_object_by_name(
const task_t task,
- mach_port_t name)
+ mach_port_name_t name)
{
ipc_space_t space = task->itk_space;
ipc_entry_t entry;
-
entry = ipc_entry_lookup(space, name);
if(entry != IE_NULL) {
iprintf("(task 0x%x, name 0x%x) ==> object 0x%x",
diff --git a/ipc/ipc_entry.h b/ipc/ipc_entry.h
index b429984b..9f7b593e 100644
--- a/ipc/ipc_entry.h
+++ b/ipc/ipc_entry.h
@@ -55,7 +55,7 @@ typedef unsigned int ipc_entry_bits_t;
typedef ipc_table_elems_t ipc_entry_num_t; /* number of entries */
typedef struct ipc_entry {
- mach_port_t ie_name;
+ mach_port_name_t ie_name;
ipc_entry_bits_t ie_bits;
struct ipc_object *ie_object;
union {
@@ -97,14 +97,14 @@ extern struct kmem_cache ipc_entry_cache;
#define ie_free(e) kmem_cache_free(&ipc_entry_cache, (vm_offset_t) (e))
extern kern_return_t
-ipc_entry_alloc(ipc_space_t space, mach_port_t *namep, ipc_entry_t *entryp);
+ipc_entry_alloc(ipc_space_t space, mach_port_name_t *namep, ipc_entry_t *entryp);
extern kern_return_t
-ipc_entry_alloc_name(ipc_space_t space, mach_port_t name, ipc_entry_t *entryp);
+ipc_entry_alloc_name(ipc_space_t space, mach_port_name_t name, ipc_entry_t *entryp);
ipc_entry_t
db_ipc_object_by_name(
- task_t task,
- mach_port_t name);
+ task_t task,
+ mach_port_name_t name);
#endif /* _IPC_IPC_ENTRY_H_ */
diff --git a/ipc/ipc_kmsg.c b/ipc/ipc_kmsg.c
index 28ed23c6..bd843804 100644
--- a/ipc/ipc_kmsg.c
+++ b/ipc/ipc_kmsg.c
@@ -42,7 +42,9 @@
#include <mach/message.h>
#include <mach/port.h>
#include <machine/locore.h>
+#include <machine/copy_user.h>
#include <kern/assert.h>
+#include <kern/debug.h>
#include <kern/kalloc.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
@@ -68,9 +70,6 @@
#include <ipc/ipc_print.h>
#endif
-#define is_misaligned(x) ( ((vm_offset_t)(x)) & (sizeof(vm_offset_t)-1) )
-#define ptr_align(x) \
- ( ( ((vm_offset_t)(x)) + (sizeof(vm_offset_t)-1) ) & ~(sizeof(vm_offset_t)-1) )
ipc_kmsg_t ipc_kmsg_cache[NCPUS];
@@ -214,7 +213,7 @@ ipc_kmsg_destroy(
* No locks held.
*/
-void
+static void
ipc_kmsg_clean_body(
vm_offset_t saddr,
vm_offset_t eaddr)
@@ -230,28 +229,23 @@ ipc_kmsg_clean_body(
type = (mach_msg_type_long_t *) saddr;
is_inline = ((mach_msg_type_t*)type)->msgt_inline;
if (((mach_msg_type_t*)type)->msgt_longform) {
- /* This must be aligned */
- if ((sizeof(natural_t) > sizeof(mach_msg_type_t)) &&
- (is_misaligned(type))) {
- saddr = ptr_align(saddr);
- continue;
- }
name = type->msgtl_name;
size = type->msgtl_size;
number = type->msgtl_number;
saddr += sizeof(mach_msg_type_long_t);
+ if (mach_msg_kernel_is_misaligned(sizeof(mach_msg_type_long_t))) {
+ saddr = mach_msg_kernel_align(saddr);
+ }
} else {
name = ((mach_msg_type_t*)type)->msgt_name;
size = ((mach_msg_type_t*)type)->msgt_size;
number = ((mach_msg_type_t*)type)->msgt_number;
saddr += sizeof(mach_msg_type_t);
+ if (mach_msg_kernel_is_misaligned(sizeof(mach_msg_type_t))) {
+ saddr = mach_msg_kernel_align(saddr);
+ }
}
- /* padding (ptrs and ports) ? */
- if ((sizeof(natural_t) > sizeof(mach_msg_type_t)) &&
- ((size >> 3) == sizeof(natural_t)))
- saddr = ptr_align(saddr);
-
/* calculate length of data in bytes, rounding up */
length = ((number * size) + 7) >> 3;
@@ -284,9 +278,7 @@ ipc_kmsg_clean_body(
}
if (is_inline) {
- /* inline data sizes round up to int boundaries */
-
- saddr += (length + 3) &~ 3;
+ saddr += length;
} else {
vm_offset_t data = * (vm_offset_t *) saddr;
@@ -301,6 +293,7 @@ ipc_kmsg_clean_body(
saddr += sizeof(vm_offset_t);
}
+ saddr = mach_msg_kernel_align(saddr);
}
}
@@ -356,7 +349,7 @@ ipc_kmsg_clean(ipc_kmsg_t kmsg)
* Nothing locked.
*/
-void
+static void
ipc_kmsg_clean_partial(
ipc_kmsg_t kmsg,
vm_offset_t eaddr,
@@ -388,31 +381,26 @@ ipc_kmsg_clean_partial(
boolean_t is_inline, is_port;
vm_size_t length;
-xxx: type = (mach_msg_type_long_t *) eaddr;
+ type = (mach_msg_type_long_t *) eaddr;
is_inline = ((mach_msg_type_t*)type)->msgt_inline;
if (((mach_msg_type_t*)type)->msgt_longform) {
- /* This must be aligned */
- if ((sizeof(natural_t) > sizeof(mach_msg_type_t)) &&
- (is_misaligned(type))) {
- eaddr = ptr_align(eaddr);
- goto xxx;
- }
name = type->msgtl_name;
size = type->msgtl_size;
rnumber = type->msgtl_number;
eaddr += sizeof(mach_msg_type_long_t);
+ if (mach_msg_kernel_is_misaligned(sizeof(mach_msg_type_long_t))) {
+ eaddr = mach_msg_kernel_align(eaddr);
+ }
} else {
name = ((mach_msg_type_t*)type)->msgt_name;
size = ((mach_msg_type_t*)type)->msgt_size;
rnumber = ((mach_msg_type_t*)type)->msgt_number;
eaddr += sizeof(mach_msg_type_t);
+ if (mach_msg_kernel_is_misaligned(sizeof(mach_msg_type_t))) {
+ eaddr = mach_msg_kernel_align(eaddr);
+ }
}
- /* padding (ptrs and ports) ? */
- if ((sizeof(natural_t) > sizeof(mach_msg_type_t)) &&
- ((size >> 3) == sizeof(natural_t)))
- eaddr = ptr_align(eaddr);
-
/* calculate length of data in bytes, rounding up */
length = ((rnumber * size) + 7) >> 3;
@@ -496,39 +484,32 @@ ipc_kmsg_free(ipc_kmsg_t kmsg)
mach_msg_return_t
ipc_kmsg_get(
- mach_msg_header_t *msg,
+ mach_msg_user_header_t *msg,
mach_msg_size_t size,
ipc_kmsg_t *kmsgp)
{
ipc_kmsg_t kmsg;
+ mach_msg_size_t ksize = size * IKM_EXPAND_FACTOR;
- if ((size < sizeof(mach_msg_header_t)) || (size & 3))
+ if ((size < sizeof(mach_msg_user_header_t)) || mach_msg_user_is_misaligned(size))
return MACH_SEND_MSG_TOO_SMALL;
- if (size <= IKM_SAVED_MSG_SIZE) {
- kmsg = ikm_cache();
- if (kmsg != IKM_NULL) {
- ikm_cache() = IKM_NULL;
- ikm_check_initialized(kmsg, IKM_SAVED_KMSG_SIZE);
- } else {
- kmsg = ikm_alloc(IKM_SAVED_MSG_SIZE);
- if (kmsg == IKM_NULL)
- return MACH_SEND_NO_BUFFER;
- ikm_init(kmsg, IKM_SAVED_MSG_SIZE);
- }
+ if (ksize <= IKM_SAVED_MSG_SIZE) {
+ kmsg = ikm_cache_alloc();
+ if (kmsg == IKM_NULL)
+ return MACH_SEND_NO_BUFFER;
} else {
- kmsg = ikm_alloc(size);
+ kmsg = ikm_alloc(ksize);
if (kmsg == IKM_NULL)
return MACH_SEND_NO_BUFFER;
- ikm_init(kmsg, size);
+ ikm_init(kmsg, ksize);
}
- if (copyinmsg(msg, &kmsg->ikm_header, size)) {
+ if (copyinmsg(msg, &kmsg->ikm_header, size, kmsg->ikm_size)) {
ikm_free(kmsg);
return MACH_SEND_INVALID_DATA;
}
- kmsg->ikm_header.msgh_size = size;
*kmsgp = kmsg;
return MACH_MSG_SUCCESS;
}
@@ -555,7 +536,7 @@ ipc_kmsg_get_from_kernel(
ipc_kmsg_t kmsg;
assert(size >= sizeof(mach_msg_header_t));
- assert((size & 3) == 0);
+ assert(!mach_msg_kernel_is_misaligned(size));
kmsg = ikm_alloc(size);
if (kmsg == IKM_NULL)
@@ -585,7 +566,7 @@ ipc_kmsg_get_from_kernel(
mach_msg_return_t
ipc_kmsg_put(
- mach_msg_header_t *msg,
+ mach_msg_user_header_t *msg,
ipc_kmsg_t kmsg,
mach_msg_size_t size)
{
@@ -598,11 +579,7 @@ ipc_kmsg_put(
else
mr = MACH_MSG_SUCCESS;
- if ((kmsg->ikm_size == IKM_SAVED_KMSG_SIZE) &&
- (ikm_cache() == IKM_NULL))
- ikm_cache() = kmsg;
- else
- ikm_free(kmsg);
+ ikm_cache_free(kmsg);
return mr;
}
@@ -672,11 +649,21 @@ mach_msg_return_t
ipc_kmsg_copyin_header(
mach_msg_header_t *msg,
ipc_space_t space,
- mach_port_t notify)
+ mach_port_name_t notify)
{
mach_msg_bits_t mbits = msg->msgh_bits &~ MACH_MSGH_BITS_CIRCULAR;
- mach_port_t dest_name = msg->msgh_remote_port;
- mach_port_t reply_name = msg->msgh_local_port;
+ /*
+ * TODO: For 64 bits, msgh_remote_port as written by user space
+ * is 4 bytes long but here we assume it is the same size as a pointer.
+ * When copying the message to the kernel, we need to perform the
+ * conversion so that port names are parsed correctly.
+ *
+ * When copying the message out of the kernel to user space, we also need
+ * to be careful with the reverse translation.
+ */
+
+ mach_port_name_t dest_name = (mach_port_name_t)msg->msgh_remote_port;
+ mach_port_name_t reply_name = (mach_port_name_t)msg->msgh_local_port;
kern_return_t kr;
#ifndef MIGRATING_THREADS
@@ -699,7 +686,10 @@ ipc_kmsg_copyin_header(
entry = ipc_entry_lookup (space, dest_name);
if (entry == IE_NULL)
+ {
+ ipc_entry_lookup_failed (msg, dest_name);
goto abort_async;
+ }
bits = entry->ie_bits;
/* check type bits */
@@ -751,7 +741,10 @@ ipc_kmsg_copyin_header(
entry = ipc_entry_lookup (space, dest_name);
if (entry == IE_NULL)
+ {
+ ipc_entry_lookup_failed (msg, dest_name);
goto abort_request;
+ }
bits = entry->ie_bits;
/* check type bits */
@@ -765,7 +758,10 @@ ipc_kmsg_copyin_header(
entry = ipc_entry_lookup (space, reply_name);
if (entry == IE_NULL)
+ {
+ ipc_entry_lookup_failed (msg, reply_name);
goto abort_request;
+ }
bits = entry->ie_bits;
/* check type bits */
@@ -832,7 +828,10 @@ ipc_kmsg_copyin_header(
entry = ipc_entry_lookup (space, dest_name);
if (entry == IE_NULL)
+ {
+ ipc_entry_lookup_failed (msg, dest_name);
goto abort_reply;
+ }
bits = entry->ie_bits;
/* check and type bits */
@@ -905,6 +904,8 @@ ipc_kmsg_copyin_header(
if (((entry = ipc_entry_lookup(space, notify)) == IE_NULL) ||
((entry->ie_bits & MACH_PORT_TYPE_RECEIVE) == 0)) {
+ if (entry == IE_NULL)
+ ipc_entry_lookup_failed (msg, notify);
is_write_unlock(space);
return MACH_SEND_INVALID_NOTIFY;
}
@@ -914,7 +915,7 @@ ipc_kmsg_copyin_header(
if (dest_name == reply_name) {
ipc_entry_t entry;
- mach_port_t name = dest_name;
+ mach_port_name_t name = dest_name;
/*
* Destination and reply ports are the same!
@@ -929,8 +930,10 @@ ipc_kmsg_copyin_header(
*/
entry = ipc_entry_lookup(space, name);
- if (entry == IE_NULL)
+ if (entry == IE_NULL) {
+ ipc_entry_lookup_failed (msg, name);
goto invalid_dest;
+ }
assert(reply_type != 0); /* because name not null */
@@ -1071,7 +1074,7 @@ ipc_kmsg_copyin_header(
reply_soright = soright;
}
}
- } else if (!MACH_PORT_VALID(reply_name)) {
+ } else if (!MACH_PORT_NAME_VALID(reply_name)) {
ipc_entry_t entry;
/*
@@ -1080,8 +1083,10 @@ ipc_kmsg_copyin_header(
*/
entry = ipc_entry_lookup(space, dest_name);
- if (entry == IE_NULL)
+ if (entry == IE_NULL) {
+ ipc_entry_lookup_failed (msg, dest_name);
goto invalid_dest;
+ }
kr = ipc_right_copyin(space, dest_name, entry,
dest_type, FALSE,
@@ -1094,7 +1099,7 @@ ipc_kmsg_copyin_header(
if (IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE)
ipc_entry_dealloc(space, dest_name, entry);
- reply_port = (ipc_object_t) reply_name;
+ reply_port = (ipc_object_t) invalid_name_to_port(reply_name);
reply_soright = IP_NULL;
} else {
ipc_entry_t dest_entry, reply_entry;
@@ -1136,12 +1141,17 @@ ipc_kmsg_copyin_header(
*/
dest_entry = ipc_entry_lookup(space, dest_name);
- if (dest_entry == IE_NULL)
+ if (dest_entry == IE_NULL) {
+ ipc_entry_lookup_failed (msg, dest_name);
goto invalid_dest;
+ }
reply_entry = ipc_entry_lookup(space, reply_name);
if (reply_entry == IE_NULL)
+ {
+ ipc_entry_lookup_failed (msg, reply_name);
goto invalid_reply;
+ }
assert(dest_entry != reply_entry); /* names are not equal */
assert(reply_type != 0); /* because reply_name not null */
@@ -1280,7 +1290,7 @@ ipc_kmsg_copyin_header(
return MACH_SEND_INVALID_REPLY;
}
-mach_msg_return_t
+static mach_msg_return_t
ipc_kmsg_copyin_body(
ipc_kmsg_t kmsg,
ipc_space_t space,
@@ -1299,6 +1309,10 @@ ipc_kmsg_copyin_body(
saddr = (vm_offset_t) (&kmsg->ikm_header + 1);
eaddr = (vm_offset_t) &kmsg->ikm_header + kmsg->ikm_header.msgh_size;
+ // We make assumptions about the alignment of the header.
+ _Static_assert(!mach_msg_kernel_is_misaligned(sizeof(mach_msg_header_t)),
+ "mach_msg_header_t needs to be MACH_MSG_KERNEL_ALIGNMENT aligned.");
+
while (saddr < eaddr) {
vm_offset_t taddr = saddr;
mach_msg_type_long_t *type;
@@ -1322,50 +1336,44 @@ ipc_kmsg_copyin_body(
is_inline = ((mach_msg_type_t*)type)->msgt_inline;
dealloc = ((mach_msg_type_t*)type)->msgt_deallocate;
if (longform) {
- /* This must be aligned */
- if ((sizeof(natural_t) > sizeof(mach_msg_type_t)) &&
- (is_misaligned(type))) {
- saddr = ptr_align(saddr);
- continue;
- }
name = type->msgtl_name;
size = type->msgtl_size;
number = type->msgtl_number;
saddr += sizeof(mach_msg_type_long_t);
+ if (mach_msg_kernel_is_misaligned(sizeof(mach_msg_type_long_t))) {
+ saddr = mach_msg_kernel_align(saddr);
+ }
} else {
name = ((mach_msg_type_t*)type)->msgt_name;
size = ((mach_msg_type_t*)type)->msgt_size;
number = ((mach_msg_type_t*)type)->msgt_number;
saddr += sizeof(mach_msg_type_t);
+ if (mach_msg_kernel_is_misaligned(sizeof(mach_msg_type_t))) {
+ saddr = mach_msg_kernel_align(saddr);
+ }
}
is_port = MACH_MSG_TYPE_PORT_ANY(name);
if ((is_port && (size != PORT_T_SIZE_IN_BITS)) ||
+#ifndef __x86_64__
(longform && ((type->msgtl_header.msgt_name != 0) ||
(type->msgtl_header.msgt_size != 0) ||
(type->msgtl_header.msgt_number != 0))) ||
+#endif
(((mach_msg_type_t*)type)->msgt_unused != 0) ||
(dealloc && is_inline)) {
ipc_kmsg_clean_partial(kmsg, taddr, FALSE, 0);
return MACH_SEND_INVALID_TYPE;
}
- /* padding (ptrs and ports) ? */
- if ((sizeof(natural_t) > sizeof(mach_msg_type_t)) &&
- ((size >> 3) == sizeof(natural_t)))
- saddr = ptr_align(saddr);
-
/* calculate length of data in bytes, rounding up */
length = (((uint64_t) number * size) + 7) >> 3;
if (is_inline) {
- vm_size_t amount;
-
- /* inline data sizes round up to int boundaries */
+ vm_size_t amount = length;
- amount = (length + 3) &~ 3;
if ((eaddr - saddr) < amount) {
ipc_kmsg_clean_partial(kmsg, taddr, FALSE, 0);
return MACH_SEND_MSG_TOO_SMALL;
@@ -1376,9 +1384,6 @@ ipc_kmsg_copyin_body(
} else {
vm_offset_t addr;
- if (sizeof(vm_offset_t) > sizeof(mach_msg_type_t))
- saddr = ptr_align(saddr);
-
if ((eaddr - saddr) < sizeof(vm_offset_t)) {
ipc_kmsg_clean_partial(kmsg, taddr, FALSE, 0);
return MACH_SEND_MSG_TOO_SMALL;
@@ -1395,14 +1400,27 @@ ipc_kmsg_copyin_body(
if (data == 0)
goto invalid_memory;
- if (copyinmap(map, (char *) addr,
- (char *) data, length) ||
- (dealloc &&
- (vm_deallocate(map, addr, length) !=
- KERN_SUCCESS))) {
+ if (sizeof(mach_port_name_t) != sizeof(mach_port_t))
+ {
+ mach_port_name_t *src = (mach_port_name_t*)addr;
+ mach_port_t *dst = (mach_port_t*)data;
+ for (int i=0; i<number; i++) {
+ if (copyin_port(src + i, dst + i)) {
+ kfree(data, length);
+ goto invalid_memory;
+ }
+ }
+ } else if (copyinmap(map, (char *) addr,
+ (char *) data, length)) {
kfree(data, length);
goto invalid_memory;
}
+ if (dealloc &&
+ (vm_deallocate(map, addr, length) != KERN_SUCCESS)) {
+ kfree(data, length);
+ goto invalid_memory;
+ }
+
} else {
vm_map_copy_t copy;
@@ -1441,11 +1459,13 @@ ipc_kmsg_copyin_body(
((mach_msg_type_t*)type)->msgt_name = newname;
for (i = 0; i < number; i++) {
- mach_port_t port = (mach_port_t) objects[i];
+ mach_port_name_t port = ((mach_port_t*)data)[i];
ipc_object_t object;
- if (!MACH_PORT_VALID(port))
+ if (!MACH_PORT_NAME_VALID(port)) {
+ objects[i] = (ipc_object_t)invalid_name_to_port(port);
continue;
+ }
kr = ipc_object_copyin(space, port,
name, &object);
@@ -1467,6 +1487,7 @@ ipc_kmsg_copyin_body(
complex = TRUE;
}
+ saddr = mach_msg_kernel_align(saddr);
}
if (!complex)
@@ -1505,7 +1526,7 @@ ipc_kmsg_copyin(
ipc_kmsg_t kmsg,
ipc_space_t space,
vm_map_t map,
- mach_port_t notify)
+ mach_port_name_t notify)
{
mach_msg_return_t mr;
@@ -1589,28 +1610,23 @@ ipc_kmsg_copyin_from_kernel(ipc_kmsg_t kmsg)
longform = ((mach_msg_type_t*)type)->msgt_longform;
/* type->msgtl_header.msgt_deallocate not used */
if (longform) {
- /* This must be aligned */
- if ((sizeof(natural_t) > sizeof(mach_msg_type_t)) &&
- (is_misaligned(type))) {
- saddr = ptr_align(saddr);
- continue;
- }
name = type->msgtl_name;
size = type->msgtl_size;
number = type->msgtl_number;
saddr += sizeof(mach_msg_type_long_t);
+ if (mach_msg_kernel_is_misaligned(sizeof(mach_msg_type_long_t))) {
+ saddr = mach_msg_kernel_align(saddr);
+ }
} else {
name = ((mach_msg_type_t*)type)->msgt_name;
size = ((mach_msg_type_t*)type)->msgt_size;
number = ((mach_msg_type_t*)type)->msgt_number;
saddr += sizeof(mach_msg_type_t);
+ if (mach_msg_kernel_is_misaligned(sizeof(mach_msg_type_t))) {
+ saddr = mach_msg_kernel_align(saddr);
+ }
}
- /* padding (ptrs and ports) ? */
- if ((sizeof(natural_t) > sizeof(mach_msg_type_t)) &&
- ((size >> 3) == sizeof(natural_t)))
- saddr = ptr_align(saddr);
-
/* calculate length of data in bytes, rounding up */
length = ((number * size) + 7) >> 3;
@@ -1618,10 +1634,8 @@ ipc_kmsg_copyin_from_kernel(ipc_kmsg_t kmsg)
is_port = MACH_MSG_TYPE_PORT_ANY(name);
if (is_inline) {
- /* inline data sizes round up to int boundaries */
-
data = saddr;
- saddr += (length + 3) &~ 3;
+ saddr += length;
} else {
/*
* The sender should supply ready-made memory
@@ -1658,6 +1672,7 @@ ipc_kmsg_copyin_from_kernel(ipc_kmsg_t kmsg)
MACH_MSGH_BITS_CIRCULAR;
}
}
+ saddr = mach_msg_kernel_align(saddr);
}
}
@@ -1698,7 +1713,7 @@ mach_msg_return_t
ipc_kmsg_copyout_header(
mach_msg_header_t *msg,
ipc_space_t space,
- mach_port_t notify)
+ mach_port_name_t notify)
{
mach_msg_bits_t mbits = msg->msgh_bits;
ipc_port_t dest = (ipc_port_t) msg->msgh_remote_port;
@@ -1710,9 +1725,9 @@ ipc_kmsg_copyout_header(
if (notify == MACH_PORT_NULL) switch (MACH_MSGH_BITS_PORTS(mbits)) {
case MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, 0): {
- mach_port_t dest_name;
+ mach_port_name_t dest_name;
ipc_port_t nsrequest;
- unsigned long payload;
+ rpc_uintptr_t payload;
/* receiving an asynchronous message */
@@ -1763,9 +1778,9 @@ ipc_kmsg_copyout_header(
MACH_MSG_TYPE_PORT_SEND_ONCE): {
ipc_entry_t entry;
ipc_port_t reply = (ipc_port_t) msg->msgh_local_port;
- mach_port_t dest_name, reply_name;
+ mach_port_name_t dest_name, reply_name;
ipc_port_t nsrequest;
- unsigned long payload;
+ rpc_uintptr_t payload;
/* receiving a request message */
@@ -1823,7 +1838,7 @@ ipc_kmsg_copyout_header(
entry->ie_bits = gen | (MACH_PORT_TYPE_SEND_ONCE | 1);
}
- assert(MACH_PORT_VALID(reply_name));
+ assert(MACH_PORT_NAME_VALID(reply_name));
entry->ie_object = (ipc_object_t) reply;
is_write_unlock(space);
@@ -1866,8 +1881,8 @@ ipc_kmsg_copyout_header(
}
case MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND_ONCE, 0): {
- mach_port_t dest_name;
- unsigned long payload;
+ mach_port_name_t dest_name;
+ rpc_uintptr_t payload;
/* receiving a reply message */
@@ -1920,8 +1935,8 @@ ipc_kmsg_copyout_header(
mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits);
mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits);
ipc_port_t reply = (ipc_port_t) msg->msgh_local_port;
- mach_port_t dest_name, reply_name;
- unsigned long payload;
+ mach_port_name_t dest_name, reply_name;
+ rpc_uintptr_t payload;
if (IP_VALID(reply)) {
ipc_port_t notify_port;
@@ -1998,7 +2013,7 @@ ipc_kmsg_copyout_header(
is_write_unlock(space);
reply = IP_DEAD;
- reply_name = MACH_PORT_DEAD;
+ reply_name = MACH_PORT_NAME_DEAD;
goto copyout_dest;
}
@@ -2101,6 +2116,8 @@ ipc_kmsg_copyout_header(
if (((entry = ipc_entry_lookup(space, notify))
== IE_NULL) ||
((entry->ie_bits & MACH_PORT_TYPE_RECEIVE) == 0)) {
+ if (entry == IE_NULL)
+ ipc_entry_lookup_failed (msg, notify);
is_read_unlock(space);
return MACH_RCV_INVALID_NOTIFY;
}
@@ -2109,7 +2126,7 @@ ipc_kmsg_copyout_header(
ip_lock(dest);
is_read_unlock(space);
- reply_name = (mach_port_t) reply;
+ reply_name = invalid_port_to_name(msg->msgh_local_port);
}
/*
@@ -2178,12 +2195,12 @@ ipc_kmsg_copyout_header(
if (ip_active(reply) ||
IP_TIMESTAMP_ORDER(timestamp,
reply->ip_timestamp))
- dest_name = MACH_PORT_DEAD;
+ dest_name = MACH_PORT_NAME_DEAD;
else
- dest_name = MACH_PORT_NULL;
+ dest_name = MACH_PORT_NAME_NULL;
ip_unlock(reply);
} else
- dest_name = MACH_PORT_DEAD;
+ dest_name = MACH_PORT_NAME_DEAD;
}
if (IP_VALID(reply))
@@ -2228,10 +2245,10 @@ ipc_kmsg_copyout_object(
ipc_space_t space,
ipc_object_t object,
mach_msg_type_name_t msgt_name,
- mach_port_t *namep)
+ mach_port_name_t *namep)
{
if (!IO_VALID(object)) {
- *namep = (mach_port_t) object;
+ *namep = invalid_port_to_name((mach_port_t)object);
return MACH_MSG_SUCCESS;
}
@@ -2301,9 +2318,9 @@ ipc_kmsg_copyout_object(
ipc_object_destroy(object, msgt_name);
if (kr == KERN_INVALID_CAPABILITY)
- *namep = MACH_PORT_DEAD;
+ *namep = MACH_PORT_NAME_DEAD;
else {
- *namep = MACH_PORT_NULL;
+ *namep = MACH_PORT_NAME_NULL;
if (kr == KERN_RESOURCE_SHORTAGE)
return MACH_MSG_IPC_KERNEL;
@@ -2336,13 +2353,17 @@ ipc_kmsg_copyout_object(
mach_msg_return_t
ipc_kmsg_copyout_body(
- vm_offset_t saddr,
- vm_offset_t eaddr,
+ ipc_kmsg_t kmsg,
ipc_space_t space,
vm_map_t map)
{
mach_msg_return_t mr = MACH_MSG_SUCCESS;
kern_return_t kr;
+ vm_offset_t saddr, eaddr;
+
+ saddr = (vm_offset_t) (&kmsg->ikm_header + 1);
+ eaddr = (vm_offset_t) &kmsg->ikm_header +
+ kmsg->ikm_header.msgh_size;
while (saddr < eaddr) {
vm_offset_t taddr = saddr;
@@ -2358,28 +2379,23 @@ ipc_kmsg_copyout_body(
is_inline = ((mach_msg_type_t*)type)->msgt_inline;
longform = ((mach_msg_type_t*)type)->msgt_longform;
if (longform) {
- /* This must be aligned */
- if ((sizeof(natural_t) > sizeof(mach_msg_type_t)) &&
- (is_misaligned(type))) {
- saddr = ptr_align(saddr);
- continue;
- }
name = type->msgtl_name;
size = type->msgtl_size;
number = type->msgtl_number;
saddr += sizeof(mach_msg_type_long_t);
+ if (mach_msg_kernel_is_misaligned(sizeof(mach_msg_type_long_t))) {
+ saddr = mach_msg_kernel_align(saddr);
+ }
} else {
name = ((mach_msg_type_t*)type)->msgt_name;
size = ((mach_msg_type_t*)type)->msgt_size;
number = ((mach_msg_type_t*)type)->msgt_number;
saddr += sizeof(mach_msg_type_t);
+ if (mach_msg_kernel_is_misaligned(sizeof(mach_msg_type_t))) {
+ saddr = mach_msg_kernel_align(saddr);
+ }
}
- /* padding (ptrs and ports) ? */
- if ((sizeof(natural_t) > sizeof(mach_msg_type_t)) &&
- ((size >> 3) == sizeof(natural_t)))
- saddr = ptr_align(saddr);
-
/* calculate length of data in bytes, rounding up */
length = (((uint64_t) number * size) + 7) >> 3;
@@ -2387,44 +2403,43 @@ ipc_kmsg_copyout_body(
is_port = MACH_MSG_TYPE_PORT_ANY(name);
if (is_port) {
- mach_port_t *objects;
+ ipc_object_t *objects;
mach_msg_type_number_t i;
if (!is_inline && (length != 0)) {
/* first allocate memory in the map */
+ uint64_t allocated = length;
+
+ _Static_assert(sizeof(mach_port_name_t) <= sizeof(mach_port_t),
+ "Size of mach_port_t should be equal or larger than mach_port_name_t.");
+ allocated -= (sizeof(mach_port_t) - sizeof(mach_port_name_t)) * number;
- kr = vm_allocate(map, &addr, length, TRUE);
+ kr = vm_allocate(map, &addr, allocated, TRUE);
if (kr != KERN_SUCCESS) {
ipc_kmsg_clean_body(taddr, saddr);
goto vm_copyout_failure;
}
}
- objects = (mach_port_t *)
+ objects = (ipc_object_t *)
(is_inline ? saddr : * (vm_offset_t *) saddr);
/* copyout port rights carried in the message */
for (i = 0; i < number; i++) {
- ipc_object_t object =
- (ipc_object_t) objects[i];
+ ipc_object_t object = objects[i];
- mr |= ipc_kmsg_copyout_object(space, object,
- name, &objects[i]);
+ mr |= ipc_kmsg_copyout_object_to_port(space, object,
+ name, (mach_port_t *)&objects[i]);
}
}
if (is_inline) {
- /* inline data sizes round up to int boundaries */
-
((mach_msg_type_t*)type)->msgt_deallocate = FALSE;
- saddr += (length + 3) &~ 3;
+ saddr += length;
} else {
vm_offset_t data;
- if (sizeof(vm_offset_t) > sizeof(mach_msg_type_t))
- saddr = ptr_align(saddr);
-
data = * (vm_offset_t *) saddr;
/* copyout memory carried in the message */
@@ -2435,8 +2450,19 @@ ipc_kmsg_copyout_body(
} else if (is_port) {
/* copyout to memory allocated above */
- (void) copyoutmap(map, (char *) data,
- (char *) addr, length);
+ if (sizeof(mach_port_name_t) != sizeof(mach_port_t)) {
+ mach_port_t *src = (mach_port_t*)data;
+ mach_port_name_t *dst = (mach_port_name_t*)addr;
+ for (int i=0; i<number; i++) {
+ if (copyout_port(src + i, dst + i)) {
+ kr = KERN_FAILURE;
+ goto vm_copyout_failure;
+ }
+ }
+ } else {
+ (void) copyoutmap(map, (char *) data,
+ (char *) addr, length);
+ }
kfree(data, length);
} else {
vm_map_copy_t copy = (vm_map_copy_t) data;
@@ -2464,6 +2490,9 @@ ipc_kmsg_copyout_body(
* (vm_offset_t *) saddr = addr;
saddr += sizeof(vm_offset_t);
}
+
+ /* Next element is always correctly aligned */
+ saddr = mach_msg_kernel_align(saddr);
}
return mr;
@@ -2492,7 +2521,7 @@ ipc_kmsg_copyout(
ipc_kmsg_t kmsg,
ipc_space_t space,
vm_map_t map,
- mach_port_t notify)
+ mach_port_name_t notify)
{
mach_msg_bits_t mbits = kmsg->ikm_header.msgh_bits;
mach_msg_return_t mr;
@@ -2502,13 +2531,7 @@ ipc_kmsg_copyout(
return mr;
if (mbits & MACH_MSGH_BITS_COMPLEX) {
- vm_offset_t saddr, eaddr;
-
- saddr = (vm_offset_t) (&kmsg->ikm_header + 1);
- eaddr = (vm_offset_t) &kmsg->ikm_header +
- kmsg->ikm_header.msgh_size;
-
- mr = ipc_kmsg_copyout_body(saddr, eaddr, space, map);
+ mr = ipc_kmsg_copyout_body(kmsg, space, map);
if (mr != MACH_MSG_SUCCESS)
mr |= MACH_RCV_BODY_ERROR;
}
@@ -2547,7 +2570,7 @@ ipc_kmsg_copyout_pseudo(
ipc_object_t reply = (ipc_object_t) kmsg->ikm_header.msgh_local_port;
mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits);
mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits);
- mach_port_t dest_name, reply_name;
+ mach_port_name_t dest_name, reply_name;
mach_msg_return_t mr;
assert(IO_VALID(dest));
@@ -2560,13 +2583,7 @@ ipc_kmsg_copyout_pseudo(
kmsg->ikm_header.msgh_local_port = reply_name;
if (mbits & MACH_MSGH_BITS_COMPLEX) {
- vm_offset_t saddr, eaddr;
-
- saddr = (vm_offset_t) (&kmsg->ikm_header + 1);
- eaddr = (vm_offset_t) &kmsg->ikm_header +
- kmsg->ikm_header.msgh_size;
-
- mr |= ipc_kmsg_copyout_body(saddr, eaddr, space, map);
+ mr |= ipc_kmsg_copyout_body(kmsg, space, map);
}
return mr;
@@ -2591,7 +2608,7 @@ ipc_kmsg_copyout_dest(
ipc_object_t reply = (ipc_object_t) kmsg->ikm_header.msgh_local_port;
mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits);
mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits);
- mach_port_t dest_name, reply_name;
+ mach_port_name_t dest_name, reply_name;
assert(IO_VALID(dest));
@@ -2602,14 +2619,14 @@ ipc_kmsg_copyout_dest(
} else {
io_release(dest);
io_check_unlock(dest);
- dest_name = MACH_PORT_DEAD;
+ dest_name = MACH_PORT_NAME_DEAD;
}
if (IO_VALID(reply)) {
ipc_object_destroy(reply, reply_type);
- reply_name = MACH_PORT_NULL;
+ reply_name = MACH_PORT_NAME_NULL;
} else
- reply_name = (mach_port_t) reply;
+ reply_name = invalid_port_to_name((mach_port_t)reply);
kmsg->ikm_header.msgh_bits = (MACH_MSGH_BITS_OTHER(mbits) |
MACH_MSGH_BITS(reply_type, dest_type));
@@ -2629,7 +2646,7 @@ ipc_kmsg_copyout_dest(
#if MACH_KDB
-char *
+static char *
ipc_type_name(
int type_name,
boolean_t received)
@@ -2697,7 +2714,7 @@ ipc_type_name(
}
}
-void
+static void
ipc_print_type_name(
int type_name)
{
@@ -2801,21 +2818,21 @@ ipc_msg_print(mach_msg_header_t *msgh)
is_inline = ((mach_msg_type_t*)type)->msgt_inline;
dealloc = ((mach_msg_type_t*)type)->msgt_deallocate;
if (longform) {
- /* This must be aligned */
- if ((sizeof(natural_t) > sizeof(mach_msg_type_t)) &&
- (is_misaligned(type))) {
- saddr = ptr_align(saddr);
- continue;
- }
name = type->msgtl_name;
size = type->msgtl_size;
number = type->msgtl_number;
saddr += sizeof(mach_msg_type_long_t);
+ if (mach_msg_kernel_is_misaligned(sizeof(mach_msg_type_long_t))) {
+ saddr = mach_msg_kernel_align(saddr);
+ }
} else {
name = ((mach_msg_type_t*)type)->msgt_name;
size = ((mach_msg_type_t*)type)->msgt_size;
number = ((mach_msg_type_t*)type)->msgt_number;
saddr += sizeof(mach_msg_type_t);
+ if (mach_msg_kernel_is_misaligned(sizeof(mach_msg_type_t))) {
+ saddr = mach_msg_kernel_align(saddr);
+ }
}
db_printf("-- type=");
@@ -2837,20 +2854,17 @@ ipc_msg_print(mach_msg_header_t *msgh)
is_port = MACH_MSG_TYPE_PORT_ANY(name);
if ((is_port && (size != PORT_T_SIZE_IN_BITS)) ||
+#ifndef __x86_64__
(longform && ((type->msgtl_header.msgt_name != 0) ||
(type->msgtl_header.msgt_size != 0) ||
(type->msgtl_header.msgt_number != 0))) ||
+#endif
(((mach_msg_type_t*)type)->msgt_unused != 0) ||
(dealloc && is_inline)) {
db_printf("*** invalid type\n");
return;
}
- /* padding (ptrs and ports) ? */
- if ((sizeof(natural_t) > sizeof(mach_msg_type_t)) &&
- ((size >> 3) == sizeof(natural_t)))
- saddr = ptr_align(saddr);
-
/* calculate length of data in bytes, rounding up */
length = ((number * size) + 7) >> 3;
@@ -2859,7 +2873,7 @@ ipc_msg_print(mach_msg_header_t *msgh)
vm_size_t amount;
unsigned i, numwords;
- /* inline data sizes round up to int boundaries */
+ /* round up to int boundaries for printing */
amount = (length + 3) &~ 3;
if ((eaddr - saddr) < amount) {
db_printf("*** too small\n");
@@ -2884,6 +2898,7 @@ ipc_msg_print(mach_msg_header_t *msgh)
db_printf("0x%x\n", * (vm_offset_t *) saddr);
saddr += sizeof(vm_offset_t);
}
+ saddr = mach_msg_kernel_align(saddr);
}
}
#endif /* MACH_KDB */
diff --git a/ipc/ipc_kmsg.h b/ipc/ipc_kmsg.h
index c6cd77f0..9ee1aa4a 100644
--- a/ipc/ipc_kmsg.h
+++ b/ipc/ipc_kmsg.h
@@ -102,6 +102,46 @@ extern ipc_kmsg_t ipc_kmsg_cache[NCPUS];
#define ikm_cache() ipc_kmsg_cache[cpu_number()]
+#define ikm_cache_alloc_try() \
+MACRO_BEGIN \
+ ipc_kmsg_t __kmsg = ikm_cache(); \
+ if (__kmsg != IKM_NULL) { \
+ ikm_cache() = IKM_NULL; \
+ ikm_check_initialized(__kmsg, IKM_SAVED_KMSG_SIZE); \
+ } \
+ __kmsg; \
+MACRO_END
+
+#define ikm_cache_alloc() \
+MACRO_BEGIN \
+ ipc_kmsg_t __kmsg = ikm_cache_alloc_try(); \
+ if (!__kmsg) { \
+ __kmsg = ikm_alloc(IKM_SAVED_MSG_SIZE); \
+ if (__kmsg != IKM_NULL) \
+ ikm_init(__kmsg, IKM_SAVED_MSG_SIZE); \
+ } \
+ __kmsg; \
+MACRO_END
+
+#define ikm_cache_free_try(kmsg) \
+MACRO_BEGIN \
+ int __success = 0; \
+ if (ikm_cache() == IKM_NULL) { \
+ ikm_cache() = (kmsg); \
+ __success = 1; \
+ } \
+ __success; \
+MACRO_END
+
+#define ikm_cache_free(kmsg) \
+MACRO_BEGIN \
+ if (((kmsg)->ikm_size == IKM_SAVED_KMSG_SIZE) && \
+ (ikm_cache() == IKM_NULL)) \
+ ikm_cache() = (kmsg); \
+ else \
+ ikm_free(kmsg); \
+MACRO_END
+
/*
* The size of the kernel message buffers that will be cached.
* IKM_SAVED_KMSG_SIZE includes overhead; IKM_SAVED_MSG_SIZE doesn't.
@@ -116,6 +156,16 @@ extern ipc_kmsg_t ipc_kmsg_cache[NCPUS];
#define ikm_alloc(size) \
((ipc_kmsg_t) kalloc(ikm_plus_overhead(size)))
+/*
+ * The conversion between userland and kernel-land has to convert from port
+ * names to ports. This may increase the size that needs to be allocated
+ * on the kernel size. At worse the message is full of port names to be
+ * converted.
+ */
+#define IKM_EXPAND_FACTOR ((sizeof(mach_port_t) + sizeof(mach_port_name_t) - 1) / sizeof(mach_port_name_t))
+/* But make sure it's not the converse. */
+_Static_assert(sizeof(mach_port_t) >= sizeof(mach_port_name_t));
+
#define ikm_init(kmsg, size) \
MACRO_BEGIN \
ikm_init_special((kmsg), ikm_plus_overhead(size)); \
@@ -242,38 +292,49 @@ extern void
ipc_kmsg_free(ipc_kmsg_t);
extern mach_msg_return_t
-ipc_kmsg_get(mach_msg_header_t *, mach_msg_size_t, ipc_kmsg_t *);
+ipc_kmsg_get(mach_msg_user_header_t *, mach_msg_size_t, ipc_kmsg_t *);
extern mach_msg_return_t
ipc_kmsg_get_from_kernel(mach_msg_header_t *, mach_msg_size_t, ipc_kmsg_t *);
extern mach_msg_return_t
-ipc_kmsg_put(mach_msg_header_t *, ipc_kmsg_t, mach_msg_size_t);
+ipc_kmsg_put(mach_msg_user_header_t *, ipc_kmsg_t, mach_msg_size_t);
extern void
ipc_kmsg_put_to_kernel(mach_msg_header_t *, ipc_kmsg_t, mach_msg_size_t);
extern mach_msg_return_t
-ipc_kmsg_copyin_header(mach_msg_header_t *, ipc_space_t, mach_port_t);
+ipc_kmsg_copyin_header(mach_msg_header_t *, ipc_space_t, mach_port_name_t);
extern mach_msg_return_t
-ipc_kmsg_copyin(ipc_kmsg_t, ipc_space_t, vm_map_t, mach_port_t);
+ipc_kmsg_copyin(ipc_kmsg_t, ipc_space_t, vm_map_t, mach_port_name_t);
extern void
ipc_kmsg_copyin_from_kernel(ipc_kmsg_t);
extern mach_msg_return_t
-ipc_kmsg_copyout_header(mach_msg_header_t *, ipc_space_t, mach_port_t);
+ipc_kmsg_copyout_header(mach_msg_header_t *, ipc_space_t, mach_port_name_t);
extern mach_msg_return_t
ipc_kmsg_copyout_object(ipc_space_t, ipc_object_t,
- mach_msg_type_name_t, mach_port_t *);
+ mach_msg_type_name_t, mach_port_name_t *);
+
+static inline mach_msg_return_t
+ipc_kmsg_copyout_object_to_port(ipc_space_t space, ipc_object_t object,
+ mach_msg_type_name_t msgt_name, mach_port_t *portp)
+{
+ mach_port_name_t name;;
+ mach_msg_return_t mr;
+ mr = ipc_kmsg_copyout_object(space, object, msgt_name, &name);
+ *portp = (mach_port_t)name;
+ return mr;
+}
extern mach_msg_return_t
-ipc_kmsg_copyout_body(vm_offset_t, vm_offset_t, ipc_space_t, vm_map_t);
+ipc_kmsg_copyout_body(ipc_kmsg_t, ipc_space_t, vm_map_t);
extern mach_msg_return_t
-ipc_kmsg_copyout(ipc_kmsg_t, ipc_space_t, vm_map_t, mach_port_t);
+ipc_kmsg_copyout(ipc_kmsg_t, ipc_space_t, vm_map_t, mach_port_name_t);
extern mach_msg_return_t
ipc_kmsg_copyout_pseudo(ipc_kmsg_t, ipc_space_t, vm_map_t);
diff --git a/ipc/ipc_machdep.h b/ipc/ipc_machdep.h
index c205ba45..2871fc31 100755
--- a/ipc/ipc_machdep.h
+++ b/ipc/ipc_machdep.h
@@ -27,18 +27,13 @@
#ifndef _IPC_IPC_MACHDEP_H_
#define _IPC_IPC_MACHDEP_H_
+#include <mach/message.h>
+
/*
* At times, we need to know the size of a port in bits
*/
-/* 64 bit machines */
-#if defined(__alpha)
-#define PORT_T_SIZE_IN_BITS 64
-#endif
-
-/* default, 32 bit machines */
-#if !defined(PORT_T_SIZE_IN_BITS)
-#define PORT_T_SIZE_IN_BITS 32
-#endif
+#define PORT_T_SIZE_IN_BITS (sizeof(mach_port_t)*8)
+#define PORT_NAME_T_SIZE_IN_BITS (sizeof(mach_port_name_t)*8)
#endif /* _IPC_IPC_MACHDEP_H_ */
diff --git a/ipc/ipc_marequest.c b/ipc/ipc_marequest.c
index 736db838..c096fe24 100644
--- a/ipc/ipc_marequest.c
+++ b/ipc/ipc_marequest.c
@@ -161,12 +161,12 @@ ipc_marequest_init(void)
mach_msg_return_t
ipc_marequest_create(
- ipc_space_t space,
- ipc_port_t port,
- mach_port_t notify,
- ipc_marequest_t *marequestp)
+ ipc_space_t space,
+ ipc_port_t port,
+ mach_port_name_t notify,
+ ipc_marequest_t *marequestp)
{
- mach_port_t name;
+ mach_port_name_t name;
ipc_entry_t entry;
ipc_port_t soright;
ipc_marequest_t marequest;
@@ -258,7 +258,7 @@ ipc_marequest_create(
void
ipc_marequest_cancel(
ipc_space_t space,
- mach_port_t name)
+ mach_port_name_t name)
{
ipc_marequest_bucket_t bucket;
ipc_marequest_t marequest, *last;
@@ -279,7 +279,7 @@ ipc_marequest_cancel(
*last = marequest->imar_next;
imarb_unlock(bucket);
- marequest->imar_name = MACH_PORT_NULL;
+ marequest->imar_name = MACH_PORT_NAME_NULL;
}
/*
@@ -294,8 +294,8 @@ ipc_marequest_cancel(
void
ipc_marequest_rename(
ipc_space_t space,
- mach_port_t old,
- mach_port_t new)
+ mach_port_name_t old,
+ mach_port_name_t new)
{
ipc_marequest_bucket_t bucket;
ipc_marequest_t marequest, *last;
@@ -340,7 +340,7 @@ void
ipc_marequest_destroy(ipc_marequest_t marequest)
{
ipc_space_t space = marequest->imar_space;
- mach_port_t name;
+ mach_port_name_t name;
ipc_port_t soright;
is_write_lock(space);
@@ -377,7 +377,7 @@ ipc_marequest_destroy(ipc_marequest_t marequest)
entry->ie_bits &= ~IE_BITS_MAREQUEST;
} else
- name = MACH_PORT_NULL;
+ name = MACH_PORT_NAME_NULL;
}
is_write_unlock(space);
diff --git a/ipc/ipc_marequest.h b/ipc/ipc_marequest.h
index 4f6f7584..a55d4e23 100644
--- a/ipc/ipc_marequest.h
+++ b/ipc/ipc_marequest.h
@@ -63,7 +63,7 @@
typedef struct ipc_marequest {
struct ipc_space *imar_space;
- mach_port_t imar_name;
+ mach_port_name_t imar_name;
struct ipc_port *imar_soright;
struct ipc_marequest *imar_next;
} *ipc_marequest_t;
@@ -84,14 +84,14 @@ ipc_marequest_info(unsigned int *, hash_info_bucket_t *, unsigned int);
extern mach_msg_return_t
ipc_marequest_create(ipc_space_t space, ipc_port_t port,
- mach_port_t notify, ipc_marequest_t *marequestp);
+ mach_port_name_t notify, ipc_marequest_t *marequestp);
extern void
-ipc_marequest_cancel(ipc_space_t space, mach_port_t name);
+ipc_marequest_cancel(ipc_space_t space, mach_port_name_t name);
extern void
ipc_marequest_rename(ipc_space_t space,
- mach_port_t old, mach_port_t new);
+ mach_port_name_t old, mach_port_name_t new);
extern void
ipc_marequest_destroy(ipc_marequest_t marequest);
diff --git a/ipc/ipc_mqueue.c b/ipc/ipc_mqueue.c
index 9138aec4..44e1eb98 100644
--- a/ipc/ipc_mqueue.c
+++ b/ipc/ipc_mqueue.c
@@ -36,6 +36,7 @@
#include <mach/port.h>
#include <mach/message.h>
+#include <machine/copy_user.h>
#include <kern/assert.h>
#include <kern/counters.h>
#include <kern/debug.h>
@@ -259,7 +260,7 @@ ipc_mqueue_send(
ip_unlock(port);
counter(c_ipc_mqueue_send_block++);
- thread_block((void (*)(void)) 0);
+ thread_block(thread_no_continuation);
ip_lock(port);
/* why did we wake up? */
@@ -399,7 +400,7 @@ ipc_mqueue_send(
mach_msg_return_t
ipc_mqueue_copyin(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_mqueue_t *mqueuep,
ipc_object_t *objectp)
{
@@ -520,7 +521,7 @@ ipc_mqueue_receive(
mach_msg_size_t max_size,
mach_msg_timeout_t time_out,
boolean_t resume,
- void (*continuation)(void),
+ continuation_t continuation,
ipc_kmsg_t *kmsgp,
mach_port_seqno_t *seqnop)
{
@@ -540,7 +541,7 @@ ipc_mqueue_receive(
if (kmsg != IKM_NULL) {
/* check space requirements */
- if (kmsg->ikm_header.msgh_size > max_size) {
+ if (msg_usize(&kmsg->ikm_header) > max_size) {
* (mach_msg_size_t *) kmsgp =
kmsg->ikm_header.msgh_size;
imq_unlock(mqueue);
@@ -649,7 +650,7 @@ ipc_mqueue_receive(
/* we have a kmsg; unlock the msg queue */
imq_unlock(mqueue);
- assert(kmsg->ikm_header.msgh_size <= max_size);
+ assert(msg_usize(&kmsg->ikm_header) <= max_size);
}
{
diff --git a/ipc/ipc_mqueue.h b/ipc/ipc_mqueue.h
index 2af5e02e..dfac7456 100644
--- a/ipc/ipc_mqueue.h
+++ b/ipc/ipc_mqueue.h
@@ -68,14 +68,14 @@ extern mach_msg_return_t
ipc_mqueue_send(ipc_kmsg_t, mach_msg_option_t, mach_msg_timeout_t);
extern mach_msg_return_t
-ipc_mqueue_copyin(ipc_space_t, mach_port_t, ipc_mqueue_t *, ipc_object_t *);
+ipc_mqueue_copyin(ipc_space_t, mach_port_name_t, ipc_mqueue_t *, ipc_object_t *);
#define IMQ_NULL_CONTINUE ((void (*)()) 0)
extern mach_msg_return_t
ipc_mqueue_receive(ipc_mqueue_t, mach_msg_option_t,
mach_msg_size_t, mach_msg_timeout_t,
- boolean_t, void (*)(),
+ boolean_t, continuation_t,
ipc_kmsg_t *, mach_port_seqno_t *);
/*
diff --git a/ipc/ipc_notify.c b/ipc/ipc_notify.c
index df5f68bc..d0b71cf2 100644
--- a/ipc/ipc_notify.c
+++ b/ipc/ipc_notify.c
@@ -58,7 +58,7 @@ mach_dead_name_notification_t ipc_notify_dead_name_template;
* Initialize a template for port-deleted notifications.
*/
-void
+static void
ipc_notify_init_port_deleted(mach_port_deleted_notification_t *n)
{
mach_msg_header_t *m = &n->not_header;
@@ -72,7 +72,7 @@ ipc_notify_init_port_deleted(mach_port_deleted_notification_t *n)
m->msgh_id = MACH_NOTIFY_PORT_DELETED;
t->msgt_name = MACH_MSG_TYPE_PORT_NAME;
- t->msgt_size = PORT_T_SIZE_IN_BITS;
+ t->msgt_size = PORT_NAME_T_SIZE_IN_BITS;
t->msgt_number = 1;
t->msgt_inline = TRUE;
t->msgt_longform = FALSE;
@@ -88,7 +88,7 @@ ipc_notify_init_port_deleted(mach_port_deleted_notification_t *n)
* Initialize a template for msg-accepted notifications.
*/
-void
+static void
ipc_notify_init_msg_accepted(mach_msg_accepted_notification_t *n)
{
mach_msg_header_t *m = &n->not_header;
@@ -102,7 +102,7 @@ ipc_notify_init_msg_accepted(mach_msg_accepted_notification_t *n)
m->msgh_id = MACH_NOTIFY_MSG_ACCEPTED;
t->msgt_name = MACH_MSG_TYPE_PORT_NAME;
- t->msgt_size = PORT_T_SIZE_IN_BITS;
+ t->msgt_size = PORT_NAME_T_SIZE_IN_BITS;
t->msgt_number = 1;
t->msgt_inline = TRUE;
t->msgt_longform = FALSE;
@@ -118,7 +118,7 @@ ipc_notify_init_msg_accepted(mach_msg_accepted_notification_t *n)
* Initialize a template for port-destroyed notifications.
*/
-void
+static void
ipc_notify_init_port_destroyed(mach_port_destroyed_notification_t *n)
{
mach_msg_header_t *m = &n->not_header;
@@ -149,7 +149,7 @@ ipc_notify_init_port_destroyed(mach_port_destroyed_notification_t *n)
* Initialize a template for no-senders notifications.
*/
-void
+static void
ipc_notify_init_no_senders(
mach_no_senders_notification_t *n)
{
@@ -164,7 +164,7 @@ ipc_notify_init_no_senders(
m->msgh_id = MACH_NOTIFY_NO_SENDERS;
t->msgt_name = MACH_MSG_TYPE_INTEGER_32;
- t->msgt_size = PORT_T_SIZE_IN_BITS;
+ t->msgt_size = 32;
t->msgt_number = 1;
t->msgt_inline = TRUE;
t->msgt_longform = FALSE;
@@ -180,7 +180,7 @@ ipc_notify_init_no_senders(
* Initialize a template for send-once notifications.
*/
-void
+static void
ipc_notify_init_send_once(
mach_send_once_notification_t *n)
{
@@ -200,7 +200,7 @@ ipc_notify_init_send_once(
* Initialize a template for dead-name notifications.
*/
-void
+static void
ipc_notify_init_dead_name(
mach_dead_name_notification_t *n)
{
@@ -215,7 +215,7 @@ ipc_notify_init_dead_name(
m->msgh_id = MACH_NOTIFY_DEAD_NAME;
t->msgt_name = MACH_MSG_TYPE_PORT_NAME;
- t->msgt_size = PORT_T_SIZE_IN_BITS;
+ t->msgt_size = PORT_NAME_T_SIZE_IN_BITS;
t->msgt_number = 1;
t->msgt_inline = TRUE;
t->msgt_longform = FALSE;
@@ -253,15 +253,15 @@ ipc_notify_init(void)
void
ipc_notify_port_deleted(
- ipc_port_t port,
- mach_port_t name)
+ ipc_port_t port,
+ mach_port_name_t name)
{
ipc_kmsg_t kmsg;
mach_port_deleted_notification_t *n;
kmsg = ikm_alloc(sizeof *n);
if (kmsg == IKM_NULL) {
- printf("dropped port-deleted (0x%p, 0x%lx)\n", port, name);
+ printf("dropped port-deleted (0x%p, 0x%x)\n", port, name);
ipc_port_release_sonce(port);
return;
}
@@ -287,15 +287,15 @@ ipc_notify_port_deleted(
void
ipc_notify_msg_accepted(
- ipc_port_t port,
- mach_port_t name)
+ ipc_port_t port,
+ mach_port_name_t name)
{
ipc_kmsg_t kmsg;
mach_msg_accepted_notification_t *n;
kmsg = ikm_alloc(sizeof *n);
if (kmsg == IKM_NULL) {
- printf("dropped msg-accepted (0x%p, 0x%lx)\n", port, name);
+ printf("dropped msg-accepted (0x%p, 0x%x)\n", port, name);
ipc_port_release_sonce(port);
return;
}
@@ -425,15 +425,15 @@ ipc_notify_send_once(ipc_port_t port)
void
ipc_notify_dead_name(
- ipc_port_t port,
- mach_port_t name)
+ ipc_port_t port,
+ mach_port_name_t name)
{
ipc_kmsg_t kmsg;
mach_dead_name_notification_t *n;
kmsg = ikm_alloc(sizeof *n);
if (kmsg == IKM_NULL) {
- printf("dropped dead-name (0x%p, 0x%lx)\n", port, name);
+ printf("dropped dead-name (0x%p, 0x%x)\n", port, name);
ipc_port_release_sonce(port);
return;
}
diff --git a/ipc/ipc_notify.h b/ipc/ipc_notify.h
index 789bd23c..8940f382 100644
--- a/ipc/ipc_notify.h
+++ b/ipc/ipc_notify.h
@@ -38,10 +38,10 @@ extern void
ipc_notify_init(void);
extern void
-ipc_notify_port_deleted(ipc_port_t, mach_port_t);
+ipc_notify_port_deleted(ipc_port_t, mach_port_name_t);
extern void
-ipc_notify_msg_accepted(ipc_port_t, mach_port_t);
+ipc_notify_msg_accepted(ipc_port_t, mach_port_name_t);
extern void
ipc_notify_port_destroyed(ipc_port_t, ipc_port_t);
@@ -53,6 +53,6 @@ extern void
ipc_notify_send_once(ipc_port_t);
extern void
-ipc_notify_dead_name(ipc_port_t, mach_port_t);
+ipc_notify_dead_name(ipc_port_t, mach_port_name_t);
#endif /* _IPC_IPC_NOTIFY_H_ */
diff --git a/ipc/ipc_object.c b/ipc/ipc_object.c
index a6457c37..1074fb2c 100644
--- a/ipc/ipc_object.c
+++ b/ipc/ipc_object.c
@@ -106,7 +106,7 @@ ipc_object_release(
kern_return_t
ipc_object_translate(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
mach_port_right_t right,
ipc_object_t *objectp)
{
@@ -150,7 +150,7 @@ ipc_object_translate(
kern_return_t
ipc_object_alloc_dead(
ipc_space_t space,
- mach_port_t *namep)
+ mach_port_name_t *namep)
{
ipc_entry_t entry;
kern_return_t kr;
@@ -187,7 +187,7 @@ ipc_object_alloc_dead(
kern_return_t
ipc_object_alloc_dead_name(
ipc_space_t space,
- mach_port_t name)
+ mach_port_name_t name)
{
ipc_entry_t entry;
kern_return_t kr;
@@ -231,7 +231,7 @@ ipc_object_alloc(
ipc_object_type_t otype,
mach_port_type_t type,
mach_port_urefs_t urefs,
- mach_port_t *namep,
+ mach_port_name_t *namep,
ipc_object_t *objectp)
{
ipc_object_t object;
@@ -298,7 +298,7 @@ ipc_object_alloc_name(
ipc_object_type_t otype,
mach_port_type_t type,
mach_port_urefs_t urefs,
- mach_port_t name,
+ mach_port_name_t name,
ipc_object_t *objectp)
{
ipc_object_t object;
@@ -405,7 +405,7 @@ ipc_object_copyin_type(
kern_return_t
ipc_object_copyin(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
mach_msg_type_name_t msgt_name,
ipc_object_t *objectp)
{
@@ -608,9 +608,9 @@ ipc_object_copyout(
ipc_object_t object,
mach_msg_type_name_t msgt_name,
boolean_t overflow,
- mach_port_t *namep)
+ mach_port_name_t *namep)
{
- mach_port_t name;
+ mach_port_name_t name;
ipc_entry_t entry;
kern_return_t kr;
@@ -666,62 +666,6 @@ ipc_object_copyout(
return kr;
}
-#if 0
-/* XXX same, but don't check for already-existing send rights */
-kern_return_t
-ipc_object_copyout_multiname(space, object, namep)
- ipc_space_t space;
- ipc_object_t object;
- mach_port_t *namep;
-{
- mach_port_t name;
- ipc_entry_t entry;
- kern_return_t kr;
-
- assert(IO_VALID(object));
- assert(io_otype(object) == IOT_PORT);
-
- is_write_lock(space);
-
- for (;;) {
- if (!space->is_active) {
- is_write_unlock(space);
- return KERN_INVALID_TASK;
- }
-
- kr = ipc_entry_alloc(space, &name, &entry);
- if (kr != KERN_SUCCESS) {
- is_write_unlock(space);
- return kr; /* space is unlocked */
- }
-
- assert(IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE);
- assert(entry->ie_object == IO_NULL);
-
- io_lock(object);
- if (!io_active(object)) {
- io_unlock(object);
- ipc_entry_dealloc(space, name, entry);
- is_write_unlock(space);
- return KERN_INVALID_CAPABILITY;
- }
-
- entry->ie_object = object;
- break;
- }
-
- /* space is write-locked and active, object is locked and active */
-
- kr = ipc_right_copyout_multiname(space, name, entry, object);
- /* object is unlocked */
- is_write_unlock(space);
-
- if (kr == KERN_SUCCESS)
- *namep = name;
- return kr;
-}
-#endif /* 0 */
-
/*
* Routine: ipc_object_copyout_name
* Purpose:
@@ -747,9 +691,9 @@ ipc_object_copyout_name(
ipc_object_t object,
mach_msg_type_name_t msgt_name,
boolean_t overflow,
- mach_port_t name)
+ mach_port_name_t name)
{
- mach_port_t oname;
+ mach_port_name_t oname;
ipc_entry_t oentry;
ipc_entry_t entry;
kern_return_t kr;
@@ -826,9 +770,9 @@ ipc_object_copyout_dest(
ipc_space_t space,
ipc_object_t object,
mach_msg_type_name_t msgt_name,
- mach_port_t *namep)
+ mach_port_name_t *namep)
{
- mach_port_t name;
+ mach_port_name_t name;
assert(IO_VALID(object));
assert(io_active(object));
@@ -860,7 +804,7 @@ ipc_object_copyout_dest(
if (port->ip_receiver == space)
name = port->ip_receiver_name;
else
- name = MACH_PORT_NULL;
+ name = MACH_PORT_NAME_NULL;
ip_unlock(port);
@@ -895,7 +839,7 @@ ipc_object_copyout_dest(
ip_unlock(port);
ipc_notify_send_once(port);
- name = MACH_PORT_NULL;
+ name = MACH_PORT_NAME_NULL;
}
break;
@@ -930,8 +874,8 @@ ipc_object_copyout_dest(
kern_return_t
ipc_object_rename(
ipc_space_t space,
- mach_port_t oname,
- mach_port_t nname)
+ mach_port_name_t oname,
+ mach_port_name_t nname)
{
ipc_entry_t oentry, nentry;
kern_return_t kr;
diff --git a/ipc/ipc_object.h b/ipc/ipc_object.h
index be5bea71..209fae18 100644
--- a/ipc/ipc_object.h
+++ b/ipc/ipc_object.h
@@ -116,30 +116,30 @@ extern void
ipc_object_release(ipc_object_t);
extern kern_return_t
-ipc_object_translate(ipc_space_t, mach_port_t,
+ipc_object_translate(ipc_space_t, mach_port_name_t,
mach_port_right_t, ipc_object_t *);
extern kern_return_t
-ipc_object_alloc_dead(ipc_space_t, mach_port_t *);
+ipc_object_alloc_dead(ipc_space_t, mach_port_name_t *);
extern kern_return_t
-ipc_object_alloc_dead_name(ipc_space_t, mach_port_t);
+ipc_object_alloc_dead_name(ipc_space_t, mach_port_name_t);
extern kern_return_t
ipc_object_alloc(ipc_space_t, ipc_object_type_t,
mach_port_type_t, mach_port_urefs_t,
- mach_port_t *, ipc_object_t *);
+ mach_port_name_t *, ipc_object_t *);
extern kern_return_t
ipc_object_alloc_name(ipc_space_t, ipc_object_type_t,
mach_port_type_t, mach_port_urefs_t,
- mach_port_t, ipc_object_t *);
+ mach_port_name_t, ipc_object_t *);
extern mach_msg_type_name_t
ipc_object_copyin_type(mach_msg_type_name_t);
extern kern_return_t
-ipc_object_copyin(ipc_space_t, mach_port_t,
+ipc_object_copyin(ipc_space_t, mach_port_name_t,
mach_msg_type_name_t, ipc_object_t *);
extern void
@@ -150,18 +150,18 @@ ipc_object_destroy(ipc_object_t, mach_msg_type_name_t);
extern kern_return_t
ipc_object_copyout(ipc_space_t, ipc_object_t,
- mach_msg_type_name_t, boolean_t, mach_port_t *);
+ mach_msg_type_name_t, boolean_t, mach_port_name_t *);
extern kern_return_t
ipc_object_copyout_name(ipc_space_t, ipc_object_t,
- mach_msg_type_name_t, boolean_t, mach_port_t);
+ mach_msg_type_name_t, boolean_t, mach_port_name_t);
extern void
ipc_object_copyout_dest(ipc_space_t, ipc_object_t,
- mach_msg_type_name_t, mach_port_t *);
+ mach_msg_type_name_t, mach_port_name_t *);
extern kern_return_t
-ipc_object_rename(ipc_space_t, mach_port_t, mach_port_t);
+ipc_object_rename(ipc_space_t, mach_port_name_t, mach_port_name_t);
extern void
ipc_object_print(ipc_object_t);
diff --git a/ipc/ipc_port.c b/ipc/ipc_port.c
index 86a4ee2a..e959f674 100644
--- a/ipc/ipc_port.c
+++ b/ipc/ipc_port.c
@@ -57,9 +57,9 @@
#endif /* MACH_KDB */
-decl_simple_lock_data(, ipc_port_multiple_lock_data)
+def_simple_lock_data(, ipc_port_multiple_lock_data)
-decl_simple_lock_data(, ipc_port_timestamp_lock_data)
+def_simple_lock_data(, ipc_port_timestamp_lock_data)
ipc_port_timestamp_t ipc_port_timestamp_data;
/*
@@ -96,7 +96,7 @@ ipc_port_timestamp(void)
kern_return_t
ipc_port_dnrequest(
ipc_port_t port,
- mach_port_t name,
+ mach_port_name_t name,
ipc_port_t soright,
ipc_port_request_index_t *indexp)
{
@@ -234,7 +234,7 @@ ipc_port_dngrow(ipc_port_t port)
ipc_port_t
ipc_port_dncancel(
ipc_port_t port,
- mach_port_t name,
+ mach_port_name_t name,
ipc_port_request_index_t index)
{
ipc_port_request_t ipr, table;
@@ -431,7 +431,7 @@ ipc_port_set_seqno(
*/
void
-ipc_port_set_protected_payload(ipc_port_t port, unsigned long payload)
+ipc_port_set_protected_payload(ipc_port_t port, rpc_uintptr_t payload)
{
ipc_mqueue_t mqueue;
@@ -506,9 +506,9 @@ ipc_port_clear_receiver(
void
ipc_port_init(
- ipc_port_t port,
- ipc_space_t space,
- mach_port_t name)
+ ipc_port_t port,
+ ipc_space_t space,
+ mach_port_name_t name)
{
/* port->ip_kobject doesn't have to be initialized */
@@ -553,11 +553,11 @@ ipc_port_init(
kern_return_t
ipc_port_alloc(
ipc_space_t space,
- mach_port_t *namep,
+ mach_port_name_t *namep,
ipc_port_t *portp)
{
ipc_port_t port;
- mach_port_t name;
+ mach_port_name_t name;
kern_return_t kr;
kr = ipc_object_alloc(space, IOT_PORT,
@@ -593,7 +593,7 @@ ipc_port_alloc(
kern_return_t
ipc_port_alloc_name(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_port_t *portp)
{
ipc_port_t port;
@@ -734,7 +734,7 @@ ipc_port_destroy(
for (index = 1; index < size; index++) {
ipc_port_request_t ipr = &dnrequests[index];
- mach_port_t name = ipr->ipr_name;
+ mach_port_name_t name = ipr->ipr_name;
ipc_port_t soright;
if (name == MACH_PORT_NULL)
@@ -911,7 +911,7 @@ ipc_port_check_circularity(
ipc_port_t
ipc_port_lookup_notify(
ipc_space_t space,
- mach_port_t name)
+ mach_port_name_t name)
{
ipc_port_t port;
ipc_entry_t entry;
@@ -1008,12 +1008,12 @@ ipc_port_copy_send(
* Nothing locked.
*/
-mach_port_t
+mach_port_name_t
ipc_port_copyout_send(
ipc_port_t sright,
ipc_space_t space)
{
- mach_port_t name;
+ mach_port_name_t name;
if (IP_VALID(sright)) {
kern_return_t kr;
@@ -1024,12 +1024,12 @@ ipc_port_copyout_send(
ipc_port_release_send(sright);
if (kr == KERN_INVALID_CAPABILITY)
- name = MACH_PORT_DEAD;
+ name = MACH_PORT_NAME_DEAD;
else
- name = MACH_PORT_NULL;
+ name = MACH_PORT_NAME_NULL;
}
} else
- name = (mach_port_t) sright;
+ name = invalid_port_to_name((mach_port_t)sright);
return name;
}
@@ -1197,7 +1197,7 @@ ipc_port_alloc_special(ipc_space_t space)
* the fast rpc path).
*/
- ipc_port_init(port, space, (mach_port_t)port);
+ ipc_port_init(port, space, (mach_port_name_t)port);
return port;
}
@@ -1249,8 +1249,7 @@ ipc_port_dealloc_special(
*/
void
-ipc_port_print(port)
- const ipc_port_t port;
+ipc_port_print(const ipc_port_t port)
{
printf("port 0x%x\n", port);
@@ -1283,7 +1282,7 @@ ipc_port_print(port)
printf(", sndrs=0x%x", port->ip_blocked.ithq_base);
printf(", kobj=0x%x\n", port->ip_kobject);
- iprintf("protected_payload=%p\n", (void *) port->ip_protected_payload);
+ iprintf("protected_payload=%p\n", (void *) (vm_offset_t) port->ip_protected_payload);
indent -= 2;
}
diff --git a/ipc/ipc_port.h b/ipc/ipc_port.h
index ade69679..192d8806 100644
--- a/ipc/ipc_port.h
+++ b/ipc/ipc_port.h
@@ -50,7 +50,6 @@
#include <ipc/ipc_thread.h>
#include <ipc/ipc_object.h>
#include "ipc_target.h"
-#include <mach/rpc.h>
/*
* A receive right (port) can be in four states:
@@ -97,7 +96,7 @@ struct ipc_port {
mach_port_msgcount_t ip_msgcount;
mach_port_msgcount_t ip_qlimit;
struct ipc_thread_queue ip_blocked;
- unsigned long ip_protected_payload;
+ rpc_uintptr_t ip_protected_payload;
};
#define ip_object ip_target.ipt_object
@@ -137,7 +136,7 @@ typedef struct ipc_port_request {
} notify;
union {
- mach_port_t name;
+ mach_port_name_t name;
struct ipc_table_size *size;
} name;
} *ipc_port_request_t;
@@ -208,14 +207,14 @@ ipc_port_timestamp(void);
(ipc_object_t *) (portp))
extern kern_return_t
-ipc_port_dnrequest(ipc_port_t, mach_port_t, ipc_port_t,
+ipc_port_dnrequest(ipc_port_t, mach_port_name_t, ipc_port_t,
ipc_port_request_index_t *);
extern kern_return_t
ipc_port_dngrow(ipc_port_t);
extern ipc_port_t
-ipc_port_dncancel(ipc_port_t, mach_port_t, ipc_port_request_index_t);
+ipc_port_dncancel(ipc_port_t, mach_port_name_t, ipc_port_request_index_t);
#define ipc_port_dnrename(port, index, oname, nname) \
MACRO_BEGIN \
@@ -264,7 +263,7 @@ extern void
ipc_port_set_seqno(ipc_port_t, mach_port_seqno_t);
extern void
-ipc_port_set_protected_payload(ipc_port_t, unsigned long);
+ipc_port_set_protected_payload(ipc_port_t, rpc_uintptr_t);
extern void
ipc_port_clear_protected_payload(ipc_port_t);
@@ -273,13 +272,13 @@ extern void
ipc_port_clear_receiver(ipc_port_t);
extern void
-ipc_port_init(ipc_port_t, ipc_space_t, mach_port_t);
+ipc_port_init(ipc_port_t, ipc_space_t, mach_port_name_t);
extern kern_return_t
-ipc_port_alloc(ipc_space_t, mach_port_t *, ipc_port_t *);
+ipc_port_alloc(ipc_space_t, mach_port_name_t *, ipc_port_t *);
extern kern_return_t
-ipc_port_alloc_name(ipc_space_t, mach_port_t, ipc_port_t *);
+ipc_port_alloc_name(ipc_space_t, mach_port_name_t, ipc_port_t *);
extern void
ipc_port_destroy(ipc_port_t);
@@ -288,7 +287,7 @@ extern boolean_t
ipc_port_check_circularity(ipc_port_t, ipc_port_t);
extern ipc_port_t
-ipc_port_lookup_notify(ipc_space_t, mach_port_t);
+ipc_port_lookup_notify(ipc_space_t, mach_port_name_t);
extern ipc_port_t
ipc_port_make_send(ipc_port_t);
@@ -296,7 +295,7 @@ ipc_port_make_send(ipc_port_t);
extern ipc_port_t
ipc_port_copy_send(ipc_port_t);
-extern mach_port_t
+extern mach_port_name_t
ipc_port_copyout_send(ipc_port_t, ipc_space_t);
extern void
diff --git a/ipc/ipc_pset.c b/ipc/ipc_pset.c
index 884e8972..30c12a2b 100644
--- a/ipc/ipc_pset.c
+++ b/ipc/ipc_pset.c
@@ -69,11 +69,11 @@
kern_return_t
ipc_pset_alloc(
ipc_space_t space,
- mach_port_t *namep,
+ mach_port_name_t *namep,
ipc_pset_t *psetp)
{
ipc_pset_t pset;
- mach_port_t name;
+ mach_port_name_t name;
kern_return_t kr;
kr = ipc_object_alloc(space, IOT_PORT_SET,
@@ -107,13 +107,12 @@ ipc_pset_alloc(
kern_return_t
ipc_pset_alloc_name(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_pset_t *psetp)
{
ipc_pset_t pset;
kern_return_t kr;
-
kr = ipc_object_alloc_name(space, IOT_PORT_SET,
MACH_PORT_TYPE_PORT_SET, 0,
name, (ipc_object_t *) &pset);
diff --git a/ipc/ipc_pset.h b/ipc/ipc_pset.h
index e9936fef..3f94be53 100644
--- a/ipc/ipc_pset.h
+++ b/ipc/ipc_pset.h
@@ -66,10 +66,10 @@ typedef struct ipc_pset {
#define ips_release(pset) io_release(&(pset)->ips_object)
extern kern_return_t
-ipc_pset_alloc(ipc_space_t, mach_port_t *, ipc_pset_t *);
+ipc_pset_alloc(ipc_space_t, mach_port_name_t *, ipc_pset_t *);
extern kern_return_t
-ipc_pset_alloc_name(ipc_space_t, mach_port_t, ipc_pset_t *);
+ipc_pset_alloc_name(ipc_space_t, mach_port_name_t, ipc_pset_t *);
extern void
ipc_pset_add(ipc_pset_t, ipc_port_t);
diff --git a/ipc/ipc_right.c b/ipc/ipc_right.c
index 773b3b10..79f70c3d 100644
--- a/ipc/ipc_right.c
+++ b/ipc/ipc_right.c
@@ -65,9 +65,9 @@
kern_return_t
ipc_right_lookup_write(
- ipc_space_t space,
- mach_port_t name,
- ipc_entry_t *entryp)
+ ipc_space_t space,
+ mach_port_name_t name,
+ ipc_entry_t *entryp)
{
ipc_entry_t entry;
@@ -105,11 +105,11 @@ boolean_t
ipc_right_reverse(
ipc_space_t space,
ipc_object_t object,
- mach_port_t *namep,
+ mach_port_name_t *namep,
ipc_entry_t *entryp)
{
ipc_port_t port;
- mach_port_t name;
+ mach_port_name_t name;
ipc_entry_t entry;
/* would switch on io_otype to handle multiple types of object */
@@ -184,11 +184,11 @@ ipc_right_reverse(
kern_return_t
ipc_right_dnrequest(
- ipc_space_t space,
- mach_port_t name,
- boolean_t immediate,
- ipc_port_t notify,
- ipc_port_t *previousp)
+ ipc_space_t space,
+ mach_port_name_t name,
+ boolean_t immediate,
+ ipc_port_t notify,
+ ipc_port_t *previousp)
{
ipc_port_t previous;
@@ -304,10 +304,10 @@ ipc_right_dnrequest(
ipc_port_t
ipc_right_dncancel(
- ipc_space_t space,
- ipc_port_t port,
- mach_port_t name,
- ipc_entry_t entry)
+ ipc_space_t space,
+ ipc_port_t port,
+ mach_port_name_t name,
+ ipc_entry_t entry)
{
ipc_port_t dnrequest;
@@ -333,7 +333,7 @@ ipc_right_dncancel(
boolean_t
ipc_right_inuse(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_entry_t entry)
{
ipc_entry_bits_t bits = entry->ie_bits;
@@ -362,7 +362,7 @@ boolean_t
ipc_right_check(
ipc_space_t space,
ipc_port_t port,
- mach_port_t name,
+ mach_port_name_t name,
ipc_entry_t entry)
{
ipc_entry_bits_t bits;
@@ -431,7 +431,7 @@ ipc_right_check(
void
ipc_right_clean(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_entry_t entry)
{
ipc_entry_bits_t bits = entry->ie_bits;
@@ -555,9 +555,9 @@ ipc_right_clean(
kern_return_t
ipc_right_destroy(
- ipc_space_t space,
- mach_port_t name,
- ipc_entry_t entry)
+ ipc_space_t space,
+ mach_port_name_t name,
+ ipc_entry_t entry)
{
ipc_entry_bits_t bits = entry->ie_bits;
mach_port_type_t type = IE_BITS_TYPE(bits);
@@ -698,7 +698,7 @@ ipc_right_destroy(
kern_return_t
ipc_right_dealloc(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_entry_t entry)
{
ipc_entry_bits_t bits = entry->ie_bits;
@@ -874,7 +874,7 @@ ipc_right_dealloc(
kern_return_t
ipc_right_delta(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_entry_t entry,
mach_port_right_t right,
mach_port_delta_t delta)
@@ -1196,7 +1196,7 @@ ipc_right_delta(
kern_return_t
ipc_right_info(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_entry_t entry,
mach_port_type_t *typep,
mach_port_urefs_t *urefsp)
@@ -1239,7 +1239,7 @@ ipc_right_info(
boolean_t
ipc_right_copyin_check(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_entry_t entry,
mach_msg_type_name_t msgt_name)
{
@@ -1327,7 +1327,7 @@ ipc_right_copyin_check(
kern_return_t
ipc_right_copyin(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_entry_t entry,
mach_msg_type_name_t msgt_name,
boolean_t deadok,
@@ -1660,7 +1660,7 @@ ipc_right_copyin(
void
ipc_right_copyin_undo(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_entry_t entry,
mach_msg_type_name_t msgt_name,
ipc_object_t object,
@@ -1746,11 +1746,11 @@ ipc_right_copyin_undo(
kern_return_t
ipc_right_copyin_two(
- ipc_space_t space,
- mach_port_t name,
- ipc_entry_t entry,
- ipc_object_t *objectp,
- ipc_port_t *sorightp)
+ ipc_space_t space,
+ mach_port_name_t name,
+ ipc_entry_t entry,
+ ipc_object_t *objectp,
+ ipc_port_t *sorightp)
{
ipc_entry_bits_t bits = entry->ie_bits;
mach_port_urefs_t urefs;
@@ -1846,7 +1846,7 @@ ipc_right_copyin_two(
kern_return_t
ipc_right_copyout(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_entry_t entry,
mach_msg_type_name_t msgt_name,
boolean_t overflow,
@@ -1978,41 +1978,6 @@ ipc_right_copyout(
return KERN_SUCCESS;
}
-#if 0
-/*XXX same, but allows multiple duplicate send rights */
-kern_return_t
-ipc_right_copyout_multiname(space, name, entry, object)
- ipc_space_t space;
- mach_port_t name;
- ipc_entry_t entry;
- ipc_object_t object;
-{
- ipc_entry_bits_t bits = entry->ie_bits;
- ipc_port_t port;
-
- assert(IO_VALID(object));
- assert(io_otype(object) == IOT_PORT);
- assert(io_active(object));
- assert(entry->ie_object == object);
-
- port = (ipc_port_t) object;
-
- assert(port->ip_srights > 0);
-
- assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_NONE);
- assert(IE_BITS_UREFS(bits) == 0);
-
- /* transfer send right and ref to entry */
- ip_unlock(port);
-
- /* entry is locked holding ref, so can use port */
-
- entry->ie_bits = (bits | MACH_PORT_TYPE_SEND) + 1;
-
- return KERN_SUCCESS;
-}
-#endif
-
/*
* Routine: ipc_right_rename
* Purpose:
@@ -2028,11 +1993,11 @@ ipc_right_copyout_multiname(space, name, entry, object)
kern_return_t
ipc_right_rename(
- ipc_space_t space,
- mach_port_t oname,
- ipc_entry_t oentry,
- mach_port_t nname,
- ipc_entry_t nentry)
+ ipc_space_t space,
+ mach_port_name_t oname,
+ ipc_entry_t oentry,
+ mach_port_name_t nname,
+ ipc_entry_t nentry)
{
ipc_entry_bits_t bits = oentry->ie_bits;
ipc_port_request_index_t request = oentry->ie_request;
diff --git a/ipc/ipc_right.h b/ipc/ipc_right.h
index 03642f86..6802abb6 100644
--- a/ipc/ipc_right.h
+++ b/ipc/ipc_right.h
@@ -44,69 +44,69 @@
#define ipc_right_lookup_read ipc_right_lookup_write
extern kern_return_t
-ipc_right_lookup_write(ipc_space_t, mach_port_t, ipc_entry_t *);
+ipc_right_lookup_write(ipc_space_t, mach_port_name_t, ipc_entry_t *);
extern boolean_t
ipc_right_reverse(ipc_space_t, ipc_object_t,
- mach_port_t *, ipc_entry_t *);
+ mach_port_name_t *, ipc_entry_t *);
extern kern_return_t
-ipc_right_dnrequest(ipc_space_t, mach_port_t, boolean_t,
+ipc_right_dnrequest(ipc_space_t, mach_port_name_t, boolean_t,
ipc_port_t, ipc_port_t *);
extern ipc_port_t
-ipc_right_dncancel(ipc_space_t, ipc_port_t, mach_port_t, ipc_entry_t);
+ipc_right_dncancel(ipc_space_t, ipc_port_t, mach_port_name_t, ipc_entry_t);
#define ipc_right_dncancel_macro(space, port, name, entry) \
(((entry)->ie_request == 0) ? IP_NULL : \
ipc_right_dncancel((space), (port), (name), (entry)))
extern boolean_t
-ipc_right_inuse(ipc_space_t, mach_port_t, ipc_entry_t);
+ipc_right_inuse(ipc_space_t, mach_port_name_t, ipc_entry_t);
extern boolean_t
-ipc_right_check(ipc_space_t, ipc_port_t, mach_port_t, ipc_entry_t);
+ipc_right_check(ipc_space_t, ipc_port_t, mach_port_name_t, ipc_entry_t);
extern void
-ipc_right_clean(ipc_space_t, mach_port_t, ipc_entry_t);
+ipc_right_clean(ipc_space_t, mach_port_name_t, ipc_entry_t);
extern kern_return_t
-ipc_right_destroy(ipc_space_t, mach_port_t, ipc_entry_t);
+ipc_right_destroy(ipc_space_t, mach_port_name_t, ipc_entry_t);
extern kern_return_t
-ipc_right_dealloc(ipc_space_t, mach_port_t, ipc_entry_t);
+ipc_right_dealloc(ipc_space_t, mach_port_name_t, ipc_entry_t);
extern kern_return_t
-ipc_right_delta(ipc_space_t, mach_port_t, ipc_entry_t,
+ipc_right_delta(ipc_space_t, mach_port_name_t, ipc_entry_t,
mach_port_right_t, mach_port_delta_t);
extern kern_return_t
-ipc_right_info(ipc_space_t, mach_port_t, ipc_entry_t,
+ipc_right_info(ipc_space_t, mach_port_name_t, ipc_entry_t,
mach_port_type_t *, mach_port_urefs_t *);
extern boolean_t
-ipc_right_copyin_check(ipc_space_t, mach_port_t, ipc_entry_t,
+ipc_right_copyin_check(ipc_space_t, mach_port_name_t, ipc_entry_t,
mach_msg_type_name_t);
extern kern_return_t
-ipc_right_copyin(ipc_space_t, mach_port_t, ipc_entry_t,
+ipc_right_copyin(ipc_space_t, mach_port_name_t, ipc_entry_t,
mach_msg_type_name_t, boolean_t,
ipc_object_t *, ipc_port_t *);
extern void
-ipc_right_copyin_undo(ipc_space_t, mach_port_t, ipc_entry_t,
+ipc_right_copyin_undo(ipc_space_t, mach_port_name_t, ipc_entry_t,
mach_msg_type_name_t, ipc_object_t, ipc_port_t);
extern kern_return_t
-ipc_right_copyin_two(ipc_space_t, mach_port_t, ipc_entry_t,
+ipc_right_copyin_two(ipc_space_t, mach_port_name_t, ipc_entry_t,
ipc_object_t *, ipc_port_t *);
extern kern_return_t
-ipc_right_copyout(ipc_space_t, mach_port_t, ipc_entry_t,
+ipc_right_copyout(ipc_space_t, mach_port_name_t, ipc_entry_t,
mach_msg_type_name_t, boolean_t, ipc_object_t);
extern kern_return_t
-ipc_right_rename(ipc_space_t, mach_port_t, ipc_entry_t,
- mach_port_t, ipc_entry_t);
+ipc_right_rename(ipc_space_t, mach_port_name_t, ipc_entry_t,
+ mach_port_name_t, ipc_entry_t);
#endif /* _IPC_IPC_RIGHT_H_ */
diff --git a/ipc/ipc_space.c b/ipc/ipc_space.c
index 894cf58e..77040d13 100644
--- a/ipc/ipc_space.c
+++ b/ipc/ipc_space.c
@@ -194,7 +194,7 @@ ipc_space_destroy(
mach_port_type_t type = IE_BITS_TYPE(entry->ie_bits);
if (type != MACH_PORT_TYPE_NONE) {
- mach_port_t name =
+ mach_port_name_t name =
MACH_PORT_MAKEB(entry->ie_name, entry->ie_bits);
ipc_right_clean(space, name, entry);
diff --git a/ipc/ipc_space.h b/ipc/ipc_space.h
index a2aac40a..96d58942 100644
--- a/ipc/ipc_space.h
+++ b/ipc/ipc_space.h
@@ -47,7 +47,9 @@
#include <kern/lock.h>
#include <kern/rdxtree.h>
#include <kern/slab.h>
+#include <kern/printf.h>
#include <ipc/ipc_entry.h>
+#include <ipc/ipc_port.h>
#include <ipc/ipc_types.h>
/*
@@ -142,7 +144,7 @@ void ipc_space_destroy(struct ipc_space *);
static inline ipc_entry_t
ipc_entry_lookup(
ipc_space_t space,
- mach_port_t name)
+ mach_port_name_t name)
{
ipc_entry_t entry;
@@ -155,6 +157,18 @@ ipc_entry_lookup(
return entry;
}
+extern volatile boolean_t mach_port_deallocate_debug;
+
+static inline void
+ipc_entry_lookup_failed(mach_msg_header_t *msg, mach_port_name_t name)
+{
+ if (name == MACH_PORT_NAME_NULL || name == MACH_PORT_NAME_DEAD)
+ return;
+ printf("task %.*s looked up a bogus port %lu for %d, most probably a bug.\n", (int) sizeof current_task()->name, current_task()->name, (unsigned long) name, msg->msgh_id);
+ if (mach_port_deallocate_debug)
+ SoftDebugger("ipc_entry_lookup");
+}
+
/*
* Routine: ipc_entry_get
* Purpose:
@@ -170,10 +184,10 @@ ipc_entry_lookup(
static inline kern_return_t
ipc_entry_get(
ipc_space_t space,
- mach_port_t *namep,
+ mach_port_name_t *namep,
ipc_entry_t *entryp)
{
- mach_port_t new_name;
+ mach_port_name_t new_name;
ipc_entry_t free_entry;
assert(space->is_active);
@@ -208,7 +222,7 @@ ipc_entry_get(
* (See comment in ipc/ipc_table.h.)
*/
- assert(MACH_PORT_VALID(new_name));
+ assert(MACH_PORT_NAME_VALID(new_name));
assert(free_entry->ie_object == IO_NULL);
space->is_size += 1;
@@ -229,7 +243,7 @@ ipc_entry_get(
static inline void
ipc_entry_dealloc(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_entry_t entry)
{
assert(space->is_active);
diff --git a/ipc/ipc_target.c b/ipc/ipc_target.c
index aa35a595..94c5d407 100644
--- a/ipc/ipc_target.c
+++ b/ipc/ipc_target.c
@@ -26,7 +26,7 @@
#include "ipc_target.h"
void
-ipc_target_init(struct ipc_target *ipt, mach_port_t name)
+ipc_target_init(struct ipc_target *ipt, mach_port_name_t name)
{
ipt->ipt_name = name;
ipc_mqueue_init(&ipt->ipt_messages);
diff --git a/ipc/ipc_target.h b/ipc/ipc_target.h
index a66e6875..c2cc9241 100644
--- a/ipc/ipc_target.h
+++ b/ipc/ipc_target.h
@@ -27,13 +27,12 @@
#include "ipc_mqueue.h"
#include "ipc_object.h"
-#include <mach/rpc.h>
typedef struct ipc_target {
struct ipc_object ipt_object;
- mach_port_t ipt_name;
+ mach_port_name_t ipt_name;
struct ipc_mqueue ipt_messages;
#ifdef MIGRATING_THREADS
@@ -56,7 +55,7 @@ typedef struct ipc_target {
#define IPT_TYPE_MESSAGE_RPC 1
#define IPT_TYPE_MIGRATE_RPC 2
-void ipc_target_init(struct ipc_target *ipt, mach_port_t name);
+void ipc_target_init(struct ipc_target *ipt, mach_port_name_t name);
void ipc_target_terminate(struct ipc_target *ipt);
#define ipt_lock(ipt) io_lock(&(ipt)->ipt_object)
diff --git a/ipc/mach_debug.c b/ipc/mach_debug.c
index 6ddc89b2..7dca4b6b 100644
--- a/ipc/mach_debug.c
+++ b/ipc/mach_debug.c
@@ -39,9 +39,9 @@
#include <mach/port.h>
#include <mach/machine/vm_types.h>
#include <mach/vm_param.h>
-#include <mach_debug/ipc_info.h>
#include <mach_debug/hash_info.h>
#include <kern/host.h>
+#include <kern/mach_debug.server.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
#include <ipc/ipc_space.h>
@@ -70,7 +70,7 @@
kern_return_t
mach_port_get_srights(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
mach_port_rights_t *srightsp)
{
ipc_port_t port;
@@ -190,7 +190,7 @@ host_ipc_marequest_info(
kern_return_t
mach_port_dnrequest_info(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
unsigned int *totalp,
unsigned int *usedp)
{
@@ -249,7 +249,7 @@ mach_port_dnrequest_info(
kern_return_t
mach_port_kernel_object(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
unsigned int *typep,
vm_offset_t *addrp)
{
diff --git a/ipc/mach_msg.c b/ipc/mach_msg.c
index fe0c43e3..6194ef7b 100644
--- a/ipc/mach_msg.c
+++ b/ipc/mach_msg.c
@@ -39,6 +39,7 @@
#include <mach/kern_return.h>
#include <mach/port.h>
#include <mach/message.h>
+#include <machine/copy_user.h>
#include <kern/assert.h>
#include <kern/counters.h>
#include <kern/debug.h>
@@ -89,11 +90,11 @@
mach_msg_return_t
mach_msg_send(
- mach_msg_header_t *msg,
+ mach_msg_user_header_t *msg,
mach_msg_option_t option,
mach_msg_size_t send_size,
mach_msg_timeout_t time_out,
- mach_port_t notify)
+ mach_port_name_t notify)
{
ipc_space_t space = current_space();
vm_map_t map = current_map();
@@ -171,12 +172,12 @@ mach_msg_send(
mach_msg_return_t
mach_msg_receive(
- mach_msg_header_t *msg,
+ mach_msg_user_header_t *msg,
mach_msg_option_t option,
mach_msg_size_t rcv_size,
- mach_port_t rcv_name,
+ mach_port_name_t rcv_name,
mach_msg_timeout_t time_out,
- mach_port_t notify)
+ mach_port_name_t notify)
{
ipc_thread_t self = current_thread();
ipc_space_t space = current_space();
@@ -241,7 +242,7 @@ mach_msg_receive(
return mr;
kmsg->ikm_header.msgh_seqno = seqno;
- if (kmsg->ikm_header.msgh_size > rcv_size) {
+ if (msg_usize(&kmsg->ikm_header) > rcv_size) {
ipc_kmsg_copyout_dest(kmsg, space);
(void) ipc_kmsg_put(msg, kmsg, sizeof *msg);
return MACH_RCV_TOO_LARGE;
@@ -286,11 +287,11 @@ mach_msg_receive_continue(void)
ipc_thread_t self = current_thread();
ipc_space_t space = current_space();
vm_map_t map = current_map();
- mach_msg_header_t *msg = self->ith_msg;
+ mach_msg_user_header_t *msg = self->ith_msg;
mach_msg_option_t option = self->ith_option;
mach_msg_size_t rcv_size = self->ith_rcv_size;
mach_msg_timeout_t time_out = self->ith_timeout;
- mach_port_t notify = self->ith_notify;
+ mach_port_name_t notify = self->ith_notify;
ipc_object_t object = self->ith_object;
ipc_mqueue_t mqueue = self->ith_mqueue;
ipc_kmsg_t kmsg;
@@ -321,7 +322,7 @@ mach_msg_receive_continue(void)
}
kmsg->ikm_header.msgh_seqno = seqno;
- assert(kmsg->ikm_header.msgh_size <= rcv_size);
+ assert(msg_usize(&kmsg->ikm_header) <= rcv_size);
} else {
mr = ipc_mqueue_receive(mqueue, option & MACH_RCV_TIMEOUT,
MACH_MSG_SIZE_MAX, time_out,
@@ -335,7 +336,7 @@ mach_msg_receive_continue(void)
}
kmsg->ikm_header.msgh_seqno = seqno;
- if (kmsg->ikm_header.msgh_size > rcv_size) {
+ if (msg_usize(&kmsg->ikm_header) > rcv_size) {
ipc_kmsg_copyout_dest(kmsg, space);
(void) ipc_kmsg_put(msg, kmsg, sizeof *msg);
thread_syscall_return(MACH_RCV_TOO_LARGE);
@@ -380,13 +381,13 @@ mach_msg_receive_continue(void)
mach_msg_return_t
mach_msg_trap(
- mach_msg_header_t *msg,
+ mach_msg_user_header_t *msg,
mach_msg_option_t option,
mach_msg_size_t send_size,
mach_msg_size_t rcv_size,
- mach_port_t rcv_name,
+ mach_port_name_t rcv_name,
mach_msg_timeout_t time_out,
- mach_port_t notify)
+ mach_port_name_t notify)
{
mach_msg_return_t mr;
@@ -449,23 +450,21 @@ mach_msg_trap(
* We must clear ikm_cache before copyinmsg.
*/
- if ((send_size > IKM_SAVED_MSG_SIZE) ||
- (send_size < sizeof(mach_msg_header_t)) ||
- (send_size & 3) ||
- ((kmsg = ikm_cache()) == IKM_NULL))
+ if (((send_size * IKM_EXPAND_FACTOR) > IKM_SAVED_MSG_SIZE) ||
+ (send_size < sizeof(mach_msg_user_header_t)) ||
+ (send_size & 3))
goto slow_get;
- ikm_cache() = IKM_NULL;
- ikm_check_initialized(kmsg, IKM_SAVED_KMSG_SIZE);
+ kmsg = ikm_cache_alloc_try();
+ if (kmsg == IKM_NULL)
+ goto slow_get;
if (copyinmsg(msg, &kmsg->ikm_header,
- send_size)) {
+ send_size, kmsg->ikm_size)) {
ikm_free(kmsg);
goto slow_get;
}
- kmsg->ikm_header.msgh_size = send_size;
-
fast_copyin:
/*
* optimized ipc_kmsg_copyin/ipc_mqueue_copyin
@@ -484,7 +483,7 @@ mach_msg_trap(
MACH_MSG_TYPE_MAKE_SEND_ONCE): {
ipc_port_t reply_port;
{
- mach_port_t reply_name =
+ mach_port_name_t reply_name =
kmsg->ikm_header.msgh_local_port;
if (reply_name != rcv_name)
@@ -496,20 +495,26 @@ mach_msg_trap(
ipc_entry_t entry;
entry = ipc_entry_lookup (space, reply_name);
if (entry == IE_NULL)
+ {
+ ipc_entry_lookup_failed (msg, reply_name);
goto abort_request_copyin;
+ }
reply_port = (ipc_port_t) entry->ie_object;
assert(reply_port != IP_NULL);
}
{
- mach_port_t dest_name =
+ mach_port_name_t dest_name =
kmsg->ikm_header.msgh_remote_port;
ipc_entry_t entry;
ipc_entry_bits_t bits;
entry = ipc_entry_lookup (space, dest_name);
if (entry == IE_NULL)
+ {
+ ipc_entry_lookup_failed (msg, dest_name);
goto abort_request_copyin;
+ }
bits = entry->ie_bits;
/* check type bits */
@@ -604,7 +609,7 @@ mach_msg_trap(
/* sending a reply message */
{
- mach_port_t reply_name =
+ mach_port_name_t reply_name =
kmsg->ikm_header.msgh_local_port;
if (reply_name != MACH_PORT_NULL)
@@ -616,12 +621,15 @@ mach_msg_trap(
{
ipc_entry_t entry;
- mach_port_t dest_name =
+ mach_port_name_t dest_name =
kmsg->ikm_header.msgh_remote_port;
entry = ipc_entry_lookup (space, dest_name);
if (entry == IE_NULL)
+ {
+ ipc_entry_lookup_failed (msg, dest_name);
goto abort_reply_dest_copyin;
+ }
/* check type bits */
if (IE_BITS_TYPE (entry->ie_bits) !=
@@ -669,7 +677,10 @@ mach_msg_trap(
ipc_entry_bits_t bits;
entry = ipc_entry_lookup (space, rcv_name);
if (entry == IE_NULL)
+ {
+ ipc_entry_lookup_failed (msg, rcv_name);
goto abort_reply_rcv_copyin;
+ }
bits = entry->ie_bits;
/* check type bits; looking for receive or set */
@@ -791,7 +802,7 @@ mach_msg_trap(
self->ith_object = rcv_object;
self->ith_mqueue = rcv_mqueue;
- if ((receiver->swap_func == (void (*)()) mach_msg_continue) &&
+ if ((receiver->swap_func == mach_msg_continue) &&
thread_handoff(self, mach_msg_continue, receiver)) {
assert(current_thread() == receiver);
@@ -800,7 +811,7 @@ mach_msg_trap(
* because the receiver is using no options.
*/
} else if ((receiver->swap_func ==
- (void (*)()) exception_raise_continue) &&
+ exception_raise_continue) &&
thread_handoff(self, mach_msg_continue, receiver)) {
counter(c_mach_msg_trap_block_exc++);
assert(current_thread() == receiver);
@@ -832,7 +843,7 @@ mach_msg_trap(
assert(current_thread() == receiver);
if ((receiver->swap_func ==
- (void (*)()) mach_msg_receive_continue) &&
+ mach_msg_receive_continue) &&
((receiver->ith_option & MACH_RCV_NOTIFY) == 0)) {
/*
* We can still use the optimized code.
@@ -942,7 +953,7 @@ mach_msg_trap(
== dest_port);
reply_size = kmsg->ikm_header.msgh_size;
- if (rcv_size < reply_size)
+ if (rcv_size < msg_usize(&kmsg->ikm_header))
goto slow_copyout;
/* optimized ipc_kmsg_copyout/ipc_kmsg_copyout_header */
@@ -952,8 +963,8 @@ mach_msg_trap(
MACH_MSG_TYPE_PORT_SEND_ONCE): {
ipc_port_t reply_port =
(ipc_port_t) kmsg->ikm_header.msgh_local_port;
- mach_port_t dest_name, reply_name;
- unsigned long payload;
+ mach_port_name_t dest_name, reply_name;
+ rpc_uintptr_t payload;
/* receiving a request message */
@@ -1003,7 +1014,7 @@ mach_msg_trap(
entry->ie_bits = gen | (MACH_PORT_TYPE_SEND_ONCE | 1);
}
- assert(MACH_PORT_VALID(reply_name));
+ assert(MACH_PORT_NAME_VALID(reply_name));
entry->ie_object = (ipc_object_t) reply_port;
is_write_unlock(space);
}
@@ -1057,8 +1068,8 @@ mach_msg_trap(
}
case MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND_ONCE, 0): {
- mach_port_t dest_name;
- unsigned long payload;
+ mach_port_name_t dest_name;
+ rpc_uintptr_t payload;
/* receiving a reply message */
@@ -1102,8 +1113,8 @@ mach_msg_trap(
case MACH_MSGH_BITS_COMPLEX|
MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND_ONCE, 0): {
- mach_port_t dest_name;
- unsigned long payload;
+ mach_port_name_t dest_name;
+ rpc_uintptr_t payload;
/* receiving a complex reply message */
@@ -1148,9 +1159,7 @@ mach_msg_trap(
kmsg->ikm_header.msgh_remote_port = MACH_PORT_NULL;
mr = ipc_kmsg_copyout_body(
- (vm_offset_t) (&kmsg->ikm_header + 1),
- (vm_offset_t) &kmsg->ikm_header
- + kmsg->ikm_header.msgh_size,
+ kmsg,
space,
current_map());
@@ -1179,11 +1188,12 @@ mach_msg_trap(
if ((kmsg->ikm_size != IKM_SAVED_KMSG_SIZE) ||
copyoutmsg(&kmsg->ikm_header, msg,
- reply_size) ||
- (ikm_cache() != IKM_NULL))
+ reply_size))
+ goto slow_put;
+
+ if (!ikm_cache_free_try(kmsg))
goto slow_put;
- ikm_cache() = kmsg;
thread_syscall_return(MACH_MSG_SUCCESS);
/*NOTREACHED*/
return MACH_MSG_SUCCESS; /* help for the compiler */
@@ -1454,7 +1464,7 @@ mach_msg_trap(
*/
reply_size = kmsg->ikm_header.msgh_size;
- if (rcv_size < reply_size) {
+ if (rcv_size < msg_usize(&kmsg->ikm_header)) {
ipc_kmsg_copyout_dest(kmsg, space);
(void) ipc_kmsg_put(msg, kmsg, sizeof *msg);
thread_syscall_return(MACH_RCV_TOO_LARGE);
@@ -1548,7 +1558,7 @@ mach_msg_trap(
return mr;
kmsg->ikm_header.msgh_seqno = seqno;
- if (rcv_size < kmsg->ikm_header.msgh_size) {
+ if (rcv_size < msg_usize(&kmsg->ikm_header)) {
ipc_kmsg_copyout_dest(kmsg, space);
(void) ipc_kmsg_put(msg, kmsg, sizeof *msg);
return MACH_RCV_TOO_LARGE;
@@ -1613,7 +1623,7 @@ mach_msg_continue(void)
task_t task = thread->task;
ipc_space_t space = task->itk_space;
vm_map_t map = task->map;
- mach_msg_header_t *msg = thread->ith_msg;
+ mach_msg_user_header_t *msg = thread->ith_msg;
mach_msg_size_t rcv_size = thread->ith_rcv_size;
ipc_object_t object = thread->ith_object;
ipc_mqueue_t mqueue = thread->ith_mqueue;
@@ -1632,7 +1642,7 @@ mach_msg_continue(void)
}
kmsg->ikm_header.msgh_seqno = seqno;
- if (kmsg->ikm_header.msgh_size > rcv_size) {
+ if (msg_usize(&kmsg->ikm_header) > rcv_size) {
ipc_kmsg_copyout_dest(kmsg, space);
(void) ipc_kmsg_put(msg, kmsg, sizeof *msg);
thread_syscall_return(MACH_RCV_TOO_LARGE);
@@ -1673,8 +1683,8 @@ mach_msg_interrupt(thread_t thread)
{
ipc_mqueue_t mqueue;
- assert((thread->swap_func == (void (*)()) mach_msg_continue) ||
- (thread->swap_func == (void (*)()) mach_msg_receive_continue));
+ assert((thread->swap_func == mach_msg_continue) ||
+ (thread->swap_func == mach_msg_receive_continue));
mqueue = thread->ith_mqueue;
imq_lock(mqueue);
diff --git a/ipc/mach_msg.h b/ipc/mach_msg.h
index ce0fe4d4..2951bcea 100644
--- a/ipc/mach_msg.h
+++ b/ipc/mach_msg.h
@@ -40,13 +40,13 @@
#include <mach/message.h>
extern mach_msg_return_t
-mach_msg_send(mach_msg_header_t *, mach_msg_option_t,
- mach_msg_size_t, mach_msg_timeout_t, mach_port_t);
+mach_msg_send(mach_msg_user_header_t *, mach_msg_option_t,
+ mach_msg_size_t, mach_msg_timeout_t, mach_port_name_t);
extern mach_msg_return_t
-mach_msg_receive(mach_msg_header_t *, mach_msg_option_t,
- mach_msg_size_t, mach_port_t,
- mach_msg_timeout_t, mach_port_t);
+mach_msg_receive(mach_msg_user_header_t *, mach_msg_option_t,
+ mach_msg_size_t, mach_port_name_t,
+ mach_msg_timeout_t, mach_port_name_t);
extern void
mach_msg_receive_continue(void);
diff --git a/ipc/mach_port.c b/ipc/mach_port.c
index 0757bb84..d8696e23 100644
--- a/ipc/mach_port.c
+++ b/ipc/mach_port.c
@@ -45,7 +45,6 @@
#include <mach/vm_param.h>
#include <mach/vm_prot.h>
#ifdef MIGRATING_THREADS
-#include <mach/rpc.h>
#include <kern/task.h>
#include <kern/act.h>
#endif /* MIGRATING_THREADS */
@@ -60,7 +59,7 @@
#include <ipc/ipc_pset.h>
#include <ipc/ipc_right.h>
#include <ipc/mach_port.h>
-
+#include <ipc/mach_port.server.h>
/*
@@ -69,12 +68,12 @@
* A helper function for mach_port_names.
*/
-void
+static void
mach_port_names_helper(
ipc_port_timestamp_t timestamp,
ipc_entry_t entry,
- mach_port_t name,
- mach_port_t *names,
+ mach_port_name_t name,
+ mach_port_name_t *names,
mach_port_type_t *types,
ipc_entry_num_t *actualp)
{
@@ -145,14 +144,14 @@ mach_port_names_helper(
kern_return_t
mach_port_names(
ipc_space_t space,
- mach_port_t **namesp,
+ mach_port_name_t **namesp,
mach_msg_type_number_t *namesCnt,
mach_port_type_t **typesp,
mach_msg_type_number_t *typesCnt)
{
ipc_entry_num_t actual; /* this many names */
ipc_port_timestamp_t timestamp; /* logical time of this operation */
- mach_port_t *names;
+ mach_port_name_t *names;
mach_port_type_t *types;
kern_return_t kr;
@@ -161,9 +160,10 @@ mach_port_names(
vm_offset_t addr2; /* allocated memory, for types */
vm_map_copy_t memory1; /* copied-in memory, for names */
vm_map_copy_t memory2; /* copied-in memory, for types */
+ ipc_entry_num_t bound;
/* safe simplifying assumption */
- assert_static(sizeof(mach_port_t) == sizeof(mach_port_type_t));
+ assert_static(sizeof(mach_port_name_t) == sizeof(mach_port_type_t));
if (space == IS_NULL)
return KERN_INVALID_TASK;
@@ -171,7 +171,6 @@ mach_port_names(
size = 0;
for (;;) {
- ipc_entry_num_t bound;
vm_size_t size_needed;
is_read_lock(space);
@@ -187,7 +186,7 @@ mach_port_names(
/* upper bound on number of names in the space */
bound = space->is_size;
- size_needed = round_page(bound * sizeof(mach_port_t));
+ size_needed = round_page(bound * sizeof(mach_port_name_t));
if (size_needed <= size)
break;
@@ -225,7 +224,7 @@ mach_port_names(
}
/* space is read-locked and active */
- names = (mach_port_t *) addr1;
+ names = (mach_port_name_t *) addr1;
types = (mach_port_type_t *) addr2;
actual = 0;
@@ -241,6 +240,7 @@ mach_port_names(
names, types, &actual);
}
}
+ assert(actual < bound);
is_read_unlock(space);
if (actual == 0) {
@@ -254,7 +254,7 @@ mach_port_names(
} else {
vm_size_t size_used;
- size_used = round_page(actual * sizeof(mach_port_t));
+ size_used = round_page(actual * sizeof(mach_port_name_t));
/*
* Make used memory pageable and get it into
@@ -287,7 +287,7 @@ mach_port_names(
}
}
- *namesp = (mach_port_t *) memory1;
+ *namesp = (mach_port_name_t *) memory1;
*namesCnt = actual;
*typesp = (mach_port_type_t *) memory2;
*typesCnt = actual;
@@ -327,7 +327,7 @@ mach_port_names(
kern_return_t
mach_port_type(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
mach_port_type_t *typep)
{
mach_port_urefs_t urefs;
@@ -368,14 +368,14 @@ mach_port_type(
kern_return_t
mach_port_rename(
- ipc_space_t space,
- mach_port_t oname,
- mach_port_t nname)
+ ipc_space_t space,
+ mach_port_name_t oname,
+ mach_port_name_t nname)
{
if (space == IS_NULL)
return KERN_INVALID_TASK;
- if (!MACH_PORT_VALID(nname))
+ if (!MACH_PORT_NAME_VALID(nname))
return KERN_INVALID_VALUE;
return ipc_object_rename(space, oname, nname);
@@ -416,14 +416,14 @@ kern_return_t
mach_port_allocate_name(
ipc_space_t space,
mach_port_right_t right,
- mach_port_t name)
+ mach_port_name_t name)
{
kern_return_t kr;
if (space == IS_NULL)
return KERN_INVALID_TASK;
- if (!MACH_PORT_VALID(name))
+ if (!MACH_PORT_NAME_VALID(name))
return KERN_INVALID_VALUE;
switch (right) {
@@ -479,7 +479,7 @@ kern_return_t
mach_port_allocate(
ipc_space_t space,
mach_port_right_t right,
- mach_port_t *namep)
+ mach_port_name_t *namep)
{
kern_return_t kr;
@@ -534,12 +534,12 @@ mach_port_allocate(
* KERN_INVALID_NAME The name doesn't denote a right.
*/
-static volatile boolean_t mach_port_deallocate_debug = FALSE;
+volatile boolean_t mach_port_deallocate_debug = FALSE;
kern_return_t
mach_port_destroy(
- ipc_space_t space,
- mach_port_t name)
+ ipc_space_t space,
+ mach_port_name_t name)
{
ipc_entry_t entry;
kern_return_t kr;
@@ -549,7 +549,7 @@ mach_port_destroy(
kr = ipc_right_lookup_write(space, name, &entry);
if (kr != KERN_SUCCESS) {
- if (MACH_PORT_VALID (name) && space == current_space()) {
+ if (MACH_PORT_NAME_VALID (name) && space == current_space()) {
printf("task %.*s destroying a bogus port %lu, most probably a bug.\n", (int) sizeof current_task()->name, current_task()->name, (unsigned long) name);
if (mach_port_deallocate_debug)
SoftDebugger("mach_port_deallocate");
@@ -582,8 +582,8 @@ mach_port_destroy(
kern_return_t
mach_port_deallocate(
- ipc_space_t space,
- mach_port_t name)
+ ipc_space_t space,
+ mach_port_name_t name)
{
ipc_entry_t entry;
kern_return_t kr;
@@ -593,7 +593,7 @@ mach_port_deallocate(
kr = ipc_right_lookup_write(space, name, &entry);
if (kr != KERN_SUCCESS) {
- if (MACH_PORT_VALID (name) && space == current_space()) {
+ if (MACH_PORT_NAME_VALID (name) && space == current_space()) {
printf("task %.*s deallocating a bogus port %lu, most probably a bug.\n", (int) sizeof current_task()->name, current_task()->name, (unsigned long) name);
if (mach_port_deallocate_debug)
SoftDebugger("mach_port_deallocate");
@@ -626,7 +626,7 @@ mach_port_deallocate(
kern_return_t
mach_port_get_refs(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
mach_port_right_t right,
mach_port_urefs_t *urefsp)
{
@@ -700,7 +700,7 @@ mach_port_get_refs(
kern_return_t
mach_port_mod_refs(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
mach_port_right_t right,
mach_port_delta_t delta)
{
@@ -715,9 +715,9 @@ mach_port_mod_refs(
kr = ipc_right_lookup_write(space, name, &entry);
if (kr != KERN_SUCCESS) {
- if (MACH_PORT_VALID (name) && space == current_space()) {
+ if (MACH_PORT_NAME_VALID (name) && space == current_space()) {
printf("task %.*s %screasing a bogus port "
- "%lu by %d, most probably a bug.\n",
+ "%u by %d, most probably a bug.\n",
(int) (sizeof current_task()->name),
current_task()->name,
delta < 0 ? "de" : "in", name,
@@ -753,7 +753,7 @@ mach_port_mod_refs(
kern_return_t
mach_port_set_qlimit(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
mach_port_msgcount_t qlimit)
{
ipc_port_t port;
@@ -793,7 +793,7 @@ mach_port_set_qlimit(
kern_return_t
mach_port_set_mscount(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
mach_port_mscount_t mscount)
{
ipc_port_t port;
@@ -830,7 +830,7 @@ mach_port_set_mscount(
kern_return_t
mach_port_set_seqno(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
mach_port_seqno_t seqno)
{
ipc_port_t port;
@@ -856,16 +856,16 @@ mach_port_set_seqno(
* A helper function for mach_port_get_set_status.
*/
-void
+static void
mach_port_gst_helper(
ipc_pset_t pset,
ipc_port_t port,
ipc_entry_num_t maxnames,
- mach_port_t *names,
+ mach_port_name_t *names,
ipc_entry_num_t *actualp)
{
ipc_pset_t ip_pset;
- mach_port_t name;
+ mach_port_name_t name;
assert(port != IP_NULL);
@@ -907,8 +907,8 @@ mach_port_gst_helper(
kern_return_t
mach_port_get_set_status(
ipc_space_t space,
- mach_port_t name,
- mach_port_t **members,
+ mach_port_name_t name,
+ mach_port_name_t **members,
mach_msg_type_number_t *membersCnt)
{
ipc_entry_num_t actual; /* this many members */
@@ -926,7 +926,7 @@ mach_port_get_set_status(
for (;;) {
ipc_entry_t entry;
- mach_port_t *names;
+ mach_port_name_t *names;
ipc_pset_t pset;
kr = vm_allocate(ipc_kernel_map, &addr, size, TRUE);
@@ -958,8 +958,8 @@ mach_port_get_set_status(
assert(pset != IPS_NULL);
/* the port set must be active */
- names = (mach_port_t *) addr;
- maxnames = size / sizeof(mach_port_t);
+ names = (mach_port_name_t *) addr;
+ maxnames = size / sizeof(mach_port_name_t);
actual = 0;
ipc_entry_t ientry;
@@ -984,7 +984,7 @@ mach_port_get_set_status(
/* didn't have enough memory; allocate more */
kmem_free(ipc_kernel_map, addr, size);
- size = round_page(actual * sizeof(mach_port_t)) + PAGE_SIZE;
+ size = round_page(actual * sizeof(mach_port_name_t)) + PAGE_SIZE;
}
if (actual == 0) {
@@ -994,7 +994,7 @@ mach_port_get_set_status(
} else {
vm_size_t size_used;
- size_used = round_page(actual * sizeof(mach_port_t));
+ size_used = round_page(actual * sizeof(mach_port_name_t));
/*
* Make used memory pageable and get it into
@@ -1015,7 +1015,7 @@ mach_port_get_set_status(
addr + size_used, size - size_used);
}
- *members = (mach_port_t *) memory;
+ *members = (mach_port_name_t *) memory;
*membersCnt = actual;
return KERN_SUCCESS;
}
@@ -1044,8 +1044,8 @@ mach_port_get_set_status(
kern_return_t
mach_port_move_member(
ipc_space_t space,
- mach_port_t member,
- mach_port_t after)
+ mach_port_name_t member,
+ mach_port_name_t after)
{
ipc_entry_t entry;
ipc_port_t port;
@@ -1138,7 +1138,7 @@ mach_port_move_member(
kern_return_t
mach_port_request_notification(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
mach_msg_id_t id,
mach_port_mscount_t sync,
ipc_port_t notify,
@@ -1222,14 +1222,14 @@ mach_port_request_notification(
kern_return_t
mach_port_insert_right(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
ipc_port_t poly,
mach_msg_type_name_t polyPoly)
{
if (space == IS_NULL)
return KERN_INVALID_TASK;
- if (!MACH_PORT_VALID(name) ||
+ if (!MACH_PORT_NAME_VALID(name) ||
!MACH_MSG_TYPE_PORT_ANY_RIGHT(polyPoly))
return KERN_INVALID_VALUE;
@@ -1259,7 +1259,7 @@ mach_port_insert_right(
kern_return_t
mach_port_extract_right(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
mach_msg_type_name_t msgt_name,
ipc_port_t *poly,
mach_msg_type_name_t *polyPoly)
@@ -1296,7 +1296,7 @@ mach_port_extract_right(
kern_return_t
mach_port_get_receive_status(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
mach_port_status_t *statusp)
{
ipc_port_t port;
@@ -1324,7 +1324,7 @@ mach_port_get_receive_status(
statusp->mps_seqno = port->ip_seqno;
imq_unlock(&pset->ips_messages);
ips_unlock(pset);
- assert(MACH_PORT_VALID(statusp->mps_pset));
+ assert(MACH_PORT_NAME_VALID(statusp->mps_pset));
}
} else {
no_port_set:
@@ -1350,7 +1350,7 @@ mach_port_get_receive_status(
kern_return_t
mach_port_set_rpcinfo(
ipc_space_t space,
- mach_port_t name,
+ mach_port_name_t name,
void *rpc_info,
unsigned int rpc_info_count)
{
@@ -1394,7 +1394,7 @@ void sact_count(void)
kern_return_t
mach_port_create_act(
task_t task,
- mach_port_t name,
+ mach_port_name_t name,
vm_offset_t user_stack,
vm_offset_t user_rbuf,
vm_size_t user_rbuf_size,
@@ -1467,7 +1467,7 @@ mach_port_create_act(
kern_return_t
mach_port_set_syscall_right(
task_t task,
- mach_port_t name)
+ mach_port_name_t name)
{
ipc_entry_t entry;
kern_return_t kr;
@@ -1511,8 +1511,8 @@ mach_port_set_syscall_right(
kern_return_t
mach_port_set_protected_payload(
ipc_space_t space,
- mach_port_t name,
- unsigned long payload)
+ mach_port_name_t name,
+ rpc_uintptr_t payload)
{
ipc_port_t port;
kern_return_t kr;
@@ -1548,7 +1548,7 @@ mach_port_set_protected_payload(
kern_return_t
mach_port_clear_protected_payload(
ipc_space_t space,
- mach_port_t name)
+ mach_port_name_t name)
{
ipc_port_t port;
kern_return_t kr;
diff --git a/ipc/mach_port.h b/ipc/mach_port.h
index 073f7946..e91e4952 100644
--- a/ipc/mach_port.h
+++ b/ipc/mach_port.h
@@ -30,41 +30,6 @@
#include <ipc/ipc_types.h>
#include <ipc/ipc_entry.h>
-extern kern_return_t
-mach_port_allocate_name (
- ipc_space_t space,
- mach_port_right_t right,
- mach_port_t name);
-
-extern kern_return_t
-mach_port_allocate (
- ipc_space_t space,
- mach_port_right_t right,
- mach_port_t *namep);
-
-extern kern_return_t
-mach_port_destroy(
- ipc_space_t space,
- mach_port_t name);
-
-extern kern_return_t
-mach_port_deallocate(
- ipc_space_t space,
- mach_port_t name);
-
-extern kern_return_t
-mach_port_insert_right(
- ipc_space_t space,
- mach_port_t name,
- ipc_port_t poly,
- mach_msg_type_name_t polyPoly);
-
-kern_return_t
-mach_port_get_receive_status(
- ipc_space_t space,
- mach_port_t name,
- mach_port_status_t *statusp);
-
#if MACH_KDB
void db_debug_port_references (boolean_t enable);
#endif /* MACH_KDB */
diff --git a/ipc/mach_rpc.c b/ipc/mach_rpc.c
deleted file mode 100644
index 6ca46cc9..00000000
--- a/ipc/mach_rpc.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 1994 The University of Utah and
- * the Computer Systems Laboratory (CSL). All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software is hereby
- * granted provided that (1) source code retains these copyright, permission,
- * and disclaimer notices, and (2) redistributions including binaries
- * reproduce the notices in supporting documentation, and (3) all advertising
- * materials mentioning features or use of this software display the following
- * acknowledgement: ``This product includes software developed by the
- * Computer Systems Laboratory at the University of Utah.''
- *
- * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
- * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
- * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * CSL requests users of this software to return to csl-dist@cs.utah.edu any
- * improvements that they make and grant CSL redistribution rights.
- *
- */
-
-#ifdef MIGRATING_THREADS
-
-#include <kern/printf.h>
-#include <mach/kern_return.h>
-#include <mach/port.h>
-#include <mach/rpc.h>
-#include <mach/notify.h>
-#include <mach/mach_param.h>
-#include <mach/vm_param.h>
-#include <mach/vm_prot.h>
-#include <kern/task.h>
-#include <kern/act.h>
-#include <vm/vm_map.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_user.h>
-#include <ipc/ipc_entry.h>
-#include <ipc/ipc_space.h>
-#include <ipc/ipc_object.h>
-#include <ipc/ipc_notify.h>
-#include <ipc/ipc_port.h>
-#include <ipc/ipc_pset.h>
-#include <ipc/ipc_right.h>
-
-#undef DEBUG_MPRC
-
-/*
- * XXX need to identify if one endpoint of an RPC is the kernel to
- * ensure proper port name translation (or lack of). This is bogus.
- */
-#define ISKERNELACT(act) ((act)->task == kernel_task)
-
-/*
- * Copy the indicated port from the task associated with the source
- * activation into the task associated with the destination activation.
- *
- * XXX on errors we should probably clear the portp to avoid leaking
- * info to the other side.
- */
-kern_return_t
-mach_port_rpc_copy(
- struct rpc_port_desc *portp,
- struct Act *sact,
- struct Act *dact)
-{
- ipc_space_t sspace, dspace;
- mach_msg_type_name_t tname;
- ipc_object_t iname;
- kern_return_t kr;
-
-#ifdef DEBUG_MPRC
- printf("m_p_rpc_copy(portp=%x/%x, sact=%x, dact=%x): ",
- portp->name, portp->msgt_name, sact, dact);
-#endif
- sspace = sact->task->itk_space;
- dspace = dact->task->itk_space;
- if (sspace == IS_NULL || dspace == IS_NULL) {
-#ifdef DEBUG_MPRC
- printf("bogus src (%x) or dst (%x) space\n", sspace, dspace);
-#endif
- return KERN_INVALID_TASK;
- }
-
- if (!MACH_MSG_TYPE_PORT_ANY(portp->msgt_name)) {
-#ifdef DEBUG_MPRC
- printf("invalid port type\n");
-#endif
- return KERN_INVALID_VALUE;
- }
-
- if (ISKERNELACT(sact)) {
- iname = (ipc_object_t) portp->name;
- ipc_object_copyin_from_kernel(iname, portp->msgt_name);
- kr = KERN_SUCCESS;
- } else {
- kr = ipc_object_copyin(sspace, portp->name, portp->msgt_name,
- &iname);
- }
- if (kr != KERN_SUCCESS) {
-#ifdef DEBUG_MPRC
- printf("copyin returned %x\n", kr);
-#endif
- return kr;
- }
-
- tname = ipc_object_copyin_type(portp->msgt_name);
- if (!IO_VALID(iname)) {
- portp->name = (mach_port_t) iname;
- portp->msgt_name = tname;
-#ifdef DEBUG_MPRC
- printf("iport %x invalid\n", iname);
-#endif
- return KERN_SUCCESS;
- }
-
- if (ISKERNELACT(dact)) {
- portp->name = (mach_port_t) iname;
- kr = KERN_SUCCESS;
- } else {
- kr = ipc_object_copyout(dspace, iname, tname, TRUE,
- &portp->name);
- }
- if (kr != KERN_SUCCESS) {
- ipc_object_destroy(iname, tname);
-
- if (kr == KERN_INVALID_CAPABILITY)
- portp->name = MACH_PORT_DEAD;
- else {
- portp->name = MACH_PORT_NULL;
-#ifdef DEBUG_MPRC
- printf("copyout iport %x returned %x\n", iname);
-#endif
- return kr;
- }
- }
-
- portp->msgt_name = tname;
-#ifdef DEBUG_MPRC
- printf("portp=%x/%x, iname=%x\n", portp->name, portp->msgt_name, iname);
-#endif
- return KERN_SUCCESS;
-}
-
-kern_return_t
-mach_port_rpc_sig(const ipc_space_t space, const char *name, const char *buffer, unsigned int buflen)
-{
- return KERN_FAILURE;
-}
-
-#endif /* MIGRATING_THREADS */
diff --git a/ipc/port.h b/ipc/port.h
index 49af6e2c..c85685d7 100644
--- a/ipc/port.h
+++ b/ipc/port.h
@@ -39,13 +39,14 @@
#ifndef _IPC_PORT_H_
#define _IPC_PORT_H_
+#include <kern/debug.h>
#include <mach/port.h>
/*
- * mach_port_t must be an unsigned type. Port values
+ * mach_port_name_t must be an unsigned type. Port values
* have two parts, a generation number and an index.
* These macros encapsulate all knowledge of how
- * a mach_port_t is laid out.
+ * a mach_port_name_t is laid out.
*
* If the size of generation numbers changes,
* be sure to update IE_BITS_GEN_MASK and friends
@@ -67,11 +68,10 @@
/*
* Typedefs for code cleanliness. These must all have
- * the same (unsigned) type as mach_port_t.
+ * the same (unsigned) type as mach_port_name_t.
*/
-typedef mach_port_t mach_port_index_t; /* index values */
-typedef mach_port_t mach_port_gen_t; /* generation numbers */
+typedef mach_port_name_t mach_port_gen_t; /* generation numbers */
#define MACH_PORT_UREFS_MAX ((mach_port_urefs_t) ((1 << 16) - 1))
@@ -84,4 +84,23 @@ typedef mach_port_t mach_port_gen_t; /* generation numbers */
#define MACH_PORT_UREFS_UNDERFLOW(urefs, delta) \
(((delta) < 0) && (-(delta) > (urefs)))
+
+static inline mach_port_t invalid_name_to_port(mach_port_name_t name)
+{
+ if (name == MACH_PORT_NAME_NULL)
+ return MACH_PORT_NULL;
+ if (name == MACH_PORT_NAME_DEAD)
+ return MACH_PORT_DEAD;
+ panic("invalid_name_to_port() called with a valid port");
+}
+
+static inline mach_port_name_t invalid_port_to_name(mach_port_t port)
+{
+ if (port == MACH_PORT_NULL)
+ return MACH_PORT_NAME_NULL;
+ if (port == MACH_PORT_DEAD)
+ return MACH_PORT_NAME_DEAD;
+ panic("invalid_port_to_name() called with a valid name");
+}
+
#endif /* _IPC_PORT_H_ */
diff --git a/kern/.gitignore b/kern/.gitignore
new file mode 100644
index 00000000..72bccc6b
--- /dev/null
+++ b/kern/.gitignore
@@ -0,0 +1,2 @@
+exc.none.defs.c
+exc.none.msgids
diff --git a/kern/ast.c b/kern/ast.c
index d2289344..8c514b3c 100644
--- a/kern/ast.c
+++ b/kern/ast.c
@@ -203,7 +203,7 @@ ast_check(void)
/*
* Need to recheck and possibly update hint.
*/
- simple_lock(&rq->lock);
+ runq_lock(rq);
q = rq->runq + rq->low;
if (rq->count > 0) {
for (i = rq->low; i < NRQS; i++) {
@@ -213,7 +213,7 @@ ast_check(void)
}
rq->low = i;
}
- simple_unlock(&rq->lock);
+ runq_unlock(rq);
}
if (rq->low <= thread->sched_pri) {
diff --git a/kern/ast.h b/kern/ast.h
index c8cbd04f..aded1677 100644
--- a/kern/ast.h
+++ b/kern/ast.h
@@ -40,7 +40,6 @@
* a set of reasons for an AST, and passing this set to ast_taken.
*/
-#include "cpu_number.h"
#include <kern/kern_types.h>
#include <kern/macros.h>
#include <machine/ast.h>
diff --git a/kern/boot_script.c b/kern/boot_script.c
index 9e8f60a7..07ce4b35 100644
--- a/kern/boot_script.c
+++ b/kern/boot_script.c
@@ -6,6 +6,7 @@
#include <string.h>
#include <kern/printf.h>
#include "boot_script.h"
+#include "bootstrap.h"
/* This structure describes a symbol. */
@@ -547,7 +548,7 @@ boot_script_exec (void)
{
char *p, buf[50];
int len;
- mach_port_t name;
+ mach_port_name_t name;
if (arg->type == VAL_SYM)
{
@@ -749,7 +750,7 @@ main (int argc, char **argv)
char buf[500], *p;
int len;
FILE *fp;
- mach_port_t host_port, device_port;
+ mach_port_name_t host_port, device_port;
if (argc < 2)
{
diff --git a/kern/boot_script.h b/kern/boot_script.h
index c007d777..d1f968d6 100644
--- a/kern/boot_script.h
+++ b/kern/boot_script.h
@@ -74,8 +74,8 @@ int boot_script_exec_cmd (void *hook,
int boot_script_task_create (struct cmd *); /* task_create + task_suspend */
int boot_script_task_resume (struct cmd *);
int boot_script_prompt_task_resume (struct cmd *);
-int boot_script_insert_right (struct cmd *, mach_port_t, mach_port_t *namep);
-int boot_script_insert_task_port (struct cmd *, task_t, mach_port_t *namep);
+int boot_script_insert_right (struct cmd *, mach_port_t, mach_port_name_t *namep);
+int boot_script_insert_task_port (struct cmd *, task_t, mach_port_name_t *namep);
/* The user must define this function to clean up the `task_t'
returned by boot_script_task_create. */
diff --git a/kern/bootstrap.c b/kern/bootstrap.c
index 60e1ad58..49358ac6 100644
--- a/kern/bootstrap.c
+++ b/kern/bootstrap.c
@@ -39,7 +39,8 @@
#include <machine/vm_param.h>
#include <machine/pcb.h>
#include <ipc/ipc_port.h>
-#include <ipc/mach_port.h>
+#include <ipc/mach_port.server.h>
+#include <kern/bootstrap.h>
#include <kern/debug.h>
#include <kern/host.h>
#include <kern/printf.h>
@@ -70,15 +71,15 @@
#include <mach/xen.h>
extern struct start_info boot_info; /* XXX put this in a header! */
#else /* MACH_XEN */
-extern struct multiboot_info boot_info; /* XXX put this in a header! */
+extern struct multiboot_raw_info boot_info; /* XXX put this in a header! */
#endif /* MACH_XEN */
#endif
#include "boot_script.h"
-static mach_port_t boot_device_port; /* local name */
-static mach_port_t boot_host_port; /* local name */
+static mach_port_name_t boot_device_port; /* local name */
+static mach_port_name_t boot_host_port; /* local name */
extern char *kernel_cmdline;
@@ -87,12 +88,12 @@ static void user_bootstrap_compat(void); /* forward */
static void bootstrap_exec_compat(void *exec_data); /* forward */
static void get_compat_strings(char *flags_str, char *root_str); /* forward */
-static mach_port_t
+static mach_port_name_t
task_insert_send_right(
task_t task,
ipc_port_t port)
{
- mach_port_t name;
+ mach_port_name_t name;
for (name = 1;; name++) {
kern_return_t kr;
@@ -155,9 +156,25 @@ void bootstrap_create(void)
boot_info.mods_count = n;
boot_info.flags |= MULTIBOOT_MODS;
#else /* MACH_XEN */
+#ifdef __x86_64__
+ struct multiboot_raw_module *bmods32 = ((struct multiboot_raw_module *)
+ phystokv(boot_info.mods_addr));
+ struct multiboot_module *bmods=NULL;
+ if (bmods32)
+ {
+ int i;
+ bmods = alloca(boot_info.mods_count * sizeof(*bmods));
+ for (i=0; i<boot_info.mods_count; i++)
+ {
+ bmods[i].mod_start = bmods32[i].mod_start;
+ bmods[i].mod_end = bmods32[i].mod_end;
+ bmods[i].string = bmods32[i].string;
+ }
+ }
+#else
struct multiboot_module *bmods = ((struct multiboot_module *)
phystokv(boot_info.mods_addr));
-
+#endif
#endif /* MACH_XEN */
if (!(boot_info.flags & MULTIBOOT_MODS)
|| (boot_info.mods_count == 0))
@@ -562,12 +579,12 @@ build_args_and_stack(struct exec_info *boot_exec_info,
vm_offset_t stack_base;
vm_size_t stack_size;
char * arg_ptr;
- int arg_count, envc;
+ long arg_count, envc;
int arg_len;
char * arg_pos;
int arg_item_len;
char * string_pos;
- char * zero = (char *)0;
+ rpc_vm_offset_t zero = 0;
int i;
#define STACK_SIZE (2*64*1024)
@@ -593,17 +610,16 @@ build_args_and_stack(struct exec_info *boot_exec_info,
* trailing 0 pointer
* pointers to environment variables
* trailing 0 pointer
- * and align to integer boundary
*/
- arg_len += (sizeof(integer_t)
- + (arg_count + 1 + envc + 1) * sizeof(char *));
- arg_len = (arg_len + sizeof(integer_t) - 1) & ~(sizeof(integer_t)-1);
+ arg_len += (sizeof(rpc_vm_offset_t)
+ + (arg_count + 1 + envc + 1) * sizeof(rpc_vm_offset_t));
/*
* Allocate the stack.
*/
stack_size = round_page(STACK_SIZE);
stack_base = user_stack_low(stack_size);
+
(void) vm_allocate(current_task()->map,
&stack_base,
stack_size,
@@ -616,29 +632,28 @@ build_args_and_stack(struct exec_info *boot_exec_info,
* Start the strings after the arg-count and pointers
*/
string_pos = (arg_pos
- + sizeof(integer_t)
- + (arg_count + 1 + envc + 1) * sizeof(char *));
+ + sizeof(rpc_vm_offset_t)
+ + (arg_count + 1 + envc + 1) * sizeof(rpc_vm_offset_t));
/*
* first the argument count
*/
(void) copyout(&arg_count,
arg_pos,
- sizeof(integer_t));
- arg_pos += sizeof(integer_t);
+ sizeof(rpc_vm_offset_t));
+ arg_pos += sizeof(rpc_vm_offset_t);
/*
* Then the strings and string pointers for each argument
*/
for (i = 0; i < arg_count; ++i) {
+ rpc_vm_offset_t pos = convert_vm_to_user((vm_offset_t) string_pos);
arg_ptr = argv[i];
arg_item_len = strlen(arg_ptr) + 1; /* include trailing 0 */
/* set string pointer */
- (void) copyout(&string_pos,
- arg_pos,
- sizeof (char *));
- arg_pos += sizeof(char *);
+ (void) copyout(&pos, arg_pos, sizeof (rpc_vm_offset_t));
+ arg_pos += sizeof(rpc_vm_offset_t);
/* copy string */
(void) copyout(arg_ptr, string_pos, arg_item_len);
@@ -648,21 +663,20 @@ build_args_and_stack(struct exec_info *boot_exec_info,
/*
* Null terminator for argv.
*/
- (void) copyout(&zero, arg_pos, sizeof(char *));
- arg_pos += sizeof(char *);
+ (void) copyout(&zero, arg_pos, sizeof(rpc_vm_offset_t));
+ arg_pos += sizeof(rpc_vm_offset_t);
/*
* Then the strings and string pointers for each environment variable
*/
for (i = 0; i < envc; ++i) {
+ rpc_vm_offset_t pos = convert_vm_to_user((vm_offset_t) string_pos);
arg_ptr = envp[i];
arg_item_len = strlen(arg_ptr) + 1; /* include trailing 0 */
/* set string pointer */
- (void) copyout(&string_pos,
- arg_pos,
- sizeof (char *));
- arg_pos += sizeof(char *);
+ (void) copyout(&pos, arg_pos, sizeof (rpc_vm_offset_t));
+ arg_pos += sizeof(rpc_vm_offset_t);
/* copy string */
(void) copyout(arg_ptr, string_pos, arg_item_len);
@@ -672,7 +686,7 @@ build_args_and_stack(struct exec_info *boot_exec_info,
/*
* Null terminator for envp.
*/
- (void) copyout(&zero, arg_pos, sizeof(char *));
+ (void) copyout(&zero, arg_pos, sizeof(rpc_vm_offset_t));
}
@@ -888,7 +902,7 @@ boot_script_free_task (task_t task, int aborting)
}
int
-boot_script_insert_right (struct cmd *cmd, mach_port_t port, mach_port_t *name)
+boot_script_insert_right (struct cmd *cmd, mach_port_t port, mach_port_name_t *name)
{
*name = task_insert_send_right (cmd->task,
ipc_port_make_send((ipc_port_t) port));
@@ -896,7 +910,7 @@ boot_script_insert_right (struct cmd *cmd, mach_port_t port, mach_port_t *name)
}
int
-boot_script_insert_task_port (struct cmd *cmd, task_t task, mach_port_t *name)
+boot_script_insert_task_port (struct cmd *cmd, task_t task, mach_port_name_t *name)
{
*name = task_insert_send_right (cmd->task,
ipc_port_make_send(task->itk_sself));
diff --git a/kern/bootstrap.h b/kern/bootstrap.h
index b8ed8d9f..309a63f6 100644
--- a/kern/bootstrap.h
+++ b/kern/bootstrap.h
@@ -19,6 +19,8 @@
#ifndef _KERN_BOOTSTRAP_H_
#define _KERN_BOOTSTRAP_H_
-extern void bootstrap_create(void);
+#include <kern/boot_script.h>
+
+void bootstrap_create(void);
#endif /* _KERN_BOOTSTRAP_H_ */
diff --git a/kern/cpu_number.h b/kern/cpu_number.h
index 0be2d338..1abe3dbb 100644
--- a/kern/cpu_number.h
+++ b/kern/cpu_number.h
@@ -37,7 +37,8 @@ extern int master_cpu; /* 'master' processor - keeps time */
#if (NCPUS == 1)
/* cpu number is always 0 on a single processor system */
-#define cpu_number() (0)
+#define cpu_number() (0)
+#define cpu_number_slow() (0)
#endif /* NCPUS == 1 */
diff --git a/kern/debug.c b/kern/debug.c
index 78c55f81..eec2f148 100644
--- a/kern/debug.c
+++ b/kern/debug.c
@@ -41,7 +41,7 @@
#include <device/cons.h>
#if NCPUS>1
-simple_lock_data_t Assert_print_lock; /* uninited, we take our chances */
+simple_lock_irq_data_t Assert_print_lock; /* uninited, we take our chances */
#endif
static void
@@ -54,10 +54,10 @@ void
Assert(const char *exp, const char *file, int line, const char *fun)
{
#if NCPUS > 1
- simple_lock(&Assert_print_lock);
+ spl_t s = simple_lock_irq(&Assert_print_lock);
printf("{cpu%d} %s:%d: %s: Assertion `%s' failed.",
cpu_number(), file, line, fun, exp);
- simple_unlock(&Assert_print_lock);
+ simple_unlock_irq(s, &Assert_print_lock);
#else
printf("%s:%d: %s: Assertion `%s' failed.",
file, line, fun, exp);
@@ -66,8 +66,7 @@ Assert(const char *exp, const char *file, int line, const char *fun)
Debugger("assertion failure");
}
-void SoftDebugger(message)
- const char *message;
+void SoftDebugger(const char *message)
{
printf("Debugger invoked: %s\n", message);
@@ -98,8 +97,7 @@ void SoftDebugger(message)
#endif
}
-void Debugger(message)
- const char *message;
+void Debugger(const char *message)
{
#if !MACH_KDB
panic("Debugger invoked, but there isn't one!");
@@ -114,8 +112,7 @@ void Debugger(message)
even before panic_init() gets called from the "normal" place in kern/startup.c.
(panic_init() still needs to be called from there
to make sure we get initialized before starting multiple processors.) */
-boolean_t panic_lock_initialized = FALSE;
-decl_simple_lock_data(, panic_lock)
+def_simple_lock_irq_data(static, panic_lock)
const char *panicstr;
int paniccpu;
@@ -123,11 +120,6 @@ int paniccpu;
void
panic_init(void)
{
- if (!panic_lock_initialized)
- {
- panic_lock_initialized = TRUE;
- simple_lock_init(&panic_lock);
- }
}
#if ! MACH_KBD
@@ -139,13 +131,14 @@ void
Panic(const char *file, int line, const char *fun, const char *s, ...)
{
va_list listp;
+ spl_t spl;
panic_init();
- simple_lock(&panic_lock);
+ spl = simple_lock_irq(&panic_lock);
if (panicstr) {
if (cpu_number() != paniccpu) {
- simple_unlock(&panic_lock);
+ simple_unlock_irq(spl, &panic_lock);
halt_cpu();
/* NOTREACHED */
}
@@ -154,7 +147,7 @@ Panic(const char *file, int line, const char *fun, const char *s, ...)
panicstr = s;
paniccpu = cpu_number();
}
- simple_unlock(&panic_lock);
+ simple_unlock_irq(spl, &panic_lock);
printf("panic ");
#if NCPUS > 1
printf("{cpu%d} ", paniccpu);
@@ -205,6 +198,8 @@ unsigned char __stack_chk_guard [ sizeof (vm_offset_t) ] =
[ sizeof (vm_offset_t) - 1 ] = 0xff,
};
+void __stack_chk_fail (void);
+
void
__stack_chk_fail (void)
{
diff --git a/kern/elf-load.c b/kern/elf-load.c
index 3e80edfe..ce86327c 100644
--- a/kern/elf-load.c
+++ b/kern/elf-load.c
@@ -31,8 +31,8 @@ int exec_load(exec_read_func_t *read, exec_read_exec_func_t *read_exec,
void *handle, exec_info_t *out_info)
{
vm_size_t actual;
- Elf32_Ehdr x;
- Elf32_Phdr *phdr, *ph;
+ Elf_Ehdr x;
+ Elf_Phdr *phdr, *ph;
vm_size_t phsize;
int i;
int result;
@@ -51,7 +51,7 @@ int exec_load(exec_read_func_t *read, exec_read_exec_func_t *read_exec,
return EX_NOT_EXECUTABLE;
/* Make sure the file is of the right architecture. */
- if ((x.e_ident[EI_CLASS] != ELFCLASS32) ||
+ if ((x.e_ident[EI_CLASS] != MY_ELF_CLASS) ||
(x.e_ident[EI_DATA] != MY_EI_DATA) ||
(x.e_machine != MY_E_MACHINE))
return EX_WRONG_ARCH;
@@ -65,7 +65,7 @@ int exec_load(exec_read_func_t *read, exec_read_exec_func_t *read_exec,
out_info->entry = (vm_offset_t) x.e_entry + loadbase;
phsize = x.e_phnum * x.e_phentsize;
- phdr = (Elf32_Phdr *)alloca(phsize);
+ phdr = (Elf_Phdr *)alloca(phsize);
result = (*read)(handle, x.e_phoff, phdr, phsize, &actual);
if (result)
@@ -75,7 +75,7 @@ int exec_load(exec_read_func_t *read, exec_read_exec_func_t *read_exec,
for (i = 0; i < x.e_phnum; i++)
{
- ph = (Elf32_Phdr *)((vm_offset_t)phdr + i * x.e_phentsize);
+ ph = (Elf_Phdr *)((vm_offset_t)phdr + i * x.e_phentsize);
if (ph->p_type == PT_LOAD)
{
exec_sectype_t type = EXEC_SECTYPE_ALLOC |
diff --git a/kern/eventcount.c b/kern/eventcount.c
index a9d7bd41..1cbc15a2 100644
--- a/kern/eventcount.c
+++ b/kern/eventcount.c
@@ -244,7 +244,7 @@ evc_signal(evc_t ev)
#if (NCPUS > 1)
retry:
while((thread->state & TH_RUN) || thread->lock.lock_data)
- ;
+ cpu_pause();
#endif
thread_lock(thread);
@@ -260,12 +260,12 @@ evc_signal(evc_t ev)
* on run queue.
*/
thread->state = (state &~ TH_WAIT) | TH_RUN;
- thread_unlock(thread);
#if NCPUS > 1
thread_setrun(thread, TRUE);
#else
simpler_thread_setrun(thread, TRUE);
#endif
+ thread_unlock(thread);
break;
case TH_RUN | TH_WAIT:
@@ -339,7 +339,7 @@ simpler_thread_setrun(
ast_on(cpu_number(), AST_BLOCK);
whichq = (th)->sched_pri;
- simple_lock(&(rq)->lock); /* lock the run queue */
+ runq_lock(rq); /* lock the run queue */
enqueue_head(&(rq)->runq[whichq], &((th)->links));
if (whichq < (rq)->low || (rq)->count == 0)
@@ -350,7 +350,7 @@ simpler_thread_setrun(
#else
(th)->runq = (rq);
#endif
- simple_unlock(&(rq)->lock);
+ runq_unlock(rq);
/*
* Turn off first_quantum to allow context switch.
diff --git a/kern/eventcount.h b/kern/eventcount.h
index 7cc82207..598d7e02 100644
--- a/kern/eventcount.h
+++ b/kern/eventcount.h
@@ -35,6 +35,8 @@
#ifndef _KERN_EVENTCOUNT_H_
#define _KERN_EVENTCOUNT_H_ 1
+#include <kern/lock.h>
+
/* kernel visible only */
typedef struct evc {
diff --git a/kern/exception.c b/kern/exception.c
index 246c1419..15f29705 100644
--- a/kern/exception.c
+++ b/kern/exception.c
@@ -45,6 +45,7 @@
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/processor.h>
+#include <kern/printf.h>
#include <kern/sched.h>
#include <kern/sched_prim.h>
#include <kern/exception.h>
@@ -84,9 +85,9 @@ boolean_t debug_user_with_kdb = FALSE;
void
exception(
- integer_t _exception,
- integer_t code,
- integer_t subcode)
+ integer_t _exception,
+ integer_t code,
+ long_integer_t subcode)
{
ipc_thread_t self = current_thread();
ipc_port_t exc_port;
@@ -156,9 +157,9 @@ exception(
void
exception_try_task(
- integer_t _exception,
- integer_t code,
- integer_t subcode)
+ integer_t _exception,
+ integer_t code,
+ long_integer_t subcode)
{
ipc_thread_t self = current_thread();
task_t task = self->task;
@@ -276,31 +277,47 @@ struct mach_exception {
mach_msg_type_t codeType;
integer_t code;
mach_msg_type_t subcodeType;
- integer_t subcode;
+ rpc_long_integer_t subcode;
};
#define INTEGER_T_SIZE_IN_BITS (8 * sizeof(integer_t))
#define INTEGER_T_TYPE MACH_MSG_TYPE_INTEGER_T
+#define RPC_LONG_INTEGER_T_SIZE_IN_BITS (8 * sizeof(rpc_long_integer_t))
+#if defined(__x86_64__) && !defined(USER32)
+#define RPC_LONG_INTEGER_T_TYPE MACH_MSG_TYPE_INTEGER_64
+#else
+#define RPC_LONG_INTEGER_T_TYPE MACH_MSG_TYPE_INTEGER_32
+#endif
/* in mach/machine/vm_types.h */
mach_msg_type_t exc_port_proto = {
- /* msgt_name = */ MACH_MSG_TYPE_PORT_SEND,
- /* msgt_size = */ PORT_T_SIZE_IN_BITS,
- /* msgt_number = */ 1,
- /* msgt_inline = */ TRUE,
- /* msgt_longform = */ FALSE,
- /* msgt_deallocate = */ FALSE,
- /* msgt_unused = */ 0
+ .msgt_name = MACH_MSG_TYPE_PORT_SEND,
+ .msgt_size = PORT_T_SIZE_IN_BITS,
+ .msgt_number = 1,
+ .msgt_inline = TRUE,
+ .msgt_longform = FALSE,
+ .msgt_deallocate = FALSE,
+ .msgt_unused = 0
};
mach_msg_type_t exc_code_proto = {
- /* msgt_name = */ INTEGER_T_TYPE,
- /* msgt_size = */ INTEGER_T_SIZE_IN_BITS,
- /* msgt_number = */ 1,
- /* msgt_inline = */ TRUE,
- /* msgt_longform = */ FALSE,
- /* msgt_deallocate = */ FALSE,
- /* msgt_unused = */ 0
+ .msgt_name = INTEGER_T_TYPE,
+ .msgt_size = INTEGER_T_SIZE_IN_BITS,
+ .msgt_number = 1,
+ .msgt_inline = TRUE,
+ .msgt_longform = FALSE,
+ .msgt_deallocate = FALSE,
+ .msgt_unused = 0
+};
+
+mach_msg_type_t exc_subcode_proto = {
+ .msgt_name = RPC_LONG_INTEGER_T_TYPE,
+ .msgt_size = RPC_LONG_INTEGER_T_SIZE_IN_BITS,
+ .msgt_number = 1,
+ .msgt_inline = TRUE,
+ .msgt_longform = FALSE,
+ .msgt_deallocate = FALSE,
+ .msgt_unused = 0
};
/*
@@ -328,9 +345,9 @@ exception_raise(
ipc_port_t dest_port,
ipc_port_t thread_port,
ipc_port_t task_port,
- integer_t _exception,
- integer_t code,
- integer_t subcode)
+ integer_t _exception,
+ integer_t code,
+ long_integer_t subcode)
{
ipc_thread_t self = current_thread();
ipc_thread_t receiver;
@@ -349,16 +366,9 @@ exception_raise(
* and it will give the buffer back with its reply.
*/
- kmsg = ikm_cache();
- if (kmsg != IKM_NULL) {
- ikm_cache() = IKM_NULL;
- ikm_check_initialized(kmsg, IKM_SAVED_KMSG_SIZE);
- } else {
- kmsg = ikm_alloc(IKM_SAVED_MSG_SIZE);
- if (kmsg == IKM_NULL)
- panic("exception_raise");
- ikm_init(kmsg, IKM_SAVED_MSG_SIZE);
- }
+ kmsg = ikm_cache_alloc();
+ if (kmsg == IKM_NULL)
+ panic("exception_raise");
/*
* We need a reply port for the RPC.
@@ -448,9 +458,8 @@ exception_raise(
receiver = ipc_thread_queue_first(&dest_mqueue->imq_threads);
if ((receiver == ITH_NULL) ||
- !((receiver->swap_func == (void (*)()) mach_msg_continue) ||
- ((receiver->swap_func ==
- (void (*)()) mach_msg_receive_continue) &&
+ !((receiver->swap_func == mach_msg_continue) ||
+ ((receiver->swap_func == mach_msg_receive_continue) &&
(sizeof(struct mach_exception) <= receiver->ith_msize) &&
((receiver->ith_option & MACH_RCV_NOTIFY) == 0))) ||
!thread_handoff(self, exception_raise_continue, receiver)) {
@@ -521,7 +530,7 @@ exception_raise(
exc->exception = _exception;
exc->codeType = exc_code_proto;
exc->code = code;
- exc->subcodeType = exc_code_proto;
+ exc->subcodeType = exc_subcode_proto;
exc->subcode = subcode;
/*
@@ -606,10 +615,12 @@ exception_raise(
{
kern_return_t kr;
ipc_entry_t entry;
+ mach_port_name_t port_name;
- kr = ipc_entry_get (space, &exc->Head.msgh_remote_port, &entry);
+ kr = ipc_entry_get (space, &port_name, &entry);
if (kr)
goto abort_copyout;
+ exc->Head.msgh_remote_port = (mach_port_t) port_name;
{
mach_port_gen_t gen;
@@ -656,10 +667,10 @@ exception_raise(
* to handle the two ports in the body.
*/
- mr = (ipc_kmsg_copyout_object(space, (ipc_object_t) thread_port,
- MACH_MSG_TYPE_PORT_SEND, &exc->thread) |
- ipc_kmsg_copyout_object(space, (ipc_object_t) task_port,
- MACH_MSG_TYPE_PORT_SEND, &exc->task));
+ mr = (ipc_kmsg_copyout_object_to_port(space, (ipc_object_t) thread_port,
+ MACH_MSG_TYPE_PORT_SEND, &exc->thread) |
+ ipc_kmsg_copyout_object_to_port(space, (ipc_object_t) task_port,
+ MACH_MSG_TYPE_PORT_SEND, &exc->task));
if (mr != MACH_MSG_SUCCESS) {
(void) ipc_kmsg_put(receiver->ith_msg, kmsg,
kmsg->ikm_header.msgh_size);
@@ -677,15 +688,20 @@ exception_raise(
assert(kmsg->ikm_size == IKM_SAVED_KMSG_SIZE);
if (copyoutmsg(&kmsg->ikm_header, receiver->ith_msg,
- sizeof(struct mach_exception)) ||
- (ikm_cache() != IKM_NULL)) {
+ sizeof(struct mach_exception))) {
+ mr = ipc_kmsg_put(receiver->ith_msg, kmsg,
+ kmsg->ikm_header.msgh_size);
+ thread_syscall_return(mr);
+ /*NOTREACHED*/
+ }
+
+ if (!ikm_cache_free_try(kmsg)) {
mr = ipc_kmsg_put(receiver->ith_msg, kmsg,
kmsg->ikm_header.msgh_size);
thread_syscall_return(mr);
/*NOTREACHED*/
}
- ikm_cache() = kmsg;
thread_syscall_return(MACH_MSG_SUCCESS);
/*NOTREACHED*/
#ifndef __GNUC__
@@ -723,7 +739,7 @@ exception_raise(
exc->exception = _exception;
exc->codeType = exc_code_proto;
exc->code = code;
- exc->subcodeType = exc_code_proto;
+ exc->subcodeType = exc_subcode_proto;
exc->subcode = subcode;
ipc_mqueue_send_always(kmsg);
@@ -762,13 +778,13 @@ exception_raise(
/* Type descriptor for the return code. */
mach_msg_type_t exc_RetCode_proto = {
- /* msgt_name = */ MACH_MSG_TYPE_INTEGER_32,
- /* msgt_size = */ 32,
- /* msgt_number = */ 1,
- /* msgt_inline = */ TRUE,
- /* msgt_longform = */ FALSE,
- /* msgt_deallocate = */ FALSE,
- /* msgt_unused = */ 0
+ .msgt_name = MACH_MSG_TYPE_INTEGER_32,
+ .msgt_size = 32,
+ .msgt_number = 1,
+ .msgt_inline = TRUE,
+ .msgt_longform = FALSE,
+ .msgt_deallocate = FALSE,
+ .msgt_unused = 0
};
/*
@@ -805,11 +821,7 @@ exception_parse_reply(ipc_kmsg_t kmsg)
kr = msg->RetCode;
- if ((kmsg->ikm_size == IKM_SAVED_KMSG_SIZE) &&
- (ikm_cache() == IKM_NULL))
- ikm_cache() = kmsg;
- else
- ikm_free(kmsg);
+ ikm_cache_free(kmsg);
return kr;
}
diff --git a/kern/exception.h b/kern/exception.h
index 55902dd1..36138da8 100644
--- a/kern/exception.h
+++ b/kern/exception.h
@@ -26,13 +26,13 @@ extern void
exception(
integer_t _exception,
integer_t code,
- integer_t subcode) __attribute__ ((noreturn));
+ long_integer_t subcode) __attribute__ ((noreturn));
extern void
exception_try_task(
integer_t _exception,
integer_t code,
- integer_t subcode) __attribute__ ((noreturn));
+ long_integer_t subcode) __attribute__ ((noreturn));
extern void
exception_no_server(void) __attribute__ ((noreturn));
@@ -44,7 +44,7 @@ exception_raise(
ipc_port_t task_port,
integer_t _exception,
integer_t code,
- integer_t subcode) __attribute__ ((noreturn));
+ long_integer_t subcode) __attribute__ ((noreturn));
extern kern_return_t
exception_parse_reply(ipc_kmsg_t kmsg);
diff --git a/kern/gsync.c b/kern/gsync.c
index e73a6cf0..31b564ca 100644
--- a/kern/gsync.c
+++ b/kern/gsync.c
@@ -134,11 +134,12 @@ probe_address (vm_map_t map, vm_offset_t addr,
vm_prot_t rprot;
boolean_t wired_p;
- if (vm_map_lookup (&map, addr, prot, &ver,
+ if (vm_map_lookup (&map, addr, prot, TRUE, &ver,
&vap->obj, &vap->off, &rprot, &wired_p) != KERN_SUCCESS)
return (-1);
else if ((rprot & prot) != prot)
{
+ vm_map_unlock_read (map);
vm_object_unlock (vap->obj);
return (-1);
}
@@ -227,18 +228,13 @@ kern_return_t gsync_wait (task_t task, vm_offset_t addr,
else if (addr % sizeof (int) != 0)
return (KERN_INVALID_ADDRESS);
- vm_map_lock_read (task->map);
-
struct gsync_waiter w;
struct vm_args va;
boolean_t remote = task != current_task ();
int bucket = gsync_prepare_key (task, addr, flags, &w.key, &va);
if (bucket < 0)
- {
- vm_map_unlock_read (task->map);
- return (KERN_INVALID_ADDRESS);
- }
+ return (KERN_INVALID_ADDRESS);
else if (remote)
/* The VM object is returned locked. However, we are about to acquire
* a sleeping lock for a bucket, so we must not hold any simple
@@ -354,17 +350,12 @@ kern_return_t gsync_wake (task_t task,
else if (addr % sizeof (int) != 0)
return (KERN_INVALID_ADDRESS);
- vm_map_lock_read (task->map);
-
union gsync_key key;
struct vm_args va;
int bucket = gsync_prepare_key (task, addr, flags, &key, &va);
if (bucket < 0)
- {
- vm_map_unlock_read (task->map);
- return (KERN_INVALID_ADDRESS);
- }
+ return (KERN_INVALID_ADDRESS);
else if (current_task () != task && (flags & GSYNC_MUTATE) != 0)
/* See above on why we do this. */
vm_object_reference_locked (va.obj);
@@ -437,6 +428,7 @@ kern_return_t gsync_requeue (task_t task, vm_offset_t src,
int src_bkt = gsync_prepare_key (task, src, flags, &src_k, &va);
if (src_bkt < 0)
return (KERN_INVALID_ADDRESS);
+ vm_map_unlock_read (task->map);
/* Unlock the VM object before the second lookup. */
vm_object_unlock (va.obj);
@@ -444,6 +436,7 @@ kern_return_t gsync_requeue (task_t task, vm_offset_t src,
int dst_bkt = gsync_prepare_key (task, dst, flags, &dst_k, &va);
if (dst_bkt < 0)
return (KERN_INVALID_ADDRESS);
+ vm_map_unlock_read (task->map);
/* We never create any temporary mappings in 'requeue', so we
* can unlock the VM object right now. */
diff --git a/kern/host.c b/kern/host.c
index 3271b0cd..69394374 100644
--- a/kern/host.c
+++ b/kern/host.c
@@ -42,6 +42,7 @@
#include <kern/processor.h>
#include <kern/ipc_host.h>
#include <kern/mach_clock.h>
+#include <kern/mach_host.server.h>
#include <mach/vm_param.h>
host_data_t realhost;
@@ -204,7 +205,7 @@ kern_return_t host_info(
* wanted to know about what version of the kernel this is).
*/
-kern_return_t host_kernel_version(
+kern_return_t host_get_kernel_version(
const host_t host,
kernel_version_t out_version)
{
@@ -218,6 +219,16 @@ kern_return_t host_kernel_version(
return KERN_SUCCESS;
}
+#if !defined(__x86_64__) || defined(USER32)
+/* Same as above, but does not exist for x86_64. */
+kern_return_t host_kernel_version(
+ const host_t host,
+ kernel_version_t out_version)
+{
+ return host_get_kernel_version(host, out_version);
+}
+#endif
+
/*
* host_processor_sets:
*
diff --git a/kern/ipc_host.c b/kern/ipc_host.c
index a02eb6f6..6b818623 100644
--- a/kern/ipc_host.c
+++ b/kern/ipc_host.c
@@ -35,6 +35,7 @@
#include <mach/message.h>
#include <kern/debug.h>
#include <kern/host.h>
+#include <kern/mach_host.server.h>
#include <kern/processor.h>
#include <kern/task.h>
#include <kern/thread.h>
@@ -42,6 +43,7 @@
#include <kern/ipc_kobject.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_space.h>
+#include <mach/mach_traps.h>
#include <machine/machspl.h> /* for spl */
@@ -94,7 +96,7 @@ void ipc_host_init(void)
* or other errors.
*/
-mach_port_t
+mach_port_name_t
mach_host_self(void)
{
ipc_port_t sright;
diff --git a/kern/ipc_kobject.c b/kern/ipc_kobject.c
index 5ced4037..0a815953 100644
--- a/kern/ipc_kobject.c
+++ b/kern/ipc_kobject.c
@@ -78,8 +78,7 @@
*/
ipc_kmsg_t
-ipc_kobject_server(request)
- ipc_kmsg_t request;
+ipc_kobject_server(ipc_kmsg_t request)
{
mach_msg_size_t reply_size = ikm_less_overhead(8192);
ipc_kmsg_t reply;
@@ -102,13 +101,14 @@ ipc_kobject_server(request)
#define InP ((mach_msg_header_t *) &request->ikm_header)
#define OutP ((mig_reply_header_t *) &reply->ikm_header)
- static mach_msg_type_t RetCodeType = {
- /* msgt_name = */ MACH_MSG_TYPE_INTEGER_32,
- /* msgt_size = */ 32,
- /* msgt_number = */ 1,
- /* msgt_inline = */ TRUE,
- /* msgt_longform = */ FALSE,
- /* msgt_unused = */ 0
+ static const mach_msg_type_t RetCodeType = {
+ .msgt_name = MACH_MSG_TYPE_INTEGER_32,
+ .msgt_size = 32,
+ .msgt_number = 1,
+ .msgt_inline = TRUE,
+ .msgt_longform = FALSE,
+ .msgt_deallocate = FALSE,
+ .msgt_unused = 0
};
OutP->Head.msgh_bits =
MACH_MSGH_BITS(MACH_MSGH_BITS_LOCAL(InP->msgh_bits), 0);
@@ -238,11 +238,7 @@ ipc_kobject_server(request)
/* like ipc_kmsg_put, but without the copyout */
ikm_check_initialized(request, request->ikm_size);
- if ((request->ikm_size == IKM_SAVED_KMSG_SIZE) &&
- (ikm_cache() == IKM_NULL))
- ikm_cache() = request;
- else
- ikm_free(request);
+ ikm_cache_free(request);
} else {
/*
* The message contents of the request are intact.
@@ -286,10 +282,7 @@ ipc_kobject_server(request)
*/
void
-ipc_kobject_set(port, kobject, type)
- ipc_port_t port;
- ipc_kobject_t kobject;
- ipc_kobject_type_t type;
+ipc_kobject_set(ipc_port_t port, ipc_kobject_t kobject, ipc_kobject_type_t type)
{
ip_lock(port);
assert(ip_active(port));
@@ -327,7 +320,7 @@ ipc_kobject_destroy(
default:
#if MACH_ASSERT
- printf("ipc_kobject_destroy: port 0x%p, kobj 0x%lx, type %d\n",
+ printf("ipc_kobject_destroy: port 0x%p, kobj 0x%zd, type %d\n",
port, port->ip_kobject, ip_kotype(port));
#endif /* MACH_ASSERT */
break;
@@ -341,9 +334,8 @@ ipc_kobject_destroy(
*/
boolean_t
-ipc_kobject_notify(request_header, reply_header)
- mach_msg_header_t *request_header;
- mach_msg_header_t *reply_header;
+ipc_kobject_notify(mach_msg_header_t *request_header,
+ mach_msg_header_t *reply_header)
{
ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
diff --git a/kern/ipc_mig.c b/kern/ipc_mig.c
index 22dac420..d26d2c6d 100644
--- a/kern/ipc_mig.c
+++ b/kern/ipc_mig.c
@@ -27,8 +27,10 @@
#include <mach/boolean.h>
#include <mach/port.h>
#include <mach/message.h>
+#include <mach/mig_support.h>
#include <mach/thread_status.h>
#include <machine/locore.h>
+#include <machine/copy_user.h>
#include <kern/ast.h>
#include <kern/debug.h>
#include <kern/ipc_tt.h>
@@ -49,7 +51,7 @@
#include <ipc/ipc_port.h>
#include <ipc/ipc_pset.h>
#include <ipc/ipc_thread.h>
-#include <ipc/mach_port.h>
+#include <ipc/mach_port.server.h>
#include <device/dev_hdr.h>
#include <device/device_types.h>
#include <device/ds_routines.h>
@@ -91,10 +93,9 @@ mach_msg_send_from_kernel(
}
mach_msg_return_t
-mach_msg_rpc_from_kernel(msg, send_size, reply_size)
- const mach_msg_header_t *msg;
- mach_msg_size_t send_size;
- mach_msg_size_t reply_size;
+mach_msg_rpc_from_kernel(const mach_msg_header_t *msg,
+ mach_msg_size_t send_size,
+ mach_msg_size_t reply_size)
{
panic("mach_msg_rpc_from_kernel"); /*XXX*/
}
@@ -145,9 +146,9 @@ mach_msg(
mach_msg_option_t option,
mach_msg_size_t send_size,
mach_msg_size_t rcv_size,
- mach_port_t rcv_name,
+ mach_port_name_t rcv_name,
mach_msg_timeout_t time_out,
- mach_port_t notify)
+ mach_port_name_t notify)
{
ipc_space_t space = current_space();
vm_map_t map = current_map();
@@ -197,7 +198,7 @@ mach_msg(
kmsg->ikm_header.msgh_seqno = seqno;
- if (rcv_size < kmsg->ikm_header.msgh_size) {
+ if (rcv_size < msg_usize(&kmsg->ikm_header)) {
ipc_kmsg_copyout_dest(kmsg, space);
ipc_kmsg_put_to_kernel(msg, kmsg, sizeof *msg);
return MACH_RCV_TOO_LARGE;
@@ -230,7 +231,7 @@ mach_msg(
* mach_msg() calls which are kernel calls.
*/
-mach_port_t
+mach_port_name_t
mig_get_reply_port(void)
{
ipc_thread_t self = current_thread();
@@ -285,10 +286,7 @@ mig_put_reply_port(
* len - Length of destination buffer.
*/
vm_size_t
-mig_strncpy(dest, src, len)
- char *dest;
- const char *src;
- int len;
+mig_strncpy(char *dest, const char *src, vm_size_t len)
{
char *dest_ = dest;
int i;
@@ -306,6 +304,16 @@ mig_strncpy(dest, src, len)
return dest - dest_;
}
+/* Called by MiG to deallocate memory, which in this case happens
+ * to be kernel memory. */
+void
+mig_deallocate(vm_address_t addr, vm_size_t size)
+{
+ (void) size;
+ /* We do the same thing as in ipc_kmsg_clean_body. */
+ vm_map_copy_discard((vm_map_copy_t) addr);
+}
+
#define fast_send_right_lookup(name, port, abort) \
MACRO_BEGIN \
ipc_space_t space = current_space(); \
@@ -333,8 +341,8 @@ MACRO_BEGIN \
is_read_unlock(space); \
MACRO_END
-device_t
-port_name_to_device(mach_port_t name)
+static device_t
+port_name_to_device(mach_port_name_t name)
{
ipc_port_t port;
device_t device;
@@ -376,8 +384,8 @@ port_name_to_device(mach_port_t name)
}
}
-thread_t
-port_name_to_thread(mach_port_t name)
+static thread_t
+port_name_to_thread(mach_port_name_t name)
{
ipc_port_t port;
@@ -421,8 +429,8 @@ port_name_to_thread(mach_port_t name)
}
}
-task_t
-port_name_to_task(mach_port_t name)
+static task_t
+port_name_to_task(mach_port_name_t name)
{
ipc_port_t port;
@@ -468,9 +476,9 @@ port_name_to_task(mach_port_t name)
}
}
-vm_map_t
+static vm_map_t
port_name_to_map(
- mach_port_t name)
+ mach_port_name_t name)
{
ipc_port_t port;
@@ -516,8 +524,8 @@ port_name_to_map(
}
}
-ipc_space_t
-port_name_to_space(mach_port_t name)
+static ipc_space_t
+port_name_to_space(mach_port_name_t name)
{
ipc_port_t port;
@@ -564,47 +572,6 @@ port_name_to_space(mach_port_t name)
}
/*
- * Hack to translate a thread port to a thread pointer for calling
- * thread_get_state and thread_set_state. This is only necessary
- * because the IPC message for these two operations overflows the
- * kernel stack.
- *
- * AARGH!
- */
-
-kern_return_t thread_get_state_KERNEL(
- mach_port_t thread_port, /* port right for thread */
- int flavor,
- thread_state_t old_state, /* pointer to OUT array */
- natural_t *old_state_count) /* IN/OUT */
-{
- thread_t thread;
- kern_return_t result;
-
- thread = port_name_to_thread(thread_port);
- result = thread_get_state(thread, flavor, old_state, old_state_count);
- thread_deallocate(thread);
-
- return result;
-}
-
-kern_return_t thread_set_state_KERNEL(
- mach_port_t thread_port, /* port right for thread */
- int flavor,
- thread_state_t new_state,
- natural_t new_state_count)
-{
- thread_t thread;
- kern_return_t result;
-
- thread = port_name_to_thread(thread_port);
- result = thread_set_state(thread, flavor, new_state, new_state_count);
- thread_deallocate(thread);
-
- return result;
-}
-
-/*
* Things to keep in mind:
*
* The idea here is to duplicate the semantics of the true kernel RPC.
@@ -619,13 +586,13 @@ kern_return_t thread_set_state_KERNEL(
kern_return_t
syscall_vm_map(
- mach_port_t target_map,
- vm_offset_t *address,
- vm_size_t size,
- vm_offset_t mask,
+ mach_port_name_t target_map,
+ rpc_vm_offset_t *address,
+ rpc_vm_size_t size,
+ rpc_vm_offset_t mask,
boolean_t anywhere,
- mach_port_t memory_object,
- vm_offset_t offset,
+ mach_port_name_t memory_object,
+ rpc_vm_offset_t offset,
boolean_t copy,
vm_prot_t cur_protection,
vm_prot_t max_protection,
@@ -640,7 +607,7 @@ syscall_vm_map(
if (map == VM_MAP_NULL)
return MACH_SEND_INTERRUPTED;
- if (MACH_PORT_VALID(memory_object)) {
+ if (MACH_PORT_NAME_VALID(memory_object)) {
result = ipc_object_copyin(current_space(), memory_object,
MACH_MSG_TYPE_COPY_SEND,
(ipc_object_t *) &port);
@@ -649,14 +616,14 @@ syscall_vm_map(
return result;
}
} else
- port = (ipc_port_t) memory_object;
+ port = (ipc_port_t)invalid_name_to_port(memory_object);
- copyin(address, &addr, sizeof(vm_offset_t));
+ copyin_address(address, &addr);
result = vm_map(map, &addr, size, mask, anywhere,
port, offset, copy,
cur_protection, max_protection, inheritance);
if (result == KERN_SUCCESS)
- copyout(&addr, address, sizeof(vm_offset_t));
+ copyout_address(&addr, address);
if (IP_VALID(port))
ipc_port_release_send(port);
vm_map_deallocate(map);
@@ -665,9 +632,9 @@ syscall_vm_map(
}
kern_return_t syscall_vm_allocate(
- mach_port_t target_map,
- vm_offset_t *address,
- vm_size_t size,
+ mach_port_name_t target_map,
+ rpc_vm_offset_t *address,
+ rpc_vm_size_t size,
boolean_t anywhere)
{
vm_map_t map;
@@ -678,19 +645,19 @@ kern_return_t syscall_vm_allocate(
if (map == VM_MAP_NULL)
return MACH_SEND_INTERRUPTED;
- copyin(address, &addr, sizeof(vm_offset_t));
+ copyin_address(address, &addr);
result = vm_allocate(map, &addr, size, anywhere);
if (result == KERN_SUCCESS)
- copyout(&addr, address, sizeof(vm_offset_t));
+ copyout_address(&addr, address);
vm_map_deallocate(map);
return result;
}
kern_return_t syscall_vm_deallocate(
- mach_port_t target_map,
- vm_offset_t start,
- vm_size_t size)
+ mach_port_name_t target_map,
+ rpc_vm_offset_t start,
+ rpc_vm_size_t size)
{
vm_map_t map;
kern_return_t result;
@@ -706,13 +673,13 @@ kern_return_t syscall_vm_deallocate(
}
kern_return_t syscall_task_create(
- mach_port_t parent_task,
- boolean_t inherit_memory,
- mach_port_t *child_task) /* OUT */
+ mach_port_name_t parent_task,
+ boolean_t inherit_memory,
+ mach_port_name_t *child_task) /* OUT */
{
task_t t, c;
ipc_port_t port;
- mach_port_t name;
+ mach_port_name_t name;
kern_return_t result;
t = port_name_to_task(parent_task);
@@ -726,15 +693,14 @@ kern_return_t syscall_task_create(
(void) ipc_kmsg_copyout_object(current_space(),
(ipc_object_t) port,
MACH_MSG_TYPE_PORT_SEND, &name);
- copyout(&name, child_task,
- sizeof(mach_port_t));
+ copyout(&name, child_task, sizeof(mach_port_name_t));
}
task_deallocate(t);
return result;
}
-kern_return_t syscall_task_terminate(mach_port_t task)
+kern_return_t syscall_task_terminate(mach_port_name_t task)
{
task_t t;
kern_return_t result;
@@ -749,7 +715,7 @@ kern_return_t syscall_task_terminate(mach_port_t task)
return result;
}
-kern_return_t syscall_task_suspend(mach_port_t task)
+kern_return_t syscall_task_suspend(mach_port_name_t task)
{
task_t t;
kern_return_t result;
@@ -765,9 +731,9 @@ kern_return_t syscall_task_suspend(mach_port_t task)
}
kern_return_t syscall_task_set_special_port(
- mach_port_t task,
+ mach_port_name_t task,
int which_port,
- mach_port_t port_name)
+ mach_port_name_t port_name)
{
task_t t;
ipc_port_t port;
@@ -777,7 +743,7 @@ kern_return_t syscall_task_set_special_port(
if (t == TASK_NULL)
return MACH_SEND_INTERRUPTED;
- if (MACH_PORT_VALID(port_name)) {
+ if (MACH_PORT_NAME_VALID(port_name)) {
result = ipc_object_copyin(current_space(), port_name,
MACH_MSG_TYPE_COPY_SEND,
(ipc_object_t *) &port);
@@ -786,7 +752,7 @@ kern_return_t syscall_task_set_special_port(
return result;
}
} else
- port = (ipc_port_t) port_name;
+ port = (ipc_port_t)invalid_name_to_port(port_name);
result = task_set_special_port(t, which_port, port);
if ((result != KERN_SUCCESS) && IP_VALID(port))
@@ -798,12 +764,12 @@ kern_return_t syscall_task_set_special_port(
kern_return_t
syscall_mach_port_allocate(
- mach_port_t task,
+ mach_port_name_t task,
mach_port_right_t right,
- mach_port_t *namep)
+ mach_port_name_t *namep)
{
ipc_space_t space;
- mach_port_t name;
+ mach_port_name_t name;
kern_return_t kr;
space = port_name_to_space(task);
@@ -812,7 +778,9 @@ syscall_mach_port_allocate(
kr = mach_port_allocate(space, right, &name);
if (kr == KERN_SUCCESS)
- copyout(&name, namep, sizeof(mach_port_t));
+ {
+ copyout(&name, namep, sizeof(mach_port_name_t));
+ }
is_release(space);
return kr;
@@ -820,9 +788,9 @@ syscall_mach_port_allocate(
kern_return_t
syscall_mach_port_allocate_name(
- mach_port_t task,
+ mach_port_name_t task,
mach_port_right_t right,
- mach_port_t name)
+ mach_port_name_t name)
{
ipc_space_t space;
kern_return_t kr;
@@ -839,8 +807,8 @@ syscall_mach_port_allocate_name(
kern_return_t
syscall_mach_port_deallocate(
- mach_port_t task,
- mach_port_t name)
+ mach_port_name_t task,
+ mach_port_name_t name)
{
ipc_space_t space;
kern_return_t kr;
@@ -857,9 +825,9 @@ syscall_mach_port_deallocate(
kern_return_t
syscall_mach_port_insert_right(
- mach_port_t task,
- mach_port_t name,
- mach_port_t right,
+ mach_port_name_t task,
+ mach_port_name_t name,
+ mach_port_name_t right,
mach_msg_type_name_t rightType)
{
ipc_space_t space;
@@ -876,7 +844,7 @@ syscall_mach_port_insert_right(
return KERN_INVALID_VALUE;
}
- if (MACH_PORT_VALID(right)) {
+ if (MACH_PORT_NAME_VALID(right)) {
kr = ipc_object_copyin(current_space(), right, rightType,
&object);
if (kr != KERN_SUCCESS) {
@@ -884,7 +852,7 @@ syscall_mach_port_insert_right(
return kr;
}
} else
- object = (ipc_object_t) right;
+ object = (ipc_object_t)invalid_name_to_port(right);
newtype = ipc_object_copyin_type(rightType);
kr = mach_port_insert_right(space, name, (ipc_port_t) object, newtype);
@@ -895,7 +863,7 @@ syscall_mach_port_insert_right(
return kr;
}
-kern_return_t syscall_thread_depress_abort(mach_port_t thread)
+kern_return_t syscall_thread_depress_abort(mach_port_name_t thread)
{
thread_t t;
kern_return_t result;
@@ -914,12 +882,12 @@ kern_return_t syscall_thread_depress_abort(mach_port_t thread)
* Device traps -- these are way experimental.
*/
io_return_t
-syscall_device_write_request(mach_port_t device_name,
- mach_port_t reply_name,
+syscall_device_write_request(mach_port_name_t device_name,
+ mach_port_name_t reply_name,
dev_mode_t mode,
- recnum_t recnum,
- vm_offset_t data,
- vm_size_t data_count)
+ rpc_recnum_t recnum,
+ rpc_vm_offset_t data,
+ rpc_vm_size_t data_count)
{
device_t dev;
/*ipc_port_t reply_port;*/
@@ -965,12 +933,12 @@ syscall_device_write_request(mach_port_t device_name,
}
io_return_t
-syscall_device_writev_request(mach_port_t device_name,
- mach_port_t reply_name,
+syscall_device_writev_request(mach_port_name_t device_name,
+ mach_port_name_t reply_name,
dev_mode_t mode,
- recnum_t recnum,
- io_buf_vec_t *iovec,
- vm_size_t iocount)
+ rpc_recnum_t recnum,
+ rpc_io_buf_vec_t *iovec,
+ rpc_vm_size_t iocount)
{
device_t dev;
/*ipc_port_t reply_port;*/
diff --git a/kern/ipc_mig.h b/kern/ipc_mig.h
index 6f063eca..422e8d84 100644
--- a/kern/ipc_mig.h
+++ b/kern/ipc_mig.h
@@ -28,6 +28,7 @@
#include <mach/std_types.h>
#include <device/device_types.h>
+#include <ipc/ipc_thread.h>
/*
* Routine: mach_msg_send_from_kernel
@@ -64,79 +65,79 @@ extern mach_msg_return_t mach_msg_rpc_from_kernel(
mach_msg_size_t reply_size);
extern kern_return_t syscall_vm_map(
- mach_port_t target_map,
- vm_offset_t *address,
- vm_size_t size,
- vm_offset_t mask,
+ mach_port_name_t target_map,
+ rpc_vm_offset_t *address,
+ rpc_vm_size_t size,
+ rpc_vm_offset_t mask,
boolean_t anywhere,
- mach_port_t memory_object,
- vm_offset_t offset,
+ mach_port_name_t memory_object,
+ rpc_vm_offset_t offset,
boolean_t copy,
vm_prot_t cur_protection,
vm_prot_t max_protection,
vm_inherit_t inheritance);
extern kern_return_t syscall_vm_allocate(
- mach_port_t target_map,
- vm_offset_t *address,
- vm_size_t size,
+ mach_port_name_t target_map,
+ rpc_vm_offset_t *address,
+ rpc_vm_size_t size,
boolean_t anywhere);
extern kern_return_t syscall_vm_deallocate(
- mach_port_t target_map,
- vm_offset_t start,
- vm_size_t size);
+ mach_port_name_t target_map,
+ rpc_vm_offset_t start,
+ rpc_vm_size_t size);
extern kern_return_t syscall_task_create(
- mach_port_t parent_task,
- boolean_t inherit_memory,
- mach_port_t *child_task);
+ mach_port_name_t parent_task,
+ boolean_t inherit_memory,
+ mach_port_name_t *child_task);
-extern kern_return_t syscall_task_terminate(mach_port_t task);
+extern kern_return_t syscall_task_terminate(mach_port_name_t task);
-extern kern_return_t syscall_task_suspend(mach_port_t task);
+extern kern_return_t syscall_task_suspend(mach_port_name_t task);
extern kern_return_t syscall_task_set_special_port(
- mach_port_t task,
+ mach_port_name_t task,
int which_port,
- mach_port_t port_name);
+ mach_port_name_t port_name);
extern kern_return_t syscall_mach_port_allocate(
- mach_port_t task,
- mach_port_right_t right,
- mach_port_t *namep);
+ mach_port_name_t task,
+ mach_port_right_t right,
+ mach_port_name_t *namep);
extern kern_return_t syscall_mach_port_deallocate(
- mach_port_t task,
- mach_port_t name);
+ mach_port_name_t task,
+ mach_port_name_t name);
extern kern_return_t syscall_mach_port_insert_right(
- mach_port_t task,
- mach_port_t name,
- mach_port_t right,
+ mach_port_name_t task,
+ mach_port_name_t name,
+ mach_port_name_t right,
mach_msg_type_name_t rightType);
extern kern_return_t syscall_mach_port_allocate_name(
- mach_port_t task,
- mach_port_right_t right,
- mach_port_t name);
+ mach_port_name_t task,
+ mach_port_right_t right,
+ mach_port_name_t name);
-extern kern_return_t syscall_thread_depress_abort(mach_port_t thread);
+extern kern_return_t syscall_thread_depress_abort(mach_port_name_t thread);
extern io_return_t syscall_device_write_request(
- mach_port_t device_name,
- mach_port_t reply_name,
+ mach_port_name_t device_name,
+ mach_port_name_t reply_name,
dev_mode_t mode,
- recnum_t recnum,
- vm_offset_t data,
- vm_size_t data_count);
+ rpc_recnum_t recnum,
+ rpc_vm_offset_t data,
+ rpc_vm_size_t data_count);
io_return_t syscall_device_writev_request(
- mach_port_t device_name,
- mach_port_t reply_name,
+ mach_port_name_t device_name,
+ mach_port_name_t reply_name,
dev_mode_t mode,
- recnum_t recnum,
- io_buf_vec_t *iovec,
- vm_size_t iocount);
+ rpc_recnum_t recnum,
+ rpc_io_buf_vec_t *iovec,
+ rpc_vm_size_t iocount);
#endif /* _IPC_MIG_H_ */
diff --git a/kern/ipc_sched.c b/kern/ipc_sched.c
index be82971b..4519c654 100644
--- a/kern/ipc_sched.c
+++ b/kern/ipc_sched.c
@@ -233,7 +233,7 @@ thread_handoff(
/*
* stack_handoff is machine-dependent. It does the
* machine-dependent components of a context-switch, like
- * changing address spaces. It updates active_threads.
+ * changing address spaces. It updates active_thread.
*/
stack_handoff(old, new);
diff --git a/kern/ipc_tt.c b/kern/ipc_tt.c
index 04a5a92f..7c9a0b8d 100644
--- a/kern/ipc_tt.c
+++ b/kern/ipc_tt.c
@@ -32,6 +32,7 @@
#include <mach/boolean.h>
#include <mach/kern_return.h>
#include <mach/mach_param.h>
+#include <mach/mach_traps.h>
#include <mach/task_special_ports.h>
#include <mach/thread_special_ports.h>
#include <vm/vm_kern.h>
@@ -41,6 +42,7 @@
#include <kern/thread.h>
#include <kern/ipc_kobject.h>
#include <kern/ipc_tt.h>
+#include <kern/mach.server.h>
#include <ipc/ipc_space.h>
#include <ipc/ipc_table.h>
#include <ipc/ipc_port.h>
@@ -511,7 +513,7 @@ retrieve_thread_exception(thread)
* or other errors.
*/
-mach_port_t
+mach_port_name_t
mach_task_self(void)
{
task_t task = current_task();
@@ -532,7 +534,7 @@ mach_task_self(void)
* or other errors.
*/
-mach_port_t
+mach_port_name_t
mach_thread_self(void)
{
thread_t thread = current_thread();
@@ -554,11 +556,11 @@ mach_thread_self(void)
* or other errors.
*/
-mach_port_t
+mach_port_name_t
mach_reply_port(void)
{
ipc_port_t port;
- mach_port_t name;
+ mach_port_name_t name;
kern_return_t kr;
kr = ipc_port_alloc(current_task()->itk_space, &name, &port);
@@ -887,7 +889,7 @@ mach_ports_register(
kern_return_t
mach_ports_lookup(
task_t task,
- ipc_port_t **portsp,
+ mach_port_t **portsp,
mach_msg_type_number_t *portsCnt)
{
vm_offset_t memory;
@@ -924,7 +926,7 @@ mach_ports_lookup(
itk_unlock(task);
- *portsp = ports;
+ *portsp = (mach_port_t *)ports;
*portsCnt = TASK_PORT_REGISTER_MAX;
return KERN_SUCCESS;
}
diff --git a/kern/ipc_tt.h b/kern/ipc_tt.h
index 78cb43ad..5c667387 100644
--- a/kern/ipc_tt.h
+++ b/kern/ipc_tt.h
@@ -86,7 +86,7 @@ convert_port_to_space(struct ipc_port *);
extern void
space_deallocate(ipc_space_t);
-mach_port_t
+mach_port_name_t
mach_reply_port (void);
#endif /* _KERN_IPC_TT_H_ */
diff --git a/kern/lock.c b/kern/lock.c
index a4b82522..36b6d209 100644
--- a/kern/lock.c
+++ b/kern/lock.c
@@ -36,6 +36,8 @@
#include <string.h>
+#include <machine/smp.h>
+
#include <kern/debug.h>
#include <kern/lock.h>
#include <kern/thread.h>
@@ -88,7 +90,7 @@ void simple_lock_init(simple_lock_t l)
void simple_lock(simple_lock_t l)
{
while (test_and_set((boolean_t *)l))
- continue;
+ cpu_pause();
}
void simple_unlock(simple_lock_t l)
@@ -202,7 +204,7 @@ boolean_t _simple_lock_try(
return TRUE;
}
-void simple_unlock(
+void _simple_unlock(
simple_lock_t l)
{
assert(l->lock_data != 0);
@@ -290,7 +292,7 @@ void lock_write(
if ((i = lock_wait_time) > 0) {
simple_unlock(&l->interlock);
while (--i > 0 && l->want_write)
- continue;
+ cpu_pause();
simple_lock(&l->interlock);
}
@@ -310,7 +312,7 @@ void lock_write(
simple_unlock(&l->interlock);
while (--i > 0 && (l->read_count != 0 ||
l->want_upgrade))
- continue;
+ cpu_pause();
simple_lock(&l->interlock);
}
@@ -338,11 +340,16 @@ void lock_done(
if (l->recursion_depth != 0)
l->recursion_depth--;
else
- if (l->want_upgrade)
+ if (l->want_upgrade) {
l->want_upgrade = FALSE;
- else {
+#if MACH_LDEBUG
+ assert(l->writer == current_thread());
+ l->writer = THREAD_NULL;
+#endif /* MACH_LDEBUG */
+ } else {
l->want_write = FALSE;
#if MACH_LDEBUG
+ assert(l->writer == current_thread());
l->writer = THREAD_NULL;
#endif /* MACH_LDEBUG */
}
@@ -384,7 +391,7 @@ void lock_read(
if ((i = lock_wait_time) > 0) {
simple_unlock(&l->interlock);
while (--i > 0 && (l->want_write || l->want_upgrade))
- continue;
+ cpu_pause();
simple_lock(&l->interlock);
}
@@ -450,7 +457,7 @@ boolean_t lock_read_to_write(
if ((i = lock_wait_time) > 0) {
simple_unlock(&l->interlock);
while (--i > 0 && l->read_count != 0)
- continue;
+ cpu_pause();
simple_lock(&l->interlock);
}
@@ -492,6 +499,7 @@ void lock_write_to_read(
}
#if MACH_LDEBUG
+ assert(l->writer == current_thread());
l->writer = THREAD_NULL;
#endif /* MACH_LDEBUG */
simple_unlock(&l->interlock);
@@ -671,7 +679,11 @@ void db_show_all_slocks(void)
#else /* MACH_SLOCKS && NCPUS == 1 */
void db_show_all_slocks(void)
{
+#if MACH_LOCK_MON
+ lip();
+#else
db_printf("simple lock info not available\n");
+#endif
}
#endif /* MACH_SLOCKS && NCPUS == 1 */
#endif /* MACH_KDB */
diff --git a/kern/lock.h b/kern/lock.h
index 2781a48a..9d081d39 100644
--- a/kern/lock.h
+++ b/kern/lock.h
@@ -36,12 +36,42 @@
#include <mach/boolean.h>
#include <mach/machine/vm_types.h>
+#include <machine/spl.h>
+
+/*
+ * Note: we cannot blindly use simple locks in interrupt handlers, otherwise one
+ * may try to acquire a lock while already having the lock, thus a deadlock.
+ *
+ * When locks are needed in interrupt handlers, the _irq versions of the calls
+ * should be used, which disable interrupts (by calling splhigh) before acquiring
+ * the lock, thus preventing the deadlock. They need to be used this way:
+ *
+ * spl_t s = simple_lock_irq(&mylock);
+ * [... critical section]
+ * simple_unlock_irq(s, &mylock);
+ *
+ * To catch faulty code, when MACH_LDEBUG is set we check that non-_irq versions
+ * are not called while handling an interrupt.
+ *
+ * In the following, the _nocheck versions don't check anything, the _irq
+ * versions disable interrupts, and the pristine versions add a check when
+ * MACH_LDEBUG is set.
+ */
#if NCPUS > 1
#include <machine/lock.h>/*XXX*/
+#if MACH_LOCK_MON == 0
+#define simple_lock_nocheck _simple_lock
+#define simple_lock_try_nocheck _simple_lock_try
+#define simple_unlock_nocheck _simple_unlock
+#else
+#define simple_lock_nocheck simple_lock
+#define simple_lock_try_nocheck simple_lock_try
+#define simple_unlock_nocheck simple_unlock
+#endif
#endif
-#define MACH_SLOCKS ((NCPUS > 1) || MACH_LDEBUG)
+#define MACH_SLOCKS NCPUS > 1
/*
* A simple spin lock.
@@ -68,9 +98,15 @@ typedef struct slock *simple_lock_t;
#define decl_simple_lock_data(class,name) \
class simple_lock_data_t name;
+#define def_simple_lock_data(class,name) \
+class simple_lock_data_t name = SIMPLE_LOCK_INITIALIZER(&name);
+#define def_simple_lock_irq_data(class,name) \
+class simple_lock_irq_data_t name = { SIMPLE_LOCK_INITIALIZER(&name.lock) };
#define simple_lock_addr(lock) (simple_lock_assert(&(lock)), \
&(lock))
+#define simple_lock_irq_addr(l) (simple_lock_irq_assert(&(l)), \
+ &(l)->lock)
#if (NCPUS > 1)
@@ -92,7 +128,7 @@ class simple_lock_data_t name;
extern void simple_lock_init(simple_lock_t);
extern void _simple_lock(simple_lock_t,
const char *, const char *);
-extern void simple_unlock(simple_lock_t);
+extern void _simple_unlock(simple_lock_t);
extern boolean_t _simple_lock_try(simple_lock_t,
const char *, const char *);
@@ -102,8 +138,9 @@ extern boolean_t _simple_lock_try(simple_lock_t,
#define STR(x) XSTR(x)
#define LOCATION __FILE__ ":" STR(__LINE__)
-#define simple_lock(lock) _simple_lock((lock), #lock, LOCATION)
-#define simple_lock_try(lock) _simple_lock_try((lock), #lock, LOCATION)
+#define simple_lock_nocheck(lock) _simple_lock((lock), #lock, LOCATION)
+#define simple_lock_try_nocheck(lock) _simple_lock_try((lock), #lock, LOCATION)
+#define simple_unlock_nocheck(lock) _simple_unlock((lock))
#define simple_lock_pause()
#define simple_lock_taken(lock) (simple_lock_assert(lock), \
@@ -120,18 +157,25 @@ extern void check_simple_locks_disable(void);
* Do not allocate storage for locks if not needed.
*/
struct simple_lock_data_empty { struct {} is_a_simple_lock; };
+struct simple_lock_irq_data_empty { struct simple_lock_data_empty slock; };
#define decl_simple_lock_data(class,name) \
class struct simple_lock_data_empty name;
+#define def_simple_lock_data(class,name) \
+class struct simple_lock_data_empty name;
+#define def_simple_lock_irq_data(class,name) \
+class struct simple_lock_irq_data_empty name;
#define simple_lock_addr(lock) (simple_lock_assert(&(lock)), \
(simple_lock_t)0)
+#define simple_lock_irq_addr(lock) (simple_lock_irq_assert(&(lock)), \
+ (simple_lock_t)0)
/*
* No multiprocessor locking is necessary.
*/
#define simple_lock_init(l) simple_lock_assert(l)
-#define simple_lock(l) simple_lock_assert(l)
-#define simple_unlock(l) simple_lock_assert(l)
-#define simple_lock_try(l) (simple_lock_assert(l), \
+#define simple_lock_nocheck(l) simple_lock_assert(l)
+#define simple_unlock_nocheck(l) simple_lock_assert(l)
+#define simple_lock_try_nocheck(l) (simple_lock_assert(l), \
TRUE) /* always succeeds */
#define simple_lock_taken(l) (simple_lock_assert(l), \
1) /* always succeeds */
@@ -144,6 +188,7 @@ class struct simple_lock_data_empty name;
#define decl_mutex_data(class,name) decl_simple_lock_data(class,name)
+#define def_mutex_data(class,name) def_simple_lock_data(class,name)
#define mutex_try(l) simple_lock_try(l)
#define mutex_lock(l) simple_lock(l)
#define mutex_unlock(l) simple_unlock(l)
@@ -210,13 +255,62 @@ extern void lock_clear_recursive(lock_t);
#if ! MACH_LDEBUG
#define have_read_lock(l) 1
#define have_write_lock(l) 1
+#define lock_check_no_interrupts()
#else /* MACH_LDEBUG */
/* XXX: We don't keep track of readers, so this is an approximation. */
#define have_read_lock(l) ((l)->read_count > 0)
#define have_write_lock(l) ((l)->writer == current_thread())
+extern unsigned long in_interrupt[NCPUS];
+#define lock_check_no_interrupts() assert(!in_interrupt[cpu_number()])
#endif /* MACH_LDEBUG */
#define have_lock(l) (have_read_lock(l) || have_write_lock(l))
-void db_show_all_slocks(void);
+/* These are defined elsewhere with lock monitoring */
+#if MACH_LOCK_MON == 0
+#define simple_lock(l) do { \
+ lock_check_no_interrupts(); \
+ simple_lock_nocheck(l); \
+} while (0)
+#define simple_lock_try(l) ({ \
+ lock_check_no_interrupts(); \
+ simple_lock_try_nocheck(l); \
+})
+#define simple_unlock(l) do { \
+ lock_check_no_interrupts(); \
+ simple_unlock_nocheck(l); \
+} while (0)
+#endif
+
+/* _irq variants */
+
+struct slock_irq {
+ struct slock slock;
+};
+
+#define simple_lock_irq_assert(l) simple_lock_assert(&(l)->slock)
+
+typedef struct slock_irq simple_lock_irq_data_t;
+typedef struct slock_irq *simple_lock_irq_t;
+
+#define decl_simple_lock_irq_data(class,name) \
+class simple_lock_irq_data_t name;
+
+#define simple_lock_init_irq(l) simple_lock_init(&(l)->slock)
+
+#define simple_lock_irq(l) ({ \
+ spl_t __s = splhigh(); \
+ simple_lock_nocheck(&(l)->slock); \
+ __s; \
+})
+#define simple_unlock_irq(s, l) do { \
+ simple_unlock_nocheck(&(l)->slock); \
+ splx(s); \
+} while (0)
+
+#if MACH_KDB
+extern void db_show_all_slocks(void);
+#endif /* MACH_KDB */
+
+extern void lip(void);
#endif /* _KERN_LOCK_H_ */
diff --git a/kern/lock_mon.c b/kern/lock_mon.c
index f6bbd5dd..3ca4592c 100644
--- a/kern/lock_mon.c
+++ b/kern/lock_mon.c
@@ -45,20 +45,22 @@
#include <mach/boolean.h>
#include <kern/thread.h>
#include <kern/lock.h>
-#include <kern/time_stamp.h>
+#include <kern/printf.h>
+#include <kern/mach_clock.h>
+#include <machine/ipl.h>
+#include <ddb/db_sym.h>
+#include <ddb/db_output.h>
+static void lis(int arg, int abs, int count);
-decl_simple_lock_data(extern , kdb_lock)
-decl_simple_lock_data(extern , printf_lock)
+def_simple_lock_data(, kdb_lock)
+def_simple_lock_data(, printf_lock)
#if NCPUS > 1 && MACH_LOCK_MON
-
-#if TIME_STAMP
-extern time_stamp_t time_stamp;
-#else /* TIME_STAMP */
+#define TIME_STAMP 1
typedef unsigned int time_stamp_t;
-#define time_stamp 0
-#endif /* TIME_STAMP */
+/* in milliseconds */
+#define time_stamp (elapsed_ticks * 1000 / hz)
#define LOCK_INFO_MAX (1024*32)
#define LOCK_INFO_HASH_COUNT 1024
@@ -80,6 +82,8 @@ struct lock_info_bucket {
struct lock_info info[LOCK_INFO_PER_BUCKET];
};
+static void print_lock_info(struct lock_info *li);
+
struct lock_info_bucket lock_info[LOCK_INFO_HASH_COUNT];
struct lock_info default_lock_info;
unsigned default_lock_stack = 0;
@@ -94,7 +98,6 @@ decl_simple_lock_data(, **lock)
{
struct lock_info *li = &(lock_info[HASH_LOCK(*lock)].info[0]);
int i;
- my_cpu = cpu_number();
for (i=0; i < LOCK_INFO_PER_BUCKET; i++, li++)
if (li->lock) {
@@ -115,7 +118,7 @@ void simple_lock(lock)
decl_simple_lock_data(, *lock)
{
struct lock_info *li = locate_lock_info(&lock);
- my_cpu = cpu_number();
+ int my_cpu = cpu_number();
if (current_thread())
li->stack = current_thread()->lock_stack++;
@@ -134,7 +137,7 @@ int simple_lock_try(lock)
decl_simple_lock_data(, *lock)
{
struct lock_info *li = locate_lock_info(&lock);
- my_cpu = cpu_number();
+ int my_cpu = cpu_number();
if (curr_ipl[my_cpu])
li->masked++;
@@ -172,7 +175,7 @@ void lip(void) {
#define lock_info_sort lis
-void lock_info_sort(arg, abs, count)
+static void lock_info_sort(int arg, int abs, int count)
{
struct lock_info *li, mean;
int bucket = 0;
@@ -264,19 +267,18 @@ void lock_info_clear(void)
memset(&default_lock_info, 0, sizeof(struct lock_info));
}
-void print_lock_info(li)
-struct lock_info *li;
+static void print_lock_info(struct lock_info *li)
{
- int off;
+ db_addr_t off;
int sum = li->success + li->fail;
db_printf("%d %d/%d %d/%d %d/%d %d/%d ", li->success,
li->fail, (li->fail*100)/sum,
li->masked, (li->masked*100)/sum,
li->stack, li->stack/sum,
li->time, li->time/sum);
- db_free_symbol(db_search_symbol(li->lock, 0, &off));
+ db_free_symbol(db_search_symbol((db_addr_t) li->lock, 0, &off));
if (off < 1024)
- db_printsym(li->lock, 0);
+ db_printsym((db_addr_t) li->lock, 0);
else {
db_printsym(li->caller, 0);
db_printf("(%X)", li->lock);
diff --git a/kern/mach_clock.c b/kern/mach_clock.c
index 3a568647..864704c8 100644
--- a/kern/mach_clock.c
+++ b/kern/mach_clock.c
@@ -47,16 +47,15 @@
#include <kern/host.h>
#include <kern/lock.h>
#include <kern/mach_clock.h>
+#include <kern/mach_host.server.h>
#include <kern/processor.h>
#include <kern/queue.h>
#include <kern/sched.h>
#include <kern/sched_prim.h>
#include <kern/thread.h>
-#include <kern/time_stamp.h>
#include <kern/timer.h>
#include <kern/priority.h>
#include <vm/vm_kern.h>
-#include <sys/time.h>
#include <machine/mach_param.h> /* HZ */
#include <machine/machspl.h>
#include <machine/model_dep.h>
@@ -65,9 +64,11 @@
#include <kern/pc_sample.h>
#endif
+#define MICROSECONDS_IN_ONE_SECOND 1000000
+
int hz = HZ; /* number of ticks per second */
-int tick = (1000000 / HZ); /* number of usec per tick */
-time_value_t time = { 0, 0 }; /* time since bootup (uncorrected) */
+int tick = (MICROSECONDS_IN_ONE_SECOND / HZ); /* number of usec per tick */
+time_value64_t time = { 0, 0 }; /* time since bootup (uncorrected) */
unsigned long elapsed_ticks = 0; /* ticks elapsed since bootup */
int timedelta = 0;
@@ -94,28 +95,31 @@ unsigned bigadj = 1000000; /* adjust 10*tickadj if adjustment
volatile mapped_time_value_t *mtime = 0;
-#define update_mapped_time(time) \
-MACRO_BEGIN \
- if (mtime != 0) { \
- mtime->check_seconds = (time)->seconds; \
- __sync_synchronize(); \
- mtime->microseconds = (time)->microseconds; \
- __sync_synchronize(); \
- mtime->seconds = (time)->seconds; \
- } \
+#define update_mapped_time(time) \
+MACRO_BEGIN \
+ if (mtime != 0) { \
+ mtime->check_seconds = (time)->seconds; \
+ mtime->check_seconds64 = (time)->seconds; \
+ __sync_synchronize(); \
+ mtime->microseconds = (time)->nanoseconds / 1000; \
+ mtime->time_value.nanoseconds = (time)->nanoseconds; \
+ __sync_synchronize(); \
+ mtime->seconds = (time)->seconds; \
+ mtime->time_value.seconds = (time)->seconds; \
+ } \
MACRO_END
-#define read_mapped_time(time) \
-MACRO_BEGIN \
- do { \
- time->seconds = mtime->seconds; \
- __sync_synchronize(); \
- time->microseconds = mtime->microseconds; \
- __sync_synchronize(); \
- } while (time->seconds != mtime->check_seconds); \
+#define read_mapped_time(time) \
+MACRO_BEGIN \
+ do { \
+ (time)->seconds = mtime->time_value.seconds; \
+ __sync_synchronize(); \
+ (time)->nanoseconds = mtime->time_value.nanoseconds; \
+ __sync_synchronize(); \
+ } while ((time)->seconds != mtime->check_seconds64); \
MACRO_END
-decl_simple_lock_data(, timer_lock) /* lock for ... */
+def_simple_lock_irq_data(static, timer_lock) /* lock for ... */
timer_elt_data_t timer_head; /* ordered list of timeouts */
/* (doubles as end-of-list) */
@@ -206,34 +210,26 @@ void clock_interrupt(
timer_elt_t telt;
boolean_t needsoft = FALSE;
-#if TS_FORMAT == 1
- /*
- * Increment the tick count for the timestamping routine.
- */
- ts_tick_count++;
-#endif /* TS_FORMAT == 1 */
/*
* Update the tick count since bootup, and handle
* timeouts.
*/
- s = splsched();
- simple_lock(&timer_lock);
+ s = simple_lock_irq(&timer_lock);
elapsed_ticks++;
telt = (timer_elt_t)queue_first(&timer_head.chain);
if (telt->ticks <= elapsed_ticks)
needsoft = TRUE;
- simple_unlock(&timer_lock);
- splx(s);
+ simple_unlock_irq(s, &timer_lock);
/*
* Increment the time-of-day clock.
*/
if (timedelta == 0) {
- time_value_add_usec(&time, usec);
+ time_value64_add_nanos(&time, usec * 1000);
}
else {
int delta;
@@ -254,7 +250,7 @@ void clock_interrupt(
delta = usec + tickdelta;
timedelta -= tickdelta;
}
- time_value_add_usec(&time, delta);
+ time_value64_add_nanos(&time, delta * 1000);
}
update_mapped_time(&time);
@@ -306,12 +302,10 @@ void softclock(void)
void *param;
while (TRUE) {
- s = splsched();
- simple_lock(&timer_lock);
+ s = simple_lock_irq(&timer_lock);
telt = (timer_elt_t) queue_first(&timer_head.chain);
if (telt->ticks > elapsed_ticks) {
- simple_unlock(&timer_lock);
- splx(s);
+ simple_unlock_irq(s, &timer_lock);
break;
}
fcn = telt->fcn;
@@ -319,8 +313,7 @@ void softclock(void)
remqueue(&timer_head.chain, (queue_entry_t)telt);
telt->set = TELT_UNSET;
- simple_unlock(&timer_lock);
- splx(s);
+ simple_unlock_irq(s, &timer_lock);
assert(fcn != 0);
(*fcn)(param);
@@ -341,8 +334,7 @@ void set_timeout(
spl_t s;
timer_elt_t next;
- s = splsched();
- simple_lock(&timer_lock);
+ s = simple_lock_irq(&timer_lock);
interval += elapsed_ticks;
@@ -360,33 +352,29 @@ void set_timeout(
*/
insque((queue_entry_t) telt, ((queue_entry_t)next)->prev);
telt->set = TELT_SET;
- simple_unlock(&timer_lock);
- splx(s);
+ simple_unlock_irq(s, &timer_lock);
}
boolean_t reset_timeout(timer_elt_t telt)
{
spl_t s;
- s = splsched();
- simple_lock(&timer_lock);
+ s = simple_lock_irq(&timer_lock);
if (telt->set) {
remqueue(&timer_head.chain, (queue_entry_t)telt);
telt->set = TELT_UNSET;
- simple_unlock(&timer_lock);
- splx(s);
+ simple_unlock_irq(s, &timer_lock);
return TRUE;
}
else {
- simple_unlock(&timer_lock);
- splx(s);
+ simple_unlock_irq(s, &timer_lock);
return FALSE;
}
}
void init_timeout(void)
{
- simple_lock_init(&timer_lock);
+ simple_lock_init_irq(&timer_lock);
queue_init(&timer_head.chain);
timer_head.ticks = ~0; /* MAXUINT - sentinel */
@@ -398,19 +386,19 @@ void init_timeout(void)
* the boot-time clock by storing the difference to the real-time
* clock.
*/
-struct time_value clock_boottime_offset;
+struct time_value64 clock_boottime_offset;
/*
* Update the offset of the boot-time clock from the real-time clock.
* This function must be called when the real-time clock is updated.
* This function must be called at SPLHIGH.
*/
-void
-clock_boottime_update(struct time_value *new_time)
+static void
+clock_boottime_update(const struct time_value64 *new_time)
{
- struct time_value delta = time;
- time_value_sub(&delta, new_time);
- time_value_add(&clock_boottime_offset, &delta);
+ struct time_value64 delta = time;
+ time_value64_sub(&delta, new_time);
+ time_value64_add(&clock_boottime_offset, &delta);
}
/*
@@ -418,10 +406,10 @@ clock_boottime_update(struct time_value *new_time)
* frame.
*/
void
-record_time_stamp (time_value_t *stamp)
+record_time_stamp(time_value64_t *stamp)
{
read_mapped_time(stamp);
- time_value_add(stamp, &clock_boottime_offset);
+ time_value64_add(stamp, &clock_boottime_offset);
}
/*
@@ -429,20 +417,33 @@ record_time_stamp (time_value_t *stamp)
* real-time clock frame.
*/
void
-read_time_stamp (time_value_t *stamp, time_value_t *result)
+read_time_stamp (const time_value64_t *stamp, time_value64_t *result)
{
*result = *stamp;
- time_value_sub(result, &clock_boottime_offset);
+ time_value64_sub(result, &clock_boottime_offset);
}
/*
+ * Read the time (deprecated version).
+ */
+kern_return_t
+host_get_time(const host_t host, time_value_t *current_time)
+{
+ if (host == HOST_NULL)
+ return(KERN_INVALID_HOST);
+
+ time_value64_t current_time64;
+ read_mapped_time(&current_time64);
+ TIME_VALUE64_TO_TIME_VALUE(&current_time64, current_time);
+ return (KERN_SUCCESS);
+}
+
+/*
* Read the time.
*/
kern_return_t
-host_get_time(host, current_time)
- const host_t host;
- time_value_t *current_time; /* OUT */
+host_get_time64(const host_t host, time_value64_t *current_time)
{
if (host == HOST_NULL)
return(KERN_INVALID_HOST);
@@ -455,9 +456,15 @@ host_get_time(host, current_time)
* Set the time. Only available to privileged users.
*/
kern_return_t
-host_set_time(host, new_time)
- const host_t host;
- time_value_t new_time;
+host_set_time(const host_t host, time_value_t new_time)
+{
+ time_value64_t new_time64;
+ TIME_VALUE_TO_TIME_VALUE64(&new_time, &new_time64);
+ return host_set_time64(host, new_time64);
+}
+
+kern_return_t
+host_set_time64(const host_t host, time_value64_t new_time)
{
spl_t s;
@@ -470,7 +477,7 @@ host_set_time(host, new_time)
*/
thread_bind(current_thread(), master_processor);
if (current_processor() != master_processor)
- thread_block((void (*)) 0);
+ thread_block(thread_no_continuation);
#endif /* NCPUS > 1 */
s = splhigh();
@@ -487,49 +494,72 @@ host_set_time(host, new_time)
thread_bind(current_thread(), PROCESSOR_NULL);
#endif /* NCPUS > 1 */
- return (KERN_SUCCESS);
+ return(KERN_SUCCESS);
}
/*
* Adjust the time gradually.
*/
kern_return_t
-host_adjust_time(host, new_adjustment, old_adjustment)
- const host_t host;
- time_value_t new_adjustment;
- time_value_t *old_adjustment; /* OUT */
+host_adjust_time(
+ const host_t host,
+ time_value_t new_adjustment,
+ time_value_t *old_adjustment /* OUT */)
{
- time_value_t oadj;
- unsigned int ndelta;
+ time_value64_t old_adjustment64;
+ time_value64_t new_adjustment64;
+ kern_return_t ret;
+
+ TIME_VALUE_TO_TIME_VALUE64(&new_adjustment, &new_adjustment64);
+ ret = host_adjust_time64(host, new_adjustment64, &old_adjustment64);
+ if (ret == KERN_SUCCESS) {
+ TIME_VALUE64_TO_TIME_VALUE(&old_adjustment64, old_adjustment);
+ }
+ return ret;
+}
+
+/*
+ * Adjust the time gradually.
+ */
+kern_return_t
+host_adjust_time64(
+ const host_t host,
+ time_value64_t new_adjustment,
+ time_value64_t *old_adjustment /* OUT */)
+{
+ time_value64_t oadj;
+ uint64_t ndelta_microseconds;
spl_t s;
if (host == HOST_NULL)
return (KERN_INVALID_HOST);
- ndelta = new_adjustment.seconds * 1000000
- + new_adjustment.microseconds;
+ /* Note we only adjust up to microsecond precision */
+ ndelta_microseconds = new_adjustment.seconds * MICROSECONDS_IN_ONE_SECOND
+ + new_adjustment.nanoseconds / 1000;
#if NCPUS > 1
thread_bind(current_thread(), master_processor);
if (current_processor() != master_processor)
- thread_block((void (*)) 0);
+ thread_block(thread_no_continuation);
#endif /* NCPUS > 1 */
s = splclock();
- oadj.seconds = timedelta / 1000000;
- oadj.microseconds = timedelta % 1000000;
+ oadj.seconds = timedelta / MICROSECONDS_IN_ONE_SECOND;
+ oadj.nanoseconds = (timedelta % MICROSECONDS_IN_ONE_SECOND) * 1000;
if (timedelta == 0) {
- if (ndelta > bigadj)
+ if (ndelta_microseconds > bigadj)
tickdelta = 10 * tickadj;
else
tickdelta = tickadj;
}
- if (ndelta % tickdelta)
- ndelta = ndelta / tickdelta * tickdelta;
+ /* Make ndelta_microseconds a multiple of tickdelta */
+ if (ndelta_microseconds % tickdelta)
+ ndelta_microseconds = ndelta_microseconds / tickdelta * tickdelta;
- timedelta = ndelta;
+ timedelta = ndelta_microseconds;
splx(s);
#if NCPUS > 1
@@ -585,8 +615,7 @@ void timeout(
spl_t s;
timer_elt_t elt;
- s = splsched();
- simple_lock(&timer_lock);
+ s = simple_lock_irq(&timer_lock);
for (elt = &timeout_timers[0]; elt < &timeout_timers[NTIMERS]; elt++)
if (elt->set == TELT_UNSET)
break;
@@ -595,8 +624,7 @@ void timeout(
elt->fcn = fcn;
elt->param = param;
elt->set = TELT_ALLOC;
- simple_unlock(&timer_lock);
- splx(s);
+ simple_unlock_irq(s, &timer_lock);
set_timeout(elt, (unsigned int)interval);
}
@@ -605,15 +633,12 @@ void timeout(
* Returns a boolean indicating whether the timeout element was found
* and removed.
*/
-boolean_t untimeout(fcn, param)
- void (*fcn)( void * param );
- const void * param;
+boolean_t untimeout(void (*fcn)( void * param ), const void *param)
{
spl_t s;
timer_elt_t elt;
- s = splsched();
- simple_lock(&timer_lock);
+ s = simple_lock_irq(&timer_lock);
queue_iterate(&timer_head.chain, elt, timer_elt_t, chain) {
if ((fcn == elt->fcn) && (param == elt->param)) {
@@ -623,12 +648,10 @@ boolean_t untimeout(fcn, param)
remqueue(&timer_head.chain, (queue_entry_t)elt);
elt->set = TELT_UNSET;
- simple_unlock(&timer_lock);
- splx(s);
+ simple_unlock_irq(s, &timer_lock);
return (TRUE);
}
}
- simple_unlock(&timer_lock);
- splx(s);
+ simple_unlock_irq(s, &timer_lock);
return (FALSE);
}
diff --git a/kern/mach_clock.h b/kern/mach_clock.h
index 977b43be..66903b8a 100644
--- a/kern/mach_clock.h
+++ b/kern/mach_clock.h
@@ -40,6 +40,7 @@ extern unsigned long elapsed_ticks; /* number of ticks elapsed since bootup */
extern int hz; /* number of ticks per second */
extern int tick; /* number of usec per tick */
+extern time_value64_t time; /* time since bootup (uncorrected) */
typedef void timer_func_t(void *);
@@ -91,26 +92,13 @@ extern void init_timeout (void);
* Record a timestamp in STAMP. Records values in the boot-time clock
* frame.
*/
-extern void record_time_stamp (time_value_t *stamp);
+extern void record_time_stamp (time_value64_t *stamp);
/*
* Read a timestamp in STAMP into RESULT. Returns values in the
* real-time clock frame.
*/
-extern void read_time_stamp (time_value_t *stamp, time_value_t *result);
-
-extern kern_return_t host_get_time(
- host_t host,
- time_value_t *current_time);
-
-extern kern_return_t host_set_time(
- host_t host,
- time_value_t new_time);
-
-extern kern_return_t host_adjust_time(
- host_t host,
- time_value_t new_adjustment,
- time_value_t *old_adjustment);
+extern void read_time_stamp (const time_value64_t *stamp, time_value64_t *result);
extern void mapable_time_init (void);
diff --git a/kern/machine.c b/kern/machine.c
index 2fe30cee..1322b549 100644
--- a/kern/machine.c
+++ b/kern/machine.c
@@ -44,6 +44,8 @@
#include <kern/debug.h>
#include <kern/ipc_host.h>
#include <kern/host.h>
+#include <kern/machine.h>
+#include <kern/mach_host.server.h>
#include <kern/lock.h>
#include <kern/processor.h>
#include <kern/queue.h>
@@ -66,7 +68,7 @@ struct machine_info machine_info;
struct machine_slot machine_slot[NCPUS];
queue_head_t action_queue; /* assign/shutdown queue */
-decl_simple_lock_data(,action_lock);
+def_simple_lock_data(,action_lock);
/*
* cpu_up:
@@ -82,6 +84,9 @@ void cpu_up(int cpu)
processor = cpu_to_processor(cpu);
pset_lock(&default_pset);
+#if MACH_HOST
+ pset_lock(slave_pset);
+#endif
s = splsched();
processor_lock(processor);
#if NCPUS > 1
@@ -90,20 +95,49 @@ void cpu_up(int cpu)
ms = &machine_slot[cpu];
ms->running = TRUE;
machine_info.avail_cpus++;
- pset_add_processor(&default_pset, processor);
+#if MACH_HOST
+ if (cpu != 0)
+ pset_add_processor(slave_pset, processor);
+ else
+#endif
+ pset_add_processor(&default_pset, processor);
processor->state = PROCESSOR_RUNNING;
processor_unlock(processor);
splx(s);
+#if MACH_HOST
+ pset_unlock(slave_pset);
+#endif
pset_unlock(&default_pset);
}
+kern_return_t
+host_reboot(const host_t host, int options)
+{
+ if (host == HOST_NULL)
+ return (KERN_INVALID_HOST);
+
+ if (options & RB_DEBUGGER) {
+ Debugger("Debugger");
+ } else {
+#ifdef parisc
+/* XXX this could be made common */
+ halt_all_cpus(options);
+#else
+ halt_all_cpus(!(options & RB_HALT));
+#endif
+ }
+ return (KERN_SUCCESS);
+}
+
+#if NCPUS > 1
+
/*
* cpu_down:
*
* Flag specified cpu as down. Called when a processor is about to
* go offline.
*/
-void cpu_down(int cpu)
+static void cpu_down(int cpu)
{
struct machine_slot *ms;
processor_t processor;
@@ -124,35 +158,13 @@ void cpu_down(int cpu)
splx(s);
}
-kern_return_t
-host_reboot(host, options)
- const host_t host;
- int options;
-{
- if (host == HOST_NULL)
- return (KERN_INVALID_HOST);
-
- if (options & RB_DEBUGGER) {
- Debugger("Debugger");
- } else {
-#ifdef parisc
-/* XXX this could be made common */
- halt_all_cpus(options);
-#else
- halt_all_cpus(!(options & RB_HALT));
-#endif
- }
- return (KERN_SUCCESS);
-}
-
-#if NCPUS > 1
/*
* processor_request_action - common internals of processor_assign
* and processor_shutdown. If new_pset is null, this is
* a shutdown, else it's an assign and caller must donate
* a reference.
*/
-void
+static void
processor_request_action(
processor_t processor,
processor_set_t new_pset)
@@ -164,14 +176,14 @@ processor_request_action(
* get at processor state.
*/
pset = processor->processor_set;
- simple_lock(&pset->idle_lock);
+ pset_idle_lock();
/*
* If the processor is dispatching, let it finish - it will set its
* state to running very soon.
*/
while (*(volatile int *)&processor->state == PROCESSOR_DISPATCHING)
- continue;
+ cpu_pause();
/*
* Now lock the action queue and do the dirty work.
@@ -216,7 +228,7 @@ processor_request_action(
panic("processor_request_action: bad state");
}
simple_unlock(&action_lock);
- simple_unlock(&pset->idle_lock);
+ pset_idle_unlock();
thread_wakeup((event_t)&action_queue);
}
@@ -358,49 +370,11 @@ processor_shutdown(processor_t processor)
}
/*
- * action_thread() shuts down processors or changes their assignment.
- */
-void __attribute__((noreturn)) action_thread_continue(void)
-{
- processor_t processor;
- spl_t s;
-
- while (TRUE) {
- s = splsched();
- simple_lock(&action_lock);
- while ( !queue_empty(&action_queue)) {
- processor = (processor_t) queue_first(&action_queue);
- queue_remove(&action_queue, processor, processor_t,
- processor_queue);
- simple_unlock(&action_lock);
- (void) splx(s);
-
- processor_doaction(processor);
-
- s = splsched();
- simple_lock(&action_lock);
- }
-
- assert_wait((event_t) &action_queue, FALSE);
- simple_unlock(&action_lock);
- (void) splx(s);
- counter(c_action_thread_block++);
- thread_block(action_thread_continue);
- }
-}
-
-void __attribute__((noreturn)) action_thread(void)
-{
- action_thread_continue();
- /*NOTREACHED*/
-}
-
-/*
* processor_doaction actually does the shutdown. The trick here
* is to schedule ourselves onto a cpu and then save our
* context back into the runqs before taking out the cpu.
*/
-void processor_doaction(processor_t processor)
+static void processor_doaction(processor_t processor)
{
thread_t this_thread;
spl_t s;
@@ -619,12 +593,49 @@ Restart_pset:
}
/*
+ * action_thread() shuts down processors or changes their assignment.
+ */
+void __attribute__((noreturn)) action_thread_continue(void)
+{
+ processor_t processor;
+ spl_t s;
+
+ while (TRUE) {
+ s = splsched();
+ simple_lock(&action_lock);
+ while ( !queue_empty(&action_queue)) {
+ processor = (processor_t) queue_first(&action_queue);
+ queue_remove(&action_queue, processor, processor_t,
+ processor_queue);
+ simple_unlock(&action_lock);
+ (void) splx(s);
+
+ processor_doaction(processor);
+
+ s = splsched();
+ simple_lock(&action_lock);
+ }
+
+ assert_wait((event_t) &action_queue, FALSE);
+ simple_unlock(&action_lock);
+ (void) splx(s);
+ counter(c_action_thread_block++);
+ thread_block(action_thread_continue);
+ }
+}
+
+void __attribute__((noreturn)) action_thread(void)
+{
+ action_thread_continue();
+ /*NOTREACHED*/
+}
+
+/*
* Actually do the processor shutdown. This is called at splsched,
* running on the processor's shutdown stack.
*/
-void processor_doshutdown(processor)
-processor_t processor;
+void processor_doshutdown(processor_t processor)
{
int cpu = processor->slot_num;
@@ -635,7 +646,7 @@ processor_t processor;
*/
PMAP_DEACTIVATE_KERNEL(cpu);
#ifndef MIGRATING_THREADS
- active_threads[cpu] = THREAD_NULL;
+ percpu_array[cpu].active_thread = THREAD_NULL;
#endif
cpu_down(cpu);
thread_wakeup((event_t)processor);
@@ -659,18 +670,3 @@ processor_assign(
}
#endif /* NCPUS > 1 */
-
-kern_return_t
-host_get_boot_info(
- host_t priv_host,
- kernel_boot_info_t boot_info)
-{
- char *src = "";
-
- if (priv_host == HOST_NULL) {
- return KERN_INVALID_HOST;
- }
-
- (void) strncpy(boot_info, src, KERNEL_BOOT_INFO_MAX);
- return KERN_SUCCESS;
-}
diff --git a/kern/pc_sample.c b/kern/pc_sample.c
index e9f0b16a..497bd894 100644
--- a/kern/pc_sample.c
+++ b/kern/pc_sample.c
@@ -35,6 +35,7 @@
#include <kern/host.h>
#include <kern/thread.h>
#include <kern/pc_sample.h>
+#include <kern/mach4.server.h>
#include <kern/mach_clock.h>
#if MACH_PCSAMPLE
@@ -60,8 +61,8 @@ void take_pc_sample(
cp->seqno++;
sample = &((sampled_pc_t *)cp->buffer)[cp->seqno % MAX_PC_SAMPLES];
- sample->id = (vm_offset_t)t;
- sample->pc = pc;
+ sample->id = (rpc_vm_offset_t)(vm_offset_t)t;
+ sample->pc = (rpc_vm_offset_t)pc;
sample->sampletype = flavor;
}
@@ -158,7 +159,7 @@ get_sampled_pcs(
sample_control_t *cp,
sampled_pc_seqno_t *seqnop,
sampled_pc_array_t sampled_pcs_out,
- int *sampled_pcs_cntp)
+ mach_msg_type_number_t *sampled_pcs_cntp)
{
int nsamples;
sampled_pc_seqno_t seqidx1, seqidx2;
@@ -217,7 +218,7 @@ thread_get_sampled_pcs(
thread_t thread,
sampled_pc_seqno_t *seqnop,
sampled_pc_array_t sampled_pcs_out,
- int *sampled_pcs_cntp)
+ mach_msg_type_number_t *sampled_pcs_cntp)
{
if (thread == THREAD_NULL)
return KERN_INVALID_ARGUMENT;
@@ -234,7 +235,7 @@ task_get_sampled_pcs(
task_t task,
sampled_pc_seqno_t *seqnop,
sampled_pc_array_t sampled_pcs_out,
- int *sampled_pcs_cntp)
+ mach_msg_type_number_t *sampled_pcs_cntp)
{
if (task == TASK_NULL)
return KERN_INVALID_ARGUMENT;
@@ -250,8 +251,8 @@ task_get_sampled_pcs(
kern_return_t
thread_enable_pc_sampling(
- const thread_t thread,
- const int *tickp,
+ thread_t thread,
+ int *tickp,
sampled_pc_flavor_t flavors)
{
return KERN_FAILURE; /* not implemented */
@@ -259,8 +260,8 @@ thread_enable_pc_sampling(
kern_return_t
task_enable_pc_sampling(
- const task_t task,
- const int *tickp,
+ task_t task,
+ int *tickp,
sampled_pc_flavor_t flavors)
{
return KERN_FAILURE; /* not implemented */
@@ -268,36 +269,36 @@ task_enable_pc_sampling(
kern_return_t
thread_disable_pc_sampling(
- const thread_t thread,
- const int *samplecntp)
+ thread_t thread,
+ int *samplecntp)
{
return KERN_FAILURE; /* not implemented */
}
kern_return_t
task_disable_pc_sampling(
- const task_t task,
- const int *samplecntp)
+ task_t task,
+ int *samplecntp)
{
return KERN_FAILURE; /* not implemented */
}
kern_return_t
thread_get_sampled_pcs(
- const thread_t thread,
- const sampled_pc_seqno_t *seqnop,
- const sampled_pc_array_t sampled_pcs_out,
- const int *sampled_pcs_cntp)
+ thread_t thread,
+ sampled_pc_seqno_t *seqnop,
+ sampled_pc_array_t sampled_pcs_out,
+ mach_msg_type_number_t *sampled_pcs_cntp)
{
return KERN_FAILURE; /* not implemented */
}
kern_return_t
task_get_sampled_pcs(
- const task_t task,
- const sampled_pc_seqno_t *seqnop,
- const sampled_pc_array_t sampled_pcs_out,
- const int *sampled_pcs_cntp)
+ task_t task,
+ sampled_pc_seqno_t *seqnop,
+ sampled_pc_array_t sampled_pcs_out,
+ mach_msg_type_number_t *sampled_pcs_cntp)
{
return KERN_FAILURE; /* not implemented */
}
diff --git a/kern/printf.c b/kern/printf.c
index 50f23623..cbc27ae6 100644
--- a/kern/printf.c
+++ b/kern/printf.c
@@ -116,12 +116,12 @@
* (compatibility)
*/
+#include <stdarg.h>
#include <string.h>
#include <device/cons.h>
#include <kern/printf.h>
#include <mach/boolean.h>
-#include <kern/lock.h>
-#include <stdarg.h>
+
#define isdigit(d) ((d) >= '0' && (d) <= '9')
#define Ctod(c) ((c) - '0')
diff --git a/kern/priority.c b/kern/priority.c
index 587ea2f9..7f28b3d7 100644
--- a/kern/priority.c
+++ b/kern/priority.c
@@ -42,9 +42,9 @@
#include <kern/sched.h>
#include <kern/sched_prim.h>
#include <kern/thread.h>
+#include <kern/priority.h>
#include <kern/processor.h>
#include <kern/timer.h>
-#include <kern/time_stamp.h>
#include <machine/machspl.h>
@@ -137,14 +137,14 @@ void thread_quantum_update(
if ((quantum != myprocessor->last_quantum) &&
(pset->processor_count > 1)) {
myprocessor->last_quantum = quantum;
- simple_lock(&pset->quantum_adj_lock);
+ s = simple_lock_irq(&pset->quantum_adj_lock);
quantum = min_quantum + (pset->quantum_adj_index *
(quantum - min_quantum)) /
(pset->processor_count - 1);
if (++(pset->quantum_adj_index) >=
pset->processor_count)
pset->quantum_adj_index = 0;
- simple_unlock(&pset->quantum_adj_lock);
+ simple_unlock_irq(s, &pset->quantum_adj_lock);
}
#endif /* NCPUS > 1 */
if (myprocessor->quantum <= 0) {
diff --git a/kern/processor.c b/kern/processor.c
index ec56952e..0e42fa37 100644
--- a/kern/processor.c
+++ b/kern/processor.c
@@ -51,6 +51,7 @@
#if MACH_HOST
#include <kern/slab.h>
struct kmem_cache pset_cache;
+struct processor_set *slave_pset;
#endif /* MACH_HOST */
@@ -60,14 +61,12 @@ struct kmem_cache pset_cache;
int master_cpu;
struct processor_set default_pset;
-struct processor processor_array[NCPUS];
queue_head_t all_psets;
int all_psets_count;
-decl_simple_lock_data(, all_psets_lock);
+def_simple_lock_data(, all_psets_lock);
processor_t master_processor;
-processor_t processor_ptr[NCPUS];
/*
* Bootstrap the processor/pset system so the scheduler can run.
@@ -77,14 +76,12 @@ void pset_sys_bootstrap(void)
int i;
pset_init(&default_pset);
- default_pset.empty = FALSE;
for (i = 0; i < NCPUS; i++) {
/*
* Initialize processor data structures.
- * Note that cpu_to_processor(i) is processor_ptr[i].
+ * Note that cpu_to_processor is processor_ptr.
*/
- processor_ptr[i] = &processor_array[i];
- processor_init(processor_ptr[i], i);
+ processor_init(processor_ptr(i), i);
}
master_processor = cpu_to_processor(master_cpu);
queue_init(&all_psets);
@@ -92,7 +89,6 @@ void pset_sys_bootstrap(void)
queue_enter(&all_psets, &default_pset, processor_set_t, all_psets);
all_psets_count = 1;
default_pset.active = TRUE;
- default_pset.empty = FALSE;
/*
* Note: the default_pset has a max_priority of BASEPRI_USER.
@@ -127,6 +123,8 @@ void pset_sys_init(void)
ipc_processor_init(processor);
}
}
+
+ processor_set_create(&realhost, &slave_pset, &slave_pset);
}
#endif /* MACH_HOST */
@@ -169,7 +167,7 @@ void pset_init(
pset->set_quantum = min_quantum;
#if NCPUS > 1
pset->quantum_adj_index = 0;
- simple_lock_init(&pset->quantum_adj_lock);
+ simple_lock_init_irq(&pset->quantum_adj_lock);
for (i = 0; i <= NCPUS; i++) {
pset->machine_quantum[i] = min_quantum;
@@ -245,6 +243,7 @@ void pset_add_processor(
queue_enter(&pset->processors, processor, processor_t, processors);
processor->processor_set = pset;
pset->processor_count++;
+ pset->empty = FALSE;
quantum_set(pset);
}
@@ -845,7 +844,7 @@ processor_set_policy_disable(
*
* Common internals for processor_set_{threads,tasks}
*/
-kern_return_t
+static kern_return_t
processor_set_things(
processor_set_t pset,
mach_port_t **thing_list,
diff --git a/kern/processor.h b/kern/processor.h
index b81526c0..c90d0529 100644
--- a/kern/processor.h
+++ b/kern/processor.h
@@ -41,7 +41,6 @@
#include <mach/kern_return.h>
#include <mach/port.h>
#include <mach/processor_info.h>
-#include <kern/cpu_number.h>
#include <kern/lock.h>
#include <kern/queue.h>
#include <kern/sched.h>
@@ -56,7 +55,7 @@ struct processor_set {
struct run_queue runq; /* runq for this set */
queue_head_t idle_queue; /* idle processors */
int idle_count; /* how many ? */
- decl_simple_lock_data(, idle_lock) /* lock for above */
+ decl_simple_lock_data(, idle_lock) /* lock for above, shall be taken at splsched only */
queue_head_t processors; /* all processors here */
int processor_count; /* how many ? */
boolean_t empty; /* true if no processors */
@@ -78,7 +77,7 @@ struct processor_set {
int set_quantum; /* current default quantum */
#if NCPUS > 1
int quantum_adj_index; /* runtime quantum adj. */
- decl_simple_lock_data(, quantum_adj_lock) /* lock for above */
+ decl_simple_lock_irq_data(, quantum_adj_lock) /* lock for above */
int machine_quantum[NCPUS+1]; /* ditto */
#endif /* NCPUS > 1 */
long mach_factor; /* mach_factor */
@@ -86,6 +85,23 @@ struct processor_set {
long sched_load; /* load avg for scheduler */
};
extern struct processor_set default_pset;
+#if MACH_HOST
+extern struct processor_set *slave_pset;
+#endif
+
+#ifdef MACH_LDEBUG
+#define pset_idle_lock() do { \
+ assert_splsched(); \
+ simple_lock_nocheck(&pset->idle_lock); \
+} while (0)
+#define pset_idle_unlock() do { \
+ assert_splsched(); \
+ simple_unlock_nocheck(&pset->idle_lock); \
+} while (0)
+#else
+#define pset_idle_lock() simple_lock_nocheck(&pset->idle_lock)
+#define pset_idle_unlock() simple_unlock_nocheck(&pset->idle_lock)
+#endif
struct processor {
struct run_queue runq; /* local runq for this processor */
@@ -112,6 +128,9 @@ struct processor {
typedef struct processor Processor;
extern struct processor processor_array[NCPUS];
+#include <kern/cpu_number.h>
+#include <machine/percpu.h>
+
/*
* Chain of all processor sets.
*/
@@ -195,23 +214,15 @@ extern processor_t master_processor;
#define PROCESSOR_ASSIGN 4 /* Assignment is changing */
#define PROCESSOR_SHUTDOWN 5 /* Being shutdown */
-/*
- * Use processor ptr array to find current processor's data structure.
- * This replaces a multiplication (index into processor_array) with
- * an array lookup and a memory reference. It also allows us to save
- * space if processor numbering gets too sparse.
- */
-
-extern processor_t processor_ptr[NCPUS];
-
-#define cpu_to_processor(i) (processor_ptr[i])
+#define processor_ptr(i) (&percpu_array[i].processor)
+#define cpu_to_processor processor_ptr
-#define current_processor() (processor_ptr[cpu_number()])
+#define current_processor() (percpu_ptr(struct processor, processor))
#define current_processor_set() (current_processor()->processor_set)
/* Compatibility -- will go away */
-#define cpu_state(slot_num) (processor_ptr[slot_num]->state)
+#define cpu_state(slot_num) (processor_ptr(slot_num)->state)
#define cpu_idle(slot_num) (cpu_state(slot_num) == PROCESSOR_IDLE)
/* Useful lock macros */
@@ -221,6 +232,7 @@ extern processor_t processor_ptr[NCPUS];
#define pset_ref_lock(pset) simple_lock(&(pset)->ref_lock)
#define pset_ref_unlock(pset) simple_unlock(&(pset)->ref_lock)
+/* Shall be taken at splsched only */
#define processor_lock(pr) simple_lock(&(pr)->lock)
#define processor_unlock(pr) simple_unlock(&(pr)->lock)
@@ -320,7 +332,6 @@ extern kern_return_t processor_set_threads(
natural_t *count);
#endif
-void processor_doaction(processor_t processor);
void processor_doshutdown(processor_t processor);
void quantum_set(processor_set_t pset);
void pset_init(processor_set_t pset);
diff --git a/kern/profile.c b/kern/profile.c
index b33d6953..4fcd541f 100644
--- a/kern/profile.c
+++ b/kern/profile.c
@@ -180,8 +180,7 @@ printf("profile_thread: mach_msg failed returned %x\n",(int)mr);
#include <mach/message.h>
void
-send_last_sample_buf(th)
-thread_t th;
+send_last_sample_buf(thread_t th)
{
spl_t s;
buf_to_send_t buf_entry;
@@ -290,10 +289,9 @@ profile(pc) {
MiG, even though it is not used in the function itself. */
kern_return_t
-mach_sample_thread (task, reply, cur_thread)
-ipc_space_t task;
-ipc_object_t reply;
-thread_t cur_thread;
+mach_sample_thread (ipc_space_t task,
+ ipc_object_t reply,
+ thread_t cur_thread)
{
/*
* This routine is called every time that a new thread has made
@@ -349,10 +347,7 @@ printf("ERROR:mach_sample_thread:cannot set pbuf_nb\n");
}
kern_return_t
-mach_sample_task (task, reply, cur_task)
-ipc_space_t task;
-ipc_object_t reply;
-task_t cur_task;
+mach_sample_task (ipc_space_t task, ipc_object_t reply, task_t cur_task)
{
prof_data_t pbuf=cur_task->profil_buffer;
vm_offset_t vmpbuf;
diff --git a/kern/queue.c b/kern/queue.c
index d9396e54..f5326202 100644
--- a/kern/queue.c
+++ b/kern/queue.c
@@ -119,13 +119,3 @@ void insque(
(pred->next)->prev = entry;
pred->next = entry;
}
-
-struct queue_entry
-*remque(
- struct queue_entry *elt)
-{
- (elt->next)->prev = elt->prev;
- (elt->prev)->next = elt->next;
- return(elt);
-}
-
diff --git a/kern/sched.h b/kern/sched.h
index 588e0aa6..d7e74d3a 100644
--- a/kern/sched.h
+++ b/kern/sched.h
@@ -64,7 +64,9 @@
struct run_queue {
queue_head_t runq[NRQS]; /* one for each priority */
- decl_simple_lock_data(, lock) /* one lock for all queues */
+ decl_simple_lock_data(, lock) /* one lock for all queues,
+ shall be taken at splsched
+ only */
int low; /* low queue value */
int count; /* count of threads runable */
};
@@ -72,6 +74,21 @@ struct run_queue {
typedef struct run_queue *run_queue_t;
#define RUN_QUEUE_NULL ((run_queue_t) 0)
+/* Shall be taken at splsched only */
+#ifdef MACH_LDEBUG
+#define runq_lock(rq) do { \
+ assert_splsched(); \
+ simple_lock_nocheck(&(rq)->lock); \
+} while (0)
+#define runq_unlock(rq) do { \
+ assert_splsched(); \
+ simple_unlock_nocheck(&(rq)->lock); \
+} while (0)
+#else
+#define runq_lock(rq) simple_lock_nocheck(&(rq)->lock)
+#define runq_unlock(rq) simple_unlock_nocheck(&(rq)->lock)
+#endif
+
#if MACH_FIXPRI
/*
* NOTE: For fixed priority threads, first_quantum indicates
diff --git a/kern/sched_prim.c b/kern/sched_prim.c
index 63a0437c..47e578e6 100644
--- a/kern/sched_prim.c
+++ b/kern/sched_prim.c
@@ -49,6 +49,7 @@
#include <kern/queue.h>
#include <kern/sched.h>
#include <kern/sched_prim.h>
+#include <kern/smp.h>
#include <kern/syscall_subr.h>
#include <kern/thread.h>
#include <kern/thread_swap.h>
@@ -127,14 +128,30 @@ timer_elt_data_t recompute_priorities_timer;
#define NUMQUEUES 1031
+/* Shall be taken at splsched only */
+decl_simple_lock_data(static, wait_lock[NUMQUEUES]) /* Lock for... */
queue_head_t wait_queue[NUMQUEUES];
-decl_simple_lock_data(, wait_lock[NUMQUEUES])
+
+#ifdef MACH_LDEBUG
+#define waitq_lock(wl) do { \
+ assert_splsched(); \
+ simple_lock_nocheck(wl); \
+} while (0)
+#define waitq_unlock(wl) do { \
+ assert_splsched(); \
+ simple_unlock_nocheck(wl); \
+} while (0)
+#else
+#define waitq_lock(wl) simple_lock_nocheck(wl)
+#define waitq_unlock(wl) simple_unlock_nocheck(wl)
+#endif
+
/* NOTE: we want a small positive integer out of this */
#define wait_hash(event) \
((((long)(event) < 0) ? ~(long)(event) : (long)(event)) % NUMQUEUES)
-void wait_queue_init(void)
+static void wait_queue_init(void)
{
int i;
@@ -162,7 +179,7 @@ void sched_init(void)
* Thread timeout routine, called when timer expires.
* Called at splsoftclock.
*/
-void thread_timeout(
+static void thread_timeout(
void *_thread)
{
thread_t thread = _thread;
@@ -232,7 +249,7 @@ void assert_wait(
index = wait_hash(event);
q = &wait_queue[index];
lock = &wait_lock[index];
- simple_lock(lock);
+ waitq_lock(lock);
thread_lock(thread);
enqueue_tail(q, &(thread->links));
thread->wait_event = event;
@@ -241,7 +258,7 @@ void assert_wait(
else
thread->state |= TH_WAIT | TH_UNINT;
thread_unlock(thread);
- simple_unlock(lock);
+ waitq_unlock(lock);
}
else {
thread_lock(thread);
@@ -294,7 +311,7 @@ void clear_wait(
index = wait_hash(event);
q = &wait_queue[index];
lock = &wait_lock[index];
- simple_lock(lock);
+ waitq_lock(lock);
/*
* If the thread is still waiting on that event,
* then remove it from the list. If it is waiting
@@ -307,7 +324,7 @@ void clear_wait(
thread->wait_event = 0;
event = 0; /* cause to run below */
}
- simple_unlock(lock);
+ waitq_unlock(lock);
}
if (event == 0) {
int state = thread->state;
@@ -386,7 +403,7 @@ boolean_t thread_wakeup_prim(
q = &wait_queue[index];
s = splsched();
lock = &wait_lock[index];
- simple_lock(lock);
+ waitq_lock(lock);
thread = (thread_t) queue_first(q);
while (!queue_end(q, (queue_entry_t)thread)) {
next_th = (thread_t) queue_next((queue_t) thread);
@@ -435,7 +452,7 @@ boolean_t thread_wakeup_prim(
}
thread = next_th;
}
- simple_unlock(lock);
+ waitq_unlock(lock);
splx(s);
return (woke);
}
@@ -490,7 +507,7 @@ void thread_bind(
* Assumes splsched.
*/
-thread_t thread_select(
+static thread_t thread_select(
processor_t myprocessor)
{
thread_t thread;
@@ -789,7 +806,7 @@ boolean_t thread_invoke(
/*
* switch_context is machine-dependent. It does the
* machine-dependent components of a context-switch, like
- * changing address spaces. It updates active_threads.
+ * changing address spaces. It updates active_thread.
* It returns only if a continuation is not supplied.
*/
counter(c_thread_invoke_csw++);
@@ -1173,7 +1190,7 @@ void update_priority(
whichq = NRQS - 1; \
} \
\
- simple_lock(&(rq)->lock); /* lock the run queue */ \
+ runq_lock(rq); /* lock the run queue */ \
checkrq((rq), "thread_setrun: before adding thread"); \
enqueue_tail(&(rq)->runq[whichq], &((th)->links)); \
\
@@ -1184,7 +1201,7 @@ void update_priority(
(th)->runq = (rq); \
thread_check((th), (rq)); \
checkrq((rq), "thread_setrun: after adding thread"); \
- simple_unlock(&(rq)->lock); \
+ runq_unlock(rq); \
MACRO_END
#else /* DEBUG */
#define run_queue_enqueue(rq, th) \
@@ -1197,7 +1214,7 @@ void update_priority(
whichq = NRQS - 1; \
} \
\
- simple_lock(&(rq)->lock); /* lock the run queue */ \
+ runq_lock(rq); /* lock the run queue */ \
enqueue_tail(&(rq)->runq[whichq], &((th)->links)); \
\
if (whichq < (rq)->low || (rq)->count == 0) \
@@ -1205,7 +1222,7 @@ void update_priority(
\
(rq)->count++; \
(th)->runq = (rq); \
- simple_unlock(&(rq)->lock); \
+ runq_unlock(rq); \
MACRO_END
#endif /* DEBUG */
/*
@@ -1251,8 +1268,8 @@ void thread_setrun(
*/
processor = th->last_processor;
if (processor->state == PROCESSOR_IDLE) {
- simple_lock(&processor->lock);
- simple_lock(&pset->idle_lock);
+ processor_lock(processor);
+ pset_idle_lock();
if ((processor->state == PROCESSOR_IDLE)
#if MACH_HOST
&& (processor->processor_set == pset)
@@ -1263,17 +1280,19 @@ void thread_setrun(
pset->idle_count--;
processor->next_thread = th;
processor->state = PROCESSOR_DISPATCHING;
- simple_unlock(&pset->idle_lock);
- simple_unlock(&processor->lock);
+ pset_idle_unlock();
+ processor_unlock(processor);
+ if (processor != current_processor())
+ cause_ast_check(processor);
return;
}
- simple_unlock(&pset->idle_lock);
- simple_unlock(&processor->lock);
+ pset_idle_unlock();
+ processor_unlock(processor);
}
#endif /* HW_FOOTPRINT */
if (pset->idle_count > 0) {
- simple_lock(&pset->idle_lock);
+ pset_idle_lock();
if (pset->idle_count > 0) {
processor = (processor_t) queue_first(&pset->idle_queue);
queue_remove(&(pset->idle_queue), processor, processor_t,
@@ -1281,10 +1300,12 @@ void thread_setrun(
pset->idle_count--;
processor->next_thread = th;
processor->state = PROCESSOR_DISPATCHING;
- simple_unlock(&pset->idle_lock);
+ pset_idle_unlock();
+ if (processor != current_processor())
+ cause_ast_check(processor);
return;
}
- simple_unlock(&pset->idle_lock);
+ pset_idle_unlock();
}
rq = &(pset->runq);
run_queue_enqueue(rq,th);
@@ -1309,21 +1330,23 @@ void thread_setrun(
* processor here because it may not be the current one.
*/
if (processor->state == PROCESSOR_IDLE) {
- simple_lock(&processor->lock);
+ processor_lock(processor);
pset = processor->processor_set;
- simple_lock(&pset->idle_lock);
+ pset_idle_lock();
if (processor->state == PROCESSOR_IDLE) {
queue_remove(&pset->idle_queue, processor,
processor_t, processor_queue);
pset->idle_count--;
processor->next_thread = th;
processor->state = PROCESSOR_DISPATCHING;
- simple_unlock(&pset->idle_lock);
- simple_unlock(&processor->lock);
+ pset_idle_unlock();
+ processor_unlock(processor);
+ if (processor != current_processor())
+ cause_ast_check(processor);
return;
}
- simple_unlock(&pset->idle_lock);
- simple_unlock(&processor->lock);
+ pset_idle_unlock();
+ processor_unlock(processor);
}
rq = &(processor->runq);
run_queue_enqueue(rq,th);
@@ -1421,7 +1444,7 @@ struct run_queue *rem_runq(
* the thread is on a runq, but could leave.
*/
if (rq != RUN_QUEUE_NULL) {
- simple_lock(&rq->lock);
+ runq_lock(rq);
#if DEBUG
checkrq(rq, "rem_runq: at entry");
#endif /* DEBUG */
@@ -1440,7 +1463,7 @@ struct run_queue *rem_runq(
checkrq(rq, "rem_runq: after removing thread");
#endif /* DEBUG */
th->runq = RUN_QUEUE_NULL;
- simple_unlock(&rq->lock);
+ runq_unlock(rq);
}
else {
/*
@@ -1449,7 +1472,7 @@ struct run_queue *rem_runq(
* can't move again because this routine's
* caller locked the thread.
*/
- simple_unlock(&rq->lock);
+ runq_unlock(rq);
rq = RUN_QUEUE_NULL;
}
}
@@ -1570,7 +1593,7 @@ thread_t choose_pset_thread(
* was running. If it was in an assignment or shutdown,
* leave it alone. Return its idle thread.
*/
- simple_lock(&pset->idle_lock);
+ pset_idle_lock();
if (myprocessor->state == PROCESSOR_RUNNING) {
myprocessor->state = PROCESSOR_IDLE;
/*
@@ -1588,7 +1611,7 @@ thread_t choose_pset_thread(
pset->idle_count++;
}
- simple_unlock(&pset->idle_lock);
+ pset_idle_unlock();
return myprocessor->idle_thread;
}
@@ -1604,7 +1627,7 @@ int no_dispatch_count = 0;
* to execute.
*/
-void __attribute__((noreturn)) idle_thread_continue(void)
+static void __attribute__((noreturn)) idle_thread_continue(void)
{
processor_t myprocessor;
volatile thread_t *threadp;
@@ -1713,12 +1736,12 @@ retry:
processor_set_t pset;
pset = myprocessor->processor_set;
- simple_lock(&pset->idle_lock);
+ pset_idle_lock();
if (myprocessor->state != PROCESSOR_IDLE) {
/*
* Something happened, try again.
*/
- simple_unlock(&pset->idle_lock);
+ pset_idle_unlock();
goto retry;
}
/*
@@ -1730,7 +1753,7 @@ retry:
queue_remove(&pset->idle_queue, myprocessor,
processor_t, processor_queue);
myprocessor->state = PROCESSOR_RUNNING;
- simple_unlock(&pset->idle_lock);
+ pset_idle_unlock();
counter(c_idle_thread_block++);
thread_block(idle_thread_continue);
}
@@ -1743,7 +1766,9 @@ retry:
*/
if ((new_thread = (thread_t)*threadp)!= THREAD_NULL) {
*threadp = (volatile thread_t) THREAD_NULL;
+ thread_lock(new_thread);
thread_setrun(new_thread, FALSE);
+ thread_unlock(new_thread);
}
counter(c_idle_thread_block++);
@@ -1795,7 +1820,7 @@ void idle_thread(void)
* we don't want to do at interrupt level. This allows us to
* avoid blocking.
*/
-void sched_thread_continue(void)
+static void sched_thread_continue(void)
{
while (TRUE) {
(void) compute_mach_factor();
@@ -1860,7 +1885,7 @@ int stuck_count = 0;
* it ran out of space.
*/
-boolean_t
+static boolean_t
do_runq_scan(
run_queue_t runq)
{
@@ -1931,6 +1956,7 @@ void do_thread_scan(void)
spl_t s;
boolean_t restart_needed = 0;
thread_t thread;
+ int i;
#if MACH_HOST
processor_set_t pset;
#endif /* MACH_HOST */
@@ -1939,15 +1965,19 @@ void do_thread_scan(void)
#if MACH_HOST
simple_lock(&all_psets_lock);
queue_iterate(&all_psets, pset, processor_set_t, all_psets) {
- if (restart_needed = do_runq_scan(&pset->runq))
+ if ((restart_needed = do_runq_scan(&pset->runq)))
break;
}
simple_unlock(&all_psets_lock);
#else /* MACH_HOST */
restart_needed = do_runq_scan(&default_pset.runq);
#endif /* MACH_HOST */
- if (!restart_needed)
- restart_needed = do_runq_scan(&master_processor->runq);
+ if (!restart_needed) {
+ for (i = 0; i < smp_get_numcpus(); i++) {
+ if ((restart_needed = do_runq_scan(&cpu_to_processor(i)->runq)))
+ break;
+ }
+ }
/*
* Ok, we now have a collection of candidates -- fix them.
diff --git a/kern/sched_prim.h b/kern/sched_prim.h
index 405e5456..c250b220 100644
--- a/kern/sched_prim.h
+++ b/kern/sched_prim.h
@@ -184,5 +184,6 @@ void thread_check(thread_t th, run_queue_t rq);
extern void idle_thread(void) __attribute__((noreturn));
extern void sched_thread(void);
+extern int stuck_count;
#endif /* _KERN_SCHED_PRIM_H_ */
diff --git a/kern/slab.c b/kern/slab.c
index d44da165..dc44e42b 100644
--- a/kern/slab.c
+++ b/kern/slab.c
@@ -79,6 +79,7 @@
#include <kern/slab.h>
#include <kern/kalloc.h>
#include <kern/cpu_number.h>
+#include <kern/mach_debug.server.h>
#include <mach/vm_param.h>
#include <mach/machine/vm_types.h>
#include <vm/vm_kern.h>
@@ -415,6 +416,9 @@ kmem_pagealloc_virtual(vm_size_t size, vm_size_t align)
static void
kmem_pagefree_virtual(vm_offset_t addr, vm_size_t size)
{
+ if (addr < kernel_virtual_start || addr + size > kernel_virtual_end)
+ panic("kmem_pagefree_virtual(%lx-%lx) falls in physical memory area!\n",
+ (unsigned long) addr, (unsigned long) addr + size);
assert(size > PAGE_SIZE);
size = vm_page_round(size);
kmem_free(kernel_map, addr, size);
@@ -813,10 +817,10 @@ void kmem_cache_init(struct kmem_cache *cache, const char *name,
#endif /* SLAB_USE_CPU_POOLS */
size_t buf_size;
-#if SLAB_VERIFY
- cache->flags = KMEM_CF_VERIFY;
-#else /* SLAB_VERIFY */
cache->flags = 0;
+#if SLAB_VERIFY
+ if (obj_size < PAGE_SIZE - sizeof(union kmem_bufctl) + sizeof(struct kmem_buftag))
+ cache->flags |= KMEM_CF_VERIFY;
#endif /* SLAB_VERIFY */
if (flags & KMEM_CACHE_VERIFY)
@@ -1394,6 +1398,8 @@ vm_offset_t kalloc(vm_size_t size)
if ((buf != 0) && (cache->flags & KMEM_CF_VERIFY))
kalloc_verify(cache, buf, size);
+ } else if (size <= PAGE_SIZE) {
+ buf = (void *)kmem_pagealloc_physmem(PAGE_SIZE);
} else {
buf = (void *)kmem_pagealloc_virtual(size, 0);
}
@@ -1436,6 +1442,8 @@ void kfree(vm_offset_t data, vm_size_t size)
kfree_verify(cache, (void *)data, size);
kmem_cache_free(cache, data);
+ } else if (size <= PAGE_SIZE) {
+ kmem_pagefree_physmem(data, PAGE_SIZE);
} else {
kmem_pagefree_virtual(data, size);
}
@@ -1488,11 +1496,99 @@ void slab_info(void)
#if MACH_KDB
#include <ddb/db_output.h>
- void db_show_slab_info(void)
+void db_show_slab_info(void)
{
_slab_info(db_printf);
}
+void db_whatis_slab(vm_offset_t a)
+{
+ struct kmem_cache *cache;
+ int done = 0;
+
+#ifndef SLAB_VERIFY
+ db_printf("enabling SLAB_VERIFY is recommended\n");
+#endif
+
+ simple_lock(&kmem_cache_list_lock);
+
+ list_for_each_entry(&kmem_cache_list, cache, node) {
+ if (a >= (vm_offset_t) cache
+ && a < (vm_offset_t) cache + sizeof(*cache))
+ db_printf("Cache %s\n", cache->name);
+
+ simple_lock(&cache->lock);
+
+ if (cache->flags & KMEM_CF_USE_TREE) {
+ struct rbtree_node *node;
+
+ node = rbtree_lookup_nearest(&cache->active_slabs, (void*) a,
+ kmem_slab_cmp_lookup, RBTREE_LEFT);
+ if (node) {
+ struct kmem_slab *slab;
+ slab = rbtree_entry(node, struct kmem_slab, tree_node);
+ if (a >= (vm_offset_t) slab->addr
+ && a < (vm_offset_t) slab->addr + cache->slab_size) {
+ db_printf("Allocated from cache %s\n", cache->name);
+ done = 1;
+ goto out_cache;
+ }
+ }
+ }
+
+ union kmem_bufctl *free;
+ struct kmem_slab *slab;
+
+ list_for_each_entry(&cache->partial_slabs, slab, list_node) {
+ if (a >= (vm_offset_t) slab->addr
+ && a < (vm_offset_t) slab->addr + cache->slab_size) {
+ db_printf("In cache %s\n", cache->name);
+
+ for (free = slab->first_free; free; free = free->next) {
+ void *buf = kmem_bufctl_to_buf(free, cache);
+
+ if (a >= (vm_offset_t) buf
+ && a < (vm_offset_t) buf + cache->buf_size) {
+ db_printf(" In free list\n");
+ break;
+ }
+ }
+
+ done = 1;
+ goto out_cache;
+ }
+ }
+
+ list_for_each_entry(&cache->free_slabs, slab, list_node) {
+ if (a >= (vm_offset_t) slab->addr
+ && a < (vm_offset_t) slab->addr + cache->slab_size) {
+ db_printf("In cache %s\n", cache->name);
+
+ for (free = slab->first_free; free; free = free->next) {
+ void *buf = kmem_bufctl_to_buf(free, cache);
+
+ if (a >= (vm_offset_t) buf
+ && a < (vm_offset_t) buf + cache->buf_size) {
+ db_printf(" In free list\n");
+ break;
+ }
+ }
+
+ done = 1;
+ goto out_cache;
+ }
+ }
+
+out_cache:
+ simple_unlock(&cache->lock);
+ if (done)
+ goto out;
+ }
+
+out:
+ simple_unlock(&kmem_cache_list_lock);
+}
+
#endif /* MACH_KDB */
#if MACH_DEBUG
diff --git a/kern/slab.h b/kern/slab.h
index 9d8a1156..4d51755a 100644
--- a/kern/slab.h
+++ b/kern/slab.h
@@ -171,9 +171,9 @@ struct kmem_cache {
int flags;
size_t bufctl_dist; /* Distance from buffer to bufctl */
size_t slab_size;
- unsigned long bufs_per_slab;
- unsigned long nr_objs; /* Number of allocated objects */
- unsigned long nr_free_slabs;
+ long_natural_t bufs_per_slab;
+ long_natural_t nr_objs; /* Number of allocated objects */
+ long_natural_t nr_free_slabs;
kmem_cache_ctor_t ctor;
/* All fields below are cold */
size_t obj_size; /* User-provided size */
@@ -182,8 +182,8 @@ struct kmem_cache {
size_t buf_size; /* Aligned object size */
size_t color;
size_t color_max;
- unsigned long nr_bufs; /* Total number of buffers */
- unsigned long nr_slabs;
+ long_natural_t nr_bufs; /* Total number of buffers */
+ long_natural_t nr_slabs;
char name[KMEM_CACHE_NAME_SIZE];
size_t buftag_dist; /* Distance from buffer to buftag */
size_t redzone_pad; /* Bytes from end of object to redzone word */
@@ -237,6 +237,7 @@ void slab_info(void);
#if MACH_KDB
void db_show_slab_info(void);
+void db_whatis_slab(vm_offset_t addr);
#endif /* MACH_KDB */
#endif /* _KERN_SLAB_H */
diff --git a/kern/startup.c b/kern/startup.c
index 1f873192..e72cf6f4 100644
--- a/kern/startup.c
+++ b/kern/startup.c
@@ -49,7 +49,6 @@
#include <kern/timer.h>
#include <kern/xpr.h>
#include <kern/bootstrap.h>
-#include <kern/time_stamp.h>
#include <kern/startup.h>
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
@@ -75,6 +74,7 @@ boolean_t reboot_on_panic = TRUE;
#if NCPUS > 1
#include <machine/mp_desc.h>
+#include <kern/smp.h>
#include <kern/machine.h>
#endif /* NCPUS > 1 */
@@ -131,8 +131,6 @@ void setup_main(void)
xprbootstrap();
#endif /* XPR_DEBUG */
- timestamp_init();
-
machine_init();
mapable_time_init();
@@ -276,13 +274,6 @@ void start_kernel_threads(void)
/*NOTREACHED*/
}
-#if NCPUS > 1
-void slave_main(void)
-{
- cpu_launch_first_thread(THREAD_NULL);
-}
-#endif /* NCPUS > 1 */
-
/*
* Start up the first thread on a CPU.
* First thread is specified for the master CPU.
@@ -309,8 +300,8 @@ void cpu_launch_first_thread(thread_t th)
PMAP_ACTIVATE_KERNEL(mycpu);
- active_threads[mycpu] = th;
- active_stacks[mycpu] = th->kernel_stack;
+ percpu_assign(active_thread, th);
+ percpu_assign(active_stack, th->kernel_stack);
thread_lock(th);
th->state &= ~TH_UNINT;
thread_unlock(th);
diff --git a/kern/strings.c b/kern/strings.c
index 71c99050..7e7fda02 100644
--- a/kern/strings.c
+++ b/kern/strings.c
@@ -175,27 +175,6 @@ strlen(
/*
* Abstract:
- * memset writes value "c" in the "n" bytes starting at address "s".
- * The return value is a pointer to the "s" string.
- */
-
-#if 0
-void *
-memset(
- void *_s, int c, size_t n)
-{
- char *s = _s;
- size_t i;
-
- for (i = 0; i < n ; i++)
- s[i] = c;
-
- return _s;
-}
-#endif
-
-/*
- * Abstract:
* strchr returns a pointer to the first occurrence of the character
* "c" in the string "s". If "c" is not found, return NULL.
*/
diff --git a/kern/syscall_emulation.c b/kern/syscall_emulation.c
index 95e91d55..620c2351 100644
--- a/kern/syscall_emulation.c
+++ b/kern/syscall_emulation.c
@@ -31,6 +31,7 @@
#include <kern/syscall_emulation.h>
#include <kern/task.h>
#include <kern/kalloc.h>
+#include <kern/mach.server.h>
#include <vm/vm_kern.h>
/* XXX */
@@ -94,8 +95,7 @@ void eml_task_reference(
* Cleans up after the emulation code when a process exits.
*/
-void eml_task_deallocate(task)
- const task_t task;
+void eml_task_deallocate(const task_t task)
{
eml_dispatch_t eml;
@@ -116,7 +116,7 @@ void eml_task_deallocate(task)
* task_set_emulation_vector: [Server Entry]
* set a list of emulated system calls for this task.
*/
-kern_return_t
+static kern_return_t
task_set_emulation_vector_internal(
task_t task,
int vector_start,
diff --git a/kern/syscall_subr.c b/kern/syscall_subr.c
index 6d23462c..0030e027 100644
--- a/kern/syscall_subr.c
+++ b/kern/syscall_subr.c
@@ -61,7 +61,7 @@
* returned, the thread should make one more check on the
* lock and then be a good citizen and really suspend.
*/
-void swtch_continue(void)
+static void swtch_continue(void)
{
processor_t myprocessor;
@@ -89,7 +89,7 @@ boolean_t swtch(void)
myprocessor->processor_set->runq.count > 0);
}
-void swtch_pri_continue(void)
+static void swtch_pri_continue(void)
{
thread_t thread = current_thread();
processor_t myprocessor;
@@ -130,7 +130,7 @@ boolean_t swtch_pri(int pri)
myprocessor->processor_set->runq.count > 0);
}
-void thread_switch_continue(void)
+static void thread_switch_continue(void)
{
thread_t cur_thread = current_thread();
@@ -152,8 +152,8 @@ void thread_switch_continue(void)
* even if that violates priority order.
*/
kern_return_t thread_switch(
- mach_port_t thread_name,
- int option,
+ mach_port_name_t thread_name,
+ int option,
mach_msg_timeout_t option_time)
{
thread_t cur_thread = current_thread();
diff --git a/kern/syscall_subr.h b/kern/syscall_subr.h
index b6b61ab2..c9a2777f 100644
--- a/kern/syscall_subr.h
+++ b/kern/syscall_subr.h
@@ -33,7 +33,7 @@
extern int swtch(void);
extern int swtch_pri(int);
-extern int thread_switch(mach_port_t, int, mach_msg_timeout_t);
+extern int thread_switch(mach_port_name_t, int, mach_msg_timeout_t);
extern void thread_depress_timeout(thread_t);
extern kern_return_t thread_depress_abort(thread_t);
extern void mach_print(const char *);
diff --git a/kern/syscall_sw.c b/kern/syscall_sw.c
index a383e467..4249b711 100644
--- a/kern/syscall_sw.c
+++ b/kern/syscall_sw.c
@@ -38,7 +38,7 @@
#include <kern/syscall_subr.h>
#include <kern/ipc_mig.h>
#include <kern/eventcount.h>
-#include <ipc/mach_port.h>
+#include <ipc/mach_port.server.h>
/*
@@ -60,13 +60,13 @@
boolean_t kern_invalid_debug = FALSE;
-mach_port_t null_port(void)
+static mach_port_name_t null_port(void)
{
if (kern_invalid_debug) SoftDebugger("null_port mach trap");
return(MACH_PORT_NULL);
}
-kern_return_t kern_invalid(void)
+static kern_return_t kern_invalid(void)
{
if (kern_invalid_debug) SoftDebugger("kern_invalid mach trap");
return(KERN_INVALID_ARGUMENT);
diff --git a/kern/syscall_sw.h b/kern/syscall_sw.h
index 80b1810b..9e76fc60 100644
--- a/kern/syscall_sw.h
+++ b/kern/syscall_sw.h
@@ -27,6 +27,8 @@
#ifndef _KERN_SYSCALL_SW_H_
#define _KERN_SYSCALL_SW_H_
+#include <mach/boolean.h>
+
/*
* mach_trap_stack indicates the trap may discard
* its kernel stack. Some architectures may need
@@ -35,9 +37,11 @@
* Note: this is indexed manually by locore.S!
*/
+typedef void (*generic_trap_function)(void);
+
typedef struct {
int mach_trap_arg_count;
- int (*mach_trap_function)();
+ generic_trap_function mach_trap_function;
boolean_t mach_trap_stack;
const char *mach_trap_name;
} mach_trap_t;
@@ -46,8 +50,8 @@ extern mach_trap_t mach_trap_table[];
extern int mach_trap_count;
#define MACH_TRAP(name, arg_count) \
- { (arg_count), (int (*)()) (name), FALSE, #name }
+ { (arg_count), (generic_trap_function) (name), FALSE, #name }
#define MACH_TRAP_STACK(name, arg_count) \
- { (arg_count), (int (*)()) (name), TRUE, #name }
+ { (arg_count), (generic_trap_function) (name), TRUE, #name }
#endif /* _KERN_SYSCALL_SW_H_ */
diff --git a/kern/task.c b/kern/task.c
index e91c192b..dfba04d4 100644
--- a/kern/task.c
+++ b/kern/task.c
@@ -44,7 +44,10 @@
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/slab.h>
+#include <kern/gnumach.server.h>
#include <kern/kalloc.h>
+#include <kern/mach.server.h>
+#include <kern/mach_host.server.h>
#include <kern/processor.h>
#include <kern/printf.h>
#include <kern/sched_prim.h> /* for thread_wakeup */
@@ -119,8 +122,8 @@ task_create_kernel(
new_task->map = VM_MAP_NULL;
else {
new_task->map = vm_map_create(new_pmap,
- round_page(VM_MIN_ADDRESS),
- trunc_page(VM_MAX_ADDRESS));
+ round_page(VM_MIN_USER_ADDRESS),
+ trunc_page(VM_MAX_USER_ADDRESS));
if (new_task->map == VM_MAP_NULL)
pmap_destroy(new_pmap);
}
@@ -151,10 +154,8 @@ task_create_kernel(
ipc_task_init(new_task, parent_task);
machine_task_init (new_task);
- new_task->total_user_time.seconds = 0;
- new_task->total_user_time.microseconds = 0;
- new_task->total_system_time.seconds = 0;
- new_task->total_system_time.microseconds = 0;
+ time_value64_init(&new_task->total_user_time);
+ time_value64_init(&new_task->total_system_time);
record_time_stamp (&new_task->creation_time);
@@ -178,6 +179,7 @@ task_create_kernel(
new_task->may_assign = TRUE;
new_task->assign_active = FALSE;
+ new_task->essential = FALSE;
#if MACH_PCSAMPLE
new_task->pc_sample.buffer = 0;
@@ -785,13 +787,13 @@ kern_return_t task_info(
{
task_basic_info_t basic_info;
- /* Allow *task_info_count to be two words smaller than
- the usual amount, because creation_time is a new member
- that some callers might not know about. */
+ /* Allow *task_info_count to be smaller than the provided amount
+ * that does not contain the new time_value64_t fields as some
+ * callers might not know about them yet. */
- if (*task_info_count < TASK_BASIC_INFO_COUNT - 2) {
+ if (*task_info_count <
+ TASK_BASIC_INFO_COUNT - 3 * sizeof(time_value64_t)/sizeof(integer_t))
return KERN_INVALID_ARGUMENT;
- }
basic_info = (task_basic_info_t) task_info_out;
@@ -804,16 +806,19 @@ kern_return_t task_info(
task_lock(task);
basic_info->base_priority = task->priority;
basic_info->suspend_count = task->user_stop_count;
- basic_info->user_time.seconds
- = task->total_user_time.seconds;
- basic_info->user_time.microseconds
- = task->total_user_time.microseconds;
- basic_info->system_time.seconds
- = task->total_system_time.seconds;
- basic_info->system_time.microseconds
- = task->total_system_time.microseconds;
- read_time_stamp(&task->creation_time,
- &basic_info->creation_time);
+ TIME_VALUE64_TO_TIME_VALUE(&task->total_user_time,
+ &basic_info->user_time);
+ TIME_VALUE64_TO_TIME_VALUE(&task->total_system_time,
+ &basic_info->system_time);
+ time_value64_t creation_time64;
+ read_time_stamp(&task->creation_time, &creation_time64);
+ TIME_VALUE64_TO_TIME_VALUE(&creation_time64, &basic_info->creation_time);
+ if (*task_info_count == TASK_BASIC_INFO_COUNT) {
+ /* Copy new time_value64_t fields */
+ basic_info->user_time64 = task->total_user_time;
+ basic_info->system_time64 = task->total_system_time;
+ basic_info->creation_time64 = creation_time64;
+ }
task_unlock(task);
if (*task_info_count > TASK_BASIC_INFO_COUNT)
@@ -850,21 +855,22 @@ kern_return_t task_info(
task_thread_times_info_t times_info;
thread_t thread;
- if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
+ /* Callers might not known about time_value64_t fields yet. */
+ if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT - (2 * sizeof(time_value64_t)) / sizeof(integer_t)) {
return KERN_INVALID_ARGUMENT;
}
times_info = (task_thread_times_info_t) task_info_out;
- times_info->user_time.seconds = 0;
- times_info->user_time.microseconds = 0;
- times_info->system_time.seconds = 0;
- times_info->system_time.microseconds = 0;
+
+ time_value64_t acc_user_time, acc_system_time;
+ time_value64_init(&acc_user_time);
+ time_value64_init(&acc_system_time);
task_lock(task);
queue_iterate(&task->thread_list, thread,
thread_t, thread_list)
{
- time_value_t user_time, system_time;
+ time_value64_t user_time, system_time;
spl_t s;
s = splsched();
@@ -875,12 +881,20 @@ kern_return_t task_info(
thread_unlock(thread);
splx(s);
- time_value_add(&times_info->user_time, &user_time);
- time_value_add(&times_info->system_time, &system_time);
+ time_value64_add(&acc_user_time, &user_time);
+ time_value64_add(&acc_system_time, &system_time);
}
task_unlock(task);
+ TIME_VALUE64_TO_TIME_VALUE(&acc_user_time, &times_info->user_time);
+ TIME_VALUE64_TO_TIME_VALUE(&acc_system_time, &times_info->system_time);
+ if (*task_info_count >= TASK_THREAD_TIMES_INFO_COUNT) {
+ /* Copy new time_value64_t fields */
+ times_info->user_time64 = acc_user_time;
+ times_info->system_time64 = acc_system_time;
+ }
- *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
+ if (*task_info_count > TASK_THREAD_TIMES_INFO_COUNT)
+ *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
break;
}
@@ -1151,18 +1165,39 @@ task_set_name(
task_t task,
const_kernel_debug_name_t name)
{
+ if (task == TASK_NULL)
+ return KERN_INVALID_ARGUMENT;
+
strncpy(task->name, name, sizeof task->name - 1);
task->name[sizeof task->name - 1] = '\0';
return KERN_SUCCESS;
}
/*
+ * task_set_essential
+ *
+ * Set whether TASK is an essential task, i.e. the whole system will crash
+ * if this task crashes.
+ */
+kern_return_t
+task_set_essential(
+ task_t task,
+ boolean_t essential)
+{
+ if (task == TASK_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ task->essential = !!essential;
+ return KERN_SUCCESS;
+}
+
+/*
* task_collect_scan:
*
* Attempt to free resources owned by tasks.
*/
-void task_collect_scan(void)
+static void task_collect_scan(void)
{
task_t task, prev_task;
processor_set_t pset, prev_pset;
diff --git a/kern/task.h b/kern/task.h
index 52eb8324..9521e953 100644
--- a/kern/task.h
+++ b/kern/task.h
@@ -62,9 +62,10 @@ struct task {
int ref_count; /* Number of references to me */
/* Flags */
- unsigned int active:1, /* Task has not been terminated */
+ unsigned char assign_active; /* waiting for may_assign */
+ unsigned char active:1, /* Task has not been terminated */
/* boolean_t */ may_assign:1, /* can assigned pset be changed? */
- assign_active:1; /* waiting for may_assign */
+ essential:1; /* Is this task essential for the system? */
/* Miscellaneous */
vm_map_t map; /* Address space description */
@@ -81,12 +82,12 @@ struct task {
int priority; /* for new threads */
/* Statistics */
- time_value_t total_user_time;
+ time_value64_t total_user_time;
/* total user time for dead threads */
- time_value_t total_system_time;
+ time_value64_t total_system_time;
/* total system time for dead threads */
- time_value_t creation_time; /* time stamp at creation */
+ time_value64_t creation_time; /* time stamp at creation */
/* IPC structures */
decl_simple_lock_data(, itk_lock_data)
@@ -114,13 +115,13 @@ struct task {
machine_task_t machine;
/* Statistics */
- natural_t faults; /* page faults counter */
- natural_t zero_fills; /* zero fill pages counter */
- natural_t reactivations; /* reactivated pages counter */
- natural_t pageins; /* actual pageins couter */
- natural_t cow_faults; /* copy-on-write faults counter */
- natural_t messages_sent; /* messages sent counter */
- natural_t messages_received; /* messages received counter */
+ long_natural_t faults; /* page faults counter */
+ long_natural_t zero_fills; /* zero fill pages counter */
+ long_natural_t reactivations; /* reactivated pages counter */
+ long_natural_t pageins; /* actual pageins couter */
+ long_natural_t cow_faults; /* copy-on-write faults counter */
+ long_natural_t messages_sent; /* messages sent counter */
+ long_natural_t messages_received; /* messages received counter */
char name[TASK_NAME_SIZE];
};
diff --git a/kern/thread.c b/kern/thread.c
index 0e3cc2c9..eb73590c 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -32,19 +32,25 @@
*/
#include <kern/printf.h>
+#include <mach/message.h>
#include <mach/std_types.h>
#include <mach/policy.h>
#include <mach/thread_info.h>
#include <mach/thread_special_ports.h>
#include <mach/thread_status.h>
#include <mach/time_value.h>
+#include <mach/vm_prot.h>
+#include <mach/vm_inherit.h>
#include <machine/vm_param.h>
#include <kern/ast.h>
#include <kern/counters.h>
#include <kern/debug.h>
#include <kern/eventcount.h>
+#include <kern/gnumach.server.h>
#include <kern/ipc_mig.h>
#include <kern/ipc_tt.h>
+#include <kern/mach_debug.server.h>
+#include <kern/mach_host.server.h>
#include <kern/processor.h>
#include <kern/queue.h>
#include <kern/sched.h>
@@ -55,25 +61,24 @@
#include <kern/host.h>
#include <kern/kalloc.h>
#include <kern/slab.h>
+#include <kern/smp.h>
#include <kern/mach_clock.h>
+#include <string.h>
#include <vm/vm_kern.h>
#include <vm/vm_user.h>
#include <ipc/ipc_kmsg.h>
#include <ipc/ipc_port.h>
#include <ipc/mach_msg.h>
-#include <ipc/mach_port.h>
+#include <ipc/mach_port.server.h>
#include <machine/machspl.h> /* for splsched */
#include <machine/pcb.h>
#include <machine/thread.h> /* for MACHINE_STACK */
-thread_t active_threads[NCPUS];
-vm_offset_t active_stacks[NCPUS];
-
struct kmem_cache thread_cache;
struct kmem_cache thread_stack_cache;
queue_head_t reaper_queue;
-decl_simple_lock_data(, reaper_lock)
+def_simple_lock_data(static, reaper_lock)
/* private */
struct thread thread_template;
@@ -81,7 +86,7 @@ struct thread thread_template;
#if MACH_DEBUG
#define STACK_MARKER 0xdeadbeefU
boolean_t stack_check_usage = FALSE;
-decl_simple_lock_data(, stack_usage_lock)
+def_simple_lock_data(static, stack_usage_lock)
vm_size_t stack_max_usage = 0;
#endif /* MACH_DEBUG */
@@ -117,7 +122,7 @@ vm_size_t stack_max_usage = 0;
* because stack_alloc_try/thread_invoke operate at splsched.
*/
-decl_simple_lock_data(, stack_lock_data)/* splsched only */
+def_simple_lock_data(static, stack_lock_data)/* splsched only */
#define stack_lock() simple_lock(&stack_lock_data)
#define stack_unlock() simple_unlock(&stack_lock_data)
@@ -547,6 +552,10 @@ kern_return_t thread_create(
#endif /* MACH_PCSAMPLE */
new_thread->pc_sample.buffer = 0;
+
+ /* Inherit the task name as the thread name. */
+ memcpy (new_thread->name, parent_task->name, THREAD_NAME_SIZE);
+
/*
* Add the thread to the task`s list of threads.
* The new thread holds another reference to the task.
@@ -590,7 +599,7 @@ void thread_deallocate(
task_t task;
processor_set_t pset;
- time_value_t user_time, system_time;
+ time_value64_t user_time, system_time;
if (thread == THREAD_NULL)
return;
@@ -667,8 +676,8 @@ void thread_deallocate(
* Accumulate times for dead threads in task.
*/
thread_read_times(thread, &user_time, &system_time);
- time_value_add(&task->total_user_time, &user_time);
- time_value_add(&task->total_system_time, &system_time);
+ time_value64_add(&task->total_user_time, &user_time);
+ time_value64_add(&task->total_system_time, &system_time);
/*
* Remove thread from task list and processor_set threads list.
@@ -861,8 +870,8 @@ kern_return_t thread_terminate(
kern_return_t thread_terminate_release(
thread_t thread,
task_t task,
- mach_port_t thread_name,
- mach_port_t reply_port,
+ mach_port_name_t thread_name,
+ mach_port_name_t reply_port,
vm_offset_t address,
vm_size_t size)
{
@@ -1136,7 +1145,7 @@ kern_return_t thread_halt(
}
}
-void __attribute__((noreturn)) walking_zombie(void)
+static void __attribute__((noreturn)) walking_zombie(void)
{
panic("the zombie walks!");
}
@@ -1470,6 +1479,9 @@ kern_return_t thread_set_state(
if (flavor == i386_DEBUG_STATE && thread == current_thread())
/* This state can be set directly for the curren thread. */
return thread_setstatus(thread, flavor, new_state, new_state_count);
+ if (flavor == i386_FSGS_BASE_STATE && thread == current_thread())
+ /* This state can be set directly for the curren thread. */
+ return thread_setstatus(thread, flavor, new_state, new_state_count);
#endif
if (thread == THREAD_NULL || thread == current_thread())
@@ -1499,11 +1511,12 @@ kern_return_t thread_info(
if (flavor == THREAD_BASIC_INFO) {
thread_basic_info_t basic_info;
- /* Allow *thread_info_count to be one smaller than the
- usual amount, because creation_time is a new member
- that some callers might not know about. */
+ /* Allow *thread_info_count to be smaller than the provided amount
+ * that does not contain the new time_value64_t fields as some
+ * callers might not know about them yet. */
- if (*thread_info_count < THREAD_BASIC_INFO_COUNT - 1)
+ if (*thread_info_count <
+ THREAD_BASIC_INFO_COUNT - 3 * sizeof(time_value64_t)/sizeof(natural_t))
return KERN_INVALID_ARGUMENT;
basic_info = (thread_basic_info_t) thread_info_out;
@@ -1520,13 +1533,23 @@ kern_return_t thread_info(
/* fill in info */
- thread_read_times(thread,
- &basic_info->user_time,
- &basic_info->system_time);
+ time_value64_t user_time, system_time;
+ thread_read_times(thread, &user_time, &system_time);
+ TIME_VALUE64_TO_TIME_VALUE(&user_time, &basic_info->user_time);
+ TIME_VALUE64_TO_TIME_VALUE(&system_time, &basic_info->system_time);
+
basic_info->base_priority = thread->priority;
basic_info->cur_priority = thread->sched_pri;
- read_time_stamp(&thread->creation_time,
- &basic_info->creation_time);
+ time_value64_t creation_time;
+ read_time_stamp(&thread->creation_time, &creation_time);
+ TIME_VALUE64_TO_TIME_VALUE(&creation_time, &basic_info->creation_time);
+
+ if (*thread_info_count == THREAD_BASIC_INFO_COUNT) {
+ /* Copy new time_value64_t fields */
+ basic_info->user_time64 = user_time;
+ basic_info->system_time64 = user_time;
+ basic_info->creation_time64 = creation_time;
+ }
/*
* To calculate cpu_usage, first correct for timer rate,
@@ -1722,7 +1745,7 @@ thread_t kernel_thread(
* This kernel thread runs forever looking for threads to destroy
* (when they request that they be destroyed, of course).
*/
-void __attribute__((noreturn)) reaper_thread_continue(void)
+static void __attribute__((noreturn)) reaper_thread_continue(void)
{
for (;;) {
thread_t thread;
@@ -1907,7 +1930,7 @@ Restart:
* Reset policy and priorities if needed.
*/
#if MACH_FIXPRI
- if (thread->policy & new_pset->policies == 0) {
+ if ((thread->policy & new_pset->policies) == 0) {
thread->policy = POLICY_TIMESHARE;
recompute_pri = TRUE;
}
@@ -2261,7 +2284,7 @@ thread_wire(
* pcb_collect doesn't do anything yet.
*/
-void thread_collect_scan(void)
+static void thread_collect_scan(void)
{
thread_t thread, prev_thread;
processor_set_t pset, prev_pset;
@@ -2348,8 +2371,7 @@ void consider_thread_collect(void)
#if MACH_DEBUG
-vm_size_t stack_usage(
- vm_offset_t stack)
+static vm_size_t stack_usage(vm_offset_t stack)
{
unsigned i;
@@ -2402,7 +2424,7 @@ void stack_finalize(
* *maxusagep must be initialized by the caller.
*/
-void stack_statistics(
+static void stack_statistics(
natural_t *totalp,
vm_size_t *maxusagep)
{
@@ -2555,9 +2577,9 @@ kern_return_t processor_set_stack_usage(
stack = thread->kernel_stack;
- for (cpu = 0; cpu < NCPUS; cpu++)
- if (active_threads[cpu] == thread) {
- stack = active_stacks[cpu];
+ for (cpu = 0; cpu < smp_get_numcpus(); cpu++)
+ if (percpu_array[cpu].active_thread == thread) {
+ stack = percpu_array[cpu].active_stack;
break;
}
}
@@ -2607,3 +2629,21 @@ thread_stats(void)
printf("%d using rpc_reply.\n", rpcreply);
}
#endif /* MACH_DEBUG */
+
+/*
+ * thread_set_name
+ *
+ * Set the name of thread THREAD to NAME.
+ */
+kern_return_t
+thread_set_name(
+ thread_t thread,
+ const_kernel_debug_name_t name)
+{
+ if (thread == THREAD_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ strncpy(thread->name, name, sizeof thread->name - 1);
+ thread->name[sizeof thread->name - 1] = '\0';
+ return KERN_SUCCESS;
+}
diff --git a/kern/thread.h b/kern/thread.h
index f0ed71a8..81d32924 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -42,7 +42,6 @@
#include <mach/port.h>
#include <mach/vm_prot.h>
#include <kern/ast.h>
-#include <kern/cpu_number.h>
#include <kern/mach_clock.h>
#include <kern/queue.h>
#include <kern/pc_sample.h>
@@ -55,6 +54,12 @@
#include <machine/thread.h>
#include <ipc/ipc_kmsg_queue.h>
+/*
+ * Thread name buffer size. Use the same size as the task so
+ * the thread can inherit the task's name.
+ */
+#define THREAD_NAME_SIZE TASK_NAME_SIZE
+
struct thread {
/* Run queues */
queue_chain_t links; /* current run queue links */
@@ -172,17 +177,17 @@ struct thread {
struct ipc_port *ith_sself; /* a send right */
struct ipc_port *ith_exception; /* a send right */
- mach_port_t ith_mig_reply; /* reply port for mig */
+ mach_port_name_t ith_mig_reply; /* reply port for mig */
struct ipc_port *ith_rpc_reply; /* reply port for kernel RPCs */
/* State saved when thread's stack is discarded */
union {
struct {
- mach_msg_header_t *msg;
+ mach_msg_user_header_t *msg;
mach_msg_option_t option;
mach_msg_size_t rcv_size;
mach_msg_timeout_t timeout;
- mach_port_t notify;
+ mach_port_name_t notify;
struct ipc_object *object;
struct ipc_mqueue *mqueue;
} receive;
@@ -190,7 +195,7 @@ struct thread {
struct ipc_port *port;
int exc;
int code;
- int subcode;
+ long subcode;
} exception;
void *other; /* catch-all for other state */
} saved;
@@ -204,7 +209,7 @@ struct thread {
unsigned int sched_delta; /* weighted cpu usage since update */
/* Creation time stamp */
- time_value_t creation_time;
+ time_value64_t creation_time;
/* Time-outs */
timer_elt_data_t timer; /* timer for thread */
@@ -229,8 +234,16 @@ struct thread {
#if NCPUS > 1
processor_t last_processor; /* processor this last ran on */
#endif /* NCPUS > 1 */
+
+#if MACH_LOCK_MON
+ unsigned lock_stack;
+#endif
+
+ char name[THREAD_NAME_SIZE];
};
+#include <kern/cpu_number.h>
+
/* typedef of thread_t is in kern/kern_types.h */
typedef struct thread_shuttle *thread_shuttle_t;
#define THREAD_NULL ((thread_t) 0)
@@ -263,10 +276,6 @@ typedef struct thread *thread_t;
typedef mach_port_t *thread_array_t;
#endif /* _KERN_KERN_TYPES_H_ */
-
-extern thread_t active_threads[NCPUS]; /* active threads */
-extern vm_offset_t active_stacks[NCPUS]; /* active kernel stacks */
-
#ifdef KERNEL
/*
* User routines
@@ -280,8 +289,8 @@ extern kern_return_t thread_terminate(
extern kern_return_t thread_terminate_release(
thread_t thread,
task_t task,
- mach_port_t thread_name,
- mach_port_t reply_port,
+ mach_port_name_t thread_name,
+ mach_port_name_t reply_port,
vm_offset_t address,
vm_size_t size);
extern kern_return_t thread_suspend(
@@ -387,8 +396,20 @@ extern void thread_unfreeze(
#define thread_pcb(th) ((th)->pcb)
-#define thread_lock(th) simple_lock(&(th)->lock)
-#define thread_unlock(th) simple_unlock(&(th)->lock)
+/* Shall be taken at splsched only */
+#ifdef MACH_LDEBUG
+#define thread_lock(th) do { \
+ assert_splsched(); \
+ simple_lock_nocheck(&(th)->lock); \
+} while (0)
+#define thread_unlock(th) do { \
+ assert_splsched(); \
+ simple_unlock_nocheck(&(th)->lock); \
+} while (0)
+#else
+#define thread_lock(th) simple_lock_nocheck(&(th)->lock)
+#define thread_unlock(th) simple_unlock_nocheck(&(th)->lock)
+#endif
#define thread_should_halt(thread) \
((thread)->ast & (AST_HALT|AST_TERMINATE))
@@ -398,10 +419,10 @@ extern void thread_unfreeze(
* designate this by defining CURRENT_THREAD.
*/
#ifndef CURRENT_THREAD
-#define current_thread() (active_threads[cpu_number()])
+#define current_thread() (percpu_get(thread_t, active_thread))
#endif /* CURRENT_THREAD */
-#define current_stack() (active_stacks[cpu_number()])
+#define current_stack() (percpu_get(vm_offset_t, active_stack))
#define current_task() (current_thread()->task)
#define current_space() (current_task()->itk_space)
@@ -410,6 +431,7 @@ extern void thread_unfreeze(
#if MACH_DEBUG
void stack_init(vm_offset_t stack);
void stack_finalize(vm_offset_t stack);
+void thread_stats(void);
#endif /* MACH_DEBUG */
#endif /* _KERN_THREAD_H_ */
diff --git a/kern/thread_swap.c b/kern/thread_swap.c
index 20ad0409..a5fc0523 100644
--- a/kern/thread_swap.c
+++ b/kern/thread_swap.c
@@ -60,7 +60,7 @@
queue_head_t swapin_queue;
-decl_simple_lock_data(, swapper_lock_data)
+def_simple_lock_data(static, swapper_lock_data)
#define swapper_lock() simple_lock(&swapper_lock_data)
#define swapper_unlock() simple_unlock(&swapper_lock_data)
@@ -156,7 +156,7 @@ kern_return_t thread_doswapin(thread_t thread)
* This procedure executes as a kernel thread. Threads that need to
* be swapped in are swapped in by this thread.
*/
-void __attribute__((noreturn)) swapin_thread_continue(void)
+static void __attribute__((noreturn)) swapin_thread_continue(void)
{
for (;;) {
thread_t thread;
diff --git a/kern/time_stamp.c b/kern/time_stamp.c
deleted file mode 100644
index b8ac9d82..00000000
--- a/kern/time_stamp.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-#include <mach/std_types.h>
-#include <machine/locore.h>
-#include <sys/time.h>
-#include <kern/time_stamp.h>
-
-unsigned ts_tick_count;
-
-/*
- * ts.c - kern_timestamp system call.
- */
-kern_return_t
-kern_timestamp(struct tsval *tsp)
-{
-/*
- temp.low_val = 0;
- temp.high_val = ts_tick_count;
-*/
- time_value_t temp;
- temp = time;
-
- if (copyout(&temp,
- tsp,
- sizeof(struct tsval)) != KERN_SUCCESS)
- return(KERN_INVALID_ADDRESS);
- return(KERN_SUCCESS);
-}
-
-/*
- * Initialization procedure.
- */
-
-void timestamp_init(void)
-{
- ts_tick_count = 0;
-}
diff --git a/kern/time_stamp.h b/kern/time_stamp.h
deleted file mode 100644
index 2492e522..00000000
--- a/kern/time_stamp.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
- * All Rights Reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-#ifndef _KERN_TIME_STAMP_H_
-#define _KERN_TIME_STAMP_H_
-
-#include <machine/time_stamp.h>
-/*
- * time_stamp.h -- definitions for low-overhead timestamps.
- */
-
-struct tsval {
- unsigned low_val; /* least significant word */
- unsigned high_val; /* most significant word */
-};
-
-/*
- * Format definitions.
- */
-
-#ifndef TS_FORMAT
-/*
- * Default case - Just return a tick count for machines that
- * don't support or haven't implemented this. Assume 100Hz ticks.
- *
- * low_val - Always 0.
- * high_val - tick count.
- */
-#define TS_FORMAT 1
-
-#if KERNEL
-extern unsigned ts_tick_count;
-#endif /* KERNEL */
-#endif /* TS_FORMAT */
-
-/*
- * List of all format definitions for convert_ts_to_tv.
- */
-
-#define TS_FORMAT_DEFAULT 1
-#define TS_FORMAT_MMAX 2
-
-extern void timestamp_init(void);
-
-#endif /* _KERN_TIME_STAMP_H_ */
diff --git a/kern/timer.c b/kern/timer.c
index 79ada27e..13dfc207 100644
--- a/kern/timer.c
+++ b/kern/timer.c
@@ -132,7 +132,7 @@ time_trap_uentry(unsigned ts)
/*
* Record new timer.
*/
- mytimer = &(active_threads[mycpu]->system_timer);
+ mytimer = &(current_thread()->system_timer);
current_timer[mycpu] = mytimer;
mytimer->tstamp = ts;
}
@@ -170,7 +170,7 @@ time_trap_uexit(int ts)
timer_normalize(mytimer); /* SYSTEMMODE */
}
- mytimer = &(active_threads[mycpu]->user_timer);
+ mytimer = &(current_thread()->user_timer);
/*
* Record new timer.
@@ -334,7 +334,9 @@ void timer_normalize(timer_t timer)
high_increment = timer->low_bits/TIMER_HIGH_UNIT;
timer->high_bits_check += high_increment;
+ __sync_synchronize();
timer->low_bits %= TIMER_HIGH_UNIT;
+ __sync_synchronize();
timer->high_bits += high_increment;
}
@@ -356,7 +358,9 @@ static void timer_grab(
#endif
do {
(save)->high = (timer)->high_bits;
+ __sync_synchronize ();
(save)->low = (timer)->low_bits;
+ __sync_synchronize ();
/*
* If the timer was normalized while we were doing this,
* the high_bits value read above and the high_bits check
@@ -374,26 +378,13 @@ static void timer_grab(
} while ( (save)->high != (timer)->high_bits_check);
}
-/*
- *
- * Db_timer_grab(): used by db_thread_read_times. An nonblocking
- * version of db_thread_get_times. Keep coherent with timer_grab
- * above.
- *
- */
-void db_timer_grab(
- timer_t timer,
- timer_save_t save)
-{
- /* Don't worry about coherency */
-
- (save)->high = (timer)->high_bits;
- (save)->low = (timer)->low_bits;
-}
-
+#define TIMER_TO_TIME_VALUE64(tv, timer) do { \
+ (tv)->seconds = (timer)->high + (timer)->low / 1000000; \
+ (tv)->nanoseconds = (timer)->low % 1000000 * 1000; \
+} while(0);
/*
- * timer_read reads the value of a timer into a time_value_t. If the
+ * timer_read reads the value of a timer into a time_value64_t. If the
* timer was modified during the read, retry. The value returned
* is accurate to the last update; time accumulated by a running
* timer since its last timestamp is not included.
@@ -402,7 +393,7 @@ void db_timer_grab(
void
timer_read(
timer_t timer,
- time_value_t *tv)
+ time_value64_t *tv)
{
timer_save_data_t temp;
@@ -413,9 +404,7 @@ timer_read(
#ifdef TIMER_ADJUST
TIMER_ADJUST(&temp);
#endif /* TIMER_ADJUST */
- tv->seconds = temp.high + temp.low/1000000;
- tv->microseconds = temp.low%1000000;
-
+ TIMER_TO_TIME_VALUE64(tv, &temp);
}
/*
@@ -428,29 +417,47 @@ timer_read(
*/
void thread_read_times(
thread_t thread,
- time_value_t *user_time_p,
- time_value_t *system_time_p)
+ time_value64_t *user_time_p,
+ time_value64_t *system_time_p)
{
- timer_save_data_t temp;
- timer_t timer;
+ timer_read(&thread->user_timer, user_time_p);
+ timer_read(&thread->system_timer, system_time_p);
+}
- timer = &thread->user_timer;
- timer_grab(timer, &temp);
+#if MACH_DEBUG
-#ifdef TIMER_ADJUST
- TIMER_ADJUST(&temp);
-#endif /* TIMER_ADJUST */
- user_time_p->seconds = temp.high + temp.low/1000000;
- user_time_p->microseconds = temp.low % 1000000;
+/*
+ *
+ * Db_timer_grab(): used by db_thread_read_times. An nonblocking
+ * version of db_thread_get_times. Keep coherent with timer_grab
+ * above.
+ *
+ */
+static void db_timer_grab(
+ timer_t timer,
+ timer_save_t save)
+{
+ /* Don't worry about coherency */
+
+ (save)->high = (timer)->high_bits;
+ (save)->low = (timer)->low_bits;
+}
- timer = &thread->system_timer;
- timer_grab(timer, &temp);
+static void
+nonblocking_timer_read(
+ timer_t timer,
+ time_value64_t *tv)
+{
+ timer_save_data_t temp;
+ db_timer_grab(timer, &temp);
+ /*
+ * Normalize the result
+ */
#ifdef TIMER_ADJUST
TIMER_ADJUST(&temp);
#endif /* TIMER_ADJUST */
- system_time_p->seconds = temp.high + temp.low/1000000;
- system_time_p->microseconds = temp.low % 1000000;
+ TIMER_TO_TIME_VALUE64(tv, &temp);
}
/*
@@ -462,30 +469,13 @@ void thread_read_times(
*/
void db_thread_read_times(
thread_t thread,
- time_value_t *user_time_p,
- time_value_t *system_time_p)
+ time_value64_t *user_time_p,
+ time_value64_t *system_time_p)
{
- timer_save_data_t temp;
- timer_t timer;
-
- timer = &thread->user_timer;
- db_timer_grab(timer, &temp);
-
-#ifdef TIMER_ADJUST
- TIMER_ADJUST(&temp);
-#endif /* TIMER_ADJUST */
- user_time_p->seconds = temp.high + temp.low/1000000;
- user_time_p->microseconds = temp.low % 1000000;
-
- timer = &thread->system_timer;
- timer_grab(timer, &temp);
-
-#ifdef TIMER_ADJUST
- TIMER_ADJUST(&temp);
-#endif /* TIMER_ADJUST */
- system_time_p->seconds = temp.high + temp.low/1000000;
- system_time_p->microseconds = temp.low % 1000000;
+ nonblocking_timer_read(&thread->user_timer, user_time_p);
+ nonblocking_timer_read(&thread->system_timer, system_time_p);
}
+#endif /* MACH_DEBUG */
/*
* timer_delta takes the difference of a saved timer value
diff --git a/kern/timer.h b/kern/timer.h
index 2f473cf8..92259a2b 100644
--- a/kern/timer.h
+++ b/kern/timer.h
@@ -32,7 +32,7 @@
#if STAT_TIME
/*
* Statistical timer definitions - use microseconds in timer, seconds
- * in high unit field. No adjustment needed to convert to time_value_t
+ * in high unit field. No adjustment needed to convert to time_value64_t
* as a result. Service timers once an hour.
*/
@@ -56,7 +56,7 @@
/*
* TIMER_ADJUST is used to adjust the value of a timer after it has been
- * copied into a time_value_t. No adjustment is needed if high_bits is in
+ * copied into a time_value64_t. No adjustment is needed if high_bits is in
* seconds.
*/
#undef TIMER_ADJUST
@@ -128,8 +128,8 @@ extern void start_timer(timer_t);
extern void timer_switch(timer_t);
#endif /* STAT_TIME */
-extern void timer_read(timer_t, time_value_t *);
-extern void thread_read_times(thread_t, time_value_t *, time_value_t *);
+extern void timer_read(timer_t, time_value64_t *);
+extern void thread_read_times(thread_t, time_value64_t *, time_value64_t *);
extern unsigned timer_delta(timer_t, timer_save_t);
extern void timer_normalize(timer_t);
extern void timer_init(timer_t);
@@ -184,4 +184,12 @@ extern void init_timers(void);
void timer_init(timer_t this_timer);
+#if MACH_DEBUG
+void db_thread_read_times(
+ thread_t thread,
+ time_value64_t *user_time_p,
+ time_value64_t *system_time_p);
+#endif
+
+
#endif /* _KERN_TIMER_H_ */
diff --git a/kern/xpr.c b/kern/xpr.c
index 46cb2273..1b551eb1 100644
--- a/kern/xpr.c
+++ b/kern/xpr.c
@@ -46,7 +46,7 @@
* Just set xprenable false so the buffer isn't overwritten.
*/
-decl_simple_lock_data(, xprlock)
+def_simple_lock_data(static, xprlock)
boolean_t xprenable = TRUE; /* Enable xpr tracing */
int nxprbufs = 0; /* Number of contiguous xprbufs allocated */
diff --git a/linux/Makefrag.am b/linux/Makefrag.am
index 38718a3f..23384523 100644
--- a/linux/Makefrag.am
+++ b/linux/Makefrag.am
@@ -37,6 +37,10 @@ liblinux_a_CPPFLAGS = $(AM_CPPFLAGS) \
# corresponding text segment definitions, we must always optimize.
liblinux_a_CFLAGS = -O2 $(AM_CFLAGS)
+# Disable warnings that are applied to the core Mach code.
+liblinux_a_CFLAGS += -Wno-missing-prototypes -Wno-strict-prototypes \
+ -Wno-old-style-definition
+
# See <http://lists.gnu.org/archive/html/bug-hurd/2006-01/msg00148.html>.
liblinux_a_CFLAGS += \
-fno-strict-aliasing
diff --git a/linux/dev/arch/i386/kernel/irq.c b/linux/dev/arch/i386/kernel/irq.c
index 656c1470..3b349ccc 100644
--- a/linux/dev/arch/i386/kernel/irq.c
+++ b/linux/dev/arch/i386/kernel/irq.c
@@ -27,6 +27,7 @@
#include <mach/mach_types.h>
#include <mach/vm_param.h>
#include <kern/assert.h>
+#include <kern/cpu_number.h>
#include <i386/spl.h>
#include <i386/irq.h>
@@ -343,7 +344,7 @@ probe_irq_on (void)
unsigned i, irqs = 0;
unsigned long delay;
- assert (curr_ipl == 0);
+ assert (curr_ipl[cpu_number()] == 0);
/*
* Allocate all available IRQs.
@@ -363,7 +364,7 @@ probe_irq_on (void)
for (delay = jiffies + HZ / 10; delay > jiffies;)
;
- return (irqs & ~curr_pic_mask);
+ return (irqs & ~linux_pic_mask);
}
/*
@@ -374,9 +375,9 @@ probe_irq_off (unsigned long irqs)
{
unsigned int i;
- assert (curr_ipl == 0);
+ assert (curr_ipl[cpu_number()] == 0);
- irqs &= curr_pic_mask;
+ irqs &= linux_pic_mask;
/*
* Disable unnecessary IRQs.
@@ -721,13 +722,13 @@ init_IRQ (void)
outb_p (PIT_C0 | PIT_SQUAREMODE | PIT_READMODE, PITCTL_PORT);
outb_p (latch & 0xff, PITCTR0_PORT);
outb (latch >> 8, PITCTR0_PORT);
-#endif
/*
* Install our clock interrupt handler.
*/
old_clock_handler = ivect[0];
ivect[0] = linux_timer_intr;
+#endif
reserve_mach_irqs ();
@@ -764,9 +765,11 @@ restore_IRQ (void)
*/
(void) splhigh ();
+#ifndef APIC
/*
* Restore clock interrupt handler.
*/
ivect[0] = old_clock_handler;
+#endif
}
diff --git a/linux/dev/glue/block.c b/linux/dev/glue/block.c
index 6730c5ec..a8307813 100644
--- a/linux/dev/glue/block.c
+++ b/linux/dev/glue/block.c
@@ -85,7 +85,6 @@
#include <linux/dev/glue/glue.h>
#ifdef PAE
-#warning TODO: make DMA32 between DIRECTMAP and HIGHMEM
#define VM_PAGE_LINUX VM_PAGE_DMA32
#else
#define VM_PAGE_LINUX VM_PAGE_HIGHMEM
@@ -207,7 +206,10 @@ int
blk_dev_init ()
{
#ifdef CONFIG_BLK_DEV_IDE
- ide_init ();
+ extern char *kernel_cmdline;
+ if (strncmp(kernel_cmdline, "noide", 5) &&
+ !strstr(kernel_cmdline, " noide"))
+ ide_init ();
#endif
#ifdef CONFIG_BLK_DEV_FD
floppy_init ();
diff --git a/linux/dev/glue/misc.c b/linux/dev/glue/misc.c
index 77dc31dd..5646e5ea 100644
--- a/linux/dev/glue/misc.c
+++ b/linux/dev/glue/misc.c
@@ -54,6 +54,7 @@
#include <mach/vm_param.h>
#include <kern/thread.h>
#include <kern/printf.h>
+#include <kern/mach_host.server.h>
#include <vm/vm_map.h>
#include <vm/vm_page.h>
#include <device/device_types.h>
@@ -234,7 +235,10 @@ do_gettimeofday (struct timeval *tv)
* expensive, and the host argument is not used by host_get_time (),
* only checked not to be HOST_NULL.
*/
- host_get_time ((host_t) 1, (time_value_t *) tv);
+ time_value64_t tv64;
+ host_get_time64 ((host_t) 1, &tv64);
+ tv->tv_sec = tv64.seconds;
+ tv->tv_usec = tv64.nanoseconds / 1000;
}
int
diff --git a/linux/dev/include/asm-i386/system.h b/linux/dev/include/asm-i386/system.h
index 41eb65a4..5187c5e5 100644
--- a/linux/dev/include/asm-i386/system.h
+++ b/linux/dev/include/asm-i386/system.h
@@ -1,7 +1,8 @@
#ifndef __ASM_SYSTEM_H
#define __ASM_SYSTEM_H
-#include <i386/ipl.h> /* curr_ipl, splx */
+#include <i386/ipl.h> /* curr_ipl[], splx */
+#include <kern/cpu_number.h>
#include <asm/segment.h>
@@ -225,7 +226,7 @@ static inline unsigned long __xchg(unsigned long x, void * ptr, int size)
#define mb() __asm__ __volatile__ ("" : : :"memory")
#define __sti() __asm__ __volatile__ ("sti": : :"memory")
#define __cli() __asm__ __volatile__ ("cli": : :"memory")
-#define __save_flags(x) (x = ((curr_ipl > 0) ? 0 : (1 << 9)))
+#define __save_flags(x) (x = ((curr_ipl[cpu_number()] > 0) ? 0 : (1 << 9)))
#define __restore_flags(x) splx((x & (1 << 9)) ? 0 : 7)
#ifdef __SMP__
diff --git a/linux/dev/kernel/sched.c b/linux/dev/kernel/sched.c
index 2a9eeb3f..f87482ef 100644
--- a/linux/dev/kernel/sched.c
+++ b/linux/dev/kernel/sched.c
@@ -616,6 +616,9 @@ int linux_timer_print = 0;
void
linux_timer_intr (void)
{
+ if (cpu_number() != master_cpu)
+ return;
+
(*(unsigned long *) &jiffies)++;
mark_bh (TIMER_BH);
if (tq_timer)
diff --git a/linux/src/arch/i386/kernel/bios32.c b/linux/src/arch/i386/kernel/bios32.c
index c10cc0c0..bb0e89c2 100644
--- a/linux/src/arch/i386/kernel/bios32.c
+++ b/linux/src/arch/i386/kernel/bios32.c
@@ -877,8 +877,8 @@ unsigned long pcibios_init(unsigned long memory_start, unsigned long memory_end)
*
*/
- for (check = (union bios32 *) 0xe0000;
- check <= (union bios32 *) 0xffff0;
+ for (check = (union bios32 *) phystokv(0xe0000);
+ check <= (union bios32 *) phystokv(0xffff0);
++check) {
if (check->fields.signature != BIOS32_SIGNATURE)
continue;
@@ -891,11 +891,11 @@ unsigned long pcibios_init(unsigned long memory_start, unsigned long memory_end)
if (sum != 0)
continue;
if (check->fields.revision != 0) {
- printk("pcibios_init : unsupported revision %d at 0x%p, mail drew@colorado.edu\n",
- check->fields.revision, check);
+ printk("pcibios_init : unsupported revision %d at 0x%lx, mail drew@colorado.edu\n",
+ check->fields.revision, _kvtophys(check));
continue;
}
- printk ("pcibios_init : BIOS32 Service Directory structure at 0x%p\n", check);
+ printk ("pcibios_init : BIOS32 Service Directory structure at 0x%lx\n", _kvtophys(check));
if (!bios32_entry) {
if (check->fields.entry >= 0x100000) {
printk("pcibios_init: entry in high memory, trying direct PCI access\n");
diff --git a/linux/src/drivers/block/ide.c b/linux/src/drivers/block/ide.c
index 2d0fc77e..c8dee846 100644
--- a/linux/src/drivers/block/ide.c
+++ b/linux/src/drivers/block/ide.c
@@ -2964,7 +2964,7 @@ static void probe_cmos_for_drives (ide_hwif_t *hwif)
unsigned char head = *(BIOS+2);
unsigned char sect = *(BIOS+14);
unsigned char ctl = *(BIOS+8);
- if (cyl > 0 && head > 0 && sect > 0 && sect < 64) {
+ if (cyl > 0 && head > 0 && sect > 0 && sect < 64 && head < 255) {
drive->cyl = drive->bios_cyl = cyl;
drive->head = drive->bios_head = head;
drive->sect = drive->bios_sect = sect;
@@ -3725,7 +3725,10 @@ static void probe_for_hwifs (void)
#ifdef CONFIG_BLK_DEV_PROMISE
init_dc4030();
#endif
- ahci_probe_pci();
+ extern char *kernel_cmdline;
+ if (strncmp(kernel_cmdline, "noahci", 6) &&
+ !strstr(kernel_cmdline, " noahci"))
+ ahci_probe_pci();
}
static int hwif_init (int h)
diff --git a/tests/Makefrag.am b/tests/Makefrag.am
index 16d9677d..faabdf44 100644
--- a/tests/Makefrag.am
+++ b/tests/Makefrag.am
@@ -20,5 +20,15 @@
# Tests.
#
+if !PLATFORM_xen
+
+include tests/user-qemu.mk
+
TESTS += \
- tests/test-mbchk
+ tests/test-multiboot \
+ $(USER_TESTS)
+
+clean-local: $(USER_TESTS_CLEAN)
+ rm -fr tests/include-mach
+
+endif # !PLATFORM_xen
diff --git a/tests/README b/tests/README
new file mode 100644
index 00000000..3dacc184
--- /dev/null
+++ b/tests/README
@@ -0,0 +1,37 @@
+
+There are some basic tests that can be run qith qemu. You can run all the tests with
+
+ $ make check
+
+or selectively with:
+
+ $ make run-hello
+
+Also, you can debug the existing tests, or a new one, by starting on one shell
+
+ $ make debug-hello
+
+and on another shell you can attach with gdb, load the symbols of the
+bootstrap module and break on its _start():
+
+ $ gdb gnumach
+ ...
+ (gdb) target remote :1234
+ ...
+ (gdb) b setup_main
+ Breakpoint 11 at 0xffffffff81019d60: file ../kern/startup.c, line 98.
+ (gdb) c
+ Continuing.
+
+ Breakpoint 11, setup_main () at ../kern/startup.c:98
+ 98 cninit();
+ (gdb) add-symbol-file ../gnumach/build-64/module-task
+ Reading symbols from ../gnumach/build-64/module-task...
+ (gdb) b _start
+ Breakpoint 12 at 0x40324a: _start. (2 locations)
+ (gdb) c
+ Continuing.
+
+ Breakpoint 12, _start () at ../tests/testlib.c:96
+ 96 {
+ (gdb)
diff --git a/tests/configfrag.ac b/tests/configfrag.ac
index 1c00fbb2..de87cbad 100644
--- a/tests/configfrag.ac
+++ b/tests/configfrag.ac
@@ -20,8 +20,24 @@ dnl 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# Tests.
#
-AC_CONFIG_FILES([tests/test-mbchk], [chmod +x tests/test-mbchk])
+AC_CONFIG_FILES([tests/test-multiboot], [chmod +x tests/test-multiboot])
+
+[if test x"$enable_user32" = xyes ; then
+ ac_miguser=$user32_cpu-gnu-mig
+else
+ ac_miguser=$host_cpu-gnu-mig
+fi]
+
+AC_CHECK_PROG([USER_MIG], [$ac_miguser], [$ac_miguser])
+AC_ARG_VAR([USER_MIG], [Path to the mig tool for user-space tests])
+AC_CHECK_PROG([USER_CC], [$CC], [$CC], [none])
+AC_ARG_VAR([USER_CC], [C compiler command for user-space tests])
+AC_CHECK_PROG([USER_CPP], [$CPP], [$CPP], [none])
+AC_ARG_VAR([USER_CPP], [C preprocessor for user-space tests])
+AC_ARG_VAR([USER_CFLAGS], [C compiler flags for user-space tests])
+AC_ARG_VAR([USER_CPPFLAGS], [C preprocessor flags for user-space tests])
+
dnl Local Variables:
dnl mode: autoconf
dnl End:
diff --git a/tests/grub.cfg.single.template b/tests/grub.cfg.single.template
new file mode 100644
index 00000000..4432be3e
--- /dev/null
+++ b/tests/grub.cfg.single.template
@@ -0,0 +1,4 @@
+echo TEST_START_MARKER
+multiboot /boot/gnumach GNUMACHARGS
+module /boot/BOOTMODULE BOOTMODULE '${host-port}' '${device-port}' '$(task-create)' '$(task-resume)'
+boot
diff --git a/tests/include/device/cons.h b/tests/include/device/cons.h
new file mode 100644
index 00000000..f4d8fe16
--- /dev/null
+++ b/tests/include/device/cons.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef CONS_H
+#define CONS_H
+
+#include <mach/machine/vm_types.h>
+
+void cnputc(char c, vm_offset_t cookie);
+static inline int cngetc() { return 0; }
+
+#endif /* CONS_H */
diff --git a/tests/include/kern/printf.h b/tests/include/kern/printf.h
new file mode 120000
index 00000000..c61f3e0e
--- /dev/null
+++ b/tests/include/kern/printf.h
@@ -0,0 +1 @@
+../../../kern/printf.h \ No newline at end of file
diff --git a/tests/include/mach/mig_support.h b/tests/include/mach/mig_support.h
new file mode 100644
index 00000000..bf670083
--- /dev/null
+++ b/tests/include/mach/mig_support.h
@@ -0,0 +1,71 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1992 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Abstract:
+ * MIG helpers for gnumach tests, mainly copied from glibc
+ *
+ */
+
+#ifndef _MACH_MIG_SUPPORT_H_
+#define _MACH_MIG_SUPPORT_H_
+
+#include <string.h>
+
+#include <mach/message.h>
+#include <mach/mach_types.h>
+
+#include <syscalls.h>
+
+static inline void mig_init(void *_first)
+{}
+
+static inline void mig_allocate(vm_address_t *addr, vm_size_t size)
+{
+ if (syscall_vm_allocate(mach_task_self(), addr, size, 1) != KERN_SUCCESS)
+ *addr = 0;
+}
+static inline void mig_deallocate(vm_address_t addr, vm_size_t size)
+{
+ syscall_vm_deallocate (mach_task_self(), addr, size);
+}
+static inline void mig_dealloc_reply_port(mach_port_t port)
+{}
+static inline void mig_put_reply_port(mach_port_t port)
+{}
+static inline mach_port_t mig_get_reply_port(void)
+{
+ return mach_reply_port();
+}
+static inline void mig_reply_setup(const mach_msg_header_t *_request,
+ mach_msg_header_t *reply)
+{}
+
+static inline vm_size_t mig_strncpy (char *dst, const char *src, vm_size_t len)
+{
+ return dst - strncpy(dst, src, len);
+}
+
+#endif /* not defined(_MACH_MIG_SUPPORT_H_) */
diff --git a/tests/include/syscalls.h b/tests/include/syscalls.h
new file mode 100644
index 00000000..f958154c
--- /dev/null
+++ b/tests/include/syscalls.h
@@ -0,0 +1,83 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1992 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Abstract:
+ * Syscall functions
+ *
+ */
+
+#ifndef _SYSCALLS_
+#define _SYSCALLS_
+
+#include <device/device_types.h>
+#include <mach/message.h>
+
+// TODO: there is probably a better way to define these
+
+#define MACH_SYSCALL0(syscallid, retval, name) \
+ retval name(void) __attribute__((naked));
+
+#define MACH_SYSCALL1(syscallid, retval, name, arg1) \
+ retval name(arg1 a1) __attribute__((naked));
+
+#define MACH_SYSCALL2(syscallid, retval, name, arg1, arg2) \
+ retval name(arg1 a1, arg2 a2) __attribute__((naked));
+
+#define MACH_SYSCALL3(syscallid, retval, name, arg1, arg2, arg3) \
+ retval name(arg1 a1, arg2 a2, arg3 a3) __attribute__((naked));
+
+#define MACH_SYSCALL4(syscallid, retval, name, arg1, arg2, arg3, arg4) \
+ retval name(arg1 a1, arg2 a2, arg3 a3, arg4 a4) __attribute__((naked));
+
+#define MACH_SYSCALL6(syscallid, retval, name, arg1, arg2, arg3, arg4, arg5, arg6) \
+ retval name(arg1 a1, arg2 a2, arg3 a3, arg4 a4, arg5 a5, arg6 a6) __attribute__((naked));
+
+#define MACH_SYSCALL7(syscallid, retval, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+ retval name(arg1 a1, arg2 a2, arg3 a3, arg4 a4, arg5 a5, arg6 a6, arg7 a7) __attribute__((naked));
+
+#define mach_msg mach_msg_trap
+
+MACH_SYSCALL0(26, mach_port_name_t, mach_reply_port)
+MACH_SYSCALL0(27, mach_port_name_t, mach_thread_self)
+MACH_SYSCALL0(28, mach_port_name_t, mach_task_self)
+MACH_SYSCALL0(29, mach_port_name_t, mach_host_self)
+MACH_SYSCALL1(30, void, mach_print, const char*)
+MACH_SYSCALL0(31, kern_return_t, invalid_syscall)
+MACH_SYSCALL4(65, kern_return_t, syscall_vm_allocate, mach_port_t, vm_offset_t*, vm_size_t, boolean_t)
+MACH_SYSCALL3(66, kern_return_t, syscall_vm_deallocate, mach_port_t, vm_offset_t, vm_size_t)
+MACH_SYSCALL3(72, kern_return_t, syscall_mach_port_allocate, mach_port_t, mach_port_right_t, mach_port_t*)
+MACH_SYSCALL2(73, kern_return_t, syscall_mach_port_deallocate, mach_port_t, mach_port_t)
+
+/*
+ todo: swtch_pri swtch ...
+ these seem obsolete: evc_wait
+ evc_wait_clear syscall_device_writev_request
+ syscall_device_write_request ...
+ */
+MACH_SYSCALL6(40, io_return_t, syscall_device_write_request, mach_port_name_t,
+ mach_port_name_t, dev_mode_t, recnum_t, vm_offset_t, vm_size_t)
+
+#endif /* SYSCALLS */
diff --git a/tests/include/testlib.h b/tests/include/testlib.h
new file mode 100644
index 00000000..a3f3a6a8
--- /dev/null
+++ b/tests/include/testlib.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef TESTLIB_H
+#define TESTLIB_H
+
+// in freestanding we can only use a few standard headers
+// float.h iso646.h limits.h stdarg.h stdbool.h stddef.h stdint.h
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdbool.h>
+
+#include <string.h> // we shouldn't include this from gcc, but it seems to be ok
+
+#include <mach/mach_types.h>
+#include <kern/printf.h>
+#include <util/atoi.h>
+#include <syscalls.h>
+
+#define ASSERT(cond, msg) do { \
+ if (!(cond)) \
+ { \
+ printf("%s: " #cond " failed: %s\n", \
+ TEST_FAILURE_MARKER, (msg)); \
+ halt(); \
+ } \
+ } while (0)
+
+#define ASSERT_RET(ret, msg) do { \
+ if ((ret) != KERN_SUCCESS) \
+ { \
+ printf("%s %s (0x%x): %s\n", \
+ TEST_FAILURE_MARKER, e2s((ret)), (ret), (msg)); \
+ halt(); \
+ } \
+ } while (0)
+
+#define FAILURE(msg) do { \
+ printf("%s: %s\n", TEST_FAILURE_MARKER, (msg)); \
+ halt(); \
+ } while (0)
+
+
+extern const char* TEST_SUCCESS_MARKER;
+extern const char* TEST_FAILURE_MARKER;
+
+const char* e2s(int err);
+const char* e2s_gnumach(int err);
+void halt();
+int msleep(uint32_t timeout);
+thread_t test_thread_start(task_t task, void(*routine)(void*), void* arg);
+
+mach_port_t host_priv(void);
+mach_port_t device_priv(void);
+
+int main(int argc, char *argv[], int envc, char *envp[]);
+
+#endif /* TESTLIB_H */
diff --git a/tests/include/util/atoi.h b/tests/include/util/atoi.h
new file mode 120000
index 00000000..c32c2582
--- /dev/null
+++ b/tests/include/util/atoi.h
@@ -0,0 +1 @@
+../../../util/atoi.h \ No newline at end of file
diff --git a/tests/run-qemu.sh.template b/tests/run-qemu.sh.template
new file mode 100644
index 00000000..aba8d68a
--- /dev/null
+++ b/tests/run-qemu.sh.template
@@ -0,0 +1,38 @@
+#!/bin/sh
+# Copyright (C) 2024 Free Software Foundation
+#
+# This program is free software ; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation ; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY ; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the program ; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+set -e
+
+cmd="QEMU_BIN QEMU_OPTS -cdrom tests/test-TESTNAME.iso"
+log="tests/test-TESTNAME.raw"
+
+echo "temp log $log"
+if which QEMU_BIN >/dev/null ; then
+ if ! timeout -v --foreground --kill-after=3 15s $cmd \
+ | tee $log | sed -n "/TEST_START_MARKER/"',$p' ; then
+ exit 10 # timeout
+ fi
+ if grep -qi 'TEST_FAILURE_MARKER' $log; then
+ exit 99 # error marker found, test explicitely failed
+ fi
+ if ! grep -q 'TEST_SUCCESS_MARKER' $log; then
+ exit 12 # missing reboot marker, maybe the kernel crashed
+ fi
+else
+ echo "skipping, QEMU_BIN not found"
+ exit 77
+fi
diff --git a/tests/start.S b/tests/start.S
new file mode 100644
index 00000000..b795bfbd
--- /dev/null
+++ b/tests/start.S
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+ .global _start
+_start:
+#ifdef __i386__
+ pushl %esp
+ call c_start
+#endif /* __i386__ */
+#ifdef __x86_64__
+ movq %rsp,%rdi
+ callq c_start
+#endif /* __x86_64__ */
diff --git a/tests/syscalls.S b/tests/syscalls.S
new file mode 100644
index 00000000..df9c9bc0
--- /dev/null
+++ b/tests/syscalls.S
@@ -0,0 +1,4 @@
+
+ #include <mach/syscall_sw.h>
+
+ kernel_trap(invalid_syscall,-31,0)
diff --git a/tests/test-gsync.c b/tests/test-gsync.c
new file mode 100644
index 00000000..a5160651
--- /dev/null
+++ b/tests/test-gsync.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <syscalls.h>
+#include <testlib.h>
+
+#include <mach/machine/vm_param.h>
+#include <mach/std_types.h>
+#include <mach/mach_types.h>
+#include <mach/vm_wire.h>
+
+#include <mach.user.h>
+#include <gnumach.user.h>
+
+/* Gsync flags. */
+#ifndef GSYNC_SHARED
+# define GSYNC_SHARED 0x01
+# define GSYNC_QUAD 0x02
+# define GSYNC_TIMED 0x04
+# define GSYNC_BROADCAST 0x08
+# define GSYNC_MUTATE 0x10
+#endif
+
+static uint32_t single_shared;
+static struct {
+ uint32_t val1;
+ uint32_t val2;
+} single_shared_quad;
+
+static void test_single()
+{
+ int err;
+ single_shared = 0;
+ err = gsync_wait(mach_task_self(), (vm_offset_t)&single_shared, 0, 0, 100, GSYNC_TIMED);
+ ASSERT(err == KERN_TIMEDOUT, "gsync_wait did not timeout");
+
+ single_shared = 1;
+ err = gsync_wait(mach_task_self(), (vm_offset_t)&single_shared, 0, 0, 100, GSYNC_TIMED);
+ ASSERT(err == KERN_INVALID_ARGUMENT, "gsync_wait on wrong value");
+ err = gsync_wait(mach_task_self(), (vm_offset_t)&single_shared, 1, 0, 100, GSYNC_TIMED);
+ ASSERT(err == KERN_TIMEDOUT, "gsync_wait again on correct value did not timeout");
+
+ single_shared_quad.val1 = 1;
+ single_shared_quad.val2 = 2;
+ err = gsync_wait(mach_task_self(), (vm_offset_t)&single_shared_quad, 99, 88,
+ 100, GSYNC_TIMED | GSYNC_QUAD);
+ ASSERT(err == KERN_INVALID_ARGUMENT, "gsync_wait on wrong quad value");
+ err = gsync_wait(mach_task_self(), (vm_offset_t)&single_shared_quad, 1, 2,
+ 100, GSYNC_TIMED | GSYNC_QUAD);
+ ASSERT(err == KERN_TIMEDOUT, "gsync_wait again on correct value did not timeout");
+
+ err = gsync_wake(mach_task_self(), (vm_offset_t)&single_shared, 0, 0);
+ ASSERT_RET(err, "gsync_wake with nobody waiting");
+
+ err = gsync_wait(mach_task_self(), (vm_offset_t)&single_shared, 1, 0, 100, GSYNC_TIMED);
+ ASSERT(err == KERN_TIMEDOUT, "gsync_wait not affected by previous gsync_wake");
+
+ err = gsync_wake(mach_task_self(), (vm_offset_t)&single_shared, 0, GSYNC_BROADCAST);
+ ASSERT_RET(err, "gsync_wake broadcast with nobody waiting");
+
+ err = gsync_wait(mach_task_self(), (vm_offset_t)&single_shared, 1, 0, 100, GSYNC_TIMED);
+ ASSERT(err == KERN_TIMEDOUT, "gsync_wait not affected by previous gsync_wake");
+
+ err = gsync_wake(mach_task_self(), (vm_offset_t)&single_shared, 2, GSYNC_MUTATE);
+ ASSERT_RET(err, "gsync_wake nobody + mutate");
+ ASSERT(single_shared == 2, "gsync_wake single_shared did not mutate");
+
+ err = gsync_wake(mach_task_self(), (vm_offset_t)&single_shared, 0, 0);
+ ASSERT_RET(err, "gsync_wake nobody without mutate");
+ err = gsync_wake(mach_task_self(), (vm_offset_t)&single_shared, 0, 0);
+ ASSERT_RET(err, "gsync_wake 3a");
+}
+
+static void single_thread_setter(void *arg)
+{
+ int err;
+ int val = (long)arg;
+
+ /* It should be enough to sleep a bit for our creator to call
+ gsync_wait(). To verify that the test is performed with the
+ correct sequence, we also change the value so if the wait is
+ called after our wake it will fail with KERN_INVALID_ARGUMENT */
+ msleep(100);
+
+ err = gsync_wake(mach_task_self(), (vm_offset_t)&single_shared, val, GSYNC_MUTATE);
+ ASSERT_RET(err, "gsync_wake from thread + mutate");
+
+ thread_terminate(mach_thread_self());
+ FAILURE("thread_terminate");
+}
+
+static void test_single_from_thread()
+{
+ int err;
+ single_shared = 10;
+ test_thread_start(mach_task_self(), single_thread_setter, (void*)11);
+ err = gsync_wait(mach_task_self(), (vm_offset_t)&single_shared, 10, 0, 0, 0);
+ ASSERT_RET(err, "gsync_wait without timeout for wake from another thread");
+ ASSERT(single_shared == 11, "wake didn't mutate");
+}
+
+int main(int argc, char *argv[], int envc, char *envp[])
+{
+ test_single_from_thread();
+ test_single();
+ return 0;
+}
diff --git a/tests/test-hello.c b/tests/test-hello.c
new file mode 100644
index 00000000..0d739c61
--- /dev/null
+++ b/tests/test-hello.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <testlib.h>
+
+int main(int argc, char *argv[], int envc, char *envp[])
+{
+ int ret = printf("hello!!\n");
+ ASSERT_RET(ret, "printf() should return 0 here");
+ return 0;
+}
diff --git a/tests/test-mach_host.c b/tests/test-mach_host.c
new file mode 100644
index 00000000..53f30240
--- /dev/null
+++ b/tests/test-mach_host.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <testlib.h>
+
+#include <mach_host.user.h>
+
+void test_kernel_version()
+{
+ int err;
+ kernel_version_t kver;
+ err = host_get_kernel_version(mach_host_self(), kver);
+ ASSERT_RET(err, "host_kernel_info");
+ printf("kernel version: %s\n", kver);
+}
+
+void test_host_info()
+{
+ int err;
+ mach_msg_type_number_t count;
+ mach_port_t thishost = mach_host_self();
+
+ host_basic_info_data_t binfo;
+ count = HOST_BASIC_INFO_COUNT;
+ err = host_info(thishost, HOST_BASIC_INFO, (host_info_t)&binfo, &count);
+ ASSERT_RET(err, "host_basic_info");
+ ASSERT(count == HOST_BASIC_INFO_COUNT, "");
+ ASSERT(binfo.max_cpus > 0, "no cpu?");
+ ASSERT(binfo.avail_cpus > 0, "no cpu available?");
+ ASSERT(binfo.memory_size > (1024 * 1024), "memory too low");
+
+ const int maxcpus = 255;
+ int proc_slots[maxcpus];
+ count = maxcpus;
+ err = host_info(thishost, HOST_PROCESSOR_SLOTS, (host_info_t)&proc_slots, &count);
+ ASSERT_RET(err, "host_processor_slots");
+ ASSERT((1 <= count) && (count <= maxcpus), "");
+
+ host_sched_info_data_t sinfo;
+ count = HOST_SCHED_INFO_COUNT;
+ err = host_info(thishost, HOST_SCHED_INFO, (host_info_t)&sinfo, &count);
+ ASSERT_RET(err, "host_sched_info");
+ ASSERT(count == HOST_SCHED_INFO_COUNT, "");
+ ASSERT(sinfo.min_timeout < 1000, "timeout unexpectedly high");
+ ASSERT(sinfo.min_quantum < 1000, "quantum unexpectedly high");
+
+ host_load_info_data_t linfo;
+ count = HOST_LOAD_INFO_COUNT;
+ err = host_info(thishost, HOST_LOAD_INFO, (host_info_t)&linfo, &count);
+ ASSERT_RET(err, "host_load_info");
+ ASSERT(count == HOST_LOAD_INFO_COUNT, "");
+ for (int i=0; i<3; i++)
+ {
+ printf("avenrun %d\n", linfo.avenrun[i]);
+ printf("mach_factor %d\n", linfo.mach_factor[i]);
+ }
+}
+
+// TODO processor sets
+
+int main(int argc, char *argv[], int envc, char *envp[])
+{
+ test_kernel_version();
+ test_host_info();
+ return 0;
+}
diff --git a/tests/test-mach_port.c b/tests/test-mach_port.c
new file mode 100644
index 00000000..81a1072b
--- /dev/null
+++ b/tests/test-mach_port.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <mach/message.h>
+#include <mach/mach_types.h>
+#include <mach/vm_param.h>
+
+#include <syscalls.h>
+#include <testlib.h>
+
+#include <mach.user.h>
+#include <mach_port.user.h>
+
+void test_mach_port(void)
+{
+ kern_return_t err;
+
+ mach_port_name_t *namesp;
+ mach_msg_type_number_t namesCnt=0;
+ mach_port_type_t *typesp;
+ mach_msg_type_number_t typesCnt=0;
+ err = mach_port_names(mach_task_self(), &namesp, &namesCnt, &typesp, &typesCnt);
+ ASSERT_RET(err, "mach_port_names");
+ printf("mach_port_names: type/name length: %d %d\n", namesCnt, typesCnt);
+ ASSERT((namesCnt != 0) && (namesCnt == typesCnt),
+ "mach_port_names: wrong type/name length");
+ for (int i=0; i<namesCnt; i++)
+ printf("port name %d type %x\n", namesp[i], typesp[i]);
+
+ /*
+ * test mach_port_type()
+ * use the ports we have already as bootstrap modules
+ * maybe we could do more checks on the bootstrap ports, on other modules
+ */
+ mach_port_type_t pt;
+ err = mach_port_type(mach_task_self(), host_priv(), &pt);
+ ASSERT_RET(err, "mach_port_type host_priv");
+ ASSERT(pt == MACH_PORT_TYPE_SEND, "wrong type for host_priv");
+
+ err = mach_port_type(mach_task_self(), device_priv(), &pt);
+ ASSERT_RET(err, "mach_port_type device_priv");
+ ASSERT(pt == MACH_PORT_TYPE_SEND, "wrong type for device_priv");
+
+ err = mach_port_rename(mach_task_self(), device_priv(), 111);
+ ASSERT_RET(err, "mach_port_rename");
+ // FIXME: it seems we can't restore the old name
+ err = mach_port_rename(mach_task_self(), 111, 112);
+ ASSERT_RET(err, "mach_port_rename restore orig");
+
+ const mach_port_t nrx = 1000, nset = 1001, ndead = 1002;
+ err = mach_port_allocate_name(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, nrx);
+ ASSERT_RET(err, "mach_port_allocate_name rx");
+ err = mach_port_allocate_name(mach_task_self(), MACH_PORT_RIGHT_PORT_SET, nset);
+ ASSERT_RET(err, "mach_port_allocate_name pset");
+ err = mach_port_allocate_name(mach_task_self(), MACH_PORT_RIGHT_DEAD_NAME, ndead);
+ ASSERT_RET(err, "mach_port_allocate_name dead");
+
+ // set to a valid name to check it's really allocated to a new one
+ mach_port_t newname = nrx, oldname = nrx;
+ err = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &newname);
+ ASSERT_RET(err, "mach_port_allocate");
+ ASSERT(newname != nrx, "allocated name didn't change");
+
+ oldname = newname;
+ newname = nrx;
+ err = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_PORT_SET, &newname);
+ ASSERT_RET(err, "mach_port_allocate");
+ ASSERT(newname != nrx, "allocated name didn't change");
+ ASSERT(newname != oldname, "allocated name is duplicated");
+
+ oldname = newname;
+ newname = nrx;
+ err = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_DEAD_NAME, &newname);
+ ASSERT_RET(err, "mach_port_allocate");
+ ASSERT(newname != nrx, "allocated name didn't change");
+ ASSERT(newname != oldname, "allocated name is duplicated");
+
+ err = mach_port_destroy(mach_task_self(), newname);
+ ASSERT_RET(err, "mach_port_destroy");
+
+ mach_port_urefs_t urefs;
+ err = mach_port_get_refs(mach_task_self(), nrx, MACH_PORT_RIGHT_RECEIVE, &urefs);
+ ASSERT_RET(err, "mach_port_get_refs");
+ ASSERT(urefs == 1, "rx port urefs");
+ err = mach_port_get_refs(mach_task_self(), nset, MACH_PORT_RIGHT_PORT_SET, &urefs);
+ ASSERT_RET(err, "mach_port_get_refs");
+ ASSERT(urefs == 1, "pset port urefs");
+ err = mach_port_get_refs(mach_task_self(), ndead, MACH_PORT_RIGHT_DEAD_NAME, &urefs);
+ ASSERT_RET(err, "mach_port_get_refs");
+ ASSERT(urefs == 1, "dead port urefs");
+
+ err = mach_port_destroy(mach_task_self(), nrx);
+ ASSERT_RET(err, "mach_port_destroy rx");
+ err = mach_port_destroy(mach_task_self(), nset);
+ ASSERT_RET(err, "mach_port_destroy pset");
+ err = mach_port_deallocate(mach_task_self(), ndead);
+ ASSERT_RET(err, "mach_port_deallocate dead");
+
+ // TODO test other rpc
+}
+
+int main(int argc, char *argv[], int envc, char *envp[])
+{
+ test_mach_port();
+ return 0;
+}
diff --git a/tests/test-machmsg.c b/tests/test-machmsg.c
new file mode 100644
index 00000000..60f3f49f
--- /dev/null
+++ b/tests/test-machmsg.c
@@ -0,0 +1,405 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <mach/message.h>
+#include <mach/mach_types.h>
+#include <mach/vm_param.h>
+
+#include <syscalls.h>
+#include <testlib.h>
+
+#include <mach.user.h>
+#include <mach_port.user.h>
+#include <mach_host.user.h>
+
+#define ECHO_MAX_BODY_LEN 256
+
+static uint32_t align(uint32_t val, size_t aln)
+{
+ // we should check aln is a power of 2
+ aln--;
+ return (val + aln) & (~aln);
+}
+
+#define ALIGN_INLINE(val, n) { (val) = align((val), (n)); }
+
+struct echo_params
+{
+ mach_port_t rx_port;
+ mach_msg_size_t rx_size;
+ mach_msg_size_t rx_number;
+};
+
+void echo_thread (void *arg)
+{
+ struct echo_params *params = arg;
+ int err;
+ struct message
+ {
+ mach_msg_header_t header;
+ char body[ECHO_MAX_BODY_LEN];
+ } message;
+
+ printf ("thread echo started\n");
+ for (mach_msg_size_t i=0; i<params->rx_number; i++)
+ {
+ message.header.msgh_local_port = params->rx_port;
+ message.header.msgh_size = sizeof (message);
+
+ err = mach_msg (&message.header,
+ MACH_RCV_MSG,
+ 0, sizeof (message),
+ params->rx_port, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
+ ASSERT_RET(err, "mach_msg echo rx");
+ printf("echo rx %d expected 5d\n",
+ message.header.msgh_size, params->rx_size);
+ ASSERT(message.header.msgh_size == params->rx_size,
+ "wrong size in echo rx");
+
+ message.header.msgh_local_port = MACH_PORT_NULL;
+ printf ("echo: msgh_id %d\n", message.header.msgh_id);
+
+ err = mach_msg (&message.header,
+ MACH_SEND_MSG,
+ message.header.msgh_size, 0,
+ MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
+ ASSERT_RET(err, "mach_msg echo tx");
+ }
+ printf ("thread echo stopped\n");
+ thread_terminate (mach_thread_self ());
+ FAILURE("thread_terminate");
+}
+
+#define TEST_ITERATIONS 3
+
+// TODO run_test_iterations
+void
+test_iterations (void)
+{
+ mach_port_t port, receive;
+ int err;
+ struct message
+ {
+ mach_msg_header_t header;
+ mach_msg_type_t type;
+ char data[64];
+ } message;
+
+ err = mach_port_allocate (mach_task_self (),
+ MACH_PORT_RIGHT_RECEIVE, &port);
+ ASSERT_RET(err, "mach_port_allocate");
+
+ err = mach_port_allocate (mach_task_self (),
+ MACH_PORT_RIGHT_RECEIVE, &receive);
+ ASSERT_RET(err, "mach_port_allocate 2");
+
+ struct echo_params params;
+ params.rx_port = port;
+ params.rx_size = sizeof(message.header) + sizeof(message.type) + 5;
+ ALIGN_INLINE(params.rx_size, MACH_MSG_USER_ALIGNMENT);
+ params.rx_number = TEST_ITERATIONS;
+ test_thread_start (mach_task_self (), echo_thread, &params);
+
+ time_value_t start_time;
+ err = host_get_time (mach_host_self (), &start_time);
+ ASSERT_RET(err, "host_get_time");
+
+ /* Send a message down the port */
+ for (int i = 0; i < TEST_ITERATIONS; i++)
+ {
+ struct message message;
+
+ memset (&message, 0, sizeof (message));
+ strcpy (message.data, "ciao");
+ size_t datalen = strlen (message.data) + 1;
+
+ message.header.msgh_bits
+ = MACH_MSGH_BITS (MACH_MSG_TYPE_MAKE_SEND,
+ MACH_MSG_TYPE_MAKE_SEND_ONCE);
+ message.header.msgh_remote_port = port; /* Request port */
+ message.header.msgh_local_port = receive; /* Reply port */
+ message.header.msgh_id = 123; /* Message id */
+ message.header.msgh_size = sizeof (message.header) + sizeof (message.type) + datalen; /* Message size */
+ ALIGN_INLINE(message.header.msgh_size, 4);
+ message.type.msgt_name = MACH_MSG_TYPE_STRING; /* Parameter type */
+ message.type.msgt_size = 8 * datalen; /* # Bits */
+ message.type.msgt_number = 1; /* Number of elements */
+ message.type.msgt_inline = TRUE; /* Inlined */
+ message.type.msgt_longform = FALSE; /* Shortform */
+ message.type.msgt_deallocate = FALSE; /* Do not deallocate */
+ message.type.msgt_unused = 0; /* = 0 */
+
+ /* Send the message on its way and wait for a reply. */
+ err = mach_msg (&message.header, /* The header */
+ MACH_SEND_MSG | MACH_RCV_MSG, /* Flags */
+ message.header.msgh_size, /* Send size */
+ sizeof (message), /* Max receive Size */
+ receive, /* Receive port */
+ MACH_MSG_TIMEOUT_NONE, /* No timeout */
+ MACH_PORT_NULL); /* No notification */
+ ASSERT_RET(err, "mach_msg txrx");
+ }
+
+ time_value_t stop_time;
+ err = host_get_time (mach_host_self (), &stop_time);
+ ASSERT_RET(err, "host_get_time");
+
+ printf ("start: %d.%06d\n", start_time.seconds, start_time.microseconds);
+ printf ("stop: %d.%06d\n", stop_time.seconds, stop_time.microseconds);
+}
+
+/*
+ Test a specific message type on tx, rx and rx-continue paths
+ We need to be able to create a thread for this, so some rpc must work.
+*/
+void
+run_test_simple(void *msg, mach_msg_size_t msglen, mach_msg_id_t msgid)
+{
+ mach_msg_header_t *head = msg;
+ mach_port_t port, receive;
+ int err;
+
+ err = syscall_mach_port_allocate (mach_task_self (),
+ MACH_PORT_RIGHT_RECEIVE, &port);
+ ASSERT_RET(err, "syscall_mach_port_allocate");
+
+ err = syscall_mach_port_allocate (mach_task_self (),
+ MACH_PORT_RIGHT_RECEIVE, &receive);
+ ASSERT_RET(err, "syscall_mach_port_allocate 2");
+
+ struct echo_params params;
+ params.rx_port = port;
+ params.rx_size = msglen;
+ params.rx_number = 1;
+ test_thread_start (mach_task_self (), echo_thread, &params);
+
+ head->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_MAKE_SEND,
+ MACH_MSG_TYPE_MAKE_SEND_ONCE);
+ head->msgh_remote_port = port;
+ head->msgh_local_port = receive;
+ head->msgh_id = msgid;
+ head->msgh_size = msglen;
+
+ err = mach_msg (msg,
+ MACH_SEND_MSG | MACH_RCV_MSG,
+ msglen,
+ msglen,
+ receive,
+ MACH_MSG_TIMEOUT_NONE,
+ MACH_PORT_NULL);
+ ASSERT_RET(err, "mach_msg txrx");
+
+ printf("size in final rx: %d expected %d\n", head->msgh_size, msglen);
+ ASSERT(head->msgh_size == msglen, "wrong size in final rx");
+}
+
+void
+run_test_simple_self(void *msg, mach_msg_size_t msglen, mach_msg_id_t msgid)
+{
+ mach_msg_header_t *head = msg;
+ mach_port_t port, receive;
+ int err;
+
+ err = syscall_mach_port_allocate (mach_task_self (),
+ MACH_PORT_RIGHT_RECEIVE, &port);
+ ASSERT_RET(err, "syscall_mach_port_allocate");
+
+ head->msgh_bits
+ = MACH_MSGH_BITS (MACH_MSG_TYPE_MAKE_SEND,
+ MACH_MSG_TYPE_MAKE_SEND_ONCE);
+ /* head->msgh_bits */
+ /* = MACH_MSGH_BITS (MACH_MSG_TYPE_MAKE_SEND_ONCE, */
+ /* MACH_MSG_TYPE_COPY_SEND); */
+
+ head->msgh_bits |= MACH_MSGH_BITS_COMPLEX;
+ head->msgh_remote_port = port;
+ head->msgh_local_port = port;
+ head->msgh_id = msgid;
+ head->msgh_size = msglen;
+
+ err = mach_msg (msg,
+ MACH_SEND_MSG | MACH_RCV_MSG,
+ msglen,
+ msglen,
+ port,
+ MACH_MSG_TIMEOUT_NONE,
+ MACH_PORT_NULL);
+ ASSERT_RET(err, "mach_msg txrx");
+
+ printf("size in final rx: %d expected %d\n", head->msgh_size, msglen);
+ ASSERT(head->msgh_size == msglen, "wrong size in final rx\n");
+}
+
+
+void test_msg_string(void)
+{
+ struct message
+ {
+ mach_msg_header_t header;
+ mach_msg_type_t type;
+ char data[64];
+ } msg;
+ char *test_strings[] = {"123", "12345", "ciaociao"};
+
+ memset (&msg, 0, sizeof (struct message));
+ strcpy (msg.data, "ciao");
+ size_t datalen = strlen (msg.data) + 1;
+
+ int msgid = 123;
+ int msglen = sizeof (msg.header) + sizeof (msg.type) + datalen;
+ ALIGN_INLINE(msglen, MACH_MSG_USER_ALIGNMENT);
+ msg.type.msgt_name = MACH_MSG_TYPE_STRING;
+ msg.type.msgt_size = 8 * datalen;
+ msg.type.msgt_number = 1;
+ msg.type.msgt_inline = TRUE;
+ msg.type.msgt_longform = FALSE;
+ msg.type.msgt_deallocate = FALSE;
+ msg.type.msgt_unused = 0;
+
+ run_test_simple_self(&msg, msglen, msgid);
+ run_test_simple(&msg, msglen, msgid);
+}
+
+void test_msg_string2(void)
+{
+ struct message
+ {
+ mach_msg_header_t header;
+ mach_msg_type_t type;
+ char data[10];
+ mach_msg_type_t type2;
+ char data2[5];
+ } msg;
+ const int len1 = 10;
+ const int len2 = 5;
+
+ memset (&msg, 0, sizeof (struct message));
+ int msgid = 123;
+ int msglen = sizeof (msg.header) + sizeof (msg.type) + len1;
+ ALIGN_INLINE(msglen, MACH_MSG_USER_ALIGNMENT);
+ msglen += sizeof (msg.type2) + len2;
+ ALIGN_INLINE(msglen, MACH_MSG_USER_ALIGNMENT);
+ msg.type.msgt_name = MACH_MSG_TYPE_STRING;
+ msg.type.msgt_size = 8 * len1;
+ msg.type.msgt_number = 1;
+ msg.type.msgt_inline = TRUE;
+ msg.type.msgt_longform = FALSE;
+ msg.type.msgt_deallocate = FALSE;
+ msg.type.msgt_unused = 0;
+ memset (msg.data, 'c', len1);
+ msg.type2.msgt_name = MACH_MSG_TYPE_CHAR;
+ msg.type2.msgt_size = 8;
+ msg.type2.msgt_number = len2;
+ msg.type2.msgt_inline = TRUE;
+ msg.type2.msgt_longform = FALSE;
+ msg.type2.msgt_deallocate = FALSE;
+ msg.type2.msgt_unused = 0;
+ memset (msg.data2, 'x', len2);
+
+ run_test_simple_self(&msg, msglen, msgid);
+ run_test_simple(&msg, msglen, msgid);
+}
+
+
+void test_msg_ports(void)
+{
+ struct message
+ {
+ mach_msg_header_t head;
+ mach_msg_type_t type;
+ mach_port_t *portp;
+ } msg;
+ mach_port_t msgports[3];
+
+ memset (&msg, 0, sizeof (struct message));
+
+ int msgid = 123;
+ int msglen = sizeof (msg.head) + sizeof (msg.type) + sizeof(msg.portp);
+ msg.type.msgt_name = MACH_MSG_TYPE_MOVE_SEND;
+ msg.type.msgt_size = 8*sizeof(mach_port_t);
+ msg.type.msgt_number = 3;
+ msg.type.msgt_inline = FALSE;
+ msg.type.msgt_longform = FALSE;
+ msg.type.msgt_deallocate = FALSE;
+ msg.type.msgt_unused = 0;
+ msg.portp = msgports;
+ msgports[0] = mach_host_self();
+ msgports[1] = mach_task_self();
+ msgports[2] = mach_thread_self();
+
+ run_test_simple_self(&msg, msglen, msgid);
+ run_test_simple(&msg, msglen, msgid);
+}
+
+void test_msg_emptydesc(void)
+{
+ struct message
+ {
+ mach_msg_header_t header;
+ mach_msg_type_t type_empty;
+ vm_offset_t addr_empty;
+ mach_msg_type_t type;
+ char data[64];
+ } msg;
+
+ memset (&msg, 0, sizeof (struct message));
+ strcpy (msg.data, "ciao");
+ size_t datalen = strlen (msg.data) + 1;
+
+ int msgid = 123;
+ int msglen = sizeof (msg.header);
+ msglen += sizeof (msg.type_empty)+ sizeof (msg.addr_empty);
+ msglen += sizeof (msg.type) + datalen;
+ ALIGN_INLINE(msglen, MACH_MSG_USER_ALIGNMENT);
+ msg.type_empty.msgt_name = MACH_MSG_TYPE_STRING;
+ msg.type_empty.msgt_size = 8;
+ msg.type_empty.msgt_number = 0;
+ msg.type_empty.msgt_inline = FALSE;
+ msg.type_empty.msgt_longform = FALSE;
+ msg.type_empty.msgt_deallocate = FALSE;
+ msg.type_empty.msgt_unused = 0;
+ msg.addr_empty = 0;
+
+ msg.type.msgt_name = MACH_MSG_TYPE_STRING;
+ msg.type.msgt_size = 8 * datalen;
+ msg.type.msgt_number = 1;
+ msg.type.msgt_inline = TRUE;
+ msg.type.msgt_longform = FALSE;
+ msg.type.msgt_deallocate = FALSE;
+ msg.type.msgt_unused = 0;
+
+ run_test_simple_self(&msg, msglen, msgid);
+ run_test_simple(&msg, msglen, msgid);
+}
+
+
+int
+main (int argc, char *argv[], int envc, char *envp[])
+{
+ printf("test_msg_string()\n");
+ test_msg_string();
+ printf("test_msg_string2()\n");
+ test_msg_string2();
+ printf("test_msg_ports()\n");
+ test_msg_ports();
+ printf("test_msg_emptydesc()\n");
+ test_msg_emptydesc();
+ printf("test_iters()\n");
+ test_iterations();
+ return 0;
+}
diff --git a/tests/test-mbchk.in b/tests/test-multiboot.in
index a04a00eb..20ab3309 100644
--- a/tests/test-mbchk.in
+++ b/tests/test-multiboot.in
@@ -2,7 +2,7 @@
# Test if the kernel image complies with the multiboot specification.
-# Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+# Copyright (C) 2023 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
@@ -18,10 +18,10 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-if mbchk --version > /dev/null 2>&1
-then mbchk gnumach
+if grub-file --help > /dev/null 2>&1
+then grub-file --is-x86-multiboot gnumach
else
- # `mbchk' is not available -- ignore this test.
+ # `grub-file' is not available -- ignore this test.
exit 77
fi
diff --git a/tests/test-syscalls.c b/tests/test-syscalls.c
new file mode 100644
index 00000000..be4df8c3
--- /dev/null
+++ b/tests/test-syscalls.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <syscalls.h>
+#include <testlib.h>
+
+#include <mach/exception.h>
+#include <mach/mig_errors.h>
+#include <mach/vm_param.h>
+
+#include <mach.user.h>
+#include <mach_port.user.h>
+#include <exc.server.h>
+
+
+static struct {
+ mach_port_t exception_port;
+ mach_port_t thread;
+ mach_port_t task;
+ integer_t exception;
+ integer_t code;
+ integer_t subcode;
+} last_exc;
+kern_return_t catch_exception_raise(mach_port_t exception_port,
+ mach_port_t thread, mach_port_t task,
+ integer_t exception, integer_t code,
+ long_integer_t subcode)
+{
+ printf("received catch_exception_raise(%u %u %u %d %d %d)\n",
+ exception_port, thread, task, exception, code, subcode);
+ last_exc.exception_port = exception_port;
+ last_exc.thread = thread;
+ last_exc.task = task;
+ last_exc.exception = exception;
+ last_exc.code = code;
+ last_exc.subcode = subcode;
+ return KERN_SUCCESS;
+}
+
+static char simple_request_data[PAGE_SIZE];
+static char simple_reply_data[PAGE_SIZE];
+int simple_msg_server(boolean_t (*demuxer) (mach_msg_header_t *request,
+ mach_msg_header_t *reply),
+ mach_port_t rcv_port_name,
+ int num_msgs)
+{
+ int midx = 0, mok = 0;
+ int ret;
+ mig_reply_header_t *request = (mig_reply_header_t*)simple_request_data;
+ mig_reply_header_t *reply = (mig_reply_header_t*)simple_reply_data;
+ while ((midx - num_msgs) < 0)
+ {
+ ret = mach_msg(&request->Head, MACH_RCV_MSG, 0, PAGE_SIZE,
+ rcv_port_name, 0, MACH_PORT_NULL);
+ switch (ret)
+ {
+ case MACH_MSG_SUCCESS:
+ if ((*demuxer)(&request->Head, &reply->Head))
+ mok++; // TODO send reply
+ else
+ FAILURE("demuxer didn't handle the message");
+ break;
+ default:
+ ASSERT_RET(ret, "receiving in msg_server");
+ break;
+ }
+ midx++;
+ }
+ if (mok != midx)
+ FAILURE("wrong number of message received");
+ return mok != midx;
+}
+
+
+void test_syscall_bad_arg_on_stack(void *arg)
+{
+ /* mach_msg() has 7 arguments, so the last one should be always
+ passed on the stack on x86. Here we make ESP/RSP point to the
+ wrong place to test the access check */
+#ifdef __x86_64__
+ asm volatile("movq $0x123,%rsp;" \
+ "movq $-25,%rax;" \
+ "syscall;" \
+ );
+#else
+ asm volatile("mov $0x123,%esp;" \
+ "mov $-25,%eax;" \
+ "lcall $0x7,$0x0;" \
+ );
+#endif
+ FAILURE("we shouldn't be here!");
+}
+
+void test_bad_syscall_num(void *arg)
+{
+#ifdef __x86_64__
+ asm volatile("movq $0x123456,%rax;" \
+ "syscall;" \
+ );
+#else
+ asm volatile("mov $0x123456,%eax;" \
+ "lcall $0x7,$0x0;" \
+ );
+#endif
+ FAILURE("we shouldn't be here!");
+}
+
+
+int main(int argc, char *argv[], int envc, char *envp[])
+{
+ int err;
+ mach_port_t excp;
+
+ err = mach_port_allocate(mach_task_self (), MACH_PORT_RIGHT_RECEIVE, &excp);
+ ASSERT_RET(err, "creating exception port");
+
+ err = mach_port_insert_right(mach_task_self(), excp, excp,
+ MACH_MSG_TYPE_MAKE_SEND);
+ ASSERT_RET(err, "inserting send right into exception port");
+
+ err = task_set_special_port(mach_task_self(), TASK_EXCEPTION_PORT, excp);
+ ASSERT_RET(err, "setting task exception port");
+
+ /* FIXME: receiving an exception with small size causes GP on 64 bit userspace */
+ /* mig_reply_header_t msg; */
+ /* err = mach_msg(&msg.Head, /\* The header *\/ */
+ /* MACH_RCV_MSG, */
+ /* 0, */
+ /* sizeof (msg), /\* Max receive Size *\/ */
+ /* excp, */
+ /* 1000, */
+ /* MACH_PORT_NULL); */
+
+ // FIXME: maybe MIG should provide this prototype?
+ boolean_t exc_server
+ (mach_msg_header_t *InHeadP, mach_msg_header_t *OutHeadP);
+
+ memset(&last_exc, 0, sizeof(last_exc));
+ test_thread_start(mach_task_self(), test_bad_syscall_num, NULL);
+ ASSERT_RET(simple_msg_server(exc_server, excp, 1), "error in exc server");
+ ASSERT((last_exc.exception == EXC_BAD_INSTRUCTION) && (last_exc.code == EXC_I386_INVOP),
+ "bad exception for test_bad_syscall_num()");
+
+ memset(&last_exc, 0, sizeof(last_exc));
+ test_thread_start(mach_task_self(), test_syscall_bad_arg_on_stack, NULL);
+ ASSERT_RET(simple_msg_server(exc_server, excp, 1), "error in exc server");
+ ASSERT((last_exc.exception == EXC_BAD_ACCESS) && (last_exc.code == KERN_INVALID_ADDRESS),
+ "bad exception for test_syscall_bad_arg_on_stack()");
+
+ return 0;
+}
diff --git a/tests/test-task.c b/tests/test-task.c
new file mode 100644
index 00000000..cbc75e23
--- /dev/null
+++ b/tests/test-task.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <syscalls.h>
+#include <testlib.h>
+
+#include <mach/machine/vm_param.h>
+#include <mach/std_types.h>
+#include <mach/mach_types.h>
+#include <mach/vm_wire.h>
+#include <mach/vm_param.h>
+
+#include <gnumach.user.h>
+#include <mach.user.h>
+
+
+void test_task()
+{
+ mach_port_t ourtask = mach_task_self();
+ mach_msg_type_number_t count;
+ int err;
+
+ struct task_basic_info binfo;
+ count = TASK_BASIC_INFO_COUNT;
+ err = task_info(ourtask, TASK_BASIC_INFO, (task_info_t)&binfo, &count);
+ ASSERT_RET(err, "TASK_BASIC_INFO");
+ ASSERT(binfo.virtual_size > binfo.resident_size, "wrong memory counters");
+
+ struct task_events_info einfo;
+ count = TASK_EVENTS_INFO_COUNT;
+ err = task_info(ourtask, TASK_EVENTS_INFO, (task_info_t)&einfo, &count);
+ ASSERT_RET(err, "TASK_EVENTS_INFO");
+ printf("msgs sent %llu received %llu\n",
+ einfo.messages_sent, einfo.messages_received);
+
+ struct task_thread_times_info ttinfo;
+ count = TASK_THREAD_TIMES_INFO_COUNT;
+ err = task_info(ourtask, TASK_THREAD_TIMES_INFO, (task_info_t)&ttinfo, &count);
+ ASSERT_RET(err, "TASK_THREAD_TIMES_INFO");
+ printf("run user %lld system %lld\n",
+ ttinfo.user_time64.seconds, ttinfo.user_time64.nanoseconds);
+}
+
+
+void dummy_thread(void *arg)
+{
+ printf("started dummy thread\n");
+ while (1)
+ ;
+}
+
+void check_threads(thread_t *threads, mach_msg_type_number_t nthreads)
+{
+ for (int tid=0; tid<nthreads; tid++)
+ {
+ struct thread_basic_info tinfo;
+ mach_msg_type_number_t thcount = THREAD_BASIC_INFO_COUNT;
+ int err = thread_info(threads[tid], THREAD_BASIC_INFO, (thread_info_t)&tinfo, &thcount);
+ ASSERT_RET(err, "thread_info");
+ ASSERT(thcount == THREAD_BASIC_INFO_COUNT, "thcount");
+ printf("th%d (port %d):\n", tid, threads[tid]);
+ printf(" user time %d.%06d\n", tinfo.user_time.seconds, tinfo.user_time.microseconds);
+ printf(" system time %d.%06d\n", tinfo.system_time.seconds, tinfo.system_time.microseconds);
+ printf(" cpu usage %d\n", tinfo.cpu_usage);
+ printf(" creation time %d.%06d\n", tinfo.creation_time.seconds, tinfo.creation_time.microseconds);
+ }
+}
+
+static void test_task_threads()
+{
+ thread_t *threads;
+ mach_msg_type_number_t nthreads;
+ int err;
+
+ err = task_threads(mach_task_self(), &threads, &nthreads);
+ ASSERT_RET(err, "task_threads");
+ ASSERT(nthreads == 1, "nthreads");
+ check_threads(threads, nthreads);
+
+ thread_t t1 = test_thread_start(mach_task_self(), dummy_thread, 0);
+
+ thread_t t2 = test_thread_start(mach_task_self(), dummy_thread, 0);
+
+ // let the threads run
+ msleep(100);
+
+ err = task_threads(mach_task_self(), &threads, &nthreads);
+ ASSERT_RET(err, "task_threads");
+ ASSERT(nthreads == 3, "nthreads");
+ check_threads(threads, nthreads);
+
+ err = thread_terminate(t1);
+ ASSERT_RET(err, "thread_terminate");
+ err = thread_terminate(t2);
+ ASSERT_RET(err, "thread_terminate");
+
+ err = task_threads(mach_task_self(), &threads, &nthreads);
+ ASSERT_RET(err, "task_threads");
+ ASSERT(nthreads == 1, "nthreads");
+ check_threads(threads, nthreads);
+}
+
+void test_new_task()
+{
+ int err;
+ task_t newtask;
+ err = task_create(mach_task_self(), 1, &newtask);
+ ASSERT_RET(err, "task_create");
+
+ err = task_suspend(newtask);
+ ASSERT_RET(err, "task_suspend");
+
+ err = task_set_name(newtask, "newtask");
+ ASSERT_RET(err, "task_set_name");
+
+ thread_t *threads;
+ mach_msg_type_number_t nthreads;
+ err = task_threads(newtask, &threads, &nthreads);
+ ASSERT_RET(err, "task_threads");
+ ASSERT(nthreads == 0, "nthreads 0");
+
+ test_thread_start(newtask, dummy_thread, 0);
+
+ err = task_resume(newtask);
+ ASSERT_RET(err, "task_resume");
+
+ msleep(100); // let the thread run a bit
+
+ err = task_threads(newtask, &threads, &nthreads);
+ ASSERT_RET(err, "task_threads");
+ ASSERT(nthreads == 1, "nthreads 1");
+ check_threads(threads, nthreads);
+
+ err = thread_terminate(threads[0]);
+ ASSERT_RET(err, "thread_terminate");
+
+ err = task_terminate(newtask);
+ ASSERT_RET(err, "task_terminate");
+}
+
+int test_errors()
+{
+ int err;
+ err = task_resume(MACH_PORT_NAME_DEAD);
+ ASSERT(err == MACH_SEND_INVALID_DEST, "task DEAD");
+}
+
+
+int main(int argc, char *argv[], int envc, char *envp[])
+{
+ test_task();
+ test_task_threads();
+ test_new_task();
+ test_errors();
+ return 0;
+}
diff --git a/tests/test-threads.c b/tests/test-threads.c
new file mode 100644
index 00000000..06630bef
--- /dev/null
+++ b/tests/test-threads.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <stdint.h>
+#include <mach/machine/thread_status.h>
+
+#include <syscalls.h>
+#include <testlib.h>
+
+#include <mach.user.h>
+
+void sleeping_thread(void* arg)
+{
+ printf("starting thread %d\n", arg);
+ for (int i=0; i<100; i++)
+ msleep(50);
+ printf("stopping thread %d\n", arg);
+ thread_terminate(mach_thread_self());
+ FAILURE("thread_terminate");
+}
+
+void test_many(void)
+{
+ for (long tid=0; tid<10; tid++)
+ {
+ test_thread_start(mach_task_self(), sleeping_thread, (void*)tid);
+ }
+ // TODO: wait for thread end notifications
+ msleep(6000);
+}
+
+#ifdef __x86_64__
+void test_fsgs_base_thread(void* tid)
+{
+ int err;
+#if defined(__SEG_FS) && defined(__SEG_GS)
+ long __seg_fs *fs_ptr;
+ long __seg_gs *gs_ptr;
+ long fs_value;
+ long gs_value;
+
+ struct i386_fsgs_base_state state;
+ state.fs_base = (unsigned long)&fs_value;
+ state.gs_base = (unsigned long)&gs_value;
+ err = thread_set_state(mach_thread_self(), i386_FSGS_BASE_STATE,
+ (thread_state_t) &state, i386_FSGS_BASE_STATE_COUNT);
+ ASSERT_RET(err, "thread_set_state");
+
+ fs_value = 0x100 + (long)tid;
+ gs_value = 0x200 + (long)tid;
+
+ msleep(50); // allow the others to set their segment base
+
+ fs_ptr = 0;
+ gs_ptr = 0;
+ long rdvalue = *fs_ptr;
+ printf("FS expected %lx read %lx\n", fs_value, rdvalue);
+ ASSERT(fs_value == rdvalue, "FS base error\n");
+ rdvalue = *gs_ptr;
+ printf("GS expected %lx read %lx\n", gs_value, rdvalue);
+ ASSERT(gs_value == rdvalue, "GS base error\n");
+#else
+#error " missing __SEG_FS and __SEG_GS"
+#endif
+
+ thread_terminate(mach_thread_self());
+ FAILURE("thread_terminate");
+}
+#endif
+
+void test_fsgs_base(void)
+{
+#ifdef __x86_64__
+ int err;
+ for (long tid=0; tid<10; tid++)
+ {
+ test_thread_start(mach_task_self(), test_fsgs_base_thread, (void*)tid);
+ }
+ msleep(1000); // TODO: wait for threads
+#endif
+}
+
+
+int main(int argc, char *argv[], int envc, char *envp[])
+{
+ test_fsgs_base();
+ test_many();
+ return 0;
+}
diff --git a/tests/test-vm.c b/tests/test-vm.c
new file mode 100644
index 00000000..4ece792e
--- /dev/null
+++ b/tests/test-vm.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <syscalls.h>
+#include <testlib.h>
+
+#include <mach/machine/vm_param.h>
+#include <mach/std_types.h>
+#include <mach/mach_types.h>
+#include <mach/vm_wire.h>
+#include <mach/vm_param.h>
+
+#include <device.user.h>
+#include <gnumach.user.h>
+#include <mach.user.h>
+#include <mach_port.user.h>
+
+
+static void test_memobj()
+{
+ // this emulates maptime() mapping and reading
+ struct mapped_time_value *mtime;
+ int64_t secs, usecs;
+ mach_port_t device, memobj;
+ int err;
+
+ err = device_open (device_priv(), 0, "time", &device);
+ ASSERT_RET(err, "device_open");
+ err = device_map (device, VM_PROT_READ, 0, sizeof(*mtime), &memobj, 0);
+ ASSERT_RET(err, "device_map");
+ err = mach_port_deallocate (mach_task_self (), device);
+ ASSERT_RET(err, "mach_port_deallocate");
+ mtime = 0;
+ err = vm_map(mach_task_self (), (vm_address_t *)&mtime, sizeof *mtime, 0, 1,
+ memobj, 0, 0, VM_PROT_READ, VM_PROT_READ, VM_INHERIT_NONE);
+ ASSERT_RET(err, "vm_map");
+
+ do
+ {
+ secs = mtime->seconds;
+ __sync_synchronize ();
+ usecs = mtime->microseconds;
+ __sync_synchronize ();
+ }
+ while (secs != mtime->check_seconds);
+ printf("mapped time is %lld.%lld\n",secs, usecs);
+
+ err = mach_port_deallocate (mach_task_self (), memobj);
+ ASSERT_RET(err, "mach_port_deallocate");
+ err = vm_deallocate(mach_task_self(), (vm_address_t)mtime, sizeof(*mtime));
+ ASSERT_RET(err, "vm_deallocate");
+}
+
+static void test_wire()
+{
+ int err = vm_wire_all(host_priv(), mach_task_self(), VM_WIRE_ALL);
+ ASSERT_RET(err, "vm_wire_all-ALL");
+ err = vm_wire_all(host_priv(), mach_task_self(), VM_WIRE_NONE);
+ ASSERT_RET(err, "vm_wire_all-NONE");
+ // TODO check that all memory is actually wired or unwired
+}
+
+int main(int argc, char *argv[], int envc, char *envp[])
+{
+ printf("VM_MIN_ADDRESS=0x%p\n", VM_MIN_ADDRESS);
+ printf("VM_MAX_ADDRESS=0x%p\n", VM_MAX_ADDRESS);
+ test_wire();
+ test_memobj();
+ return 0;
+}
diff --git a/tests/testlib.c b/tests/testlib.c
new file mode 100644
index 00000000..2eaeb591
--- /dev/null
+++ b/tests/testlib.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2024 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <testlib.h>
+
+#include <device/cons.h>
+#include <mach/kern_return.h>
+#include <mach/message.h>
+#include <mach/mig_errors.h>
+#include <mach/vm_param.h>
+
+#include <mach.user.h>
+#include <mach_host.user.h>
+
+
+static int argc = 0;
+static char *argv_unknown[] = {"unknown", "m1", "123", "456"};
+static char **argv = argv_unknown;
+static char **envp = NULL;
+static int envc = 0;
+
+static mach_port_t host_priv_port = 1;
+static mach_port_t device_master_port = 2;
+
+void cnputc(char c, vm_offset_t cookie)
+{
+ char buf[2] = {c, 0};
+ mach_print(buf);
+}
+
+mach_port_t host_priv(void)
+{
+ return host_priv_port;
+}
+
+mach_port_t device_priv(void)
+{
+ return device_master_port;
+}
+
+void halt()
+{
+ int ret = host_reboot(host_priv_port, 0);
+ ASSERT_RET(ret, "host_reboot() failed!");
+ while (1)
+ ;
+}
+
+int msleep(uint32_t timeout)
+{
+ mach_port_t recv = mach_reply_port();
+ return mach_msg(NULL, MACH_RCV_MSG|MACH_RCV_TIMEOUT|MACH_RCV_INTERRUPT,
+ 0, 0, recv, timeout, MACH_PORT_NULL);
+}
+
+const char* e2s(int err)
+{
+ const char* s = e2s_gnumach(err);
+ if (s != NULL)
+ return s;
+ else
+ switch (err)
+ {
+ default: return "unknown";
+ }
+}
+
+/*
+ * Minimal _start() for test modules, we just take the arguments from the
+ * kernel, call main() and reboot. As in glibc, we expect the argument pointer
+ * as a first asrgument.
+ */
+void __attribute__((used, retain))
+c_start(void **argptr)
+{
+ intptr_t* argcptr = (intptr_t*)argptr;
+ argc = argcptr[0];
+ argv = (char **) &argcptr[1];
+ envp = &argv[argc + 1];
+ envc = 0;
+
+ while (envp[envc])
+ ++envc;
+
+ mach_atoi(argv[1], &host_priv_port);
+ mach_atoi(argv[2], &device_master_port);
+
+ printf("started %s", argv[0]);
+ for (int i=1; i<argc; i++)
+ {
+ printf(" %s", argv[i]);
+ }
+ printf("\n");
+
+ int ret = main(argc, argv, envc, envp);
+
+ printf("%s: test %s exit code %x\n", TEST_SUCCESS_MARKER, argv[0], ret);
+ halt();
+}
diff --git a/tests/testlib_thread_start.c b/tests/testlib_thread_start.c
new file mode 100644
index 00000000..fa8af0ea
--- /dev/null
+++ b/tests/testlib_thread_start.c
@@ -0,0 +1,86 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2017 Luc Chabassier
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* This small helper was started from
+ * https://github.com/dwarfmaster/mach-ipc/blob/master/minimal_threads/main.c
+ * and then reworked. */
+
+#include <testlib.h>
+#include <mach/vm_param.h>
+#include <mach.user.h>
+
+/* This is just a temporary mapping to set up the stack */
+static long stack_top[PAGE_SIZE/sizeof(long)] __attribute__ ((aligned (PAGE_SIZE)));
+
+thread_t test_thread_start(task_t task, void(*routine)(void*), void* arg) {
+ const vm_size_t stack_size = PAGE_SIZE * 16;
+ kern_return_t ret;
+ vm_address_t stack;
+
+ ret = vm_allocate(task, &stack, stack_size, TRUE);
+ ASSERT_RET(ret, "can't allocate the stack for a new thread");
+
+ ret = vm_protect(task, stack, PAGE_SIZE, FALSE, VM_PROT_NONE);
+ ASSERT_RET(ret, "can't protect the stack from overflows");
+
+ long *top = (long*)((vm_offset_t)stack_top + PAGE_SIZE) - 1;
+#ifdef __i386__
+ *top = (long)arg; /* The argument is passed on the stack on x86_32 */
+ *(top - 1) = 0; /* The return address */
+#elif defined(__x86_64__)
+ *top = 0; /* The return address */
+#endif
+ ret = vm_write(task, stack + stack_size - PAGE_SIZE, (vm_offset_t)stack_top, PAGE_SIZE);
+ ASSERT_RET(ret, "can't initialize the stack for the new thread");
+
+ thread_t thread;
+ ret = thread_create(task, &thread);
+ ASSERT_RET(ret, "thread_create()");
+
+ struct i386_thread_state state;
+ unsigned int count;
+ count = i386_THREAD_STATE_COUNT;
+ ret = thread_get_state(thread, i386_REGS_SEGS_STATE,
+ (thread_state_t) &state, &count);
+ ASSERT_RET(ret, "thread_get_state()");
+
+#ifdef __i386__
+ state.eip = (long) routine;
+ state.uesp = (long) (stack + stack_size - sizeof(long) * 2);
+ state.ebp = 0;
+#elif defined(__x86_64__)
+ state.rip = (long) routine;
+ state.ursp = (long) (stack + stack_size - sizeof(long) * 1);
+ state.rbp = 0;
+ state.rdi = (long)arg;
+#endif
+ ret = thread_set_state(thread, i386_REGS_SEGS_STATE,
+ (thread_state_t) &state, i386_THREAD_STATE_COUNT);
+ ASSERT_RET(ret, "thread_set_state");
+
+ ret = thread_resume(thread);
+ ASSERT_RET(ret, "thread_resume");
+
+ return thread;
+}
diff --git a/tests/user-qemu.mk b/tests/user-qemu.mk
new file mode 100644
index 00000000..fd5ae1ab
--- /dev/null
+++ b/tests/user-qemu.mk
@@ -0,0 +1,221 @@
+# Copyright (C) 2024 Free Software Foundation
+
+# This program is free software ; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation ; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY ; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the program ; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+#
+# MIG stubs generation for user-space tests
+#
+
+MACH_TESTINSTALL = $(builddir)/tests/include-mach
+MACH_TESTINCLUDE = $(MACH_TESTINSTALL)/$(prefix)/include
+
+MIGCOMUSER = $(USER_MIG) -n -cc cat - /dev/null
+MIG_OUTDIR = $(builddir)/tests/mig-out
+MIG_CPPFLAGS = -x c -nostdinc -I$(MACH_TESTINCLUDE)
+
+# FIXME: how can we reliably detect a change on any header and reinstall them?
+$(MACH_TESTINSTALL):
+ mkdir -p $@
+ $(MAKE) install-data DESTDIR=$@
+
+prepare-test: $(MACH_TESTINSTALL)
+
+$(MIG_OUTDIR):
+ mkdir -p $@
+
+define generate_mig_client
+$(MIG_OUTDIR)/$(2).user.c: prepare-test $(MIG_OUTDIR) $(MACH_TESTINCLUDE)/$(1)/$(2).defs
+ $(USER_CPP) $(USER_CPPFLAGS) $(MIG_CPPFLAGS) \
+ -o $(MIG_OUTDIR)/$(2).user.defs \
+ $(MACH_TESTINCLUDE)/$(1)/$(2).defs
+ $(MIGCOMUSER) $(MIGCOMFLAGS) $(MIGCOMUFLAGS) \
+ -user $(MIG_OUTDIR)/$(2).user.c \
+ -header $(MIG_OUTDIR)/$(2).user.h \
+ -list $(MIG_OUTDIR)/$(2).user.msgids \
+ < $(MIG_OUTDIR)/$(2).user.defs
+endef
+
+define generate_mig_server
+$(MIG_OUTDIR)/$(2).server.c: prepare-test $(MIG_OUTDIR) $(srcdir)/include/$(1)/$(2).defs
+ $(USER_CPP) $(USER_CPPFLAGS) $(MIG_CPPFLAGS) \
+ -o $(MIG_OUTDIR)/$(2).server.defs \
+ $(srcdir)/include/$(1)/$(2).defs
+ $(MIGCOMUSER) $(MIGCOMFLAGS) $(MIGCOMUFLAGS) \
+ -server $(MIG_OUTDIR)/$(2).server.c \
+ -header $(MIG_OUTDIR)/$(2).server.h \
+ -list $(MIG_OUTDIR)/$(2).server.msgids \
+ < $(MIG_OUTDIR)/$(2).server.defs
+endef
+
+# These are all the IPC implemented in the kernel, both as a server or as a client.
+# Files are sorted as in
+# find builddir/tests/include-mach/ -name *.defs | grep -v types | sort
+# eval->info for debug of generated rules
+$(eval $(call generate_mig_client,device,device))
+$(eval $(call generate_mig_client,device,device_reply))
+$(eval $(call generate_mig_client,device,device_request))
+$(eval $(call generate_mig_client,mach_debug,mach_debug))
+# default_pager.defs?
+$(eval $(call generate_mig_server,mach,exc))
+# experimental.defs?
+$(eval $(call generate_mig_client,mach,gnumach))
+$(eval $(call generate_mig_client,mach,mach4))
+$(eval $(call generate_mig_client,mach,mach))
+$(eval $(call generate_mig_client,mach,mach_host))
+$(eval $(call generate_mig_client,mach,mach_port))
+# memory_object{_default}.defs?
+# notify.defs?
+$(eval $(call generate_mig_server,mach,task_notify))
+if HOST_ix86
+$(eval $(call generate_mig_client,mach/i386,mach_i386))
+endif
+if HOST_x86_64
+$(eval $(call generate_mig_client,mach/x86_64,mach_i386))
+endif
+
+# NOTE: keep in sync with the rules above
+MIG_GEN_CC = \
+ $(MIG_OUTDIR)/device.user.c \
+ $(MIG_OUTDIR)/device_reply.user.c \
+ $(MIG_OUTDIR)/device_request.user.c \
+ $(MIG_OUTDIR)/mach_debug.user.c \
+ $(MIG_OUTDIR)/exc.server.c \
+ $(MIG_OUTDIR)/gnumach.user.c \
+ $(MIG_OUTDIR)/mach4.user.c \
+ $(MIG_OUTDIR)/mach.user.c \
+ $(MIG_OUTDIR)/mach_host.user.c \
+ $(MIG_OUTDIR)/mach_port.user.c \
+ $(MIG_OUTDIR)/task_notify.server.c \
+ $(MIG_OUTDIR)/mach_i386.user.c
+
+#
+# compilation of user space tests and utilities
+#
+
+TEST_START_MARKER = booting-start-of-test
+TEST_SUCCESS_MARKER = gnumach-test-success-and-reboot
+TEST_FAILURE_MARKER = gnumach-test-failure
+
+TESTCFLAGS = -static -nostartfiles -nolibc \
+ -ffreestanding \
+ -ftrivial-auto-var-init=pattern \
+ -I$(srcdir)/tests/include \
+ -I$(MACH_TESTINCLUDE) \
+ -I$(MIG_OUTDIR) \
+ -ggdb3 \
+ -DMIG_EOPNOTSUPP
+
+SRC_TESTLIB= \
+ $(srcdir)/i386/i386/strings.c \
+ $(srcdir)/kern/printf.c \
+ $(srcdir)/kern/strings.c \
+ $(srcdir)/util/atoi.c \
+ $(srcdir)/tests/syscalls.S \
+ $(srcdir)/tests/start.S \
+ $(srcdir)/tests/testlib.c \
+ $(srcdir)/tests/testlib_thread_start.c \
+ $(builddir)/tests/errlist.c \
+ $(MIG_GEN_CC)
+
+tests/errlist.c: $(addprefix $(srcdir)/include/mach/,message.h kern_return.h mig_errors.h)
+ echo "/* autogenerated file */" >$@
+ echo "#include <mach/message.h>" >>$@
+ echo "#include <mach/kern_return.h>" >>$@
+ echo "#include <mach/mig_errors.h>" >>$@
+ echo "#include <testlib.h>" >>$@
+ echo "#include <stddef.h>" >>$@
+ echo "const char* TEST_SUCCESS_MARKER = \"$(TEST_SUCCESS_MARKER)\";" >>$@
+ echo "const char* TEST_FAILURE_MARKER = \"$(TEST_FAILURE_MARKER)\";" >>$@
+ echo "const char* e2s_gnumach(int err) { switch (err) {" >>$@
+ grep "define[[:space:]]MIG" $(srcdir)/include/mach/mig_errors.h | \
+ awk '{printf " case %s: return \"%s\";\n", $$2, $$2}' >>$@
+ grep "define[[:space:]]KERN" $(srcdir)/include/mach/kern_return.h | \
+ awk '{printf " case %s: return \"%s\";\n", $$2, $$2}' >>$@
+ awk 'f;/MACH_MSG_SUCCESS/{f=1}' $(srcdir)/include/mach/message.h | \
+ grep "define[[:space:]]MACH" | \
+ awk '{printf " case %s: return \"%s\";\n", $$2, $$2}' >>$@
+ echo " default: return NULL;" >>$@
+ echo "}}" >>$@
+
+tests/module-%: $(srcdir)/tests/test-%.c $(SRC_TESTLIB) $(MACH_TESTINSTALL)
+ $(USER_CC) $(USER_CFLAGS) $(TESTCFLAGS) $< $(SRC_TESTLIB) -o $@
+
+#
+# packaging of qemu bootable image and test runner
+#
+
+GNUMACH_ARGS = console=com0
+QEMU_OPTS = -m 2048 -nographic -no-reboot -boot d
+QEMU_GDB_PORT ?= 1234
+
+if HOST_ix86
+QEMU_BIN = qemu-system-i386
+QEMU_OPTS += -cpu pentium3-v1
+endif
+if HOST_x86_64
+QEMU_BIN = qemu-system-x86_64
+QEMU_OPTS += -cpu core2duo-v1
+endif
+
+tests/test-%.iso: tests/module-% gnumach $(srcdir)/tests/grub.cfg.single.template
+ rm -rf $(builddir)/tests/isofiles
+ mkdir -p $(builddir)/tests/isofiles/boot/grub/
+ < $(srcdir)/tests/grub.cfg.single.template \
+ sed -e "s|BOOTMODULE|$(notdir $<)|g" \
+ -e "s/GNUMACHARGS/$(GNUMACH_ARGS)/g" \
+ -e "s/TEST_START_MARKER/$(TEST_START_MARKER)/g" \
+ >$(builddir)/tests/isofiles/boot/grub/grub.cfg
+ cp gnumach $< $(builddir)/tests/isofiles/boot/
+ grub-mkrescue -o $@ $(builddir)/tests/isofiles
+
+tests/test-%: tests/test-%.iso $(srcdir)/tests/run-qemu.sh.template
+ < $(srcdir)/tests/run-qemu.sh.template \
+ sed -e "s|TESTNAME|$(subst tests/test-,,$@)|g" \
+ -e "s/QEMU_OPTS/$(QEMU_OPTS)/g" \
+ -e "s/QEMU_BIN/$(QEMU_BIN)/g" \
+ -e "s/TEST_START_MARKER/$(TEST_START_MARKER)/g" \
+ -e "s/TEST_SUCCESS_MARKER/$(TEST_SUCCESS_MARKER)/g" \
+ -e "s/TEST_FAILURE_MARKER/$(TEST_FAILURE_MARKER)/g" \
+ >$@
+ chmod +x $@
+
+clean-test-%:
+ rm -f tests/test-$*.iso tests/module-$* tests/test-$**
+
+
+USER_TESTS := \
+ tests/test-hello \
+ tests/test-mach_host \
+ tests/test-gsync \
+ tests/test-mach_port \
+ tests/test-vm \
+ tests/test-syscalls \
+ tests/test-machmsg \
+ tests/test-task \
+ tests/test-threads
+
+USER_TESTS_CLEAN = $(subst tests/,clean-,$(USER_TESTS))
+
+#
+# helpers for interactive test run and debug
+#
+
+run-%: tests/test-%
+ $^
+
+# don't reuse the launcher script as the timeout would kill the debug session
+debug-%: tests/test-%.iso
+ $(QEMU_BIN) $(QEMU_OPTS) -cdrom $< -gdb tcp::$(QEMU_GDB_PORT) -S \
+ | sed -n "/$(TEST_START_MARKER)/"',$$p'
diff --git a/util/byteorder.c b/util/byteorder.c
new file mode 100644
index 00000000..0629b31e
--- /dev/null
+++ b/util/byteorder.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2022 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Mach.
+ *
+ * GNU Mach is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "util/byteorder.h"
+
+uint16_t ntohs(uint16_t netshort) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return __builtin_bswap16(netshort);
+#else
+ return netshort;
+#endif
+}
+
+uint32_t ntohl(uint32_t netlong) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return __builtin_bswap32(netlong);
+#else
+ return netlong;
+#endif
+}
+
+uint16_t htons(uint16_t hostshort) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return __builtin_bswap16(hostshort);
+#else
+ return hostshort;
+#endif
+}
+
+uint32_t htonl(uint32_t hostlong) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return __builtin_bswap32(hostlong);
+#else
+ return hostlong;
+#endif
+}
diff --git a/include/stddef.h b/util/byteorder.h
index dd677348..fbec39e3 100644
--- a/include/stddef.h
+++ b/util/byteorder.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2007 Free Software Foundation, Inc.
+ * Copyright (C) 2022 Free Software Foundation, Inc.
*
* This file is part of GNU Mach.
*
@@ -7,23 +7,26 @@
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any later
* version.
- *
+ *
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
- *
+ *
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
-
-#ifndef _STDDEF_H_
-#define _STDDEF_H_
-/* From GCC's `/lib/gcc/X/X/include/stddef.h'. */
+#ifndef _UTIL_BYTEORDER_H_
+#define _UTIL_BYTEORDER_H_
+
+#include <stdint.h>
+
+uint16_t ntohs(uint16_t netshort);
+uint32_t ntohl(uint32_t netlong);
-/* Offset of member MEMBER in a struct of type TYPE. */
-#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
+uint16_t htons(uint16_t hostshort);
+uint32_t htonl(uint32_t hostlong);
-#endif /* _STDDEF_H_ */
+#endif /* _UTIL_BYTEORDER_H_ */
diff --git a/util/putchar.c b/util/putchar.c
deleted file mode 100644
index 6f8e18ea..00000000
--- a/util/putchar.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 1995 The University of Utah and
- * the Computer Systems Laboratory at the University of Utah (CSL).
- * All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software is hereby
- * granted provided that (1) source code retains these copyright, permission,
- * and disclaimer notices, and (2) redistributions including binaries
- * reproduce the notices in supporting documentation, and (3) all advertising
- * materials mentioning features or use of this software display the following
- * acknowledgement: ``This product includes software developed by the
- * Computer Systems Laboratory at the University of Utah.''
- *
- * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
- * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
- * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * CSL requests users of this software to return to csl-dist@cs.utah.edu any
- * improvements that they make and grant CSL redistribution rights.
- *
- * Author: Bryan Ford, University of Utah CSL
- */
-
-#include <device/cons.h>
-
-int putchar(int c)
-{
- cnputc(c);
- return c;
-}
-
diff --git a/util/putchar.h b/util/putchar.h
deleted file mode 100644
index 2e65bd56..00000000
--- a/util/putchar.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2008 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Author: Barry deFreese.
- */
-/*
- * Simple putchar implementation header.
- *
- */
-
-#ifndef _PUTCHAR_H_
-#define _PUTCHAR_H_
-
-#include <mach/std_types.h>
-
-extern int putchar(int c);
-
-#endif /* _PUTCHAR_H_ */
diff --git a/util/puts.c b/util/puts.c
deleted file mode 100644
index 0bd72e98..00000000
--- a/util/puts.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 1995 The University of Utah and
- * the Computer Systems Laboratory at the University of Utah (CSL).
- * All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software is hereby
- * granted provided that (1) source code retains these copyright, permission,
- * and disclaimer notices, and (2) redistributions including binaries
- * reproduce the notices in supporting documentation, and (3) all advertising
- * materials mentioning features or use of this software display the following
- * acknowledgement: ``This product includes software developed by the
- * Computer Systems Laboratory at the University of Utah.''
- *
- * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
- * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
- * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * CSL requests users of this software to return to csl-dist@cs.utah.edu any
- * improvements that they make and grant CSL redistribution rights.
- *
- * Author: Bryan Ford, University of Utah CSL
- */
-
-#include <device/cons.h>
-#include <util/putchar.h>
-
-/* Simple puts() implementation that just uses putchar().
- Note that our libc's printf() is implemented
- in terms of only puts() and putchar(), so that's all we need. */
-int puts(const char *s)
-{
- while (*s)
- {
- putchar(*s);
- s++;
- }
- putchar('\n');
- return 0;
-}
-
diff --git a/vm/memory_object.c b/vm/memory_object.c
index ad93f87c..1ea59563 100644
--- a/vm/memory_object.c
+++ b/vm/memory_object.c
@@ -61,6 +61,7 @@
#include <kern/debug.h> /* For panic() */
#include <kern/thread.h> /* For current_thread() */
#include <kern/host.h>
+#include <kern/mach.server.h> /* For rpc prototypes */
#include <vm/vm_kern.h> /* For kernel_map, vm_move */
#include <vm/vm_map.h> /* For vm_map_pageable */
#include <ipc/ipc_port.h>
@@ -73,7 +74,7 @@ typedef int memory_object_lock_result_t; /* moved from below */
ipc_port_t memory_manager_default = IP_NULL;
-decl_simple_lock_data(,memory_manager_default_lock)
+def_simple_lock_data(static,memory_manager_default_lock)
/*
* Important note:
@@ -85,7 +86,7 @@ decl_simple_lock_data(,memory_manager_default_lock)
kern_return_t memory_object_data_supply(
vm_object_t object,
vm_offset_t offset,
- vm_map_copy_t data_copy,
+ vm_offset_t vm_data_copy,
unsigned int data_cnt,
vm_prot_t lock_value,
boolean_t precious,
@@ -100,6 +101,7 @@ kern_return_t memory_object_data_supply(
vm_offset_t original_offset;
vm_page_t *page_list;
boolean_t was_absent;
+ vm_map_copy_t data_copy = (vm_map_copy_t)vm_data_copy;
vm_map_copy_t orig_copy = data_copy;
/*
@@ -425,7 +427,7 @@ kern_return_t memory_object_data_unavailable(
#define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN 2
#define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 3
-memory_object_lock_result_t memory_object_lock_page(
+static memory_object_lock_result_t memory_object_lock_page(
vm_page_t m,
memory_object_return_t should_return,
boolean_t should_flush,
@@ -976,9 +978,9 @@ kern_return_t memory_object_get_attributes(
/*
* If successful, consumes the supplied naked send right.
*/
-kern_return_t vm_set_default_memory_manager(host, default_manager)
- const host_t host;
- ipc_port_t *default_manager;
+kern_return_t vm_set_default_memory_manager(
+ const host_t host,
+ ipc_port_t *default_manager)
{
ipc_port_t current_manager;
ipc_port_t new_manager;
@@ -1058,8 +1060,7 @@ ipc_port_t memory_manager_default_reference(void)
* know when it should keep memory wired.
*/
-boolean_t memory_manager_default_port(port)
- const ipc_port_t port;
+boolean_t memory_manager_default_port(const ipc_port_t port)
{
ipc_port_t current;
boolean_t result;
diff --git a/vm/memory_object_proxy.c b/vm/memory_object_proxy.c
index 4d50bab4..57243497 100644
--- a/vm/memory_object_proxy.c
+++ b/vm/memory_object_proxy.c
@@ -133,9 +133,9 @@ memory_object_proxy_notify (mach_msg_header_t *msg)
kern_return_t
memory_object_create_proxy (ipc_space_t space, vm_prot_t max_protection,
ipc_port_t *object, natural_t object_count,
- vm_offset_t *offset, natural_t offset_count,
- vm_offset_t *start, natural_t start_count,
- vm_size_t *len, natural_t len_count,
+ rpc_vm_offset_t *offset, natural_t offset_count,
+ rpc_vm_offset_t *start, natural_t start_count,
+ rpc_vm_size_t *len, natural_t len_count,
ipc_port_t *port)
{
memory_object_proxy_t proxy;
diff --git a/vm/memory_object_proxy.h b/vm/memory_object_proxy.h
index 97f20b36..8b3f2025 100644
--- a/vm/memory_object_proxy.h
+++ b/vm/memory_object_proxy.h
@@ -36,12 +36,4 @@ extern kern_return_t memory_object_proxy_lookup (ipc_port_t port,
vm_offset_t *start,
vm_offset_t *len);
-extern kern_return_t
-memory_object_create_proxy (ipc_space_t space, vm_prot_t max_protection,
- ipc_port_t *object, natural_t object_count,
- vm_offset_t *offset, natural_t offset_count,
- vm_offset_t *start, natural_t start_count,
- vm_size_t *len, natural_t len_count,
- ipc_port_t *port);
-
#endif /* _VM_MEMORY_OBJECT_PROXY_H_ */
diff --git a/vm/pmap.h b/vm/pmap.h
index 2201b444..aca9ada8 100644
--- a/vm/pmap.h
+++ b/vm/pmap.h
@@ -157,6 +157,10 @@ boolean_t pmap_is_modified(phys_addr_t pa);
extern phys_addr_t pmap_extract(pmap_t, vm_offset_t);
/* Perform garbage collection, if any. */
extern void pmap_collect(pmap_t);
+
+/* Lookup an address. */
+int pmap_whatis(pmap_t, vm_offset_t);
+
/* Specify pageability. */
extern void pmap_change_wiring(pmap_t, vm_offset_t, boolean_t);
@@ -194,7 +198,6 @@ extern void pmap_pageable(
* Back-door routine for mapping kernel VM at initialization.
* Useful for mapping memory outside the range of direct mapped
* physical memory (i.e., devices).
- * Otherwise like pmap_map.
*/
extern vm_offset_t pmap_map_bd(
vm_offset_t virt,
diff --git a/vm/vm_debug.c b/vm/vm_debug.c
index 2dff2296..b0dace80 100644
--- a/vm/vm_debug.c
+++ b/vm/vm_debug.c
@@ -46,8 +46,10 @@
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
+#include <kern/mach_debug.server.h>
#include <kern/task.h>
#include <kern/host.h>
+#include <kern/printf.h>
#include <ipc/ipc_port.h>
@@ -64,7 +66,7 @@
* or IP_NULL if the object or its name port is null.
*/
-ipc_port_t
+static ipc_port_t
vm_object_real_name(vm_object_t object)
{
ipc_port_t port = IP_NULL;
@@ -263,7 +265,7 @@ mach_vm_object_info(
VPI_STATE_PRIVATE|VPI_STATE_ABSENT)
/*
- * Routine: mach_vm_object_pages [kernel call]
+ * Routine: mach_vm_object_pages/mach_vm_object_pages_phys/ [kernel call]
* Purpose:
* Retrieve information about the pages in a VM object.
* Conditions:
@@ -274,15 +276,16 @@ mach_vm_object_info(
* KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
*/
-kern_return_t
-mach_vm_object_pages(
+static kern_return_t
+_mach_vm_object_pages(
vm_object_t object,
- vm_page_info_array_t *pagesp,
- natural_t *countp)
+ void* *pagesp,
+ natural_t *countp,
+ int phys)
{
vm_size_t size;
vm_offset_t addr;
- vm_page_info_t *pages;
+ void *pages;
unsigned int potential, actual, count;
vm_page_t p;
kern_return_t kr;
@@ -305,28 +308,52 @@ mach_vm_object_pages(
if (pages != *pagesp)
kmem_free(ipc_kernel_map, addr, size);
- size = round_page(actual * sizeof *pages);
+ if (phys)
+ size = round_page(actual * sizeof(vm_page_phys_info_t));
+ else
+ size = round_page(actual * sizeof(vm_page_info_t));
kr = kmem_alloc(ipc_kernel_map, &addr, size);
if (kr != KERN_SUCCESS)
return kr;
- pages = (vm_page_info_t *) addr;
- potential = size/sizeof *pages;
+ pages = (void *) addr;
+ if (phys)
+ potential = size / sizeof(vm_page_phys_info_t);
+ else
+ potential = size / sizeof(vm_page_info_t);
}
/* object is locked, we have enough wired memory */
count = 0;
queue_iterate(&object->memq, p, vm_page_t, listq) {
- vm_page_info_t *info = &pages[count++];
+ vm_page_info_t *info = NULL;
+ vm_page_phys_info_t *info_phys = NULL;
+
+ if (phys)
+ info_phys = pages + count * sizeof(*info_phys);
+ else
+ info = pages + count * sizeof(*info);
+ count++;
+
vm_page_info_state_t state = 0;
- info->vpi_offset = p->offset;
- if (p->phys_addr != (typeof(info->vpi_phys_addr)) p->phys_addr)
- printf("warning: physical address overflow in mach_vm_object_pages!!");
- info->vpi_phys_addr = p->phys_addr;
- info->vpi_wire_count = p->wire_count;
- info->vpi_page_lock = p->page_lock;
- info->vpi_unlock_request = p->unlock_request;
+ if (phys) {
+ info_phys->vpi_offset = p->offset;
+ if (p->phys_addr != (typeof(info_phys->vpi_phys_addr)) p->phys_addr)
+ printf("warning: physical address overflow in mach_vm_object_pages!!\n");
+ info_phys->vpi_phys_addr = p->phys_addr;
+ info_phys->vpi_wire_count = p->wire_count;
+ info_phys->vpi_page_lock = p->page_lock;
+ info_phys->vpi_unlock_request = p->unlock_request;
+ } else {
+ info->vpi_offset = p->offset;
+ if (p->phys_addr != (typeof(info->vpi_phys_addr)) p->phys_addr)
+ printf("warning: physical address overflow in mach_vm_object_pages!!\n");
+ info->vpi_phys_addr = p->phys_addr;
+ info->vpi_wire_count = p->wire_count;
+ info->vpi_page_lock = p->page_lock;
+ info->vpi_unlock_request = p->unlock_request;
+ }
if (p->busy)
state |= VPI_STATE_BUSY;
@@ -374,7 +401,10 @@ mach_vm_object_pages(
}
vm_page_unlock_queues();
- info->vpi_state = state;
+ if (phys)
+ info_phys->vpi_state = state;
+ else
+ info->vpi_state = state;
}
if (object->resident_page_count != count)
@@ -395,7 +425,10 @@ mach_vm_object_pages(
/* kmem_alloc doesn't zero memory */
- size_used = actual * sizeof *pages;
+ if (phys)
+ size_used = actual * sizeof(vm_page_phys_info_t);
+ else
+ size_used = actual * sizeof(vm_page_info_t);
rsize_used = round_page(size_used);
if (rsize_used != size)
@@ -410,13 +443,31 @@ mach_vm_object_pages(
TRUE, &copy);
assert(kr == KERN_SUCCESS);
- *pagesp = (vm_page_info_t *) copy;
+ *pagesp = (void *) copy;
*countp = actual;
}
return KERN_SUCCESS;
}
+kern_return_t
+mach_vm_object_pages(
+ vm_object_t object,
+ vm_page_info_array_t *pagesp,
+ natural_t *countp)
+{
+ return _mach_vm_object_pages(object, (void**) pagesp, countp, 0);
+}
+
+kern_return_t
+mach_vm_object_pages_phys(
+ vm_object_t object,
+ vm_page_phys_info_array_t *pagesp,
+ natural_t *countp)
+{
+ return _mach_vm_object_pages(object, (void**) pagesp, countp, 1);
+}
+
#endif /* MACH_VM_DEBUG */
/*
@@ -432,10 +483,8 @@ mach_vm_object_pages(
*/
kern_return_t
-host_virtual_physical_table_info(host, infop, countp)
- const host_t host;
- hash_info_bucket_array_t *infop;
- natural_t *countp;
+host_virtual_physical_table_info(const host_t host,
+ hash_info_bucket_array_t *infop, natural_t *countp)
{
vm_offset_t addr;
vm_size_t size = 0;/* '=0' to quiet gcc warnings */
diff --git a/vm/vm_external.c b/vm/vm_external.c
index 3b1a2879..99f4b9c3 100644
--- a/vm/vm_external.c
+++ b/vm/vm_external.c
@@ -96,9 +96,8 @@ void vm_external_destroy(vm_external_t e)
kmem_cache_free(&vm_external_cache, (vm_offset_t) e);
}
-vm_external_state_t _vm_external_state_get(e, offset)
- const vm_external_t e;
- vm_offset_t offset;
+vm_external_state_t _vm_external_state_get(const vm_external_t e,
+ vm_offset_t offset)
{
unsigned
int bit, byte;
diff --git a/vm/vm_fault.c b/vm/vm_fault.c
index df7b2c41..d99425a3 100644
--- a/vm/vm_fault.c
+++ b/vm/vm_fault.c
@@ -70,7 +70,7 @@ typedef struct vm_fault_state {
vm_offset_t vmf_vaddr;
vm_prot_t vmf_fault_type;
boolean_t vmf_change_wiring;
- void (*vmf_continuation)();
+ vm_fault_continuation_t vmf_continuation;
vm_map_version_t vmf_version;
boolean_t vmf_wired;
struct vm_object *vmf_object;
@@ -218,7 +218,7 @@ vm_fault_return_t vm_fault_page(
*/
/* More arguments: */
boolean_t resume, /* We are restarting. */
- void (*continuation)()) /* Continuation for blocking. */
+ continuation_t continuation) /* Continuation for blocking. */
{
vm_page_t m;
vm_object_t object;
@@ -347,7 +347,7 @@ vm_fault_return_t vm_fault_page(
PAGE_ASSERT_WAIT(m, interruptible);
vm_object_unlock(object);
- if (continuation != (void (*)()) 0) {
+ if (continuation != thread_no_continuation) {
vm_fault_state_t *state =
(vm_fault_state_t *) current_thread()->ith_other;
@@ -652,7 +652,7 @@ vm_fault_return_t vm_fault_page(
m->offset + object->paging_offset,
PAGE_SIZE, access_required)) != KERN_SUCCESS) {
if (object->pager && rc != MACH_SEND_INTERRUPTED)
- printf("%s(0x%p, 0x%p, 0x%lx, 0x%x, 0x%x) failed, %x\n",
+ printf("%s(0x%p, 0x%p, 0x%zx, 0x%x, 0x%x) failed, %x\n",
"memory_object_data_request",
object->pager,
object->pager_request,
@@ -1082,7 +1082,7 @@ vm_fault_return_t vm_fault_page(
block_and_backoff:
vm_fault_cleanup(object, first_m);
- if (continuation != (void (*)()) 0) {
+ if (continuation != thread_no_continuation) {
vm_fault_state_t *state =
(vm_fault_state_t *) current_thread()->ith_other;
@@ -1129,7 +1129,7 @@ vm_fault_return_t vm_fault_page(
* and deallocated when leaving vm_fault.
*/
-void
+static void
vm_fault_continue(void)
{
vm_fault_state_t *state =
@@ -1149,7 +1149,7 @@ kern_return_t vm_fault(
vm_prot_t fault_type,
boolean_t change_wiring,
boolean_t resume,
- void (*continuation)())
+ vm_fault_continuation_t continuation)
{
vm_map_version_t version; /* Map version for verificiation */
boolean_t wired; /* Should mapping be wired down? */
@@ -1187,7 +1187,7 @@ kern_return_t vm_fault(
goto after_vm_fault_page;
}
- if (continuation != (void (*)()) 0) {
+ if (continuation != vm_fault_no_continuation) {
/*
* We will probably need to save state.
*/
@@ -1213,7 +1213,7 @@ kern_return_t vm_fault(
* it to begin the search.
*/
- if ((kr = vm_map_lookup(&map, vaddr, fault_type, &version,
+ if ((kr = vm_map_lookup(&map, vaddr, fault_type, FALSE, &version,
&object, &offset,
&prot, &wired)) != KERN_SUCCESS) {
goto done;
@@ -1239,7 +1239,7 @@ kern_return_t vm_fault(
object->ref_count++;
vm_object_paging_begin(object);
- if (continuation != (void (*)()) 0) {
+ if (continuation != vm_fault_no_continuation) {
vm_fault_state_t *state =
(vm_fault_state_t *) current_thread()->ith_other;
@@ -1293,7 +1293,7 @@ kern_return_t vm_fault(
kr = KERN_SUCCESS;
goto done;
case VM_FAULT_MEMORY_SHORTAGE:
- if (continuation != (void (*)()) 0) {
+ if (continuation != vm_fault_no_continuation) {
vm_fault_state_t *state =
(vm_fault_state_t *) current_thread()->ith_other;
@@ -1375,7 +1375,7 @@ kern_return_t vm_fault(
* take another fault.
*/
kr = vm_map_lookup(&map, vaddr,
- fault_type & ~VM_PROT_WRITE, &version,
+ fault_type & ~VM_PROT_WRITE, FALSE, &version,
&retry_object, &retry_offset, &retry_prot,
&wired);
@@ -1476,7 +1476,7 @@ kern_return_t vm_fault(
#undef RELEASE_PAGE
done:
- if (continuation != (void (*)()) 0) {
+ if (continuation != vm_fault_no_continuation) {
vm_fault_state_t *state =
(vm_fault_state_t *) current_thread()->ith_other;
@@ -1767,7 +1767,7 @@ kern_return_t vm_fault_wire_fast(
* Release a page used by vm_fault_copy.
*/
-void vm_fault_copy_cleanup(
+static void vm_fault_copy_cleanup(
vm_page_t page,
vm_page_t top_page)
{
diff --git a/vm/vm_fault.h b/vm/vm_fault.h
index 7fdbc417..ae692b11 100644
--- a/vm/vm_fault.h
+++ b/vm/vm_fault.h
@@ -49,11 +49,14 @@ typedef kern_return_t vm_fault_return_t;
#define VM_FAULT_FICTITIOUS_SHORTAGE 4
#define VM_FAULT_MEMORY_ERROR 5
+typedef void (*vm_fault_continuation_t)(kern_return_t);
+#define vm_fault_no_continuation ((vm_fault_continuation_t)0)
+
extern void vm_fault_init(void);
extern vm_fault_return_t vm_fault_page(vm_object_t, vm_offset_t, vm_prot_t,
boolean_t, boolean_t, vm_prot_t *,
vm_page_t *, vm_page_t *, boolean_t,
- void (*)());
+ continuation_t);
extern void vm_fault_cleanup(vm_object_t, vm_page_t);
/*
@@ -61,7 +64,7 @@ extern void vm_fault_cleanup(vm_object_t, vm_page_t);
*/
extern kern_return_t vm_fault(vm_map_t, vm_offset_t, vm_prot_t, boolean_t,
- boolean_t, void (*)());
+ boolean_t, vm_fault_continuation_t);
extern void vm_fault_wire(vm_map_t, vm_map_entry_t);
extern void vm_fault_unwire(vm_map_t, vm_map_entry_t);
diff --git a/vm/vm_init.c b/vm/vm_init.c
index 23d5d46e..593af11b 100644
--- a/vm/vm_init.c
+++ b/vm/vm_init.c
@@ -38,6 +38,7 @@
#include <kern/slab.h>
#include <kern/kalloc.h>
#include <vm/vm_fault.h>
+#include <vm/vm_init.h>
#include <vm/vm_object.h>
#include <vm/vm_map.h>
#include <vm/vm_page.h>
diff --git a/vm/vm_kern.c b/vm/vm_kern.c
index c624a875..51223d98 100644
--- a/vm/vm_kern.c
+++ b/vm/vm_kern.c
@@ -1014,7 +1014,7 @@ kmem_io_map_copyout(
return(ret);
}
copy->cpy_cont = vm_map_copy_discard_cont;
- copy->cpy_cont_args = (char *) new_copy;
+ copy->cpy_cont_args = (vm_map_copyin_args_t)new_copy;
copy = new_copy;
page_list = &copy->cpy_page_list[0];
}
diff --git a/vm/vm_map.c b/vm/vm_map.c
index 7fe3e141..7db76b7b 100644
--- a/vm/vm_map.c
+++ b/vm/vm_map.c
@@ -43,6 +43,7 @@
#include <kern/assert.h>
#include <kern/debug.h>
#include <kern/kalloc.h>
+#include <kern/mach.server.h>
#include <kern/list.h>
#include <kern/rbtree.h>
#include <kern/slab.h>
@@ -112,8 +113,7 @@ MACRO_END
* start or end value.] Note that these clippings may not
* always be necessary (as the two resulting entries are then
* not changed); however, the clipping is done for convenience.
- * No attempt is currently made to "glue back together" two
- * abutting entries.
+ * The entries can later be "glued back together" (coalesced).
*
* The symmetric (shadow) copy strategy implements virtual copy
* by copying VM object references from one map to
@@ -280,8 +280,8 @@ void vm_map_unlock(struct vm_map *map)
#define vm_map_copy_entry_create(copy) \
_vm_map_entry_create(&(copy)->cpy_hdr)
-vm_map_entry_t _vm_map_entry_create(map_header)
- const struct vm_map_header *map_header;
+static vm_map_entry_t
+_vm_map_entry_create(const struct vm_map_header *map_header)
{
vm_map_entry_t entry;
@@ -303,9 +303,9 @@ vm_map_entry_t _vm_map_entry_create(map_header)
#define vm_map_copy_entry_dispose(map, entry) \
_vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
-void _vm_map_entry_dispose(map_header, entry)
- const struct vm_map_header *map_header;
- vm_map_entry_t entry;
+static void
+_vm_map_entry_dispose(const struct vm_map_header *map_header,
+ vm_map_entry_t entry)
{
(void)map_header;
@@ -551,10 +551,12 @@ void vm_map_deallocate(vm_map_t map)
c = --map->ref_count;
simple_unlock(&map->ref_lock);
+ /* Check the refcount */
if (c > 0) {
return;
}
+ /* If no more references, call vm_map_delete without locking the map */
projected_buffer_collect(map);
(void) vm_map_delete(map, map->min_offset, map->max_offset);
@@ -635,27 +637,6 @@ boolean_t vm_map_lookup_entry(
}
/*
- * Routine: invalid_user_access
- *
- * Verifies whether user access is valid.
- */
-
-boolean_t
-invalid_user_access(
- vm_map_t map,
- vm_offset_t start,
- vm_offset_t end,
- vm_prot_t prot)
-{
- vm_map_entry_t entry;
-
- return (map == VM_MAP_NULL || map == kernel_map ||
- !vm_map_lookup_entry(map, start, &entry) ||
- entry->vme_end < end ||
- (prot & ~(entry->protection)));
-}
-
-/*
* Find a range of available space from the specified map.
*
* If successful, this function returns the map entry immediately preceding
@@ -685,16 +666,16 @@ vm_map_find_entry_anywhere(struct vm_map *map,
if (((mask + 1) & mask) != 0) {
/* We have high bits in addition to the low bits */
- int first0 = ffs(~mask); /* First zero after low bits */
+ int first0 = __builtin_ffs(~mask); /* First zero after low bits */
vm_offset_t lowmask = (1UL << (first0-1)) - 1; /* low bits */
vm_offset_t himask = mask - lowmask; /* high bits */
- int second1 = ffs(himask); /* First one after low bits */
+ int second1 = __builtin_ffs(himask); /* First one after low bits */
max = 1UL << (second1-1);
if (himask + max != 0) {
/* high bits do not continue up to the end */
- printf("invalid mask %lx\n", mask);
+ printf("invalid mask %zx\n", mask);
return NULL;
}
@@ -737,7 +718,7 @@ restart:
max_size = size + mask;
if (max_size < size) {
- printf("max_size %x got smaller than size %x with mask %lx\n",
+ printf("max_size %zd got smaller than size %zd with mask %zd\n",
max_size, size, mask);
goto error;
}
@@ -773,7 +754,7 @@ restart:
assert(end <= (entry->vme_end + entry->gap_size));
if (end > max) {
/* Does not respect the allowed maximum */
- printf("%lx does not respect %lx\n", end, max);
+ printf("%zx does not respect %zx\n", end, max);
return NULL;
}
*startp = start;
@@ -915,7 +896,7 @@ boolean_t vm_map_pmap_enter_enable = FALSE;
* In/out conditions:
* The source map should not be locked on entry.
*/
-void
+static void
vm_map_pmap_enter(
vm_map_t map,
vm_offset_t addr,
@@ -939,7 +920,7 @@ vm_map_pmap_enter(
if (vm_map_pmap_enter_print) {
printf("vm_map_pmap_enter:");
- printf("map: %p, addr: %lx, object: %p, offset: %lx\n",
+ printf("map: %p, addr: %zx, object: %p, offset: %zx\n",
map, addr, object, offset);
}
@@ -987,6 +968,7 @@ kern_return_t vm_map_enter(
vm_inherit_t inheritance)
{
vm_map_entry_t entry;
+ vm_map_entry_t next_entry;
vm_offset_t start;
vm_offset_t end;
kern_return_t result = KERN_SUCCESS;
@@ -1007,6 +989,7 @@ kern_return_t vm_map_enter(
end = start + size;
*address = start;
+ next_entry = entry->vme_next;
} else {
vm_map_entry_t temp_entry;
@@ -1041,14 +1024,15 @@ kern_return_t vm_map_enter(
RETURN(KERN_NO_SPACE);
entry = temp_entry;
+ next_entry = entry->vme_next;
/*
* ... the next region doesn't overlap the
* end point.
*/
- if ((entry->vme_next != vm_map_to_entry(map)) &&
- (entry->vme_next->vme_start < end))
+ if ((next_entry != vm_map_to_entry(map)) &&
+ (next_entry->vme_start < end))
RETURN(KERN_NO_SPACE);
}
@@ -1064,12 +1048,10 @@ kern_return_t vm_map_enter(
/*
* See whether we can avoid creating a new entry (and object) by
- * extending one of our neighbors. [So far, we only attempt to
- * extend from below.]
+ * extending one of our neighbors.
*/
- if ((object == VM_OBJECT_NULL) &&
- (entry != vm_map_to_entry(map)) &&
+ if ((entry != vm_map_to_entry(map)) &&
(entry->vme_end == start) &&
(!entry->is_shared) &&
(!entry->is_sub_map) &&
@@ -1079,20 +1061,63 @@ kern_return_t vm_map_enter(
(entry->wired_count == 0) &&
(entry->projected_on == 0)) {
if (vm_object_coalesce(entry->object.vm_object,
- VM_OBJECT_NULL,
+ object,
entry->offset,
- (vm_offset_t) 0,
+ offset,
(vm_size_t)(entry->vme_end - entry->vme_start),
- (vm_size_t)(end - entry->vme_end))) {
+ size,
+ &entry->object.vm_object,
+ &entry->offset)) {
/*
* Coalesced the two objects - can extend
* the previous map entry to include the
* new range.
*/
- map->size += (end - entry->vme_end);
+ map->size += size;
entry->vme_end = end;
vm_map_gap_update(&map->hdr, entry);
+ /*
+ * Now that we did, perhaps we could simplify
+ * things even further by coalescing the next
+ * entry into the one we just extended.
+ */
+ vm_map_coalesce_entry(map, next_entry);
+ RETURN(KERN_SUCCESS);
+ }
+ }
+ if ((next_entry != vm_map_to_entry(map)) &&
+ (next_entry->vme_start == end) &&
+ (!next_entry->is_shared) &&
+ (!next_entry->is_sub_map) &&
+ (next_entry->inheritance == inheritance) &&
+ (next_entry->protection == cur_protection) &&
+ (next_entry->max_protection == max_protection) &&
+ (next_entry->wired_count == 0) &&
+ (next_entry->projected_on == 0)) {
+ if (vm_object_coalesce(object,
+ next_entry->object.vm_object,
+ offset,
+ next_entry->offset,
+ size,
+ (vm_size_t)(next_entry->vme_end - next_entry->vme_start),
+ &next_entry->object.vm_object,
+ &next_entry->offset)) {
+
+ /*
+ * Coalesced the two objects - can extend
+ * the next map entry to include the
+ * new range.
+ */
+ map->size += size;
+ next_entry->vme_start = start;
+ vm_map_gap_update(&map->hdr, entry);
+ /*
+ * Now that we did, perhaps we could simplify
+ * things even further by coalescing the
+ * entry into the previous one.
+ */
+ vm_map_coalesce_entry(map, next_entry);
RETURN(KERN_SUCCESS);
}
}
@@ -1571,6 +1596,7 @@ kern_return_t vm_map_protect(
{
vm_map_entry_t current;
vm_map_entry_t entry;
+ vm_map_entry_t next;
vm_map_lock(map);
@@ -1632,7 +1658,8 @@ kern_return_t vm_map_protect(
*/
if ((current->protection != VM_PROT_NONE) &&
- (current->wired_access != VM_PROT_NONE)) {
+ (current->wired_access != VM_PROT_NONE ||
+ map->wiring_required)) {
current->wired_access = current->protection;
}
@@ -1645,9 +1672,16 @@ kern_return_t vm_map_protect(
current->vme_end,
current->protection);
}
- current = current->vme_next;
+
+ next = current->vme_next;
+ vm_map_coalesce_entry(map, current);
+ current = next;
}
+ next = current->vme_next;
+ if (vm_map_coalesce_entry(map, current))
+ current = next;
+
/* Returns with the map read-locked if successful */
vm_map_pageable_scan(map, entry, current);
@@ -1671,6 +1705,7 @@ kern_return_t vm_map_inherit(
{
vm_map_entry_t entry;
vm_map_entry_t temp_entry;
+ vm_map_entry_t next;
vm_map_lock(map);
@@ -1688,9 +1723,13 @@ kern_return_t vm_map_inherit(
entry->inheritance = new_inheritance;
- entry = entry->vme_next;
+ next = entry->vme_next;
+ vm_map_coalesce_entry(map, entry);
+ entry = next;
}
+ vm_map_coalesce_entry(map, entry);
+
vm_map_unlock(map);
return(KERN_SUCCESS);
}
@@ -1792,6 +1831,30 @@ kern_return_t vm_map_pageable(
return(KERN_SUCCESS);
}
+/* Update pageability of all the memory currently in the map.
+ * The map must be locked, and protection mismatch will not be checked, see
+ * vm_map_pageable().
+ */
+static kern_return_t
+vm_map_pageable_current(vm_map_t map, vm_prot_t access_type)
+{
+ struct rbtree_node *node;
+ vm_offset_t min_address, max_address;
+
+ node = rbtree_first(&map->hdr.tree);
+ min_address = rbtree_entry(node, struct vm_map_entry,
+ tree_node)->vme_start;
+
+ node = rbtree_last(&map->hdr.tree);
+ max_address = rbtree_entry(node, struct vm_map_entry,
+ tree_node)->vme_end;
+
+ /* Returns with the map read-locked if successful */
+ return vm_map_pageable(map, min_address, max_address,access_type,
+ FALSE, FALSE);
+}
+
+
/*
* vm_map_pageable_all:
*
@@ -1822,8 +1885,7 @@ vm_map_pageable_all(struct vm_map *map, vm_wire_t flags)
map->wiring_required = FALSE;
/* Returns with the map read-locked if successful */
- kr = vm_map_pageable(map, map->min_offset, map->max_offset,
- VM_PROT_NONE, FALSE, FALSE);
+ kr = vm_map_pageable_current(map, VM_PROT_NONE);
vm_map_unlock(map);
return kr;
}
@@ -1836,9 +1898,7 @@ vm_map_pageable_all(struct vm_map *map, vm_wire_t flags)
if (flags & VM_WIRE_CURRENT) {
/* Returns with the map read-locked if successful */
- kr = vm_map_pageable(map, map->min_offset, map->max_offset,
- VM_PROT_READ | VM_PROT_WRITE,
- FALSE, FALSE);
+ kr = vm_map_pageable_current(map, VM_PROT_READ | VM_PROT_WRITE);
if (kr != KERN_SUCCESS) {
if (flags & VM_WIRE_FUTURE) {
@@ -1865,11 +1925,13 @@ void vm_map_entry_delete(
vm_map_entry_t entry)
{
vm_offset_t s, e;
+ vm_size_t size;
vm_object_t object;
extern vm_object_t kernel_object;
s = entry->vme_start;
e = entry->vme_end;
+ size = e - s;
/*Check if projected buffer*/
if (map != kernel_map && entry->projected_on != 0) {
@@ -1908,15 +1970,29 @@ void vm_map_entry_delete(
if (object == kernel_object) {
vm_object_lock(object);
vm_object_page_remove(object, entry->offset,
- entry->offset + (e - s));
+ entry->offset + size);
vm_object_unlock(object);
} else if (entry->is_shared) {
vm_object_pmap_remove(object,
entry->offset,
- entry->offset + (e - s));
- }
- else {
+ entry->offset + size);
+ } else {
pmap_remove(map->pmap, s, e);
+ /*
+ * If this object has no pager and our
+ * reference to the object is the only
+ * one, we can release the deleted pages
+ * now.
+ */
+ vm_object_lock(object);
+ if ((!object->pager_created) &&
+ (object->ref_count == 1) &&
+ (object->paging_in_progress == 0)) {
+ vm_object_page_remove(object,
+ entry->offset,
+ entry->offset + size);
+ }
+ vm_object_unlock(object);
}
}
@@ -1931,7 +2007,7 @@ void vm_map_entry_delete(
vm_object_deallocate(entry->object.vm_object);
vm_map_entry_unlink(map, entry);
- map->size -= e - s;
+ map->size -= size;
vm_map_entry_dispose(map, entry);
}
@@ -1951,6 +2027,14 @@ kern_return_t vm_map_delete(
vm_map_entry_t entry;
vm_map_entry_t first_entry;
+ if (map->pmap == kernel_pmap && (start < kernel_virtual_start || end > kernel_virtual_end))
+ panic("vm_map_delete(%lx-%lx) falls in physical memory area!\n", (unsigned long) start, (unsigned long) end);
+
+ /*
+ * Must be called with map lock taken unless refcount is zero
+ */
+ assert((map->ref_count > 0 && have_lock(map->lock)) || (map->ref_count == 0));
+
/*
* Find the start of the region, and clip it
*/
@@ -2048,7 +2132,7 @@ kern_return_t vm_map_remove(
* Steal all the pages from a vm_map_copy page_list by copying ones
* that have not already been stolen.
*/
-void
+static void
vm_map_copy_steal_pages(vm_map_copy_t copy)
{
vm_page_t m, new_m;
@@ -3674,7 +3758,7 @@ kern_return_t vm_map_copyin_object(
* the scheduler.
*/
-kern_return_t vm_map_copyin_page_list_cont(
+static kern_return_t vm_map_copyin_page_list_cont(
vm_map_copyin_args_t cont_args,
vm_map_copy_t *copy_result) /* OUT */
{
@@ -3809,7 +3893,7 @@ kern_return_t vm_map_copyin_page_list(
copy->offset = src_addr;
copy->size = len;
copy->cpy_cont = ((kern_return_t (*)()) 0);
- copy->cpy_cont_args = (char *) VM_MAP_COPYIN_ARGS_NULL;
+ copy->cpy_cont_args = VM_MAP_COPYIN_ARGS_NULL;
/*
* Find the beginning of the region.
@@ -3899,7 +3983,7 @@ make_continuation:
}
cont_args->steal_pages = steal_pages;
- copy->cpy_cont_args = (char *) cont_args;
+ copy->cpy_cont_args = cont_args;
copy->cpy_cont = vm_map_copyin_page_list_cont;
src_end = src_start;
@@ -4238,7 +4322,7 @@ retry:
cont_args->destroy_len = src_end - src_start;
cont_args->steal_pages = FALSE;
- copy->cpy_cont_args = (char *) cont_args;
+ copy->cpy_cont_args = cont_args;
copy->cpy_cont = vm_map_copyin_page_list_cont;
}
@@ -4530,8 +4614,9 @@ vm_map_t vm_map_fork(vm_map_t old_map)
* In order to later verify this lookup, a "version"
* is returned.
*
- * The map should not be locked; it will not be
- * locked on exit. In order to guarantee the
+ * The map should not be locked; it will be
+ * unlocked on exit unless keep_map_locked is set and
+ * the lookup succeeds. In order to guarantee the
* existence of the returned object, it is returned
* locked.
*
@@ -4544,6 +4629,7 @@ kern_return_t vm_map_lookup(
vm_map_t *var_map, /* IN/OUT */
vm_offset_t vaddr,
vm_prot_t fault_type,
+ boolean_t keep_map_locked,
vm_map_version_t *out_version, /* OUT */
vm_object_t *object, /* OUT */
@@ -4565,7 +4651,8 @@ kern_return_t vm_map_lookup(
#define RETURN(why) \
{ \
- vm_map_unlock_read(map); \
+ if (!(keep_map_locked && (why == KERN_SUCCESS))) \
+ vm_map_unlock_read(map); \
return(why); \
}
@@ -4823,7 +4910,8 @@ vm_region_create_proxy (task_t task, vm_address_t address,
kern_return_t ret;
vm_map_entry_t entry, tmp_entry;
vm_object_t object;
- vm_offset_t offset, start;
+ rpc_vm_offset_t rpc_offset, rpc_start;
+ rpc_vm_size_t rpc_len = (rpc_vm_size_t) len;
ipc_port_t pager;
if (task == TASK_NULL)
@@ -4859,16 +4947,16 @@ vm_region_create_proxy (task_t task, vm_address_t address,
pager = ipc_port_copy_send(object->pager);
vm_object_unlock(object);
- start = (address - entry->vme_start) + entry->offset;
- offset = 0;
+ rpc_start = (address - entry->vme_start) + entry->offset;
+ rpc_offset = 0;
vm_map_unlock_read(task->map);
ret = memory_object_create_proxy(task->itk_space, max_protection,
&pager, 1,
- &offset, 1,
- &start, 1,
- &len, 1, port);
+ &rpc_offset, 1,
+ &rpc_start, 1,
+ &rpc_len, 1, port);
if (ret)
ipc_port_release_send(pager);
@@ -4876,64 +4964,80 @@ vm_region_create_proxy (task_t task, vm_address_t address,
}
/*
- * Routine: vm_map_simplify
- *
- * Description:
- * Attempt to simplify the map representation in
- * the vicinity of the given starting address.
- * Note:
- * This routine is intended primarily to keep the
- * kernel maps more compact -- they generally don't
- * benefit from the "expand a map entry" technology
- * at allocation time because the adjacent entry
- * is often wired down.
+ * Routine: vm_map_coalesce_entry
+ * Purpose:
+ * Try to coalesce an entry with the preceeding entry in the map.
+ * Conditions:
+ * The map is locked. If coalesced, the entry is destroyed
+ * by the call.
+ * Returns:
+ * Whether the entry was coalesced.
*/
-void vm_map_simplify(
+boolean_t
+vm_map_coalesce_entry(
vm_map_t map,
- vm_offset_t start)
+ vm_map_entry_t entry)
{
- vm_map_entry_t this_entry;
- vm_map_entry_t prev_entry;
+ vm_map_entry_t prev = entry->vme_prev;
+ vm_size_t prev_size;
+ vm_size_t entry_size;
- vm_map_lock(map);
- if (
- (vm_map_lookup_entry(map, start, &this_entry)) &&
- ((prev_entry = this_entry->vme_prev) != vm_map_to_entry(map)) &&
-
- (prev_entry->vme_end == start) &&
-
- (prev_entry->is_shared == FALSE) &&
- (prev_entry->is_sub_map == FALSE) &&
-
- (this_entry->is_shared == FALSE) &&
- (this_entry->is_sub_map == FALSE) &&
-
- (prev_entry->inheritance == this_entry->inheritance) &&
- (prev_entry->protection == this_entry->protection) &&
- (prev_entry->max_protection == this_entry->max_protection) &&
- (prev_entry->wired_count == this_entry->wired_count) &&
-
- (prev_entry->needs_copy == this_entry->needs_copy) &&
-
- (prev_entry->object.vm_object == this_entry->object.vm_object) &&
- ((prev_entry->offset + (prev_entry->vme_end - prev_entry->vme_start))
- == this_entry->offset) &&
- (prev_entry->projected_on == 0) &&
- (this_entry->projected_on == 0)
- ) {
- if (map->first_free == this_entry)
- map->first_free = prev_entry;
-
- SAVE_HINT(map, prev_entry);
- prev_entry->vme_end = this_entry->vme_end;
- vm_map_entry_unlink(map, this_entry);
- vm_object_deallocate(this_entry->object.vm_object);
- vm_map_entry_dispose(map, this_entry);
- }
- vm_map_unlock(map);
+ /*
+ * Check the basic conditions for coalescing the two entries.
+ */
+ if ((entry == vm_map_to_entry(map)) ||
+ (prev == vm_map_to_entry(map)) ||
+ (prev->vme_end != entry->vme_start) ||
+ (prev->is_shared || entry->is_shared) ||
+ (prev->is_sub_map || entry->is_sub_map) ||
+ (prev->inheritance != entry->inheritance) ||
+ (prev->protection != entry->protection) ||
+ (prev->max_protection != entry->max_protection) ||
+ (prev->needs_copy != entry->needs_copy) ||
+ (prev->in_transition || entry->in_transition) ||
+ (prev->wired_count != entry->wired_count) ||
+ (prev->projected_on != 0) ||
+ (entry->projected_on != 0))
+ return FALSE;
+
+ prev_size = prev->vme_end - prev->vme_start;
+ entry_size = entry->vme_end - entry->vme_start;
+ assert(prev->gap_size == 0);
+
+ /*
+ * See if we can coalesce the two objects.
+ */
+ if (!vm_object_coalesce(prev->object.vm_object,
+ entry->object.vm_object,
+ prev->offset,
+ entry->offset,
+ prev_size,
+ entry_size,
+ &prev->object.vm_object,
+ &prev->offset))
+ return FALSE;
+
+ /*
+ * Update the hints.
+ */
+ if (map->hint == entry)
+ SAVE_HINT(map, prev);
+ if (map->first_free == entry)
+ map->first_free = prev;
+
+ /*
+ * Get rid of the entry without changing any wirings or the pmap,
+ * and without altering map->size.
+ */
+ prev->vme_end = entry->vme_end;
+ vm_map_entry_unlink(map, entry);
+ vm_map_entry_dispose(map, entry);
+
+ return TRUE;
}
+
/*
* Routine: vm_map_machine_attribute
* Purpose:
@@ -5078,8 +5182,7 @@ void vm_map_print(db_expr_t addr, boolean_t have_addr, db_expr_t count, const ch
* Pretty-print a copy object for ddb.
*/
-void vm_map_copy_print(copy)
- const vm_map_copy_t copy;
+void vm_map_copy_print(const vm_map_copy_t copy)
{
int i, npages;
diff --git a/vm/vm_map.h b/vm/vm_map.h
index 57bdf651..7e25d9f4 100644
--- a/vm/vm_map.h
+++ b/vm/vm_map.h
@@ -255,6 +255,10 @@ typedef struct vm_map_version {
#define VM_MAP_COPY_PAGE_LIST_MAX 64
+struct vm_map_copy;
+struct vm_map_copyin_args_data;
+typedef kern_return_t (*vm_map_copy_cont_fn)(struct vm_map_copyin_args_data*, struct vm_map_copy**);
+
typedef struct vm_map_copy {
int type;
#define VM_MAP_COPY_ENTRY_LIST 1
@@ -270,8 +274,8 @@ typedef struct vm_map_copy {
struct { /* PAGE_LIST */
vm_page_t page_list[VM_MAP_COPY_PAGE_LIST_MAX];
int npages;
- kern_return_t (*cont)();
- char *cont_args;
+ vm_map_copy_cont_fn cont;
+ struct vm_map_copyin_args_data* cont_args;
} c_p;
} c_u;
} *vm_map_copy_t;
@@ -323,7 +327,7 @@ MACRO_BEGIN \
(*((old_copy)->cpy_cont))((old_copy)->cpy_cont_args, \
(vm_map_copy_t *) 0); \
(old_copy)->cpy_cont = (kern_return_t (*)()) 0; \
- (old_copy)->cpy_cont_args = (char *) 0; \
+ (old_copy)->cpy_cont_args = VM_MAP_COPYIN_ARGS_NULL; \
MACRO_END
#define vm_map_copy_has_cont(copy) \
@@ -333,14 +337,14 @@ MACRO_END
* Continuation structures for vm_map_copyin_page_list.
*/
-typedef struct {
+typedef struct vm_map_copyin_args_data {
vm_map_t map;
vm_offset_t src_addr;
vm_size_t src_len;
vm_offset_t destroy_addr;
vm_size_t destroy_len;
boolean_t steal_pages;
-} vm_map_copyin_args_data_t, *vm_map_copyin_args_t;
+} vm_map_copyin_args_data_t, *vm_map_copyin_args_t;
#define VM_MAP_COPYIN_ARGS_NULL ((vm_map_copyin_args_t) 0)
@@ -408,7 +412,7 @@ extern kern_return_t vm_map_inherit(vm_map_t, vm_offset_t, vm_offset_t,
vm_inherit_t);
/* Look up an address */
-extern kern_return_t vm_map_lookup(vm_map_t *, vm_offset_t, vm_prot_t,
+extern kern_return_t vm_map_lookup(vm_map_t *, vm_offset_t, vm_prot_t, boolean_t,
vm_map_version_t *, vm_object_t *,
vm_offset_t *, vm_prot_t *, boolean_t *);
/* Find a map entry */
@@ -438,6 +442,8 @@ extern vm_map_copy_t vm_map_copy_copy(vm_map_copy_t);
extern kern_return_t vm_map_copy_discard_cont(vm_map_copyin_args_t,
vm_map_copy_t *);
+extern boolean_t vm_map_coalesce_entry(vm_map_t, vm_map_entry_t);
+
/* Add or remove machine- dependent attributes from map regions */
extern kern_return_t vm_map_machine_attribute(vm_map_t, vm_offset_t,
vm_size_t,
diff --git a/vm/vm_object.c b/vm/vm_object.c
index 0dc3d540..c238cce4 100644
--- a/vm/vm_object.c
+++ b/vm/vm_object.c
@@ -44,6 +44,7 @@
#include <ipc/ipc_space.h>
#include <kern/assert.h>
#include <kern/debug.h>
+#include <kern/mach.server.h>
#include <kern/lock.h>
#include <kern/queue.h>
#include <kern/xpr.h>
@@ -182,7 +183,7 @@ vm_object_t kernel_object = &kernel_object_store;
*/
queue_head_t vm_object_cached_list;
-decl_simple_lock_data(,vm_object_cached_lock_data)
+def_simple_lock_data(static,vm_object_cached_lock_data)
#define vm_object_cache_lock() \
simple_lock(&vm_object_cached_lock_data)
@@ -198,7 +199,7 @@ decl_simple_lock_data(,vm_object_cached_lock_data)
*/
int vm_object_cached_pages;
-decl_simple_lock_data(,vm_object_cached_pages_lock_data)
+def_simple_lock_data(static,vm_object_cached_pages_lock_data)
/*
* Virtual memory objects are initialized from
@@ -226,7 +227,7 @@ static void _vm_object_setup(
object->size = size;
}
-vm_object_t _vm_object_allocate(
+static vm_object_t _vm_object_allocate(
vm_size_t size)
{
vm_object_t object;
@@ -725,7 +726,7 @@ void memory_object_release(
* In/out conditions:
* The object is locked on entry and exit.
*/
-void vm_object_abort_activity(
+static void vm_object_abort_activity(
vm_object_t object)
{
vm_page_t p;
@@ -1288,7 +1289,7 @@ boolean_t vm_object_copy_temporary(
* If the return value indicates an error, this parameter
* is not valid.
*/
-kern_return_t vm_object_copy_call(
+static kern_return_t vm_object_copy_call(
vm_object_t src_object,
vm_offset_t src_offset,
vm_size_t size,
@@ -2686,14 +2687,16 @@ void vm_object_page_remove(
/*
* Routine: vm_object_coalesce
- * Function: Coalesces two objects backing up adjoining
- * regions of memory into a single object.
- *
- * returns TRUE if objects were combined.
- *
- * NOTE: Only works at the moment if the second object is NULL -
- * if it's not, which object do we lock first?
- *
+ * Purpose:
+ * Tries to coalesce two objects backing up adjoining
+ * regions of memory into a single object.
+ *
+ * NOTE: Only works at the moment if one of the objects
+ * is NULL or if the objects are the same - otherwise,
+ * which object do we lock first?
+ * Returns:
+ * TRUE if objects have been coalesced.
+ * FALSE the objects could not be coalesced.
* Parameters:
* prev_object First object to coalesce
* prev_offset Offset into prev_object
@@ -2703,8 +2706,14 @@ void vm_object_page_remove(
* prev_size Size of reference to prev_object
* next_size Size of reference to next_object
*
+ * new_object Resulting colesced object
+ * new_offset Offset into the resulting object
* Conditions:
- * The object must *not* be locked.
+ * The objects must *not* be locked.
+ *
+ * If the objects are coalesced successfully, the caller's
+ * references for both objects are consumed, and the caller
+ * gains a reference for the new object.
*/
boolean_t vm_object_coalesce(
@@ -2713,28 +2722,60 @@ boolean_t vm_object_coalesce(
vm_offset_t prev_offset,
vm_offset_t next_offset,
vm_size_t prev_size,
- vm_size_t next_size)
+ vm_size_t next_size,
+ vm_object_t *new_object, /* OUT */
+ vm_offset_t *new_offset) /* OUT */
{
+ vm_object_t object;
vm_size_t newsize;
- if (next_object != VM_OBJECT_NULL) {
+ if (prev_object == next_object) {
+ /*
+ * If neither object actually exists,
+ * the offsets don't matter.
+ */
+ if (prev_object == VM_OBJECT_NULL) {
+ *new_object = VM_OBJECT_NULL;
+ *new_offset = 0;
+ return TRUE;
+ }
+
+ if (prev_offset + prev_size == next_offset) {
+ *new_object = prev_object;
+ *new_offset = prev_offset;
+ /*
+ * Deallocate one of the two references.
+ */
+ vm_object_deallocate(prev_object);
+ return TRUE;
+ }
+
return FALSE;
}
- if (prev_object == VM_OBJECT_NULL) {
- return TRUE;
+ if (next_object != VM_OBJECT_NULL) {
+ /*
+ * Don't know how to merge two different
+ * objects yet.
+ */
+ if (prev_object != VM_OBJECT_NULL)
+ return FALSE;
+
+ object = next_object;
+ } else {
+ object = prev_object;
}
- vm_object_lock(prev_object);
+ vm_object_lock(object);
/*
* Try to collapse the object first
*/
- vm_object_collapse(prev_object);
+ vm_object_collapse(object);
/*
* Can't coalesce if pages not mapped to
- * prev_entry may be in use anyway:
+ * the object may be in use anyway:
* . more than one reference
* . paged out
* . shadows another object
@@ -2742,33 +2783,55 @@ boolean_t vm_object_coalesce(
* . paging references (pages might be in page-list)
*/
- if ((prev_object->ref_count > 1) ||
- prev_object->pager_created ||
- prev_object->used_for_pageout ||
- (prev_object->shadow != VM_OBJECT_NULL) ||
- (prev_object->copy != VM_OBJECT_NULL) ||
- (prev_object->paging_in_progress != 0)) {
- vm_object_unlock(prev_object);
+ if ((object->ref_count > 1) ||
+ object->pager_created ||
+ object->used_for_pageout ||
+ (object->shadow != VM_OBJECT_NULL) ||
+ (object->copy != VM_OBJECT_NULL) ||
+ (object->paging_in_progress != 0)) {
+ vm_object_unlock(object);
return FALSE;
}
- /*
- * Remove any pages that may still be in the object from
- * a previous deallocation.
- */
-
- vm_object_page_remove(prev_object,
+ if (object == prev_object) {
+ /*
+ * Remove any pages that may still be in
+ * the object from a previous deallocation.
+ */
+ vm_object_page_remove(object,
prev_offset + prev_size,
prev_offset + prev_size + next_size);
+ /*
+ * Extend the object if necessary.
+ */
+ newsize = prev_offset + prev_size + next_size;
+ if (newsize > object->size)
+ object->size = newsize;
- /*
- * Extend the object if necessary.
- */
- newsize = prev_offset + prev_size + next_size;
- if (newsize > prev_object->size)
- prev_object->size = newsize;
+ *new_offset = prev_offset;
+ } else {
+ /*
+ * Check if we have enough space in the object
+ * offset space to insert the new mapping before
+ * the existing one.
+ */
+ if (next_offset < prev_size) {
+ vm_object_unlock(object);
+ return FALSE;
+ }
+ /*
+ * Remove any pages that may still be in
+ * the object from a previous deallocation.
+ */
+ vm_object_page_remove(object,
+ next_offset - prev_size,
+ next_offset);
- vm_object_unlock(prev_object);
+ *new_offset = next_offset - prev_size;
+ }
+
+ vm_object_unlock(object);
+ *new_object = object;
return TRUE;
}
diff --git a/vm/vm_object.h b/vm/vm_object.h
index 46328a38..9c17541f 100644
--- a/vm/vm_object.h
+++ b/vm/vm_object.h
@@ -247,7 +247,9 @@ extern boolean_t vm_object_coalesce(
vm_offset_t prev_offset,
vm_offset_t next_offset,
vm_size_t prev_size,
- vm_size_t next_size);
+ vm_size_t next_size,
+ vm_object_t *new_object, /* OUT */
+ vm_offset_t *new_offset); /* OUT */
extern void vm_object_pager_wakeup(ipc_port_t pager);
diff --git a/vm/vm_page.c b/vm/vm_page.c
index 06d62c97..04decbbd 100644
--- a/vm/vm_page.c
+++ b/vm/vm_page.c
@@ -94,7 +94,7 @@ struct vm_page_cpu_pool {
* Special order value for pages that aren't in a free list. Such pages are
* either allocated, or part of a free block of pages but not the head page.
*/
-#define VM_PAGE_ORDER_UNLISTED ((unsigned short)-1)
+#define VM_PAGE_ORDER_UNLISTED (VM_PAGE_NR_FREE_LISTS + 1)
/*
* Doubly-linked list of free blocks.
@@ -245,10 +245,12 @@ static int vm_page_is_ready __read_mostly;
* - HIGHMEM: must be mapped before it can be accessed
*
* Segments are ordered by priority, 0 being the lowest priority. Their
- * relative priorities are DMA < DMA32 < DIRECTMAP < HIGHMEM. Some segments
- * may actually be aliases for others, e.g. if DMA is always possible from
- * the direct physical mapping, DMA and DMA32 are aliases for DIRECTMAP,
- * in which case the segment table contains DIRECTMAP and HIGHMEM only.
+ * relative priorities are DMA < DMA32 < DIRECTMAP < HIGHMEM or
+ * DMA < DIRECTMAP < DMA32 < HIGHMEM.
+ * Some segments may actually be aliases for others, e.g. if DMA is always
+ * possible from the direct physical mapping, DMA and DMA32 are aliases for
+ * DIRECTMAP, in which case the segment table contains DIRECTMAP and HIGHMEM
+ * only.
*/
static struct vm_page_seg vm_page_segs[VM_PAGE_MAX_SEGS];
@@ -425,7 +427,7 @@ vm_page_seg_free_to_buddy(struct vm_page_seg *seg, struct vm_page *page,
pa = page->phys_addr;
while (order < (VM_PAGE_NR_FREE_LISTS - 1)) {
- buddy_pa = pa ^ vm_page_ptoa(1 << order);
+ buddy_pa = pa ^ vm_page_ptoa(1ULL << order);
if ((buddy_pa < seg->start) || (buddy_pa >= seg->end))
break;
@@ -438,7 +440,7 @@ vm_page_seg_free_to_buddy(struct vm_page_seg *seg, struct vm_page *page,
vm_page_free_list_remove(&seg->free_lists[order], buddy);
buddy->order = VM_PAGE_ORDER_UNLISTED;
order++;
- pa &= -vm_page_ptoa(1 << order);
+ pa &= -vm_page_ptoa(1ULL << order);
page = &seg->pages[vm_page_atop(pa - seg->start)];
}
@@ -1016,7 +1018,7 @@ vm_page_seg_balance_page(struct vm_page_seg *seg,
vm_page_set_type(dest, 0, src->type);
memcpy(&dest->vm_page_header, &src->vm_page_header,
- sizeof(*dest) - VM_PAGE_HEADER_SIZE);
+ VM_PAGE_BODY_SIZE);
vm_page_copy(src, dest);
if (!src->dirty) {
@@ -1034,6 +1036,7 @@ vm_page_seg_balance_page(struct vm_page_seg *seg,
simple_unlock(&seg->lock);
simple_unlock(&vm_page_queue_free_lock);
+ vm_object_lock(object);
vm_page_insert(dest, object, offset);
vm_object_unlock(object);
@@ -1375,7 +1378,7 @@ vm_page_boot_seg_avail_size(struct vm_page_boot_seg *seg)
return seg->avail_end - seg->avail_start;
}
-unsigned long __init
+phys_addr_t __init
vm_page_bootalloc(size_t size)
{
struct vm_page_boot_seg *seg;
@@ -2040,7 +2043,15 @@ again:
* TODO Find out what could cause this and how to deal with it.
* This will likely require an out-of-memory killer.
*/
- panic("vm_page: unable to recycle any page");
+
+ {
+ static boolean_t warned = FALSE;
+
+ if (!warned) {
+ printf("vm_page warning: unable to recycle any page\n");
+ warned = 1;
+ }
+ }
}
simple_unlock(&vm_page_queue_free_lock);
diff --git a/vm/vm_page.h b/vm/vm_page.h
index d457f9a2..3be75f18 100644
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -79,9 +79,6 @@
struct vm_page {
struct list node; /* page queues or free list (P) */
- unsigned short type;
- unsigned short seg_index;
- unsigned short order;
void *priv;
/*
@@ -95,7 +92,6 @@ struct vm_page {
/* We use an empty struct as the delimiter. */
struct {} vm_page_header;
-#define VM_PAGE_HEADER_SIZE offsetof(struct vm_page, vm_page_header)
vm_object_t object; /* which object am I in (O,P) */
vm_offset_t offset; /* offset into that object (O,P) */
@@ -126,10 +122,20 @@ struct vm_page {
* without having data. (O)
* [See vm_object_overwrite] */
- vm_prot_t page_lock; /* Uses prohibited by data manager (O) */
- vm_prot_t unlock_request; /* Outstanding unlock request (O) */
+ vm_prot_t page_lock:3; /* Uses prohibited by data manager (O) */
+ vm_prot_t unlock_request:3; /* Outstanding unlock request (O) */
+
+ struct {} vm_page_footer;
+
+ unsigned short type:2;
+ unsigned short seg_index:2;
+ unsigned short order:4;
};
+#define VM_PAGE_BODY_SIZE \
+ (offsetof(struct vm_page, vm_page_footer) \
+ - offsetof(struct vm_page, vm_page_header))
+
/*
* For debugging, this macro can be defined to perform
* some useful check on a page structure.
@@ -156,8 +162,13 @@ void vm_page_check(const struct vm_page *page);
*/
#define VM_PAGE_DMA 0x01
+#if defined(VM_PAGE_DMA32_LIMIT) && VM_PAGE_DMA32_LIMIT > VM_PAGE_DIRECTMAP_LIMIT
+#define VM_PAGE_DIRECTMAP 0x02
+#define VM_PAGE_DMA32 0x04
+#else
#define VM_PAGE_DMA32 0x02
#define VM_PAGE_DIRECTMAP 0x04
+#endif
#define VM_PAGE_HIGHMEM 0x08
extern
@@ -321,13 +332,24 @@ extern unsigned int vm_page_info(
*
* Selector-to-segment-list translation table :
* DMA DMA
+ * if 32bit PAE
+ * DIRECTMAP DMA32 DMA
+ * DMA32 DMA32 DIRECTMAP DMA
+ * HIGHMEM HIGHMEM DMA32 DIRECTMAP DMA
+ * else
* DMA32 DMA32 DMA
* DIRECTMAP DIRECTMAP DMA32 DMA
* HIGHMEM HIGHMEM DIRECTMAP DMA32 DMA
+ * endif
*/
#define VM_PAGE_SEL_DMA 0
+#if defined(VM_PAGE_DMA32_LIMIT) && VM_PAGE_DMA32_LIMIT > VM_PAGE_DIRECTMAP_LIMIT
+#define VM_PAGE_SEL_DIRECTMAP 1
+#define VM_PAGE_SEL_DMA32 2
+#else
#define VM_PAGE_SEL_DMA32 1
#define VM_PAGE_SEL_DIRECTMAP 2
+#endif
#define VM_PAGE_SEL_HIGHMEM 3
/*
@@ -404,7 +426,7 @@ int vm_page_ready(void);
* pmap_steal_memory. It can be used after physical segments have been loaded
* and before the vm_page module is initialized.
*/
-unsigned long vm_page_bootalloc(size_t size);
+phys_addr_t vm_page_bootalloc(size_t size);
/*
* Set up the vm_page module.
diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c
index 575a9f5d..e2f4cf2b 100644
--- a/vm/vm_pageout.c
+++ b/vm/vm_pageout.c
@@ -412,7 +412,7 @@ vm_pageout_page(
* It returns with vm_page_queue_free_lock held.
*/
-boolean_t vm_pageout_scan(boolean_t *should_wait)
+static boolean_t vm_pageout_scan(boolean_t *should_wait)
{
boolean_t done;
diff --git a/vm/vm_resident.c b/vm/vm_resident.c
index 4af103d4..3f0cc909 100644
--- a/vm/vm_resident.c
+++ b/vm/vm_resident.c
@@ -52,6 +52,7 @@
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_kern.h>
+#include <vm/vm_resident.h>
#if MACH_VM_DEBUG
#include <mach/kern_return.h>
@@ -97,7 +98,7 @@ unsigned long vm_page_bucket_count = 0; /* How big is array? */
unsigned long vm_page_hash_mask; /* Mask for hash function */
static struct list vm_page_queue_fictitious;
-decl_simple_lock_data(,vm_page_queue_free_lock)
+def_simple_lock_data(,vm_page_queue_free_lock)
int vm_page_fictitious_count;
int vm_object_external_count;
int vm_object_external_pages;
@@ -128,7 +129,7 @@ phys_addr_t vm_page_fictitious_addr = (phys_addr_t) -1;
* defined here, but are shared by the pageout
* module.
*/
-decl_simple_lock_data(,vm_page_queue_lock)
+def_simple_lock_data(,vm_page_queue_lock)
int vm_page_active_count;
int vm_page_inactive_count;
int vm_page_wire_count;
@@ -232,7 +233,8 @@ void vm_page_bootstrap(
vm_offset_t pmap_steal_memory(
vm_size_t size)
{
- vm_offset_t addr, vaddr, paddr;
+ vm_offset_t addr, vaddr;
+ phys_addr_t paddr;
size = round_page(size);
@@ -745,7 +747,7 @@ boolean_t vm_page_convert(struct vm_page **mp)
memcpy(&real_m->vm_page_header,
&fict_m->vm_page_header,
- sizeof(*fict_m) - VM_PAGE_HEADER_SIZE);
+ VM_PAGE_BODY_SIZE);
real_m->fictitious = FALSE;
vm_page_insert(real_m, object, offset);
@@ -776,10 +778,16 @@ vm_page_t vm_page_grab(unsigned flags)
if (flags & VM_PAGE_HIGHMEM)
selector = VM_PAGE_SEL_HIGHMEM;
+#if defined(VM_PAGE_DMA32_LIMIT) && VM_PAGE_DMA32_LIMIT > VM_PAGE_DIRECTMAP_LIMIT
+ else if (flags & VM_PAGE_DMA32)
+ selector = VM_PAGE_SEL_DMA32;
+#endif
else if (flags & VM_PAGE_DIRECTMAP)
selector = VM_PAGE_SEL_DIRECTMAP;
+#if defined(VM_PAGE_DMA32_LIMIT) && VM_PAGE_DMA32_LIMIT <= VM_PAGE_DIRECTMAP_LIMIT
else if (flags & VM_PAGE_DMA32)
selector = VM_PAGE_SEL_DMA32;
+#endif
else
selector = VM_PAGE_SEL_DMA;
@@ -1073,8 +1081,7 @@ vm_page_info(
/*
* Routine: vm_page_print [exported]
*/
-void vm_page_print(p)
- const vm_page_t p;
+void vm_page_print(const vm_page_t p)
{
iprintf("Page 0x%X: object 0x%X,", (vm_offset_t) p, (vm_offset_t) p->object);
printf(" offset 0x%X", p->offset);
diff --git a/vm/vm_user.c b/vm/vm_user.c
index ad1fa75d..62aedad3 100644
--- a/vm/vm_user.c
+++ b/vm/vm_user.c
@@ -40,9 +40,11 @@
#include <mach/vm_statistics.h>
#include <mach/vm_cache_statistics.h>
#include <mach/vm_sync.h>
+#include <kern/gnumach.server.h>
#include <kern/host.h>
-#include <kern/task.h>
#include <kern/mach.server.h>
+#include <kern/mach_host.server.h>
+#include <kern/task.h>
#include <vm/vm_fault.h>
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
@@ -338,6 +340,11 @@ kern_return_t vm_map(
if (size == 0)
return KERN_INVALID_ARGUMENT;
+#ifdef USER32
+ if (mask & 0x80000000)
+ mask |= 0xffffffff00000000;
+#endif
+
*address = trunc_page(*address);
size = round_page(size);
@@ -425,12 +432,11 @@ kern_return_t vm_map(
*
* [ To unwire the pages, specify VM_PROT_NONE. ]
*/
-kern_return_t vm_wire(port, map, start, size, access)
- const ipc_port_t port;
- vm_map_t map;
- vm_offset_t start;
- vm_size_t size;
- vm_prot_t access;
+kern_return_t vm_wire(const ipc_port_t port,
+ vm_map_t map,
+ vm_offset_t start,
+ vm_size_t size,
+ vm_prot_t access)
{
boolean_t priv;
@@ -590,6 +596,10 @@ kern_return_t vm_allocate_contiguous(
if (palign == 0)
palign = PAGE_SIZE;
+ /* FIXME: Allows some small alignments less than page size */
+ if ((palign < PAGE_SIZE) && (PAGE_SIZE % palign == 0))
+ palign = PAGE_SIZE;
+
/* FIXME */
if (palign != PAGE_SIZE)
return KERN_INVALID_ARGUMENT;
@@ -597,12 +607,24 @@ kern_return_t vm_allocate_contiguous(
selector = VM_PAGE_SEL_DMA;
if (pmax > VM_PAGE_DMA_LIMIT)
#ifdef VM_PAGE_DMA32_LIMIT
- selector = VM_PAGE_SEL_DMA32;
+#if VM_PAGE_DMA32_LIMIT < VM_PAGE_DIRECTMAP_LIMIT
+ if (pmax <= VM_PAGE_DMA32_LIMIT)
+ selector = VM_PAGE_SEL_DMA32;
if (pmax > VM_PAGE_DMA32_LIMIT)
#endif
- selector = VM_PAGE_SEL_DIRECTMAP;
+#endif
+ if (pmax <= VM_PAGE_DIRECTMAP_LIMIT)
+ selector = VM_PAGE_SEL_DIRECTMAP;
if (pmax > VM_PAGE_DIRECTMAP_LIMIT)
- selector = VM_PAGE_SEL_HIGHMEM;
+#ifdef VM_PAGE_DMA32_LIMIT
+#if VM_PAGE_DMA32_LIMIT > VM_PAGE_DIRECTMAP_LIMIT
+ if (pmax <= VM_PAGE_DMA32_LIMIT)
+ selector = VM_PAGE_SEL_DMA32;
+ if (pmax > VM_PAGE_DMA32_LIMIT)
+#endif
+#endif
+ if (pmax <= VM_PAGE_HIGHMEM_LIMIT)
+ selector = VM_PAGE_SEL_HIGHMEM;
size = vm_page_round(size);
@@ -670,11 +692,115 @@ kern_return_t vm_allocate_contiguous(
return kr;
}
+ for (i = 0; i < vm_page_atop(size); i++)
+ vm_page_unwire(&pages[i]);
+
*result_vaddr = vaddr;
*result_paddr = pages->phys_addr;
assert(*result_paddr >= pmin);
- assert(*result_paddr + size < pmax);
+ assert(*result_paddr + size <= pmax);
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * vm_pages_phys returns information about a region of memory
+ */
+kern_return_t vm_pages_phys(
+ host_t host,
+ vm_map_t map,
+ vm_address_t address,
+ vm_size_t size,
+ rpc_phys_addr_array_t *pagespp,
+ mach_msg_type_number_t *countp)
+{
+ if (host == HOST_NULL)
+ return KERN_INVALID_HOST;
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_TASK;
+
+ if (!page_aligned(address))
+ return KERN_INVALID_ARGUMENT;
+ if (!page_aligned(size))
+ return KERN_INVALID_ARGUMENT;
+
+ mach_msg_type_number_t count = atop(size), cur;
+ rpc_phys_addr_array_t pagesp = *pagespp;
+ kern_return_t kr;
+
+ if (*countp < count) {
+ vm_offset_t allocated;
+ /* Avoid faults while we keep vm locks */
+ kr = kmem_alloc(ipc_kernel_map, &allocated,
+ count * sizeof(pagesp[0]));
+ if (kr != KERN_SUCCESS)
+ return KERN_RESOURCE_SHORTAGE;
+ pagesp = (rpc_phys_addr_array_t) allocated;
+ }
+
+ for (cur = 0; cur < count; cur++) {
+ vm_map_t cmap; /* current map in traversal */
+ rpc_phys_addr_t paddr;
+ vm_map_entry_t entry; /* entry in current map */
+
+ /* find the entry containing (or following) the address */
+ vm_map_lock_read(map);
+ for (cmap = map;;) {
+ /* cmap is read-locked */
+
+ if (!vm_map_lookup_entry(cmap, address, &entry)) {
+ entry = VM_MAP_ENTRY_NULL;
+ break;
+ }
+
+ if (entry->is_sub_map) {
+ /* move down to the sub map */
+
+ vm_map_t nmap = entry->object.sub_map;
+ vm_map_lock_read(nmap);
+ vm_map_unlock_read(cmap);
+ cmap = nmap;
+ continue;
+ } else {
+ /* Found it */
+ break;
+ }
+ /*NOTREACHED*/
+ }
+
+ paddr = 0;
+ if (entry) {
+ vm_offset_t offset = address - entry->vme_start + entry->offset;
+ vm_object_t object = entry->object.vm_object;
+
+ if (object) {
+ vm_object_lock(object);
+ vm_page_t page = vm_page_lookup(object, offset);
+ if (page) {
+ if (page->phys_addr != (typeof(pagesp[cur])) page->phys_addr)
+ printf("warning: physical address overflow in vm_pages_phys!!\n");
+ else
+ paddr = page->phys_addr;
+ }
+ vm_object_unlock(object);
+ }
+ }
+ vm_map_unlock_read(cmap);
+ pagesp[cur] = paddr;
+
+ address += PAGE_SIZE;
+ }
+
+ if (pagesp != *pagespp) {
+ vm_map_copy_t copy;
+ kr = vm_map_copyin(ipc_kernel_map, (vm_offset_t) pagesp,
+ count * sizeof(pagesp[0]), TRUE, &copy);
+ assert(kr == KERN_SUCCESS);
+ *pagespp = (rpc_phys_addr_array_t) copy;
+ }
+
+ *countp = count;
return KERN_SUCCESS;
}
diff --git a/x86_64/Makefrag.am b/x86_64/Makefrag.am
index 6b6bb2cb..b0bc45c2 100644
--- a/x86_64/Makefrag.am
+++ b/x86_64/Makefrag.am
@@ -28,6 +28,8 @@ if HOST_x86_64
#
libkernel_a_SOURCES += \
+ i386/i386at/acpi_parse_apic.h \
+ i386/i386at/acpi_parse_apic.c \
i386/i386at/autoconf.c \
i386/i386at/autoconf.h \
i386/i386at/biosmem.c \
@@ -65,7 +67,6 @@ libkernel_a_SOURCES += \
i386/i386at/kdsoft.h \
i386/i386at/mem.c \
i386/i386at/mem.h \
- i386/i386at/pic_isa.c \
i386/i386at/rtc.c \
i386/i386at/rtc.h
endif
@@ -86,89 +87,36 @@ endif
#
libkernel_a_SOURCES += \
- i386/i386/ast.h \
- i386/i386/ast_check.c \
- i386/i386/ast_types.h \
- i386/i386/cpu.h \
- i386/i386/cpu_number.h \
+ i386/i386/percpu.h \
+ i386/i386/percpu.c \
x86_64/cswitch.S \
- i386/i386/db_disasm.c \
- i386/i386/db_interface.c \
- i386/i386/db_interface.h \
- i386/i386/db_machdep.h \
- i386/i386/db_trace.c \
- i386/i386/db_trace.h \
- i386/i386/debug.h \
- i386/i386/debug_i386.c \
+ x86_64/copy_user.c \
x86_64/debug_trace.S \
- i386/i386/eflags.h \
- i386/i386/fpu.c \
- i386/i386/fpu.h \
- i386/i386/gdt.c \
- i386/i386/gdt.h \
- i386/i386/idt-gen.h \
- i386/i386/idt.c \
x86_64/idt_inittab.S \
- i386/i386/io_perm.c \
- i386/i386/io_perm.h \
- i386/i386/ipl.h \
- i386/i386/ktss.c \
- i386/i386/ktss.h \
- i386/i386/kttd_interface.c \
- i386/i386/kttd_machdep.h \
- i386/i386/ldt.c \
- i386/i386/ldt.h \
- i386/i386/lock.h \
x86_64/locore.S \
- i386/i386/locore.h \
- i386/i386/loose_ends.c \
- i386/i386/loose_ends.h \
- i386/i386/mach_param.h \
- i386/i386/machine_routines.h \
- i386/i386/machine_task.c \
- i386/i386/machspl.h \
- i386/i386/model_dep.h \
- i386/i386/mp_desc.c \
- i386/i386/mp_desc.h \
- i386/i386/pcb.c \
- i386/i386/pcb.h \
- i386/i386/phys.c \
- i386/i386/pio.h \
- i386/i386/pmap.h \
- i386/i386/proc_reg.h \
- i386/i386/sched_param.h \
- i386/i386/seg.c \
- i386/i386/seg.h \
- i386/i386/setjmp.h \
- x86_64/spl.S \
- i386/i386/spl.h \
- i386/i386/strings.c \
- i386/i386/task.h \
- i386/i386/thread.h \
- i386/i386/time_stamp.h \
- i386/i386/trap.c \
- i386/i386/trap.h \
- i386/i386/tss.h \
- i386/i386/user_ldt.c \
- i386/i386/user_ldt.h \
- i386/i386/vm_param.h \
- i386/i386/xpr.h \
- i386/intel/pmap.c \
- i386/intel/pmap.h \
- i386/intel/read_fault.c \
- i386/intel/read_fault.h
+ x86_64/spl.S
if PLATFORM_at
libkernel_a_SOURCES += \
+ i386/i386/apic.h \
+ i386/i386/apic.c \
i386/i386/hardclock.c \
i386/i386/hardclock.h \
- i386/i386/io_map.c \
i386/i386/irq.c \
i386/i386/irq.h \
- i386/i386/pic.c \
- i386/i386/pic.h \
+ i386/i386/msr.h \
i386/i386/pit.c \
i386/i386/pit.h
+
+if enable_apic
+libkernel_a_SOURCES += \
+ i386/i386at/ioapic.c
+else
+libkernel_a_SOURCES += \
+ i386/i386/pic.c \
+ i386/i386/pic.h \
+ i386/i386at/pic_isa.c
+endif
endif
#
@@ -211,13 +159,6 @@ nodist_libkernel_a_SOURCES += \
# Architecture specialities.
#
-if PLATFORM_at
-gnumach_LINKFLAGS += \
- --defsym _START_MAP=$(_START_MAP) \
- --defsym _START=_START_MAP+0x40000000 \
- -T '$(srcdir)'/x86_64/ldscript
-endif
-
AM_CPPFLAGS += \
-I$(top_srcdir)/i386 \
-I$(top_srcdir)/i386/i386 \
@@ -248,16 +189,40 @@ include_mach_x86_64_HEADERS = \
i386/include/mach/i386/mach_i386_types.h \
i386/include/mach/i386/machine_types.defs \
i386/include/mach/i386/multiboot.h \
- i386/include/mach/i386/syscall_sw.h \
i386/include/mach/i386/thread_status.h \
i386/include/mach/i386/trap.h \
i386/include/mach/i386/vm_param.h \
i386/include/mach/i386/vm_types.h
+
+if enable_user32
+include_mach_x86_64_HEADERS += i386/include/mach/i386/syscall_sw.h
+else
+include_mach_x86_64_HEADERS += x86_64/include/syscall_sw.h
+endif
#
# Platform specific parts.
#
+KERNEL_MAP_BASE=0xffffffff80000000
+
+if PLATFORM_at
+# For now simply keep all the kernel virtual space in the last 2G.
+# We could use a more elaborate schema if needed (e.g. reserving a
+# larger area for directmap or the kernel heap)), I think only the
+# test/bss/data sections need to be placed here kere because of
+# -mcmodel=kernel
+gnumach_LINKFLAGS += \
+ --defsym _START_MAP=$(_START_MAP) \
+ --defsym _START=$(_START_MAP) \
+ --defsym KERNEL_MAP_SHIFT=$(KERNEL_MAP_BASE) \
+ -z max-page-size=0x1000 \
+ -T '$(srcdir)'/x86_64/ldscript
+
+AM_CCASFLAGS += \
+ -Ii386
+endif
+
if PLATFORM_xen
libkernel_a_SOURCES += \
x86_64/xen_locore.S \
@@ -266,9 +231,15 @@ libkernel_a_SOURCES += \
i386/i386/xen.h
gnumach_LINKFLAGS += \
- --defsym _START=0x40000000 \
- --defsym _START_MAP=0x40000000 \
+ --defsym _START_MAP=$(KERNEL_MAP_BASE) \
+ --defsym _START=$(KERNEL_MAP_BASE) \
+ --defsym KERNEL_MAP_SHIFT=0 \
-T '$(srcdir)'/x86_64/ldscript
endif
+
+AM_CFLAGS += -D_START_MAP=$(_START_MAP) \
+ -DKERNEL_MAP_BASE=$(KERNEL_MAP_BASE)
+AM_CCASFLAGS += -D_START_MAP=$(_START_MAP) \
+ -DKERNEL_MAP_BASE=$(KERNEL_MAP_BASE)
endif # HOST_x86_64
diff --git a/x86_64/boothdr.S b/x86_64/boothdr.S
new file mode 100644
index 00000000..0ab9bd55
--- /dev/null
+++ b/x86_64/boothdr.S
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2022 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <mach/machine/asm.h>
+
+#include <i386/i386asm.h>
+#include <i386/i386/proc_reg.h>
+#include <i386/i386/seg.h>
+ /*
+ * This section will be put first into .boot. See also x86_64/ldscript.
+ */
+ .section .boot.text,"ax"
+ /* We should never be entered this way. */
+ .globl boot_start
+boot_start:
+
+ .code32
+ jmp boot_entry
+
+ /* MultiBoot header - see multiboot.h. */
+#define MULTIBOOT_MAGIC 0x1BADB002
+#define MULTIBOOT_FLAGS 0x00000003
+ P2ALIGN(2)
+boot_hdr:
+ .long MULTIBOOT_MAGIC
+ .long MULTIBOOT_FLAGS
+ /*
+ * The next item here is the checksum.
+ * XX this works OK until we need at least the 30th bit.
+ */
+ .long - (MULTIBOOT_MAGIC+MULTIBOOT_FLAGS)
+
+ .global _start
+_start:
+boot_entry:
+ /*
+ * Prepare minimal page mapping to jump to 64 bit and to C code.
+ * The first 4GB is identity mapped, and the first 2GB are re-mapped
+ * to high addresses at KERNEL_MAP_BASE
+ */
+
+ movl $p3table,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p4table)
+ /*
+ * Fill 4 entries in L3 table to cover the whole 32-bit 4GB address
+ * space. Part of it might be remapped later if the kernel is mapped
+ * below 4G.
+ */
+ movl $p2table,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table)
+ movl $p2table1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + 8)
+ movl $p2table2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + 16)
+ movl $p2table3,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + 24)
+ /* point each page table level two entry to a page */
+ mov $0,%ecx
+.map_p2_table:
+ mov $0x200000,%eax // 2MiB page, should be always available
+ mul %ecx
+ or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4k
+ mov %eax,p2table(,%ecx,8)
+ inc %ecx
+ cmp $2048,%ecx // 512 entries per table, map 4 L2 tables
+ jne .map_p2_table
+
+ /*
+ * KERNEL_MAP_BASE must me aligned to 2GB.
+ * Depending on kernel starting address, we might need to add another
+ * entry in the L4 table (controlling 512 GB chunks). In any case, we
+ * add two entries in L3 table to make sure we map 2GB for the kernel.
+ * Note that this may override part of the mapping create above.
+ */
+.kernel_map:
+#if KERNEL_MAP_BASE >= (1U << 39)
+ movl $p3ktable,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p4table + (8 * ((KERNEL_MAP_BASE >> 39) & 0x1FF))) // select 512G block
+ movl $p2ktable1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3ktable + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) )) // select first 1G block
+ movl $p2ktable2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3ktable + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1) )) // select second 1G block
+#else
+ movl $p2ktable1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) )) // select first 1G block
+ movl $p2ktable2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1) )) // select second 1G block
+#endif
+
+ mov $0,%ecx
+.map_p2k_table:
+ mov $0x200000,%eax // 2MiB page, should be always available
+ mul %ecx
+ or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4K
+ mov %eax,p2ktable1(,%ecx,8)
+ inc %ecx
+ cmp $1024,%ecx // 512 entries per table, map 2 L2 tables
+ jne .map_p2k_table
+
+switch64:
+ /*
+ * Jump to 64 bit mode, we have to
+ * - enable PAE
+ * - enable long mode
+ * - enable paging and load the tables filled above in CR3
+ * - jump to a 64-bit code segment
+ */
+ mov %cr4,%eax
+ or $CR4_PAE,%eax
+ mov %eax,%cr4
+ mov $0xC0000080,%ecx // select EFER register
+ rdmsr
+ or $(1 << 8),%eax // long mode enable bit
+ wrmsr
+ mov $p4table,%eax
+ mov %eax,%cr3
+ mov %cr0,%eax
+ or $CR0_PG,%eax
+ or $CR0_WP,%eax
+ mov %eax,%cr0
+
+ lgdt gdt64pointer
+ movw $0,%ax
+ movw %ax,%fs
+ movw %ax,%gs
+ movw $16,%ax
+ movw %ax,%ds
+ movw %ax,%es
+ movw %ax,%ss
+ ljmp $8,$boot_entry64
+
+ .code64
+
+boot_entry64:
+ /* Switch to our own interrupt stack. */
+ movq $solid_intstack+INTSTACK_SIZE-16, %rax
+ andq $(~15),%rax
+ movq %rax,%rsp
+
+ /* Reset EFLAGS to a known state. */
+ pushq $0
+ popf
+ /* save multiboot info for later */
+ movq %rbx,%r8
+
+ /* Fix ifunc entries */
+ movq $__rela_iplt_start,%rsi
+ movq $__rela_iplt_end,%rdi
+iplt_cont:
+ cmpq %rdi,%rsi
+ jae iplt_done
+ movq (%rsi),%rbx /* r_offset */
+ movb 4(%rsi),%al /* info */
+ cmpb $42,%al /* IRELATIVE */
+ jnz iplt_next
+ call *(%ebx) /* call ifunc */
+ movq %rax,(%rbx) /* fixed address */
+iplt_next:
+ addq $8,%rsi
+ jmp iplt_cont
+iplt_done:
+
+ /* restore multiboot info */
+ movq %r8,%rdi
+ /* Jump into C code. */
+ call EXT(c_boot_entry)
+ /* not reached */
+ nop
+
+ .code32
+ .section .boot.data
+ .align 4096
+#define SEG_ACCESS_OFS 40
+#define SEG_GRANULARITY_OFS 52
+gdt64:
+ .quad 0
+gdt64code:
+ .quad (ACC_P << SEG_ACCESS_OFS) | (ACC_CODE_R << SEG_ACCESS_OFS) | (SZ_64 << SEG_GRANULARITY_OFS)
+gdt64data:
+ .quad (ACC_P << SEG_ACCESS_OFS) | (ACC_DATA_W << SEG_ACCESS_OFS)
+gdt64end:
+ .skip (4096 - (gdt64end - gdt64))
+gdt64pointer:
+ .word gdt64end - gdt64 - 1
+ .quad gdt64
+
+ .section .boot.data
+ .align 4096
+p4table: .space 4096
+p3table: .space 4096
+p2table: .space 4096
+p2table1: .space 4096
+p2table2: .space 4096
+p2table3: .space 4096
+p3ktable: .space 4096
+p2ktable1: .space 4096
+p2ktable2: .space 4096
diff --git a/x86_64/configfrag.ac b/x86_64/configfrag.ac
index e455d31b..f119a9a3 100644
--- a/x86_64/configfrag.ac
+++ b/x86_64/configfrag.ac
@@ -27,6 +27,10 @@ dnl USE OF THIS SOFTWARE.
# Determines the size of the CPU cache line.
AC_DEFINE([CPU_L1_SHIFT], [6], [CPU_L1_SHIFT])
+ [if test x"$enable_user32" = xyes ; then
+ user32_cpu=i686
+ fi]
+
[# Does the architecture provide machine-specific interfaces?
mach_machine_routines=1
diff --git a/x86_64/copy_user.c b/x86_64/copy_user.c
new file mode 100644
index 00000000..c6e125d9
--- /dev/null
+++ b/x86_64/copy_user.c
@@ -0,0 +1,613 @@
+/*
+ * Copyright (C) 2023 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include <kern/debug.h>
+#include <mach/boolean.h>
+
+#include <copy_user.h>
+
+
+/* Mach field descriptors measure size in bits */
+#define descsize_to_bytes(n) (n / 8)
+#define bytes_to_descsize(n) (n * 8)
+
+#ifdef USER32
+/* Versions of mach_msg_type_t and mach_msg_type_long that are expected from the 32 bit userland. */
+typedef struct {
+ unsigned int msgt_name : 8,
+ msgt_size : 8,
+ msgt_number : 12,
+ msgt_inline : 1,
+ msgt_longform : 1,
+ msgt_deallocate : 1,
+ msgt_unused : 1;
+} mach_msg_user_type_t;
+_Static_assert(sizeof(mach_msg_user_type_t) == 4);
+
+typedef struct {
+ mach_msg_user_type_t msgtl_header;
+ unsigned short msgtl_name;
+ unsigned short msgtl_size;
+ natural_t msgtl_number;
+} mach_msg_user_type_long_t;
+_Static_assert(sizeof(mach_msg_user_type_long_t) == 12);
+#else
+typedef mach_msg_type_t mach_msg_user_type_t;
+typedef mach_msg_type_long_t mach_msg_user_type_long_t;
+#endif /* USER32 */
+
+/*
+* Helper to unpack the relevant fields of a msg type; the fields are different
+* depending on whether is long form or not.
+.*/
+static inline void unpack_msg_type(vm_offset_t addr,
+ mach_msg_type_name_t *name,
+ mach_msg_type_size_t *size,
+ mach_msg_type_number_t *number,
+ boolean_t *is_inline,
+ vm_size_t *user_amount,
+ vm_size_t *kernel_amount)
+{
+ mach_msg_type_t* kmt = (mach_msg_type_t*)addr;
+ *is_inline = kmt->msgt_inline;
+ if (kmt->msgt_longform)
+ {
+ mach_msg_type_long_t* kmtl = (mach_msg_type_long_t*)addr;
+ *name = kmtl->msgtl_name;
+ *size = kmtl->msgtl_size;
+ *number = kmtl->msgtl_number;
+ *kernel_amount = sizeof(mach_msg_type_long_t);
+ *user_amount = sizeof(mach_msg_user_type_long_t);
+ }
+ else
+ {
+ *name = kmt->msgt_name;
+ *size = kmt->msgt_size;
+ *number = kmt->msgt_number;
+ *kernel_amount = sizeof(mach_msg_type_t);
+ *user_amount = sizeof(mach_msg_user_type_t);
+ }
+}
+
+#ifdef USER32
+static inline void mach_msg_user_type_to_kernel(const mach_msg_user_type_t *u,
+ mach_msg_type_t* k) {
+ k->msgt_name = u->msgt_name;
+ k->msgt_size = u->msgt_size;
+ k->msgt_number = u->msgt_number;
+ k->msgt_inline = u->msgt_inline;
+ k->msgt_longform = u->msgt_longform;
+ k->msgt_deallocate = u->msgt_deallocate;
+ k->msgt_unused = 0;
+}
+
+static inline void mach_msg_user_type_to_kernel_long(const mach_msg_user_type_long_t *u,
+ mach_msg_type_long_t* k) {
+ const mach_msg_type_long_t kernel = {
+ .msgtl_header = {
+ .msgt_name = u->msgtl_name,
+ .msgt_size = u->msgtl_size,
+ .msgt_number = u->msgtl_number,
+ .msgt_inline = u->msgtl_header.msgt_inline,
+ .msgt_longform = u->msgtl_header.msgt_longform,
+ .msgt_deallocate = u->msgtl_header.msgt_deallocate,
+ .msgt_unused = 0
+ }
+ };
+ *k = kernel;
+}
+
+static inline void mach_msg_kernel_type_to_user(const mach_msg_type_t *k,
+ mach_msg_user_type_t *u) {
+ u->msgt_name = k->msgt_name;
+ u->msgt_size = k->msgt_size;
+ u->msgt_number = k->msgt_number;
+ u->msgt_inline = k->msgt_inline;
+ u->msgt_longform = k->msgt_longform;
+ u->msgt_deallocate = k->msgt_deallocate;
+ u->msgt_unused = 0;
+}
+
+static inline void mach_msg_kernel_type_to_user_long(const mach_msg_type_long_t *k,
+ mach_msg_user_type_long_t *u) {
+ const mach_msg_user_type_long_t user = {
+ .msgtl_header = {
+ .msgt_name = 0,
+ .msgt_size = 0,
+ .msgt_number = 0,
+ .msgt_inline = k->msgtl_header.msgt_inline,
+ .msgt_longform = k->msgtl_header.msgt_longform,
+ .msgt_deallocate = k->msgtl_header.msgt_deallocate,
+ .msgt_unused = 0
+ },
+ .msgtl_name = k->msgtl_header.msgt_name,
+ .msgtl_size = k->msgtl_header.msgt_size,
+ .msgtl_number = k->msgtl_header.msgt_number
+ };
+ *u = user;
+}
+#endif
+
+static inline int copyin_mach_msg_type(const rpc_vm_offset_t *uaddr, mach_msg_type_t *kaddr) {
+#ifdef USER32
+ mach_msg_user_type_t user;
+ int ret = copyin(uaddr, &user, sizeof(mach_msg_user_type_t));
+ if (ret) {
+ return ret;
+ }
+ mach_msg_user_type_to_kernel(&user, kaddr);
+ return 0;
+#else
+ return copyin(uaddr, kaddr, sizeof(mach_msg_type_t));
+#endif
+}
+
+static inline int copyout_mach_msg_type(const mach_msg_type_t *kaddr, rpc_vm_offset_t *uaddr) {
+#ifdef USER32
+ mach_msg_user_type_t user;
+ mach_msg_kernel_type_to_user(kaddr, &user);
+ return copyout(&user, uaddr, sizeof(mach_msg_user_type_t));
+#else
+ return copyout(kaddr, uaddr, sizeof(mach_msg_type_t));
+#endif
+}
+
+static inline int copyin_mach_msg_type_long(const rpc_vm_offset_t *uaddr, mach_msg_type_long_t *kaddr) {
+#ifdef USER32
+ mach_msg_user_type_long_t user;
+ int ret = copyin(uaddr, &user, sizeof(mach_msg_user_type_long_t));
+ if (ret)
+ return ret;
+ mach_msg_user_type_to_kernel_long(&user, kaddr);
+ return 0;
+#else
+ return copyin(uaddr, kaddr, sizeof(mach_msg_type_long_t));
+#endif
+}
+
+static inline int copyout_mach_msg_type_long(const mach_msg_type_long_t *kaddr, rpc_vm_offset_t *uaddr) {
+#ifdef USER32
+ mach_msg_user_type_long_t user;
+ mach_msg_kernel_type_to_user_long(kaddr, &user);
+ return copyout(&user, uaddr, sizeof(mach_msg_user_type_long_t));
+#else
+ return copyout(kaddr, uaddr, sizeof(mach_msg_type_long_t));
+#endif
+}
+
+/* Optimized version of unpack_msg_type(), including proper copyin() */
+static inline int copyin_unpack_msg_type(vm_offset_t uaddr,
+ vm_offset_t kaddr,
+ mach_msg_type_name_t *name,
+ mach_msg_type_size_t *size,
+ mach_msg_type_number_t *number,
+ boolean_t *is_inline,
+ vm_size_t *user_amount,
+ vm_size_t *kernel_amount)
+{
+ mach_msg_type_t *kmt = (mach_msg_type_t*)kaddr;
+ if (copyin_mach_msg_type((void *)uaddr, kmt))
+ return 1;
+ *is_inline = kmt->msgt_inline;
+ if (kmt->msgt_longform)
+ {
+ mach_msg_type_long_t* kmtl = (mach_msg_type_long_t*)kaddr;
+ if (copyin_mach_msg_type_long((void *)uaddr, kmtl))
+ return 1;
+ *name = kmtl->msgtl_name;
+ *size = kmtl->msgtl_size;
+ *number = kmtl->msgtl_number;
+ *user_amount = sizeof(mach_msg_user_type_long_t);
+ *kernel_amount = sizeof(mach_msg_type_long_t);
+ }
+ else
+ {
+ *name = kmt->msgt_name;
+ *size = kmt->msgt_size;
+ *number = kmt->msgt_number;
+ *user_amount = sizeof(mach_msg_user_type_t);
+ *kernel_amount = sizeof(mach_msg_type_t);
+ }
+ return 0;
+}
+
+/*
+ * The msg type has a different size field depending on whether is long or not,
+ * and we also need to convert from bytes to bits
+ */
+static inline void adjust_msg_type_size(vm_offset_t addr, int amount)
+{
+ mach_msg_type_t* kmt = (mach_msg_type_t*)addr;
+ if (kmt->msgt_longform)
+ {
+ mach_msg_type_long_t* kmtl = (mach_msg_type_long_t*)addr;
+ kmtl->msgtl_size += bytes_to_descsize(amount);
+ }
+ else
+ {
+ kmt->msgt_size += bytes_to_descsize(amount);
+ }
+}
+
+/* Optimized version of unpack_msg_type(), including proper copyout() */
+static inline int copyout_unpack_msg_type(vm_offset_t kaddr,
+ vm_offset_t uaddr,
+ mach_msg_type_name_t *name,
+ mach_msg_type_size_t *size,
+ mach_msg_type_number_t *number,
+ boolean_t *is_inline,
+ vm_size_t *user_amount,
+ vm_size_t *kernel_amount)
+{
+ mach_msg_type_t *kmt = (mach_msg_type_t*)kaddr;
+ *is_inline = kmt->msgt_inline;
+ if (kmt->msgt_longform)
+ {
+ mach_msg_type_long_t* kmtl = (mach_msg_type_long_t*)kaddr;
+ mach_msg_type_size_t orig_size = kmtl->msgtl_size;
+ int ret;
+
+ if (MACH_MSG_TYPE_PORT_ANY(kmtl->msgtl_name)) {
+#ifdef USER32
+ kmtl->msgtl_size = bytes_to_descsize(sizeof(mach_port_name_t));
+#else
+ /* 64 bit ABI uses mach_port_name_inlined_t for inlined ports. */
+ if (!kmt->msgt_inline)
+ kmtl->msgtl_size = bytes_to_descsize(sizeof(mach_port_name_t));
+#endif
+ }
+ ret = copyout_mach_msg_type_long(kmtl, (void*)uaddr);
+ kmtl->msgtl_size = orig_size;
+ if (ret)
+ return 1;
+
+ *name = kmtl->msgtl_name;
+ *size = kmtl->msgtl_size;
+ *number = kmtl->msgtl_number;
+ *user_amount = sizeof(mach_msg_user_type_long_t);
+ *kernel_amount = sizeof(mach_msg_type_long_t);
+ }
+ else
+ {
+ mach_msg_type_size_t orig_size = kmt->msgt_size;
+ int ret;
+
+ if (MACH_MSG_TYPE_PORT_ANY(kmt->msgt_name)) {
+#ifdef USER32
+ kmt->msgt_size = bytes_to_descsize(sizeof(mach_port_name_t));
+#else
+ /* 64 bit ABI uses mach_port_name_inlined_t for inlined ports. */
+ if (!kmt->msgt_inline)
+ kmt->msgt_size = bytes_to_descsize(sizeof(mach_port_name_t));
+#endif
+ }
+ ret = copyout_mach_msg_type(kmt, (void *)uaddr);
+ kmt->msgt_size = orig_size;
+ if (ret)
+ return 1;
+
+ *name = kmt->msgt_name;
+ *size = kmt->msgt_size;
+ *number = kmt->msgt_number;
+ *user_amount = sizeof(mach_msg_user_type_t);
+ *kernel_amount = sizeof(mach_msg_type_t);
+ }
+ return 0;
+}
+
+#ifdef USER32
+/*
+ * Compute the user-space size of a message still in the kernel when processing
+ * messages from 32bit userland.
+ * The message may be originating from userspace (in which case we could
+ * optimize this by keeping the usize around) or from kernel space (we could
+ * optimize if the message structure is fixed and known in advance).
+ * For now just handle the most general case, iterating over the msg body.
+ */
+size_t msg_usize(const mach_msg_header_t *kmsg)
+{
+ size_t ksize = kmsg->msgh_size;
+ size_t usize = sizeof(mach_msg_user_header_t);
+ if (ksize > sizeof(mach_msg_header_t))
+ {
+ // iterate over body compute the user-space message size
+ vm_offset_t saddr, eaddr;
+ saddr = (vm_offset_t)(kmsg + 1);
+ eaddr = saddr + ksize - sizeof(mach_msg_header_t);
+ while (saddr < (eaddr - sizeof(mach_msg_type_t)))
+ {
+ vm_size_t user_amount, kernel_amount;
+ mach_msg_type_name_t name;
+ mach_msg_type_size_t size;
+ mach_msg_type_number_t number;
+ boolean_t is_inline;
+ unpack_msg_type(saddr, &name, &size, &number, &is_inline, &user_amount, &kernel_amount);
+ saddr += kernel_amount;
+ saddr = mach_msg_kernel_align(saddr);
+ usize += user_amount;
+ usize = mach_msg_user_align(usize);
+
+ if (is_inline)
+ {
+ if (MACH_MSG_TYPE_PORT_ANY(name))
+ {
+ const vm_size_t length = sizeof(mach_port_t) * number;
+ saddr += length;
+ usize += sizeof(mach_port_name_t) * number;
+ }
+ else
+ {
+ size_t n = descsize_to_bytes(size);
+ saddr += n*number;
+ usize += n*number;
+ }
+ }
+ else
+ {
+ // advance one pointer
+ saddr += sizeof(vm_offset_t);
+ usize += sizeof(rpc_vm_offset_t);
+ }
+ saddr = mach_msg_kernel_align(saddr);
+ usize = mach_msg_user_align(usize);
+ }
+ }
+ return usize;
+}
+#endif /* USER32 */
+
+/*
+ * Expand the msg header and, if required, the msg body (ports, pointers)
+ *
+ * To not make the code too complicated, we use the fact that some fields of
+ * mach_msg_header have the same size in the kernel and user variant (basically
+ * all fields except ports and addresses)
+*/
+int copyinmsg (const void *userbuf, void *kernelbuf, const size_t usize, const size_t ksize)
+{
+ const mach_msg_user_header_t *umsg = userbuf;
+ mach_msg_header_t *kmsg = kernelbuf;
+
+#ifdef USER32
+ if (copyin(&umsg->msgh_bits, &kmsg->msgh_bits, sizeof(kmsg->msgh_bits)))
+ return 1;
+ /* kmsg->msgh_size is filled in later */
+ if (copyin_port(&umsg->msgh_remote_port, &kmsg->msgh_remote_port))
+ return 1;
+ if (copyin_port(&umsg->msgh_local_port, &kmsg->msgh_local_port))
+ return 1;
+ if (copyin(&umsg->msgh_seqno, &kmsg->msgh_seqno,
+ sizeof(kmsg->msgh_seqno) + sizeof(kmsg->msgh_id)))
+ return 1;
+#else
+ /* The 64 bit interface ensures the header is the same size, so it does not need any resizing. */
+ _Static_assert(sizeof(mach_msg_header_t) == sizeof(mach_msg_user_header_t),
+ "mach_msg_header_t and mach_msg_user_header_t expected to be of the same size");
+ if (copyin(umsg, kmsg, sizeof(mach_msg_header_t)))
+ return 1;
+ kmsg->msgh_remote_port &= 0xFFFFFFFF; // FIXME: still have port names here
+ kmsg->msgh_local_port &= 0xFFFFFFFF; // also, this assumes little-endian
+#endif
+
+ vm_offset_t usaddr, ueaddr, ksaddr;
+ ksaddr = (vm_offset_t)(kmsg + 1);
+ usaddr = (vm_offset_t)(umsg + 1);
+ ueaddr = (vm_offset_t)umsg + usize;
+
+ _Static_assert(!mach_msg_user_is_misaligned(sizeof(mach_msg_user_header_t)),
+ "mach_msg_user_header_t needs to be MACH_MSG_USER_ALIGNMENT aligned.");
+
+ if (usize > sizeof(mach_msg_user_header_t))
+ {
+ /* check we have at least space for an empty descryptor */
+ while (usaddr <= (ueaddr - sizeof(mach_msg_user_type_t)))
+ {
+ vm_size_t user_amount, kernel_amount;
+ mach_msg_type_name_t name;
+ mach_msg_type_size_t size;
+ mach_msg_type_number_t number;
+ boolean_t is_inline;
+ if (copyin_unpack_msg_type(usaddr, ksaddr, &name, &size, &number,
+ &is_inline, &user_amount, &kernel_amount))
+ return 1;
+
+ // keep a reference to the current field descriptor, we
+ // might need to adjust it later depending on the type
+ vm_offset_t ktaddr = ksaddr;
+ usaddr += user_amount;
+ usaddr = mach_msg_user_align(usaddr);
+ ksaddr += kernel_amount;
+ ksaddr = mach_msg_kernel_align(ksaddr);
+
+ if (is_inline)
+ {
+ if (MACH_MSG_TYPE_PORT_ANY(name))
+ {
+#ifdef USER32
+ if (size != bytes_to_descsize(sizeof(mach_port_name_t)))
+ return 1;
+ if ((usaddr + sizeof(mach_port_name_t)*number) > ueaddr)
+ return 1;
+ adjust_msg_type_size(ktaddr, sizeof(mach_port_t) - sizeof(mach_port_name_t));
+ for (int i=0; i<number; i++)
+ {
+ if (copyin_port((mach_port_name_t*)usaddr, (mach_port_t*)ksaddr))
+ return 1;
+ ksaddr += sizeof(mach_port_t);
+ usaddr += sizeof(mach_port_name_t);
+ }
+#else
+ if (size != bytes_to_descsize(sizeof(mach_port_name_inlined_t)))
+ return 1;
+ const vm_size_t length = number * sizeof(mach_port_name_inlined_t);
+ if ((usaddr + length) > ueaddr)
+ return 1;
+ if (copyin((void*)usaddr, (void*)ksaddr, length))
+ return 1;
+ usaddr += length;
+ ksaddr += length;
+#endif
+ }
+ else
+ {
+ // type that doesn't need change
+ size_t n = descsize_to_bytes(size);
+ if ((usaddr + n*number) > ueaddr)
+ return 1;
+ if (copyin((void*)usaddr, (void*)ksaddr, n*number))
+ return 1;
+ usaddr += n*number;
+ ksaddr += n*number;
+ }
+ }
+ else
+ {
+ if ((usaddr + sizeof(rpc_vm_offset_t)) > ueaddr)
+ return 1;
+
+ /* out-of-line port arrays are always arrays of mach_port_name_t (4 bytes)
+ * and are expanded in ipc_kmsg_copyin_body() */
+ if (MACH_MSG_TYPE_PORT_ANY(name)) {
+ if (size != bytes_to_descsize(sizeof(mach_port_name_t)))
+ return 1;
+ adjust_msg_type_size(ktaddr, sizeof(mach_port_t) - sizeof(mach_port_name_t));
+ }
+
+ if (copyin_address((rpc_vm_offset_t*)usaddr, (vm_offset_t*)ksaddr))
+ return 1;
+ // Advance one pointer.
+ ksaddr += sizeof(vm_offset_t);
+ usaddr += sizeof(rpc_vm_offset_t);
+ }
+ // Note that we have to align because mach_port_name_t might not align
+ // with the required user alignment.
+ usaddr = mach_msg_user_align(usaddr);
+ ksaddr = mach_msg_kernel_align(ksaddr);
+ }
+ }
+
+ kmsg->msgh_size = sizeof(mach_msg_header_t) + ksaddr - (vm_offset_t)(kmsg + 1);
+ assert(kmsg->msgh_size <= ksize);
+#ifndef USER32
+ if (kmsg->msgh_size != usize)
+ return 1;
+#endif
+ return 0;
+}
+
+int copyoutmsg (const void *kernelbuf, void *userbuf, const size_t ksize)
+{
+ const mach_msg_header_t *kmsg = kernelbuf;
+ mach_msg_user_header_t *umsg = userbuf;
+#ifdef USER32
+ if (copyout(&kmsg->msgh_bits, &umsg->msgh_bits, sizeof(kmsg->msgh_bits)))
+ return 1;
+ /* umsg->msgh_size is filled in later */
+ if (copyout_port(&kmsg->msgh_remote_port, &umsg->msgh_remote_port))
+ return 1;
+ if (copyout_port(&kmsg->msgh_local_port, &umsg->msgh_local_port))
+ return 1;
+ if (copyout(&kmsg->msgh_seqno, &umsg->msgh_seqno,
+ sizeof(kmsg->msgh_seqno) + sizeof(kmsg->msgh_id)))
+ return 1;
+#else
+ if (copyout(kmsg, umsg, sizeof(mach_msg_header_t)))
+ return 1;
+#endif /* USER32 */
+
+ vm_offset_t ksaddr, keaddr, usaddr;
+ ksaddr = (vm_offset_t)(kmsg + 1);
+ usaddr = (vm_offset_t)(umsg + 1);
+ keaddr = ksaddr + ksize - sizeof(mach_msg_header_t);
+
+ if (ksize > sizeof(mach_msg_header_t))
+ {
+ while (ksaddr < keaddr)
+ {
+ vm_size_t user_amount, kernel_amount;
+ mach_msg_type_name_t name;
+ mach_msg_type_size_t size;
+ mach_msg_type_number_t number;
+ boolean_t is_inline;
+ if (copyout_unpack_msg_type(ksaddr, usaddr, &name, &size, &number,
+ &is_inline, &user_amount, &kernel_amount))
+ return 1;
+ usaddr += user_amount;
+ usaddr = mach_msg_user_align(usaddr);
+ ksaddr += kernel_amount;
+ ksaddr = mach_msg_kernel_align(ksaddr);
+
+ if (is_inline)
+ {
+ if (MACH_MSG_TYPE_PORT_ANY(name))
+ {
+#ifdef USER32
+ for (int i=0; i<number; i++)
+ {
+ if (copyout_port((mach_port_t*)ksaddr, (mach_port_name_t*)usaddr))
+ return 1;
+ ksaddr += sizeof(mach_port_t);
+ usaddr += sizeof(mach_port_name_t);
+ }
+#else
+ if (size != bytes_to_descsize(sizeof(mach_port_name_inlined_t)))
+ return 1;
+ const vm_size_t length = number * sizeof(mach_port_name_inlined_t);
+ if (copyout((void*)ksaddr, (void*)usaddr, length))
+ return 1;
+ ksaddr += length;
+ usaddr += length;
+#endif
+ }
+ else
+ {
+ size_t n = descsize_to_bytes(size);
+ if (copyout((void*)ksaddr, (void*)usaddr, n*number))
+ return 1;
+ usaddr += n*number;
+ ksaddr += n*number;
+ }
+ }
+ else
+ {
+ if (copyout_address((vm_offset_t*)ksaddr, (rpc_vm_offset_t*)usaddr))
+ return 1;
+ // advance one pointer
+ ksaddr += sizeof(vm_offset_t);
+ usaddr += sizeof(rpc_vm_offset_t);
+ }
+ usaddr = mach_msg_user_align(usaddr);
+ ksaddr = mach_msg_kernel_align(ksaddr);
+ }
+ }
+
+ mach_msg_size_t usize;
+ usize = sizeof(mach_msg_user_header_t) + usaddr - (vm_offset_t)(umsg + 1);
+ if (copyout(&usize, &umsg->msgh_size, sizeof(umsg->msgh_size)))
+ return 1;
+#ifndef USER32
+ if (usize != ksize)
+ return 1;
+#endif
+
+ return 0;
+
+}
diff --git a/x86_64/cswitch.S b/x86_64/cswitch.S
index 1a7471c3..9c4640fd 100644
--- a/x86_64/cswitch.S
+++ b/x86_64/cswitch.S
@@ -40,8 +40,8 @@ ENTRY(Load_context)
lea KERNEL_STACK_SIZE-IKS_SIZE-IEL_SIZE(%rcx),%rdx
/* point to stack top */
CPU_NUMBER(%eax)
- movq %rcx,CX(EXT(active_stacks),%eax) /* store stack address */
- movq %rdx,CX(EXT(kernel_stack),%eax) /* store stack top */
+ movq %rcx,MY(ACTIVE_STACK) /* store stack address */
+ movq %rdx,CX(EXT(kernel_stack),%rax) /* store stack top */
/* XXX complete */
@@ -61,8 +61,7 @@ ENTRY(Load_context)
*/
ENTRY(Switch_context)
- CPU_NUMBER(%eax)
- movq CX(EXT(active_stacks),%eax),%rcx /* get old kernel stack */
+ movq MY(ACTIVE_STACK),%rcx /* get old kernel stack */
movq %r12,KSS_R12(%rcx) /* save registers */
movq %r13,KSS_R13(%rcx)
@@ -85,9 +84,9 @@ ENTRY(Switch_context)
/* point to stack top */
CPU_NUMBER(%eax)
- movq %rsi,CX(EXT(active_threads),%eax) /* new thread is active */
- movq %rcx,CX(EXT(active_stacks),%eax) /* set current stack */
- movq %rbx,CX(EXT(kernel_stack),%eax) /* set stack top */
+ movq %rsi,MY(ACTIVE_THREAD) /* new thread is active */
+ movq %rcx,MY(ACTIVE_STACK) /* set current stack */
+ movq %rbx,CX(EXT(kernel_stack),%rax) /* set stack top */
movq KSS_ESP(%rcx),%rsp /* switch stacks */
movq KSS_EBP(%rcx),%rbp /* restore registers */
@@ -119,8 +118,7 @@ ENTRY(Thread_continue)
*/
ENTRY(switch_to_shutdown_context)
ud2
- CPU_NUMBER(%eax)
- movq EXT(active_stacks)(,%eax,8),%rcx /* get old kernel stack */
+ movq MY(ACTIVE_STACK),%rcx /* get old kernel stack */
movq %r12,KSS_R12(%rcx) /* save registers */
movq %r13,KSS_R13(%rcx)
movq %r14,KSS_R14(%rcx)
@@ -136,8 +134,8 @@ ud2
movq S_ARG1,%rbx /* get routine to run next */
movq S_ARG2,%rsi /* get its argument */
- CPU_NUMBER(%eax)
- movq EXT(interrupt_stack)(,%eax,8),%rcx /* point to its interrupt stack */
+ CPU_NUMBER(%ecx)
+ movq CX(EXT(int_stack_base),%rcx),%rcx /* point to its interrupt stack */
lea INTSTACK_SIZE(%rcx),%rsp /* switch to it (top) */
movq %rax,%rdi /* push thread */
diff --git a/x86_64/idt_inittab.S b/x86_64/idt_inittab.S
index f021b56d..3a205ae4 100644
--- a/x86_64/idt_inittab.S
+++ b/x86_64/idt_inittab.S
@@ -41,21 +41,23 @@ ENTRY(idt_inittab)
* Interrupt descriptor table and code vectors for it.
*/
#ifdef MACH_PV_DESCRIPTORS
-#define IDT_ENTRY(n,entry,type) \
+#define IDT_ENTRY(n,entry,type,ist) \
.data 2 ;\
.byte n ;\
.byte (((type)&ACC_PL)>>5)|((((type)&(ACC_TYPE|ACC_A))==ACC_INTR_GATE)<<2) ;\
.word FLAT_KERNEL_CS ;\
- .long 0 /*pad*/ ;\
+ .word ist ;\
+ .word 0 /*pad*/ ;\
.quad entry ;\
.text
#else /* MACH_PV_DESCRIPTORS */
-#define IDT_ENTRY(n,entry,type) \
+#define IDT_ENTRY(n,entry,type,ist) \
.data 2 ;\
.quad entry ;\
.word n ;\
.word type ;\
- .long 0 /*pad*/ ;\
+ .word ist ;\
+ .word 0 /*pad*/ ;\
.text
#endif /* MACH_PV_DESCRIPTORS */
@@ -63,7 +65,7 @@ ENTRY(idt_inittab)
* No error code. Clear error code and push trap number.
*/
#define EXCEPTION(n,name) \
- IDT_ENTRY(n,EXT(name),ACC_PL_K|ACC_TRAP_GATE);\
+ IDT_ENTRY(n,EXT(name),ACC_PL_K|ACC_TRAP_GATE, 0);\
ENTRY(name) ;\
INT_FIX ;\
pushq $(0) ;\
@@ -74,7 +76,7 @@ ENTRY(name) ;\
* User-accessible exception. Otherwise, same as above.
*/
#define EXCEP_USR(n,name) \
- IDT_ENTRY(n,EXT(name),ACC_PL_U|ACC_TRAP_GATE);\
+ IDT_ENTRY(n,EXT(name),ACC_PL_U|ACC_TRAP_GATE, 0);\
ENTRY(name) ;\
INT_FIX ;\
pushq $(0) ;\
@@ -85,7 +87,7 @@ ENTRY(name) ;\
* Error code has been pushed. Just push trap number.
*/
#define EXCEP_ERR(n,name) \
- IDT_ENTRY(n,EXT(name),ACC_PL_K|ACC_INTR_GATE);\
+ IDT_ENTRY(n,EXT(name),ACC_PL_K|ACC_INTR_GATE, 0);\
ENTRY(name) ;\
INT_FIX ;\
pushq $(n) ;\
@@ -95,25 +97,25 @@ ENTRY(name) ;\
* Special interrupt code: dispatches to a unique entrypoint,
* not defined automatically here.
*/
-#define EXCEP_SPC(n,name) \
- IDT_ENTRY(n,EXT(name),ACC_PL_K|ACC_TRAP_GATE)
+#define EXCEP_SPC(n,name, ist) \
+ IDT_ENTRY(n,EXT(name),ACC_PL_K|ACC_TRAP_GATE, ist)
EXCEPTION(0x00,t_zero_div)
-EXCEP_SPC(0x01,t_debug)
+EXCEP_SPC(0x01,t_debug, 0)
/* skip NMI interrupt - let more specific code figure that out. */
EXCEP_USR(0x03,t_int3)
EXCEP_USR(0x04,t_into)
EXCEP_USR(0x05,t_bounds)
EXCEPTION(0x06,t_invop)
EXCEPTION(0x07,t_nofpu)
-EXCEPTION(0x08,a_dbl_fault)
+EXCEP_SPC(0x08,t_dbl_fault, 1)
EXCEPTION(0x09,a_fpu_over)
EXCEPTION(0x0a,a_inv_tss)
-EXCEP_SPC(0x0b,t_segnp)
+EXCEP_SPC(0x0b,t_segnp, 0)
EXCEP_ERR(0x0c,t_stack_fault)
-EXCEP_SPC(0x0d,t_gen_prot)
-EXCEP_SPC(0x0e,t_page_fault)
+EXCEP_SPC(0x0d,t_gen_prot, 0)
+EXCEP_SPC(0x0e,t_page_fault, 0)
#ifdef MACH_PV_DESCRIPTORS
EXCEP_ERR(0x0f,t_trap_0f)
#else
diff --git a/include/mach_debug/pc_info.h b/x86_64/include/syscall_sw.h
index 912da9fd..4e03f28c 100644
--- a/include/mach_debug/pc_info.h
+++ b/x86_64/include/syscall_sw.h
@@ -1,4 +1,8 @@
/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
* Permission to use, copy, modify and distribute this software and its
* documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
@@ -19,25 +23,18 @@
* any improvements or extensions that they make and grant Carnegie Mellon
* the rights to redistribute these changes.
*/
-/*
- * File: mach_debug/pc_info.h
- * Author: Brian Bershad
- * Date: January 1992
- *
- * Definitions for the PC sampling interface.
- */
-
-#ifndef _MACH_DEBUG_PC_INFO_H_
-#define _MACH_DEBUG_PC_INFO_H_
+#ifndef _MACH_X86_64_SYSCALL_SW_H_
+#define _MACH_X86_64_SYSCALL_SW_H_
-typedef struct sampled_pc {
- task_t task;
- thread_t thread;
- vm_offset_t pc;
-} sampled_pc_t;
+#include <mach/machine/asm.h>
-typedef sampled_pc_t *sampled_pc_array_t;
-typedef unsigned int sampled_pc_seqno_t;
+#define kernel_trap(trap_name,trap_number,number_args) \
+ENTRY(trap_name) \
+ movq $ trap_number,%rax; \
+ movq %rcx,%r10; \
+ syscall; \
+ ret; \
+END(trap_name)
-#endif /* _MACH_DEBUG_PC_INFO_H_ */
+#endif /* _MACH_X86_64_SYSCALL_SW_H_ */
diff --git a/x86_64/interrupt.S b/x86_64/interrupt.S
index fccf6e28..6fb77727 100644
--- a/x86_64/interrupt.S
+++ b/x86_64/interrupt.S
@@ -28,30 +28,38 @@
/*
* Generic interrupt handler.
*
- * On entry, %rax contains the irq number.
+ * On entry, %eax contains the irq number.
+ *
+ * Note: kdb_kintr needs to know our stack usage
*/
+
+#define S_REGS 24(%rsp)
+#define S_RET 16(%rsp)
+#define S_IRQ 8(%rsp)
+#define S_IPL 0(%rsp)
+
ENTRY(interrupt)
#ifdef APIC
cmpl $255,%eax /* was this a spurious intr? */
- je _no_eoi /* if so, just return */
+ jne 1f
+ ret /* if so, just return */
+1:
#endif
- pushq %rax /* save irq number */
+ subq $16,%rsp /* Two local variables */
+ movl %eax,S_IRQ /* save irq number */
+
call spl7 /* set ipl */
- pushq %rax /* save previous ipl */
- movl 8(%esp),%edx /* set irq number as 3rd arg */
- movl %edx,%ebx /* copy irq number */
- shll $2,%ebx /* irq * 4 */
- movl EXT(iunit)(%ebx),%edi /* get device unit number as 1st arg */
- movl %eax, %esi /* previous ipl as 2nd arg */
- movq 16(%esp), %rcx /* return address as 4th arg */
- movq 24(%esp), %r8 /* address of interrupted registers as 5th arg */
- shll $1,%ebx /* irq * 8 */
- call *EXT(ivect)(%ebx) /* call interrupt handler */
- popq %rdi /* restore previous ipl */
- call splx_cli /* restore previous ipl */
+ movl %eax,S_IPL /* save previous ipl */
- cli /* XXX no more nested interrupts */
- popq %rcx /* restore irq number */
+ movl S_IRQ,%ecx /* restore irq number */
+
+#if NCPUS > 1
+ cmpl $CALL_PMAP_UPDATE,%ecx /* was this a SMP pmap_update request? */
+ je _call_single
+
+ cmpl $CALL_AST_CHECK,%ecx /* was this a SMP remote -> local ast request? */
+ je _call_local_ast
+#endif
#ifndef APIC
movl $1,%eax
@@ -89,14 +97,44 @@ ENTRY(interrupt)
movl EXT(curr_pic_mask),%eax /* restore original mask */
outb %al,$(PIC_MASTER_OCW) /* unmask master */
2:
- ret
#else
- cmpl $16,%ecx /* was this a low ISA intr? */
- jge _no_eoi /* no, must be PCI (let irq_ack handle EOI) */
-_isa_eoi:
movl %ecx,%edi /* load irq number as 1st arg */
call EXT(ioapic_irq_eoi) /* ioapic irq specific EOI */
-_no_eoi:
+#endif
+
+ ;
+ movq S_IPL,S_ARG1 /* previous ipl as 2nd arg */
+
+ ;
+ movq S_RET,S_ARG2 /* return address as 3th arg */
+
+ ;
+ movq S_REGS,S_ARG3 /* address of interrupted registers as 4th arg */
+
+ movl S_IRQ,%eax /* copy irq number */
+ shll $2,%eax /* irq * 4 */
+ movl EXT(iunit)(%rax),%edi /* get device unit number as 1st arg */
+
+ shll $1,%eax /* irq * 8 */
+ call *EXT(ivect)(%rax) /* call interrupt handler */
+
+_completed:
+ movl S_IPL,%edi /* restore previous ipl */
+ call splx_cli /* restore previous ipl */
+
+ addq $16,%rsp /* pop local variables */
ret
+
+#if NCPUS > 1
+_call_single:
+ call EXT(lapic_eoi) /* lapic EOI before the handler to allow extra update */
+ call EXT(pmap_update_interrupt)
+ jmp _completed
+
+_call_local_ast:
+ call EXT(lapic_eoi) /* lapic EOI */
+ call EXT(ast_check) /* AST check on this cpu */
+ jmp _completed
+
#endif
END(interrupt)
diff --git a/x86_64/ldscript b/x86_64/ldscript
index 375e8104..67703b4d 100644
--- a/x86_64/ldscript
+++ b/x86_64/ldscript
@@ -2,7 +2,7 @@
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
"elf64-x86-64")
OUTPUT_ARCH(i386:x86-64)
-ENTRY(_start)
+ENTRY(boot_start)
SECTIONS
{
/*
@@ -11,22 +11,30 @@ SECTIONS
* be first in there. See also `i386/i386at/boothdr.S' and
* `gnumach_LINKFLAGS' in `i386/Makefrag.am'.
*/
+
. = _START;
- .text :
- AT (_START_MAP)
+ .boot : AT(_START_MAP)
+ {
+ *(.boot.text)
+ *(.boot.data)
+ } =0x90909090
+
+ . += KERNEL_MAP_SHIFT;
+ _start = .;
+ .text : AT(((ADDR(.text)) - KERNEL_MAP_SHIFT))
{
- *(.text.start)
+ *(.text*)
*(.text .stub .text.* .gnu.linkonce.t.*)
*(.text.unlikely .text.*_unlikely)
KEEP (*(.text.*personality*))
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
} =0x90909090
- .init :
+ .init : AT(((ADDR(.init)) - KERNEL_MAP_SHIFT))
{
KEEP (*(.init))
} =0x90909090
- .fini :
+ .fini : AT(((ADDR(.fini)) - KERNEL_MAP_SHIFT))
{
KEEP (*(.fini))
} =0x90909090
@@ -69,7 +77,7 @@ SECTIONS
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.plt : { *(.plt) *(.iplt) }
- .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata : AT(((ADDR(.rodata)) - KERNEL_MAP_SHIFT)) { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
.eh_frame_hdr : { *(.eh_frame_hdr) }
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
@@ -139,7 +147,7 @@ SECTIONS
.got : { *(.got) *(.igot) }
. = DATA_SEGMENT_RELRO_END (24, .);
.got.plt : { *(.got.plt) *(.igot.plt) }
- .data :
+ .data : AT(((ADDR(.data)) - KERNEL_MAP_SHIFT))
{
*(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
@@ -147,7 +155,7 @@ SECTIONS
.data1 : { *(.data1) }
_edata = .; PROVIDE (edata = .);
__bss_start = .;
- .bss :
+ .bss : AT(((ADDR(.bss)) - KERNEL_MAP_SHIFT))
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
diff --git a/x86_64/locore.S b/x86_64/locore.S
index 612fc493..806762bb 100644
--- a/x86_64/locore.S
+++ b/x86_64/locore.S
@@ -33,13 +33,147 @@
#include <i386/i386/proc_reg.h>
#include <i386/i386/trap.h>
#include <i386/i386/seg.h>
+#include <i386/i386/gdt.h>
#include <i386/i386/ldt.h>
+#include <i386/i386/msr.h>
#include <i386/i386/i386asm.h>
#include <i386/i386/cpu_number.h>
#include <i386/i386/xen.h>
-#define pusha pushq %rax ; pushq %rcx ; pushq %rdx ; pushq %rbx ; subq $8,%rsp ; pushq %rbp ; pushq %rsi ; pushq %rdi ; pushq %r8 ; pushq %r9 ; pushq %r10 ; pushq %r11 ; pushq %r12 ; pushq %r13 ; pushq %r14 ; pushq %r15
-#define popa popq %r15 ; popq %r14 ; popq %r13 ; popq %r12 ; popq %r11 ; popq %r10 ; popq %r9 ; popq %r8 ; popq %rdi ; popq %rsi ; popq %rbp ; addq $8,%rsp ; popq %rbx ; popq %rdx ; popq %rcx ; popq %rax
+
+/*
+ * Helpers for thread state as saved in the pcb area, during trap or irq handling
+ */
+#define pusha \
+ pushq %rax ;\
+ pushq %rcx ;\
+ pushq %rdx ;\
+ pushq %rbx ;\
+ subq $8,%rsp ;\
+ pushq %rbp ;\
+ pushq %rsi ;\
+ pushq %rdi ;\
+ pushq %r8 ;\
+ pushq %r9 ;\
+ pushq %r10 ;\
+ pushq %r11 ;\
+ pushq %r12 ;\
+ pushq %r13 ;\
+ pushq %r14 ;\
+ pushq %r15
+
+#define popa \
+ popq %r15 ;\
+ popq %r14 ;\
+ popq %r13 ;\
+ popq %r12 ;\
+ popq %r11 ;\
+ popq %r10 ;\
+ popq %r9 ;\
+ popq %r8 ;\
+ popq %rdi ;\
+ popq %rsi ;\
+ popq %rbp ;\
+ addq $8,%rsp ;\
+ popq %rbx ;\
+ popq %rdx ;\
+ popq %rcx ;\
+ popq %rax
+
+#define PUSH_REGS_ISR \
+ pushq %rcx ;\
+ pushq %rdx ;\
+ pushq %rsi ;\
+ pushq %rdi ;\
+ pushq %r8 ;\
+ pushq %r9 ;\
+ pushq %r10 ;\
+ pushq %r11
+
+#define PUSH_AREGS_ISR \
+ pushq %rax ;\
+ PUSH_REGS_ISR
+
+
+#define POP_REGS_ISR \
+ popq %r11 ;\
+ popq %r10 ;\
+ popq %r9 ;\
+ popq %r8 ;\
+ popq %rdi ;\
+ popq %rsi ;\
+ popq %rdx ;\
+ popq %rcx
+
+#define POP_AREGS_ISR \
+ POP_REGS_ISR ;\
+ popq %rax
+
+/*
+ * Note that we have to load the kernel segment registers even if this
+ * is a trap from the kernel, because the kernel uses user segment
+ * registers for copyin/copyout.
+ * (XXX Would it be smarter just to use fs or gs for that?)
+ */
+#ifdef USER32
+#define PUSH_SEGMENTS(reg) \
+ movq %ds,reg ;\
+ pushq reg ;\
+ movq %es,reg ;\
+ pushq reg ;\
+ pushq %fs ;\
+ pushq %gs
+#else
+#define PUSH_SEGMENTS(reg)
+#endif
+
+#ifdef USER32
+#define POP_SEGMENTS(reg) \
+ popq %gs ;\
+ popq %fs ;\
+ popq reg ;\
+ movq reg,%es ;\
+ popq reg ;\
+ movq reg,%ds
+#else
+#define POP_SEGMENTS(reg)
+#endif
+
+#ifdef USER32
+#define PUSH_SEGMENTS_ISR(reg) \
+ movq %ds,reg ;\
+ pushq reg ;\
+ movq %es,reg ;\
+ pushq reg ;\
+ pushq %fs ;\
+ pushq %gs
+#else
+#define PUSH_SEGMENTS_ISR(reg)
+#endif
+
+#ifdef USER32
+#define POP_SEGMENTS_ISR(reg) \
+ popq %gs ;\
+ popq %fs ;\
+ popq reg ;\
+ movq reg,%es ;\
+ popq reg ;\
+ movq reg,%ds
+#else
+#define POP_SEGMENTS_ISR(reg)
+#endif
+
+#ifdef USER32
+#define SET_KERNEL_SEGMENTS(reg) \
+ mov %ss,reg /* switch to kernel segments */ ;\
+ mov reg,%ds /* (same as kernel stack segment) */ ;\
+ mov reg,%es ;\
+ mov reg,%fs ;\
+ mov $(PERCPU_DS),reg ;\
+ mov reg,%gs
+#else
+#define SET_KERNEL_SEGMENTS(reg)
+#endif
/*
* Fault recovery.
@@ -122,19 +256,20 @@ LEXT(retry_table_end) ;\
* Uses %eax, %ebx, %ecx.
*/
#define TIME_TRAP_UENTRY \
+ pushf /* Save flags */ ;\
cli /* block interrupts */ ;\
movl VA_ETC,%ebx /* get timer value */ ;\
- movl CX(EXT(current_tstamp),%edx),%ecx /* get old time stamp */;\
- movl %ebx,CX(EXT(current_tstamp),%edx) /* set new time stamp */;\
+ movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\
+ movl %ebx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\
subl %ecx,%ebx /* elapsed = new-old */ ;\
- movl CX(EXT(current_timer),%edx),%ecx /* get current timer */ ;\
+ movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\
addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\
jns 0f /* if overflow, */ ;\
call timer_normalize /* normalize timer */ ;\
0: addl $(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx ;\
/* switch to sys timer */;\
- movl %ecx,CX(EXT(current_timer),%edx) /* make it current */ ;\
- sti /* allow interrupts */
+ movl %ecx,CX(EXT(current_timer),%rdx) /* make it current */ ;\
+ popf /* allow interrupts */
/*
* Update time on system call entry.
@@ -144,12 +279,13 @@ LEXT(retry_table_end) ;\
* Same as TIME_TRAP_UENTRY, but preserves %eax.
*/
#define TIME_TRAP_SENTRY \
+ pushf /* Save flags */ ;\
cli /* block interrupts */ ;\
movl VA_ETC,%ebx /* get timer value */ ;\
- movl CX(EXT(current_tstamp),%edx),%ecx /* get old time stamp */;\
- movl %ebx,CX(EXT(current_tstamp),%edx) /* set new time stamp */;\
+ movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\
+ movl %ebx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\
subl %ecx,%ebx /* elapsed = new-old */ ;\
- movl CX(EXT(current_timer),%edx),%ecx /* get current timer */ ;\
+ movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\
addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\
jns 0f /* if overflow, */ ;\
pushq %rax /* save %rax */ ;\
@@ -157,8 +293,8 @@ LEXT(retry_table_end) ;\
popq %rax /* restore %rax */ ;\
0: addl $(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx ;\
/* switch to sys timer */;\
- movl %ecx,CX(EXT(current_timer),%edx) /* make it current */ ;\
- sti /* allow interrupts */
+ movl %ecx,CX(EXT(current_timer),%rdx) /* make it current */ ;\
+ popf /* allow interrupts */
/*
* update time on user trap exit.
@@ -169,16 +305,16 @@ LEXT(retry_table_end) ;\
#define TIME_TRAP_UEXIT \
cli /* block interrupts */ ;\
movl VA_ETC,%ebx /* get timer */ ;\
- movl CX(EXT(current_tstamp),%edx),%ecx /* get old time stamp */;\
- movl %ebx,CX(EXT(current_tstamp),%edx) /* set new time stamp */;\
+ movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\
+ movl %ebx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\
subl %ecx,%ebx /* elapsed = new-old */ ;\
- movl CX(EXT(current_timer),%edx),%ecx /* get current timer */ ;\
+ movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\
addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\
jns 0f /* if overflow, */ ;\
call timer_normalize /* normalize timer */ ;\
0: addl $(TH_USER_TIMER-TH_SYSTEM_TIMER),%ecx ;\
/* switch to user timer */;\
- movl %ecx,CX(EXT(current_timer),%edx) /* make it current */
+ movl %ecx,CX(EXT(current_timer),%rdx) /* make it current */
/*
* update time on interrupt entry.
@@ -189,14 +325,14 @@ LEXT(retry_table_end) ;\
*/
#define TIME_INT_ENTRY \
movl VA_ETC,%ecx /* get timer */ ;\
- movl CX(EXT(current_tstamp),%edx),%ebx /* get old time stamp */;\
- movl %ecx,CX(EXT(current_tstamp),%edx) /* set new time stamp */;\
+ movl CX(EXT(current_tstamp),%rdx),%ebx /* get old time stamp */;\
+ movl %ecx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\
subl %ebx,%ecx /* elapsed = new-old */ ;\
- movl CX(EXT(current_timer),%edx),%ebx /* get current timer */ ;\
+ movl CX(EXT(current_timer),%rdx),%ebx /* get current timer */ ;\
addl %ecx,LOW_BITS(%ebx) /* add to low bits */ ;\
- leal CX(0,%edx),%ecx /* timer is 16 bytes */ ;\
- lea CX(EXT(kernel_timer),%edx),%ecx /* get interrupt timer*/;\
- movl %ecx,CX(EXT(current_timer),%edx) /* set timer */
+ leal CX(0,%rdx),%ecx /* timer is 16 bytes */ ;\
+ lea CX(EXT(kernel_timer),%rdx),%ecx /* get interrupt timer*/;\
+ movl %ecx,CX(EXT(current_timer),%rdx) /* set timer */
/*
* update time on interrupt exit.
@@ -206,10 +342,10 @@ LEXT(retry_table_end) ;\
*/
#define TIME_INT_EXIT \
movl VA_ETC,%eax /* get timer */ ;\
- movl CX(EXT(current_tstamp),%edx),%ecx /* get old time stamp */;\
- movl %eax,CX(EXT(current_tstamp),%edx) /* set new time stamp */;\
+ movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\
+ movl %eax,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\
subl %ecx,%eax /* elapsed = new-old */ ;\
- movl CX(EXT(current_timer),%edx),%ecx /* get current timer */ ;\
+ movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\
addl %eax,LOW_BITS(%ecx) /* add to low bits */ ;\
jns 0f /* if overflow, */ ;\
call timer_normalize /* normalize timer */ ;\
@@ -217,7 +353,7 @@ LEXT(retry_table_end) ;\
jz 0f /* if overflow, */ ;\
movl %ebx,%ecx /* get old timer */ ;\
call timer_normalize /* normalize timer */ ;\
-0: movl %ebx,CX(EXT(current_timer),%edx) /* set timer */
+0: movl %ebx,CX(EXT(current_timer),%rdx) /* set timer */
/*
@@ -246,16 +382,16 @@ timer_normalize:
ENTRY(timer_switch)
CPU_NUMBER(%edx) /* get this CPU */
movl VA_ETC,%ecx /* get timer */
- movl CX(EXT(current_tstamp),%edx),%eax /* get old time stamp */
- movl %ecx,CX(EXT(current_tstamp),%edx) /* set new time stamp */
+ movl CX(EXT(current_tstamp),%rdx),%eax /* get old time stamp */
+ movl %ecx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */
subl %ecx,%eax /* elapsed = new - old */
- movl CX(EXT(current_timer),%edx),%ecx /* get current timer */
+ movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */
addl %eax,LOW_BITS(%ecx) /* add to low bits */
jns 0f /* if overflow, */
call timer_normalize /* normalize timer */
0:
movl S_ARG0,%ecx /* get new timer */
- movl %ecx,CX(EXT(current_timer),%edx) /* set timer */
+ movl %ecx,CX(EXT(current_timer),%rdx) /* set timer */
ret
/*
@@ -264,9 +400,9 @@ ENTRY(timer_switch)
ENTRY(start_timer)
CPU_NUMBER(%edx) /* get this CPU */
movl VA_ETC,%ecx /* get timer */
- movl %ecx,CX(EXT(current_tstamp),%edx) /* set initial time stamp */
+ movl %ecx,CX(EXT(current_tstamp),%rdx) /* set initial time stamp */
movl S_ARG0,%ecx /* get timer */
- movl %ecx,CX(EXT(current_timer),%edx) /* set initial timer */
+ movl %ecx,CX(EXT(current_timer),%rdx) /* set initial timer */
ret
#endif /* accurate timing */
@@ -276,34 +412,34 @@ ENTRY(start_timer)
/*
* Trap/interrupt entry points.
*
- * All traps must create the following save area on the kernel stack:
- *
- * gs
- * fs
- * es
- * ds
- * edi
- * esi
- * ebp
- * cr2 if page fault - otherwise unused
- * ebx
- * edx
- * ecx
- * eax
- * trap number
- * error code
- * eip
- * cs
- * eflags
- * user rsp - if from user
- * user ss - if from user
- * es - if from V86 thread
- * ds - if from V86 thread
- * fs - if from V86 thread
- * gs - if from V86 thread
+ * All traps must create the i386_saved_state struct on the stack on
+ * entry. Note that:
+ * - CR2 is only used if the trap is a page fault
+ * - user_rsp/user_ss are only used if entering from user space
+ * - v86_regs are used only from V86 threads
+ * (TODO check if V86 is still used with USER32)
*
+ * Depending the CPL before entry, the stack might be switched or not;
+ * if entering from user-space the CPU loads TSS->RSP0 in RSP,
+ * otherwise RSP is unchanged. After this, the cpu pushes
+ * SS/RSP/RFLAFS/CS/RIP and optionally ErrorCode and executes the handler.
*/
+/* Try to save/show some information when a double fault happens
+ * We can't recover to a working state, so if we have a debugger wait for it,
+ * otherwise reset */
+ENTRY(t_dbl_fault)
+ INT_FIX
+ cli /* disable interrupts that might corrupt the state*/
+ pusha
+ movq %cr2,%rax
+ movq %rax,R_CR2-R_R15(%rsp) /* CR2 might contain the faulting address */
+ subq $48,%rsp // FIXME remove when segments are cleaned up
+ movq %rsp,%rdi /* pass the saved state */
+ call handle_double_fault
+ jmp cpu_shutdown /* reset */
+END(t_dbl_fault)
+
/*
* General protection or segment-not-present fault.
* Check for a GP/NP fault in the kernel_return
@@ -327,24 +463,26 @@ ENTRY(t_segnp)
/* indicate fault type */
trap_check_kernel_exit:
+#ifdef USER32
testq $(EFL_VM),32(%rsp) /* is trap from V86 mode? */
jnz EXT(alltraps) /* isn`t kernel trap if so */
+#endif
/* Note: handling KERNEL_RING value by hand */
testq $2,24(%rsp) /* is trap from kernel mode? */
jnz EXT(alltraps) /* if so: */
/* check for the kernel exit sequence */
cmpq $_kret_iret,16(%rsp) /* on IRET? */
je fault_iret
-#if 0
+#ifdef USER32
cmpq $_kret_popl_ds,16(%rsp) /* popping DS? */
je fault_popl_ds
cmpq $_kret_popl_es,16(%rsp) /* popping ES? */
je fault_popl_es
-#endif
cmpq $_kret_popl_fs,16(%rsp) /* popping FS? */
je fault_popl_fs
cmpq $_kret_popl_gs,16(%rsp) /* popping GS? */
je fault_popl_gs
+#endif
take_fault: /* if none of the above: */
jmp EXT(alltraps) /* treat as normal trap. */
@@ -373,6 +511,7 @@ fault_iret:
popq %rax /* restore eax */
jmp EXT(alltraps) /* take fault */
+#ifdef USER32
/*
* Fault restoring a segment register. The user's registers are still
* saved on the stack. The offending segment register has not been
@@ -400,11 +539,16 @@ fault_popl_gs:
jmp push_segregs /* (GS on top of stack) */
push_es:
- //pushq %es /* restore es, */
+ movq %es,%rcx
+ pushq %rcx /* restore es, */
push_fs:
pushq %fs /* restore fs, */
push_gs:
pushq %gs /* restore gs. */
+push_gsbase:
+ pushq $0
+ pushq $0
+#endif
push_segregs:
movq %rax,R_TRAPNO(%rsp) /* set trap number */
movq %rdx,R_ERR(%rsp) /* set error code */
@@ -418,18 +562,24 @@ push_segregs:
*/
ENTRY(t_debug)
INT_FIX
+#ifdef USER32
testq $(EFL_VM),16(%rsp) /* is trap from V86 mode? */
jnz 0f /* isn`t kernel trap if so */
+#endif
/* Note: handling KERNEL_RING value by hand */
testq $2,8(%rsp) /* is trap from kernel mode? */
jnz 0f /* if so: */
+#ifdef USER32
cmpq $syscall_entry,(%rsp) /* system call entry? */
jne 0f /* if so: */
/* flags are sitting where syscall */
/* wants them */
addq $32,%rsp /* remove eip/cs */
jmp syscall_entry_2 /* continue system call entry */
-
+#else
+ // TODO: implement the 64-bit case
+ ud2
+#endif
0: pushq $0 /* otherwise: */
pushq $(T_DEBUG) /* handle as normal */
jmp EXT(alltraps) /* debug fault */
@@ -462,27 +612,14 @@ ENTRY(t_page_fault)
ENTRY(alltraps)
pusha /* save the general registers */
trap_push_segs:
- movq %ds,%rax /* and the segment registers */
- pushq %rax
- movq %es,%rax /* and the segment registers */
- pushq %rax
- pushq %fs
- pushq %gs
-
- /* Note that we have to load the segment registers
- even if this is a trap from the kernel,
- because the kernel uses user segment registers for copyin/copyout.
- (XXX Would it be smarter just to use fs or gs for that?) */
- mov %ss,%ax /* switch to kernel data segment */
- mov %ax,%ds /* (same as kernel stack segment) */
- mov %ax,%es
- mov %ax,%fs
- mov %ax,%gs
-
+ PUSH_SEGMENTS(%rax) /* and the segment registers */
+ SET_KERNEL_SEGMENTS(%rax) /* switch to kernel data segment */
trap_set_segs:
cld /* clear direction flag */
+#ifdef USER32
testl $(EFL_VM),R_EFLAGS(%rsp) /* in V86 mode? */
jnz trap_from_user /* user mode trap if so */
+#endif
/* Note: handling KERNEL_RING value by hand */
testb $2,R_CS(%rsp) /* user mode trap? */
jz trap_from_kernel /* kernel trap if not */
@@ -491,18 +628,18 @@ trap_from_user:
CPU_NUMBER(%edx)
TIME_TRAP_UENTRY
- movq CX(EXT(kernel_stack),%edx),%rbx
+ movq CX(EXT(kernel_stack),%rdx),%rbx
xchgq %rbx,%rsp /* switch to kernel stack */
/* user regs pointer already set */
_take_trap:
movq %rbx,%rdi /* pass register save area to trap */
call EXT(user_trap) /* call user trap routine */
-
+#ifdef USER32
orq %rax,%rax /* emulated syscall? */
jz 1f /* no, just return */
movq R_EAX(%rbx),%rax /* yes, get syscall number */
jmp syscall_entry_3 /* and emulate it */
-
+#endif
1:
movq (%rsp),%rsp /* switch back to PCB stack */
@@ -513,10 +650,10 @@ _take_trap:
_return_from_trap:
CPU_NUMBER(%edx)
- cmpl $0,CX(EXT(need_ast),%edx)
+ cmpl $0,CX(EXT(need_ast),%rdx)
jz _return_to_user /* if we need an AST: */
- movq CX(EXT(kernel_stack),%edx),%rsp
+ movq CX(EXT(kernel_stack),%rdx),%rsp
/* switch to kernel stack */
call EXT(i386_astintr) /* take the AST */
popq %rsp /* switch back to PCB stack */
@@ -532,6 +669,7 @@ _return_to_user:
*/
_return_from_kernel:
+#ifdef USER32
_kret_popl_gs:
popq %gs /* restore segment registers */
_kret_popl_fs:
@@ -542,6 +680,7 @@ _kret_popl_es:
_kret_popl_ds:
popq %rax
movq %rax,%ds
+#endif
popa /* restore general registers */
addq $16,%rsp /* discard trap number and error code */
_kret_iret:
@@ -554,29 +693,32 @@ _kret_iret:
trap_from_kernel:
#if MACH_KDB || MACH_TTD
movq %rsp,%rbx /* save current stack */
-
movq %rsp,%rdx /* on an interrupt stack? */
- and $(~(KERNEL_STACK_SIZE-1)),%rdx
- cmpq EXT(int_stack_base),%rdx
+
+ CPU_NUMBER(%ecx)
+ and $(~(INTSTACK_SIZE-1)),%rdx
+ cmpq CX(EXT(int_stack_base),%rcx),%rdx
je 1f /* OK if so */
- CPU_NUMBER(%edx) /* get CPU number */
- cmpq CX(EXT(kernel_stack),%edx),%rsp
+ movl %ecx,%edx
+ cmpq CX(EXT(kernel_stack),%rdx),%rsp
/* already on kernel stack? */
ja 0f
- cmpq CX(EXT(active_stacks),%edx),%rsp
+ cmpq MY(ACTIVE_STACK),%rsp
ja 1f /* switch if not */
0:
- movq CX(EXT(kernel_stack),%edx),%rsp
+ movq CX(EXT(kernel_stack),%rdx),%rsp
1:
pushq %rbx /* save old stack */
movq %rbx,%rdi /* pass as parameter */
call EXT(kernel_trap) /* to kernel trap routine */
+
popq %rsp /* return to old stack */
#else /* MACH_KDB || MACH_TTD */
movq %rsp,%rdi /* pass parameter */
call EXT(kernel_trap) /* to kernel trap routine */
+
#endif /* MACH_KDB || MACH_TTD */
jmp _return_from_kernel
@@ -590,7 +732,7 @@ trap_from_kernel:
ENTRY(thread_exception_return)
ENTRY(thread_bootstrap_return)
movq %rsp,%rcx /* get kernel stack */
- or $(KERNEL_STACK_SIZE-1),%ecx
+ or $(KERNEL_STACK_SIZE-1),%rcx
movq -7-IKS_SIZE(%rcx),%rsp /* switch back to PCB stack */
jmp _return_from_trap
@@ -603,7 +745,7 @@ ENTRY(thread_bootstrap_return)
ENTRY(thread_syscall_return)
movq S_ARG0,%rax /* get return value */
movq %rsp,%rcx /* get kernel stack */
- or $(KERNEL_STACK_SIZE-1),%ecx
+ or $(KERNEL_STACK_SIZE-1),%rcx
movq -7-IKS_SIZE(%rcx),%rsp /* switch back to PCB stack */
movq %rax,R_EAX(%rsp) /* save return value */
jmp _return_from_trap
@@ -618,6 +760,7 @@ ENTRY(call_continuation)
pushq $0 /* Dummy return address */
jmp *%rax /* goto continuation */
+/* IOAPIC has 24 interrupts, put spurious in the same array */
#define INTERRUPT(n) \
.data 2 ;\
@@ -633,6 +776,7 @@ ENTRY(call_continuation)
.data 2
DATA(int_entry_table)
.text
+/* Legacy APIC interrupts or PIC interrupts */
INTERRUPT(0)
INTERRUPT(1)
INTERRUPT(2)
@@ -649,44 +793,52 @@ INTERRUPT(12)
INTERRUPT(13)
INTERRUPT(14)
INTERRUPT(15)
+#ifdef APIC
+/* APIC PCI interrupts PIRQ A-H */
+INTERRUPT(16)
+INTERRUPT(17)
+INTERRUPT(18)
+INTERRUPT(19)
+INTERRUPT(20)
+INTERRUPT(21)
+INTERRUPT(22)
+INTERRUPT(23)
+#endif
+#if NCPUS > 1
+INTERRUPT(CALL_AST_CHECK)
+INTERRUPT(CALL_PMAP_UPDATE)
+#endif
+#ifdef APIC
+/* Spurious interrupt, set irq number to vect number */
+INTERRUPT(255)
+#endif
/* XXX handle NMI - at least print a warning like Linux does. */
/*
- * All interrupts enter here.
- * old %eax on stack; interrupt number in %eax.
+ * All interrupts enter here. The cpu might have loaded a new RSP,
+ * depending on the previous CPL, as in alltraps.
+ * Old %eax on stack, interrupt number in %eax; we need to fill the remaining
+ * fields of struct i386_interrupt_state, which might be in the pcb or in the
+ * interrupt stack.
*/
ENTRY(all_intrs)
- pushq %rcx /* save registers */
- pushq %rdx
- pushq %rsi
- pushq %rdi
- pushq %r8
- pushq %r9
- pushq %r10
- pushq %r11
+ PUSH_REGS_ISR /* save registers */
cld /* clear direction flag */
+ PUSH_SEGMENTS_ISR(%rdx) /* save segment registers */
+
+ CPU_NUMBER_NO_GS(%ecx)
movq %rsp,%rdx /* on an interrupt stack? */
- and $(~(KERNEL_STACK_SIZE-1)),%rdx
- cmpq %ss:EXT(int_stack_base),%rdx
+ and $(~(INTSTACK_SIZE-1)),%rdx
+ cmpq %ss:CX(EXT(int_stack_base),%rcx),%rdx
je int_from_intstack /* if not: */
- movq %ds,%rdx /* save segment registers */
- pushq %rdx
- movq %es,%rdx
- pushq %rdx
- pushq %fs
- pushq %gs
- mov %ss,%dx /* switch to kernel segments */
- mov %dx,%ds
- mov %dx,%es
- mov %dx,%fs
- mov %dx,%gs
+ SET_KERNEL_SEGMENTS(%rdx) /* switch to kernel segments */
CPU_NUMBER(%edx)
- movq CX(EXT(int_stack_top),%edx),%rcx
+ movq CX(EXT(int_stack_top),%rdx),%rcx
xchgq %rcx,%rsp /* switch to interrupt stack */
@@ -699,12 +851,19 @@ ENTRY(all_intrs)
TIME_INT_ENTRY /* do timing */
#endif
- call EXT(interrupt) /* call generic interrupt routine */
+#ifdef MACH_LDEBUG
+ incl CX(EXT(in_interrupt),%rdx)
+#endif
- .globl EXT(return_to_iret)
-LEXT(return_to_iret) /* ( label for kdb_kintr and hardclock) */
+ call EXT(interrupt) /* call generic interrupt routine */
+ .globl EXT(return_to_iret) /* ( label for kdb_kintr and hardclock */
+LEXT(return_to_iret) /* to find the return from calling interrupt) */
CPU_NUMBER(%edx)
+#ifdef MACH_LDEBUG
+ decl CX(EXT(in_interrupt),%rdx)
+#endif
+
#if STAT_TIME
#else
TIME_INT_EXIT /* do timing */
@@ -713,47 +872,31 @@ LEXT(return_to_iret) /* ( label for kdb_kintr and hardclock) */
popq %rsp /* switch back to old stack */
+#ifdef USER32
testl $(EFL_VM),I_EFL(%rsp) /* if in V86 */
jnz 0f /* or */
+#endif
/* Note: handling KERNEL_RING value by hand */
testb $2,I_CS(%rsp) /* user mode, */
jz 1f /* check for ASTs */
0:
- cmpq $0,CX(EXT(need_ast),%edx)
+ cmpq $0,CX(EXT(need_ast),%rdx)
jnz ast_from_interrupt /* take it if so */
1:
- pop %gs /* restore segment regs */
- pop %fs
- pop %rdx
- mov %rdx,%es
- pop %rdx
- mov %rdx,%ds
- pop %r11
- pop %r10
- pop %r9
- pop %r8
- pop %rdi
- pop %rsi
- pop %rdx
- pop %rcx
- pop %rax
+ POP_SEGMENTS_ISR(%rdx) /* restore segment regs */
+ POP_AREGS_ISR /* restore registers */
iretq /* return to caller */
int_from_intstack:
- cmpq EXT(int_stack_base),%rsp /* seemingly looping? */
+ CPU_NUMBER_NO_GS(%edx)
+ cmpq CX(EXT(int_stack_base),%rdx),%rsp /* seemingly looping? */
jb stack_overflowed /* if not: */
call EXT(interrupt) /* call interrupt routine */
_return_to_iret_i: /* ( label for kdb_kintr) */
- pop %r11
- pop %r10
- pop %r9
- pop %r8
- pop %rdi
- pop %rsi
- pop %rdx /* must have been on kernel segs */
- pop %rcx
- pop %rax /* no ASTs */
+ POP_SEGMENTS_ISR(%rdx)
+ POP_AREGS_ISR /* restore registers */
+ /* no ASTs */
iretq
@@ -777,40 +920,17 @@ stack_overflowed:
* ss
*/
ast_from_interrupt:
- pop %gs /* restore all registers ... */
- pop %fs
- pop %rdx
- mov %rdx,%es
- pop %rdx
- mov %rdx,%ds
- popq %r11
- popq %r10
- popq %r9
- popq %r8
- popq %rdi
- popq %rsi
- popq %rdx
- popq %rcx
- popq %rax
+ POP_SEGMENTS_ISR(%rdx) /* restore all registers ... */
+ POP_AREGS_ISR
pushq $0 /* zero code */
pushq $0 /* zero trap number */
pusha /* save general registers */
- mov %ds,%rdx /* save segment registers */
- push %rdx
- mov %es,%rdx
- push %rdx
- push %fs
- push %gs
- mov %ss,%dx /* switch to kernel segments */
- mov %dx,%ds
- mov %dx,%es
- mov %dx,%fs
- mov %dx,%gs
-
+ PUSH_SEGMENTS_ISR(%rdx) /* save segment registers */
+ SET_KERNEL_SEGMENTS(%rdx) /* switch to kernel segments */
CPU_NUMBER(%edx)
TIME_TRAP_UENTRY
- movq CX(EXT(kernel_stack),%edx),%rsp
+ movq CX(EXT(kernel_stack),%rdx),%rsp
/* switch to kernel stack */
call EXT(i386_astintr) /* take the AST */
popq %rsp /* back to PCB stack */
@@ -824,6 +944,8 @@ ast_from_interrupt:
*
* frame-> saved %rbp
* return address in interrupt handler
+ * saved SPL
+ * saved IRQ
* return address == return_to_iret_i
* saved %r11
* saved %r10
@@ -863,7 +985,7 @@ ast_from_interrupt:
* Call kdb, passing it that register save area.
*/
-#define RET_OFFSET 16
+#define RET_OFFSET 32
ENTRY(kdb_kintr)
@@ -877,7 +999,9 @@ ENTRY(kdb_kintr)
cmpq RET_OFFSET(%rax),%rdx /* interrupt handler (2)? */
je 2f /* if not: */
movq (%rax),%rax /* try next frame */
- jmp 0b
+ testq %rax,%rax
+ jnz 0b
+ ud2 /* oops, didn't find frame, fix me :/ */
1: movq $kdb_from_iret,RET_OFFSET(%rax)
ret /* returns to kernel/user stack */
@@ -920,22 +1044,12 @@ kdb_from_iret_i: /* on interrupt stack */
pushq $0 /* zero error code */
pushq $0 /* zero trap number */
pusha /* save general registers */
- mov %ds,%rdx /* save segment registers */
- push %rdx
- mov %es,%rdx
- push %rdx
- push %fs
- push %gs
+ PUSH_SEGMENTS(%rdx) /* save segment registers */
movq %rsp,%rdx /* pass regs, */
movq $0,%rsi /* code, */
movq $-1,%rdi /* type to kdb */
call EXT(kdb_trap)
- pop %gs /* restore segment registers */
- pop %fs
- pop %rdx
- mov %rdx,%es
- pop %rdx
- mov %rdx,%ds
+ POP_SEGMENTS(%rdx) /* restore segment registers */
popa /* restore general registers */
addq $16,%rsp
@@ -1010,22 +1124,13 @@ ttd_from_iret_i: /* on interrupt stack */
pushq $0 /* zero error code */
pushq $0 /* zero trap number */
pusha /* save general registers */
- mov %ds,%rdx /* save segment registers */
- push %rdx
- mov %es,%rdx
- push %rdx
- push %fs
- push %gs
+ PUSH_SEGMENTS_ISR(%rdx) /* save segment registers */
+ ud2 // TEST it
movq %rsp,%rdx /* pass regs, */
movq $0,%rsi /* code, */
movq $-1,%rdi /* type to kdb */
call _kttd_trap
- pop %gs /* restore segment registers */
- pop %fs
- pop %rdx
- mov %rdx,%es
- pop %rdx
- mov %rdx,%ds
+ POP_SEGMENTS_ISR(%rdx) /* restore segment registers */
popa /* restore general registers */
addq $16,%rsp
@@ -1036,6 +1141,7 @@ ud2
#endif /* MACH_TTD */
+#ifdef USER32
/*
* System call enters through a call gate. Flags are not saved -
* we must shuffle stack to look like trap save area.
@@ -1056,22 +1162,9 @@ syscall_entry_2:
pushq %rax /* save system call number */
pushq $0 /* clear trap number slot */
-// TODO: test it before dropping ud2
- ud2
-
pusha /* save the general registers */
- movq %ds,%rdx /* and the segment registers */
- pushq %rdx
- movq %es,%rdx
- pushq %rdx
- pushq %fs
- pushq %gs
-
- mov %ss,%dx /* switch to kernel data segment */
- mov %dx,%ds
- mov %dx,%es
- mov %dx,%fs
- mov %dx,%gs
+ PUSH_SEGMENTS(%rdx) /* and the segment registers */
+ SET_KERNEL_SEGMENTS(%rdx) /* switch to kernel data segment */
/*
* Shuffle eflags,eip,cs into proper places
@@ -1084,10 +1177,10 @@ syscall_entry_2:
movq %rdx,R_CS(%rsp) /* fix cs */
movq %rbx,R_EFLAGS(%rsp) /* fix eflags */
- CPU_NUMBER(%edx)
+ CPU_NUMBER_NO_STACK(%edx)
TIME_TRAP_SENTRY
- movq CX(EXT(kernel_stack),%edx),%rbx
+ movq CX(EXT(kernel_stack),%rdx),%rbx
/* get current kernel stack */
xchgq %rbx,%rsp /* switch stacks - %ebx points to */
/* user registers. */
@@ -1097,7 +1190,7 @@ syscall_entry_2:
* Check for MACH or emulated system call
*/
syscall_entry_3:
- movq CX(EXT(active_threads),%edx),%rdx
+ movq MY(ACTIVE_THREAD),%rdx
/* point to current thread */
movq TH_TASK(%rdx),%rdx /* point to task */
movq TASK_EMUL(%rdx),%rdx /* get emulation vector */
@@ -1136,23 +1229,27 @@ syscall_native:
#endif
shll $5,%eax /* manual indexing of mach_trap_t */
xorq %r10,%r10
- movl EXT(mach_trap_table)(%eax),%r10d
+ mov EXT(mach_trap_table)(%rax),%r10
/* get number of arguments */
andq %r10,%r10
jz mach_call_call /* skip argument copy if none */
- movq R_UESP(%rbx),%rbx /* get user stack pointer */
- addq $4,%rbx /* Skip user return address */
-
movq $USER_DS,%rdx /* use user data segment for accesses */
mov %dx,%fs
movq %rsp,%r11 /* save kernel ESP for error recovery */
+ movq R_UESP(%rbx),%rbp /* get user stack pointer */
+ addq $4,%rbp /* Skip user return address */
+
+ movq $VM_MAX_ADDRESS, %rcx
+ cmpq %rcx,%rbp /* Check segment limit by hand */
+ jae mach_call_addr_push
+
#define PARAM(reg,ereg) \
- RECOVER(mach_call_addr_push) \
xorq %reg,%reg ;\
- movl %fs:(%rbx),%ereg /* 1st parameter */ ;\
- addq $4,%rbx ;\
+ RECOVER(mach_call_addr_push) \
+ movl %fs:(%rbp),%ereg /* 1st parameter */ ;\
+ addq $4,%rbp ;\
dec %r10 ;\
jz mach_call_call
@@ -1163,12 +1260,12 @@ syscall_native:
PARAM(r8,r8d) /* 5th parameter */
PARAM(r9,r9d) /* 6th parameter */
- lea (%rbx,%r10,4),%rbx /* point past last argument */
+ lea (%rbp,%r10,4),%rbp /* point past last argument */
xorq %r12,%r12
-0: subq $4,%rbx
+0: subq $4,%rbp
RECOVER(mach_call_addr_push)
- movl %fs:(%rbx),%r12d
+ movl %fs:(%rbp),%r12d
pushq %r12 /* push argument on stack */
dec %r10
jnz 0b /* loop for all arguments */
@@ -1183,9 +1280,7 @@ mach_call_call:
/* will return with syscallofs still (or again) in eax */
0:
#endif /* DEBUG */
-
- call *EXT(mach_trap_table)+8(%eax)
- /* call procedure */
+ call *EXT(mach_trap_table)+8(%rax) /* call procedure */
movq %rsp,%rcx /* get kernel stack */
or $(KERNEL_STACK_SIZE-1),%rcx
movq -7-IKS_SIZE(%rcx),%rsp /* switch back to PCB stack */
@@ -1194,12 +1289,12 @@ mach_call_call:
/*
* Address out of range. Change to page fault.
- * %esi holds failing address.
+ * %rbp holds failing address.
*/
mach_call_addr_push:
movq %r11,%rsp /* clean parameters from stack */
mach_call_addr:
- movq %rsi,R_CR2(%rbx) /* set fault address */
+ movq %rbp,R_CR2(%rbx) /* set fault address */
movq $(T_PAGE_FAULT),R_TRAPNO(%rbx)
/* set page-fault trap */
movq $(T_PF_USER),R_ERR(%rbx)
@@ -1232,6 +1327,9 @@ syscall_emul:
/* XXX what about write-protected pages? */
movq R_UESP(%rbx),%rdi /* get user stack pointer */
subq $16,%rdi /* push space for new arguments */
+ movq $VM_MAX_ADDRESS, %rax
+ cmpq %rax,%rdi /* Check segment limit by hand */
+ jae syscall_addr
movq R_EFLAGS(%rbx),%rax /* move flags */
RECOVER(syscall_addr)
movl %eax,%fs:0(%rdi) /* to user stack */
@@ -1255,7 +1353,179 @@ syscall_addr:
movq $(T_PF_USER),R_ERR(%rbx)
/* set error code - read user space */
jmp _take_trap /* treat as a trap */
+END(syscall)
+#else /* USER32 */
+
+/* Entry point for 64-bit syscalls.
+ * On entry we're still on the user stack, so better not use it. Instead we
+ * save the thread state immediately in thread->pcb->iss, then try to invoke
+ * the syscall.
+ * Note: emulated syscalls seem to not be used anymore in GNU/Hurd, so they
+ * are not handled here.
+ * TODO:
+ - for now we assume the return address is canonical, but apparently there
+ can be cases where it's not (see how Linux handles this). Does it apply
+ here?
+ - check that the case where a task is suspended, and later returns via
+ iretq from return_from_trap, works fine in all combinations
+ */
+ENTRY(syscall64)
+ /* RFLAGS[32:63] are reserved, so combine syscall num (32 bit) and
+ * eflags in RAX to allow using r11 as temporary register
+ */
+ shlq $32,%r11
+ shlq $32,%rax /* make sure bits 32:63 of %rax are zero */
+ shrq $32,%rax
+ or %r11,%rax
+
+ /* Save thread state in pcb->iss, as on exception entry.
+ * Since this is triggered synchronously from userspace, we could
+ * save only the callee-preserved status according to the C ABI,
+ * plus RIP and EFLAGS for sysret
+ */
+ movq MY(ACTIVE_THREAD),%r11 /* point to current thread */
+ movq TH_PCB(%r11),%r11 /* point to pcb */
+ addq $ PCB_ISS,%r11 /* point to saved state */
+
+ mov %rsp,R_UESP(%r11) /* callee-preserved register */
+ mov %rcx,R_EIP(%r11) /* syscall places user RIP in RCX */
+ mov %rbx,R_EBX(%r11) /* callee-preserved register */
+ mov %rax,%rbx /* Now we can unpack eflags again */
+ shr $32,%rbx
+ mov %rbx,R_EFLAGS(%r11) /* ... and save them in pcb as well */
+ mov %rbp,R_EBP(%r11) /* callee-preserved register */
+ mov %r12,R_R12(%r11) /* callee-preserved register */
+ mov %r13,R_R13(%r11) /* callee-preserved register */
+ mov %r14,R_R14(%r11) /* callee-preserved register */
+ mov %r15,R_R15(%r11) /* callee-preserved register */
+
+ /* Save syscall number and args for SYSCALL_EXAMINE/MSG_EXAMINE in glibc.
+ * Note: syscall number is only 32 bit, in EAX, so we sign-extend it in
+ * RAX to mask the EFLAGS bits.
+ */
+ cdqe /* sign-extend EAX in RAX */
+ mov %rax,R_EAX(%r11) /* syscall number */
+ mov %rdi,R_EDI(%r11) /* syscall arg0 */
+ mov %rsi,R_ESI(%r11) /* syscall arg1 */
+ mov %rdx,R_EDX(%r11) /* syscall arg2 */
+ mov %r10,R_R10(%r11) /* syscall arg3 */
+ mov %r8,R_R8(%r11) /* syscall arg4 */
+ mov %r9,R_R9(%r11) /* syscall arg5 */
+
+ mov %r11,%rbx /* prepare for error handling */
+ mov %r10,%rcx /* fix arg3 location according to C ABI */
+
+ /* switch to kernel stack, then we can enable interrupts */
+ CPU_NUMBER_NO_STACK(%r11d)
+ movq CX(EXT(kernel_stack),%r11),%rsp
+ sti
+
+ /* Now we have saved state and args 1-6 are in place.
+ * Before invoking the syscall we do some bound checking and,
+ * if we have more that 6 arguments, we need to copy the
+ * remaining ones to the kernel stack, handling page faults when
+ * accessing the user stack.
+ */
+ negl %eax /* get system call number */
+ jl _syscall64_range /* out of range if it was positive */
+ cmpl EXT(mach_trap_count),%eax /* check system call table bounds */
+ jg _syscall64_range /* error if out of range */
+ shll $5,%eax /* manual indexing of mach_trap_t */
+
+ /* check if we need to place some arguments on the stack */
+_syscall64_args_stack:
+ mov EXT(mach_trap_table)(%rax),%r10 /* get number of arguments */
+ subq $6,%r10 /* the first 6 args are already in place */
+ jle _syscall64_call /* skip argument copy if num args <= 6 */
+
+ movq R_UESP(%rbx),%r11 /* get user stack pointer */
+ addq $8,%r11 /* Skip user return address */
+
+ lea (%r11,%r10,8),%r11 /* point past last argument */
+
+ movq $VM_MAX_ADDRESS, %r12
+ cmpq %r12,%r11 /* Check segment limit by hand */
+ jae _syscall64_addr_push
+
+0: subq $8,%r11
+ RECOVER(_syscall64_addr_push)
+ mov (%r11),%r12
+ pushq %r12 /* push argument on stack */
+ dec %r10
+ jnz 0b /* loop for all remaining arguments */
+
+_syscall64_call:
+ call *EXT(mach_trap_table)+8(%rax) /* call procedure */
+
+_syscall64_check_for_ast:
+ /* Check for ast. */
+ CPU_NUMBER_NO_GS(%r11d)
+ cmpl $0,CX(EXT(need_ast),%r11)
+ jz _syscall64_restore_state
+
+ /* Save the syscall return value, both on our stack, for the case
+ * i386_astintr returns normally, and in the PCB stack, in case it
+ * instead calls thread_block(thread_exception_return).
+ */
+ pushq %rax /* save the return value on our stack */
+ pushq $0 /* dummy value to keep the stack aligned */
+
+ /* Find the PCB stack. */
+ movq %rsp,%rcx
+ or $(KERNEL_STACK_SIZE-1),%rcx
+ movq -7-IKS_SIZE(%rcx),%rcx
+
+ movq %rax,R_EAX(%rcx) /* save the return value in the PCB stack */
+ call EXT(i386_astintr)
+ popq %rax
+ popq %rax /* restore the return value */
+ jmp _syscall64_check_for_ast /* check again */
+
+_syscall64_restore_state:
+ /* Restore thread state and return to user using sysret. */
+ cli /* block interrupts when using the user stack in kernel space */
+ movq MY(ACTIVE_THREAD),%r11 /* point to current thread */
+ movq TH_PCB(%r11),%r11 /* point to pcb */
+ addq $ PCB_ISS,%r11 /* point to saved state */
+
+ /* Restore syscall args. Note: we can't restore the syscall number in
+ * RAX because it needs to hold the return value.*/
+ mov R_EDI(%r11),%rdi /* syscall arg0 */
+ mov R_ESI(%r11),%rsi /* syscall arg1 */
+ mov R_EDX(%r11),%rdx /* syscall arg2 */
+ mov R_R10(%r11),%r10 /* syscall arg3 */
+ mov R_R8(%r11),%r8 /* syscall arg4 */
+ mov R_R9(%r11),%r9 /* syscall arg5 */
+
+ mov R_UESP(%r11),%rsp /* callee-preserved register,
+ * also switch back to user stack */
+ mov R_EIP(%r11),%rcx /* sysret convention */
+ mov R_EBX(%r11),%rbx /* callee-preserved register */
+ mov R_EBP(%r11),%rbp /* callee-preserved register */
+ mov R_R12(%r11),%r12 /* callee-preserved register */
+ mov R_R13(%r11),%r13 /* callee-preserved register */
+ mov R_R14(%r11),%r14 /* callee-preserved register */
+ mov R_R15(%r11),%r15 /* callee-preserved register */
+ mov R_EFLAGS(%r11),%r11 /* sysret convention */
+
+ sysretq /* fast return to user-space, the thread didn't block */
+
+/* Error handling fragments, from here we jump directly to the trap handler */
+_syscall64_addr_push:
+ movq %r11,R_CR2(%rbx) /* set fault address */
+ movq $(T_PAGE_FAULT),R_TRAPNO(%rbx) /* set page-fault trap */
+ movq $(T_PF_USER),R_ERR(%rbx) /* set error code - read user space */
+ jmp _take_trap /* treat as a trap */
+
+_syscall64_range:
+ movq $(T_INVALID_OPCODE),R_TRAPNO(%rbx)
+ /* set invalid-operation trap */
+ movq $0,R_ERR(%rbx) /* clear error code */
+ jmp _take_trap /* treat as a trap */
+
+END(syscall64)
+#endif /* USER32 */
.data
DATA(cpu_features)
@@ -1265,8 +1535,6 @@ DATA(cpu_features_ecx)
.long 0
.text
-END(syscall)
-
/* Discover what kind of cpu we have; return the family number
(3, 4, 5, 6, for 386, 486, 586, 686 respectively). */
ENTRY(discover_x86_cpu_type)
@@ -1288,6 +1556,9 @@ ENTRY(discover_x86_cpu_type)
ENTRY(copyin)
xchgq %rsi,%rdi /* Get user source and kernel destination */
+ movq $VM_MAX_ADDRESS, %rcx
+ cmpq %rcx,%rsi /* Check segment limit by hand */
+ jae copyin_fail
copyin_remainder:
/*cld*/ /* count up: default mode in all GCC code */
@@ -1310,51 +1581,14 @@ copyin_fail:
movq $1,%rax /* return 1 for failure */
jmp copyin_ret /* pop frame and return */
-/*
- * Copy from user address space - version for copying messages.
- * arg0: user address
- * arg1: kernel address
- * arg2: byte count
- */
-ENTRY(copyinmsg)
- xchgq %rsi,%rdi /* Get user source and kernel destination */
-
-/* 32 on 64 conversion */
- subq $32,%rdx
- js bogus
-
- /* Copy msgh_bits */
- RECOVER(copyin_fail)
- movsl
-
- /* Copy msgh_size */
- RECOVER(copyin_fail)
- lodsl
- addl $8,%eax
- stosl
-
- xorq %rax,%rax
- /* Copy msgh_remote_port */
- RECOVER(copyin_fail)
- lodsl
- stosq
-
- /* Copy msgh_local_port */
- RECOVER(copyin_fail)
- lodsl
- stosq
-
- /* Copy msgh_seqno and msgh_id */
- RECOVER(copyin_fail)
- movsq
-
- jmp copyin_remainder
-
bogus:
ud2
ENTRY(copyout)
xchgq %rsi,%rdi /* Get user source and kernel destination */
+ movq $VM_MAX_ADDRESS, %rcx
+ cmpq %rcx,%rdi /* Check segment limit by hand */
+ jae copyin_fail
copyout_remainder:
movq %rdx,%rax /* use count */
@@ -1379,45 +1613,6 @@ copyout_fail:
jmp copyout_ret /* pop frame and return */
/*
- * Copy to user address space.
- * arg0: kernel address
- * arg1: user address
- * arg2: byte count
- */
-ENTRY(copyoutmsg)
- xchgq %rsi,%rdi /* Get user source and kernel destination */
-
-/* 32 on 64 conversion */
- subq $32,%rdx
- js bogus
-
- /* Copy msgh_bits */
- RECOVER(copyout_fail)
- movsl
-
- /* Copy msgh_size */
- lodsl
- subl $8,%eax
- RECOVER(copyout_fail)
- stosl
-
- /* Copy msgh_remote_port */
- lodsq
- RECOVER(copyout_fail)
- stosl
-
- /* Copy msgh_local_port */
- lodsq
- RECOVER(copyout_fail)
- stosl
-
- /* Copy msgh_seqno and msgh_id */
- RECOVER(copyout_fail)
- movsq
-
- jmp copyin_remainder
-
-/*
* int inst_fetch(int eip, int cs);
*
* Fetch instruction byte. Return -1 if invalid address.
@@ -1426,6 +1621,9 @@ ENTRY(inst_fetch)
movq S_ARG1, %rax /* get segment */
movw %ax,%fs /* into FS */
movq S_ARG0, %rax /* get offset */
+ movq $VM_MAX_ADDRESS, %rcx
+ cmpq %rcx,%rax /* Check segment limit by hand */
+ jae _inst_fetch_fault
RETRY(EXT(inst_fetch)) /* re-load FS on retry */
RECOVER(_inst_fetch_fault)
movzbq %fs:(%rax),%rax /* load instruction byte */
diff --git a/x86_64/spl.S b/x86_64/spl.S
index 0c2c50cb..80c65c1e 100644
--- a/x86_64/spl.S
+++ b/x86_64/spl.S
@@ -21,6 +21,7 @@
#include <i386/i386/ipl.h>
#include <i386/i386/i386asm.h>
#include <i386/i386/xen.h>
+#include <i386/cpu_number.h>
#if NCPUS > 1
#define mb lock; addl $0,(%esp)
@@ -46,7 +47,8 @@ lock orl $1,hyp_shared_info+CPU_PENDING_SEL; /* Yes, activate it */ \
ENTRY(spl0)
mb;
- movl EXT(curr_ipl),%eax /* save current ipl */
+ CPU_NUMBER(%edx)
+ movl CX(EXT(curr_ipl),%rdx),%eax /* save current ipl */
pushq %rax
cli /* disable interrupts */
#ifdef LINUX_DEV
@@ -74,9 +76,10 @@ ENTRY(spl0)
#endif
cli /* disable interrupts */
1:
- cmpl $(SPL0),EXT(curr_ipl) /* are we at spl0? */
- je 1f /* yes, all done */
- movl $(SPL0),EXT(curr_ipl) /* set ipl */
+ CPU_NUMBER(%edx)
+ cmpl $(SPL0),CX(EXT(curr_ipl),%rdx) /* are we at spl0? */
+ je 1f /* yes, all done */
+ movl $(SPL0),CX(EXT(curr_ipl),%rdx) /* set ipl */
#ifdef MACH_XEN
movl EXT(int_mask)+SPL0*4,%eax
/* get xen mask */
@@ -119,16 +122,17 @@ ENTRY(spl7)
mb;
/* just clear IF */
cli
+ CPU_NUMBER(%edx)
movl $SPL7,%eax
- xchgl EXT(curr_ipl),%eax
+ xchgl CX(EXT(curr_ipl),%rdx),%eax
ret
ENTRY(splx)
movq S_ARG0,%rdx /* get ipl */
-
+ CPU_NUMBER(%eax)
#if (MACH_KDB || MACH_TTD) && !defined(MACH_XEN)
/* First make sure that if we're exitting from ipl7, IF is still cleared */
- cmpl $SPL7,EXT(curr_ipl) /* from ipl7? */
+ cmpl $SPL7,CX(EXT(curr_ipl),%rax) /* from ipl7? */
jne 0f
pushfq
popq %rax
@@ -140,7 +144,8 @@ ENTRY(splx)
#endif /* (MACH_KDB || MACH_TTD) && !MACH_XEN */
testl %edx,%edx /* spl0? */
jz EXT(spl0) /* yes, handle specially */
- cmpl EXT(curr_ipl),%edx /* same ipl as current? */
+ CPU_NUMBER(%eax)
+ cmpl CX(EXT(curr_ipl),%rax),%edx /* same ipl as current? */
jne spl /* no */
cmpl $SPL7,%edx /* spl7? */
je 1f /* to ipl7, don't enable interrupts */
@@ -188,11 +193,13 @@ splx_cli:
1:
xorl %edx,%edx /* edx = ipl 0 */
2:
- cmpl EXT(curr_ipl),%edx /* same ipl as current? */
- je 1f /* yes, all done */
- movl %edx,EXT(curr_ipl) /* set ipl */
+ CPU_NUMBER(%eax)
+ cmpl CX(EXT(curr_ipl),%rax),%edx /* same ipl as current? */
+ je 1f /* yes, all done */
+ movl %edx,CX(EXT(curr_ipl),%rax) /* set ipl */
#ifdef MACH_XEN
- movl EXT(int_mask)(,%edx,4),%eax
+ movl EXT(int_mask),%eax
+ movl (%eax,%edx,4),%eax
/* get int mask */
XEN_SETMASK() /* program xen evts with new mask */
#endif
@@ -206,9 +213,10 @@ splx_cli:
.align TEXT_ALIGN
.globl spl
spl:
+ CPU_NUMBER(%eax)
#if (MACH_KDB || MACH_TTD) && !defined(MACH_XEN)
/* First make sure that if we're exitting from ipl7, IF is still cleared */
- cmpl $SPL7,EXT(curr_ipl) /* from ipl7? */
+ cmpl $SPL7,CX(EXT(curr_ipl),%rax) /* from ipl7? */
jne 0f
pushfq
popq %rax
@@ -221,11 +229,13 @@ spl:
cmpl $SPL7,%edx /* spl7? */
je EXT(spl7) /* yes, handle specially */
#ifdef MACH_XEN
- movl EXT(int_mask)(,%edx,4),%eax
+ movl EXT(int_mask),%eax
+ movl (%eax,%edx,4),%eax
/* get int mask */
#endif
cli /* disable interrupts */
- xchgl EXT(curr_ipl),%edx /* set ipl */
+ CPU_NUMBER(%eax)
+ xchgl CX(EXT(curr_ipl),%rax),%edx /* set ipl */
#ifdef MACH_XEN
XEN_SETMASK() /* program PICs with new mask */
#endif
diff --git a/xen/block.c b/xen/block.c
index 84fe4449..7e9db26f 100644
--- a/xen/block.c
+++ b/xen/block.c
@@ -349,7 +349,7 @@ device_close(void *devp)
static io_return_t
device_open (ipc_port_t reply_port, mach_msg_type_name_t reply_port_type,
- dev_mode_t mode, char *name, device_t *devp /* out */)
+ dev_mode_t mode, const char *name, device_t *devp /* out */)
{
int i;
ipc_port_t port, notify;
@@ -666,7 +666,7 @@ device_write(void *d, ipc_port_t reply_port,
hyp_grant_takeback(gref[j]);
if (err) {
- printf("error writing %u bytes at sector %d\n", count, bn);
+ printf("error writing %u bytes at sector %ld\n", count, bn);
break;
}
}
diff --git a/xen/console.c b/xen/console.c
index 4907903e..9ceb6ddf 100644
--- a/xen/console.c
+++ b/xen/console.c
@@ -29,27 +29,21 @@
/* Hypervisor part */
-decl_simple_lock_data(static, outlock);
-decl_simple_lock_data(static, inlock);
+def_simple_lock_irq_data(static, outlock);
+def_simple_lock_irq_data(static, inlock);
static struct xencons_interface *console;
static int kd_pollc;
int kb_mode; /* XXX: actually don't care. */
-#undef hyp_console_write
-void hyp_console_write(const char *str, int len)
-{
- hyp_console_io (CONSOLEIO_write, len, kvtolin(str));
-}
-
-int hypputc(int c)
+static int hypputc(int c)
{
if (!console) {
char d = c;
hyp_console_io(CONSOLEIO_write, 1, kvtolin(&d));
} else {
- spl_t spl = splhigh();
+ spl_t spl;
static int complain;
- simple_lock(&outlock);
+ spl = simple_lock_irq(&outlock);
while (hyp_ring_smash(console->out, console->out_prod, console->out_cons)) {
if (!complain) {
complain = 1;
@@ -62,8 +56,7 @@ int hypputc(int c)
wmb();
console->out_prod++;
hyp_event_channel_send(boot_info.console_evtchn);
- simple_unlock(&outlock);
- splx(spl);
+ simple_unlock_irq(spl, &outlock);
}
return 0;
}
@@ -111,7 +104,7 @@ static void hypcnintr(int unit, spl_t spl, void *ret_addr, void *regs) {
struct tty *tp = &hypcn_tty;
if (kd_pollc)
return;
- simple_lock(&inlock);
+ simple_lock_nocheck(&inlock.slock);
while (console->in_prod != console->in_cons) {
int c = hyp_ring_cell(console->in, console->in_cons);
mb();
@@ -127,7 +120,7 @@ static void hypcnintr(int unit, spl_t spl, void *ret_addr, void *regs) {
(*linesw[tp->t_line].l_rint)(c, tp);
}
hyp_event_channel_send(boot_info.console_evtchn);
- simple_unlock(&inlock);
+ simple_unlock_nocheck(&inlock.slock);
}
int hypcnread(dev_t dev, io_req_t ior)
@@ -142,7 +135,7 @@ int hypcnwrite(dev_t dev, io_req_t ior)
return char_write(&hypcn_tty, ior);
}
-void hypcnstart(struct tty *tp)
+static void hypcnstart(struct tty *tp)
{
spl_t o_pri;
int ch;
@@ -166,7 +159,7 @@ void hypcnstart(struct tty *tp)
}
}
-void hypcnstop()
+static void hypcnstop(struct tty *t, int n)
{
}
@@ -190,32 +183,29 @@ int hypcnopen(dev_t dev, int flag, io_req_t ior)
struct tty *tp = &hypcn_tty;
spl_t o_pri;
- o_pri = spltty();
- simple_lock(&tp->t_lock);
+ o_pri = simple_lock_irq(&tp->t_lock);
if (!(tp->t_state & (TS_ISOPEN|TS_WOPEN))) {
/* XXX ttychars allocates memory */
- simple_unlock(&tp->t_lock);
+ simple_unlock_nocheck(&tp->t_lock.slock);
ttychars(tp);
- simple_lock(&tp->t_lock);
+ simple_lock_nocheck(&tp->t_lock.slock);
tp->t_oproc = hypcnstart;
tp->t_stop = hypcnstop;
- tp->t_ospeed = tp->t_ispeed = B9600;
- tp->t_flags = ODDP|EVENP|ECHO|CRMOD|XTABS;
+ tp->t_ospeed = tp->t_ispeed = B115200;
+ tp->t_flags = ODDP|EVENP|ECHO|CRMOD|XTABS|LITOUT;
}
tp->t_state |= TS_CARR_ON;
- simple_unlock(&tp->t_lock);
- splx(o_pri);
+ simple_unlock_irq(o_pri, &tp->t_lock);
return (char_open(dev, tp, flag, ior));
}
void hypcnclose(dev_t dev, int flag)
{
struct tty *tp = &hypcn_tty;
- spl_t s = spltty();
- simple_lock(&tp->t_lock);
+ spl_t s;
+ s = simple_lock_irq(&tp->t_lock);
ttyclose(tp);
- simple_unlock(&tp->t_lock);
- splx(s);
+ simple_unlock_irq(s, &tp->t_lock);
}
int hypcnprobe(struct consdev *cp)
@@ -229,12 +219,12 @@ int hypcninit(struct consdev *cp)
{
if (console)
return 0;
- simple_lock_init(&outlock);
- simple_lock_init(&inlock);
+ simple_lock_init_irq(&outlock);
+ simple_lock_init_irq(&inlock);
console = (void*) mfn_to_kv(boot_info.console_mfn);
#ifdef MACH_PV_PAGETABLES
pmap_set_page_readwrite(console);
#endif /* MACH_PV_PAGETABLES */
- hyp_evt_handler(boot_info.console_evtchn, hypcnintr, 0, SPL6);
+ hyp_evt_handler(boot_info.console_evtchn, (interrupt_handler_fn)hypcnintr, 0, SPL6);
return 0;
}
diff --git a/xen/console.h b/xen/console.h
index 527f5fbd..4a3c541d 100644
--- a/xen/console.h
+++ b/xen/console.h
@@ -25,7 +25,10 @@
#include <device/cons.h>
#include <device/io_req.h>
-#define hyp_console_write(str, len) hyp_console_io (CONSOLEIO_write, (len), kvtolin(str))
+static inline void hyp_console_write(const char *str, int len)
+{
+ hyp_console_io (CONSOLEIO_write, len, kvtolin(str));
+}
#define hyp_console_put(str) ({ \
const char *__str = (void*) (str); \
diff --git a/xen/evt.c b/xen/evt.c
index 296101aa..7296ae43 100644
--- a/xen/evt.c
+++ b/xen/evt.c
@@ -28,9 +28,10 @@
#define NEVNT (sizeof(unsigned long) * sizeof(unsigned long) * 8)
int int_mask[NSPL];
-spl_t curr_ipl;
+spl_t curr_ipl[NCPUS];
+int spl_init = 0;
-void (*ivect[NEVNT])();
+interrupt_handler_fn ivect[NEVNT];
int intpri[NEVNT];
int iunit[NEVNT];
@@ -63,7 +64,7 @@ void hyp_c_callback(void *ret_addr, void *regs)
if (ivect[n]) {
spl_t spl = splx(intpri[n]);
asm ("lock; and %1,%0":"=m"(hyp_shared_info.evtchn_pending[i]):"r"(~(1UL<<j)));
- ivect[n](iunit[n], spl, ret_addr, regs);
+ ((void(*)(int, int, const char*, struct i386_interrupt_state*))(ivect[n]))(iunit[n], spl, ret_addr, regs);
splx_cli(spl);
} else {
printf("warning: lost unbound event %d\n", n);
@@ -91,9 +92,12 @@ void form_int_mask(void)
extern void hyp_callback(void);
extern void hyp_failsafe_callback(void);
-void hyp_intrinit() {
+void hyp_intrinit(void) {
+ int i;
+
form_int_mask();
- curr_ipl = SPLHI;
+ for (i = 0; i < NCPUS; i++)
+ curr_ipl[i] = SPLHI;
hyp_shared_info.evtchn_mask[0] = int_mask[SPLHI];
#ifdef __i386__
hyp_set_callbacks(KERNEL_CS, hyp_callback,
@@ -104,7 +108,7 @@ void hyp_intrinit() {
#endif
}
-void hyp_evt_handler(evtchn_port_t port, void (*handler)(), int unit, spl_t spl) {
+void hyp_evt_handler(evtchn_port_t port, interrupt_handler_fn handler, int unit, spl_t spl) {
if (port > NEVNT)
panic("event channel port %d > %d not supported\n", port, (int) NEVNT);
intpri[port] = spl;
diff --git a/xen/evt.h b/xen/evt.h
index e4dbad1b..a73733e0 100644
--- a/xen/evt.h
+++ b/xen/evt.h
@@ -23,7 +23,7 @@
void hyp_intrinit(void);
void form_int_mask(void);
-void hyp_evt_handler(evtchn_port_t port, void (*handler)(), int unit, spl_t spl);
+void hyp_evt_handler(evtchn_port_t port, interrupt_handler_fn handler, int unit, spl_t spl);
void hyp_c_callback(void *ret_addr, void *regs);
#endif /* XEN_EVT_H */
diff --git a/xen/grant.c b/xen/grant.c
index 1d6e607b..84758cfc 100644
--- a/xen/grant.c
+++ b/xen/grant.c
@@ -28,7 +28,7 @@
#define NR_RESERVED_ENTRIES 8
#define NR_GRANT_PAGES 8
-decl_simple_lock_data(static,lock);
+def_simple_lock_data(static,lock);
static struct grant_entry *grants;
static vm_map_entry_t grants_map_entry;
static int last_grant = NR_RESERVED_ENTRIES;
diff --git a/xen/net.c b/xen/net.c
index 51b031cd..b72844d5 100644
--- a/xen/net.c
+++ b/xen/net.c
@@ -36,6 +36,7 @@
#include <xen/public/memory.h>
#include <string.h>
#include <util/atoi.h>
+#include <util/byteorder.h>
#include "evt.h"
#include "store.h"
#include "net.h"
@@ -75,7 +76,7 @@ static struct net_data *vif_data;
struct device_emulation_ops hyp_net_emulation_ops;
-int hextoi(char *cp, int *nump)
+static int hextoi(char *cp, int *nump)
{
int number;
char *original;
@@ -476,7 +477,7 @@ void hyp_net_init(void) {
nd->rx_buf_pfn[i] = atop(addr);
if (!nd->rx_copy) {
if (hyp_do_update_va_mapping(kvtolin(nd->rx_buf[i]), 0, UVMF_INVLPG|UVMF_ALL))
- panic("eth: couldn't clear rx kv buf %d at %lx", i, addr);
+ panic("eth: couldn't clear rx kv buf %d at %llx", i, addr);
}
/* and enqueue it to backend. */
enqueue_rx_buf(nd, i);
@@ -535,7 +536,7 @@ device_close(void *devp)
static io_return_t
device_open (ipc_port_t reply_port, mach_msg_type_name_t reply_port_type,
- dev_mode_t mode, char *name, device_t *devp /* out */)
+ dev_mode_t mode, const char *name, device_t *devp /* out */)
{
int i, n;
ipc_port_t port, notify;
diff --git a/xen/public/elfstructs.h b/xen/public/elfstructs.h
index 65d53457..dc71e2d8 100644
--- a/xen/public/elfstructs.h
+++ b/xen/public/elfstructs.h
@@ -44,8 +44,7 @@ typedef uint32_t Elf64_Word;
typedef int64_t Elf64_Sxword;
typedef uint64_t Elf64_Xword;
-typedef uint32_t Elf64_Half;
-typedef uint16_t Elf64_Quarter;
+typedef uint16_t Elf64_Half;
/*
* e_ident[] identification indexes
@@ -127,19 +126,19 @@ typedef struct elfhdr {
typedef struct {
unsigned char e_ident[EI_NIDENT]; /* Id bytes */
- Elf64_Quarter e_type; /* file type */
- Elf64_Quarter e_machine; /* machine type */
- Elf64_Half e_version; /* version number */
+ Elf64_Half e_type; /* file type */
+ Elf64_Half e_machine; /* machine type */
+ Elf64_Word e_version; /* version number */
Elf64_Addr e_entry; /* entry point */
Elf64_Off e_phoff; /* Program hdr offset */
Elf64_Off e_shoff; /* Section hdr offset */
- Elf64_Half e_flags; /* Processor flags */
- Elf64_Quarter e_ehsize; /* sizeof ehdr */
- Elf64_Quarter e_phentsize; /* Program header entry size */
- Elf64_Quarter e_phnum; /* Number of program headers */
- Elf64_Quarter e_shentsize; /* Section header entry size */
- Elf64_Quarter e_shnum; /* Number of section headers */
- Elf64_Quarter e_shstrndx; /* String table index */
+ Elf64_Word e_flags; /* Processor flags */
+ Elf64_Half e_ehsize; /* sizeof ehdr */
+ Elf64_Half e_phentsize; /* Program header entry size */
+ Elf64_Half e_phnum; /* Number of program headers */
+ Elf64_Half e_shentsize; /* Section header entry size */
+ Elf64_Half e_shnum; /* Number of section headers */
+ Elf64_Half e_shstrndx; /* String table index */
} Elf64_Ehdr;
/* e_type */
@@ -202,14 +201,14 @@ typedef struct {
} Elf32_Shdr;
typedef struct {
- Elf64_Half sh_name; /* section name */
- Elf64_Half sh_type; /* section type */
+ Elf64_Word sh_name; /* section name */
+ Elf64_Word sh_type; /* section type */
Elf64_Xword sh_flags; /* section flags */
Elf64_Addr sh_addr; /* virtual address */
Elf64_Off sh_offset; /* file offset */
Elf64_Xword sh_size; /* section size */
- Elf64_Half sh_link; /* link to another */
- Elf64_Half sh_info; /* misc info */
+ Elf64_Word sh_link; /* link to another */
+ Elf64_Word sh_info; /* misc info */
Elf64_Xword sh_addralign; /* memory alignment */
Elf64_Xword sh_entsize; /* table entry size */
} Elf64_Shdr;
@@ -284,10 +283,10 @@ typedef struct elf32_sym {
} Elf32_Sym;
typedef struct {
- Elf64_Half st_name; /* Symbol name index in str table */
+ Elf64_Word st_name; /* Symbol name index in str table */
Elf_Byte st_info; /* type / binding attrs */
Elf_Byte st_other; /* unused */
- Elf64_Quarter st_shndx; /* section index of symbol */
+ Elf64_Half st_shndx; /* section index of symbol */
Elf64_Xword st_value; /* value of symbol */
Elf64_Xword st_size; /* size of symbol */
} Elf64_Sym;
@@ -368,8 +367,8 @@ typedef struct {
} Elf32_Phdr;
typedef struct {
- Elf64_Half p_type; /* entry type */
- Elf64_Half p_flags; /* flags */
+ Elf64_Word p_type; /* entry type */
+ Elf64_Word p_flags; /* flags */
Elf64_Off p_offset; /* offset */
Elf64_Addr p_vaddr; /* virtual address */
Elf64_Addr p_paddr; /* physical address */
@@ -458,9 +457,9 @@ typedef struct {
} Elf32_Note;
typedef struct {
- Elf64_Half namesz;
- Elf64_Half descsz;
- Elf64_Half type;
+ Elf64_Word namesz;
+ Elf64_Word descsz;
+ Elf64_Word type;
} Elf64_Note;
diff --git a/xen/store.c b/xen/store.c
index 23cbc223..5f5a902a 100644
--- a/xen/store.c
+++ b/xen/store.c
@@ -36,7 +36,7 @@
/* Hypervisor part */
-decl_simple_lock_data(static, lock);
+def_simple_lock_data(static, lock);
static struct xenstore_domain_interface *store;
diff --git a/xen/time.c b/xen/time.c
index e8abd56b..21791a59 100644
--- a/xen/time.c
+++ b/xen/time.c
@@ -123,17 +123,17 @@ readtodc(uint64_t *tp)
}
int
-writetodc()
+writetodc(void)
{
/* Not allowed in Xen */
return(-1);
}
void
-clkstart()
+clkstart(void)
{
evtchn_port_t port = hyp_event_channel_bind_virq(VIRQ_TIMER, 0);
- hyp_evt_handler(port, hypclock_intr, 0, SPLHI);
+ hyp_evt_handler(port, (interrupt_handler_fn)hypclock_intr, 0, SPLHI);
/* first clock tick */
clock_interrupt(0, 0, 0, 0);
diff --git a/xen/xen.c b/xen/xen.c
index 28953512..6d424474 100644
--- a/xen/xen.c
+++ b/xen/xen.c
@@ -30,7 +30,7 @@
#include "xen.h"
#include "evt.h"
-void hyp_debug()
+static void hyp_debug(void)
{
panic("debug");
}
@@ -40,7 +40,7 @@ void hyp_init(void)
hyp_grant_init();
hyp_store_init();
evtchn_port_t port = hyp_event_channel_bind_virq(VIRQ_DEBUG, 0);
- hyp_evt_handler(port, hyp_debug, 0, SPL7);
+ hyp_evt_handler(port, (interrupt_handler_fn)hyp_debug, 0, SPL7);
}
void hyp_dev_init(void)
@@ -50,17 +50,6 @@ void hyp_dev_init(void)
hyp_net_init();
}
-void _hyp_halt(void)
-{
- hyp_halt();
-}
-
-void _hyp_todo(unsigned long from)
-{
- printf("TODO: at %lx\n",from);
- hyp_halt();
-}
-
extern int int_mask[];
void hyp_idle(void)
{
diff --git a/xen/xen.h b/xen/xen.h
index cbb793e2..3fd4028a 100644
--- a/xen/xen.h
+++ b/xen/xen.h
@@ -26,4 +26,16 @@ void hyp_p2m_init(void);
void hypclock_machine_intr(int old_ipl, void *ret_addr, struct i386_interrupt_state *regs, uint64_t delta);
+struct failsafe_callback_regs {
+ unsigned int ds;
+ unsigned int es;
+ unsigned int fs;
+ unsigned int gs;
+ unsigned int ip;
+ unsigned int cs_and_mask;
+ unsigned int flags;
+};
+
+void hyp_failsafe_c_callback(struct failsafe_callback_regs *regs);
+
#endif /* XEN_XEN_H */