summaryrefslogtreecommitdiff
path: root/i386
diff options
context:
space:
mode:
authorSamuel Thibault <samuel.thibault@ens-lyon.org>2020-11-28 16:30:29 +0100
committerSamuel Thibault <samuel.thibault@ens-lyon.org>2020-11-28 17:46:32 +0100
commitb885c5ea26fb3c2f2d91b6e9a1495070da429ea4 (patch)
tree787f167688bf7518b62241fa55c539da3fa53ed5 /i386
parent44e3fb20a17dac403ec022a9c4d41dc237f3c79d (diff)
x86: Add XSAVE support
* i386/i386/fpu.h (CPU_XCR0_X87, CPU_XCR0_SSE, CPU_XCR0_AVX, CPU_XCR0_MPX, CPU_XCR0_AVX512): New macros. (xsave): Pass fp_xsave_support to xsave. (fpu_save_context): When fp_kind is FP_387X, use xsave. (fp_xsave_support): New variable declaration. * i386/i386/proc_reg.h (cpuid): New macro. * linux/src/include/asm-i386/processor.h (cpuid): Disable macro. * i386/include/mach/i386/fp_reg.h: Include <stdint.h> (i386_xfp_xstate_header): New structure. (i386_xfp_save): Add xsave fields. * i386/i386/fpu.c (fp_xsave_support): New variable. (init_fpu): Look for XSAVE feature; if available, get the supported parts and set fp_kind to FP_387X. (fpu_module_init): Set ifps_cache alignment to alignof(struct i386_fpsave_state). (fpu_set_state): Make sure to clear all the ifps structure. Reuse the FP_387FX for the FP_387X case. (fpu_get_state, fpexterrflt, fpastintr, fp_state_alloc): Reuse the FP_387FX for the FP_387X case. (fp_save): When fp_kind is FP_387X, use xsave. (fp_load): When fp_kind is FP_387X, use xrstor. Reuse the FP_387FX for the FP_387X case.
Diffstat (limited to 'i386')
-rw-r--r--i386/i386/fpu.c81
-rw-r--r--i386/i386/fpu.h19
-rw-r--r--i386/i386/proc_reg.h22
-rw-r--r--i386/include/mach/i386/fp_reg.h27
4 files changed, 122 insertions, 27 deletions
diff --git a/i386/i386/fpu.c b/i386/i386/fpu.c
index 5cbfcb2f..517764d6 100644
--- a/i386/i386/fpu.c
+++ b/i386/i386/fpu.c
@@ -70,6 +70,7 @@
#endif
int fp_kind = FP_387; /* 80387 present */
+uint64_t fp_xsave_support; /* Bitmap of supported XSAVE save areas */
struct kmem_cache ifps_cache; /* cache for FPU save area */
static unsigned long mxcsr_feature_mask = 0xffffffff; /* Always AND user-provided mxcsr with this security mask */
@@ -148,21 +149,58 @@ init_fpu(void)
/*
* We have a 387.
*/
- if (CPU_HAS_FEATURE(CPU_FEATURE_FXSR)) {
- static /* because we _need_ alignment */
- struct i386_xfp_save save;
- unsigned long mask;
- fp_kind = FP_387FX;
+ fp_kind = FP_387;
+
+ if (CPU_HAS_FEATURE(CPU_FEATURE_XSAVE)) {
+ unsigned eax, ebx, ecx, edx;
+
+ eax = 0xd;
+ ecx = 0x0;
+ cpuid(eax, ebx, ecx, edx);
+
+ fp_xsave_support = eax + (((uint64_t) edx) << 32);
+ fp_xsave_support &= CPU_XCR0_SUPPORTED;
+#ifndef MACH_RING1
+ set_cr4(get_cr4() | CR4_OSFXSR | CR4_OSXSAVE);
+ set_xcr0(fp_xsave_support);
+#endif /* MACH_RING1 */
+
+ eax = 0xd;
+ ecx = 0x0;
+ cpuid(eax, ebx, ecx, edx);
+
+ if (ebx > sizeof(struct i386_xfp_save)) {
+ /* TODO: rather make struct unbound and set size
+ * in fpu_module_init */
+ printf("XSAVE area size %u larger than provisioned "
+ "%u, not enabling XSAVE\n",
+ ebx, sizeof(struct i386_xfp_save));
+#ifndef MACH_RING1
+ set_cr4(get_cr4() & ~(CR4_OSFXSR | CR4_OSXSAVE));
+#endif /* MACH_RING1 */
+ } else {
+ fp_kind = FP_387X;
+ }
+ }
+
+ if (fp_kind == FP_387 && CPU_HAS_FEATURE(CPU_FEATURE_FXSR)) {
#ifndef MACH_RING1
set_cr4(get_cr4() | CR4_OSFXSR);
#endif /* MACH_RING1 */
+ fp_kind = FP_387FX;
+ }
+
+ if (fp_kind == FP_387X || fp_kind == FP_387FX) {
+ /* Compute mxcsr_feature_mask. */
+ static /* because we _need_ alignment */
+ struct i386_xfp_save save;
+ unsigned long mask;
fxsave(&save);
mask = save.fp_mxcsr_mask;
if (!mask)
mask = 0x0000ffbf;
mxcsr_feature_mask &= mask;
- } else
- fp_kind = FP_387;
+ }
}
#ifdef MACH_RING1
set_ts();
@@ -188,7 +226,8 @@ void
fpu_module_init(void)
{
kmem_cache_init(&ifps_cache, "i386_fpsave_state",
- sizeof(struct i386_fpsave_state), 16,
+ sizeof(struct i386_fpsave_state),
+ alignof(struct i386_fpsave_state),
NULL, 0);
}
@@ -359,9 +398,9 @@ ASSERT_IPL(SPL0);
/*
* Ensure that reserved parts of the environment are 0.
*/
- memset(&ifps->fp_save_state, 0, sizeof(struct i386_fp_save));
+ memset(ifps, 0, sizeof(*ifps));
- if (fp_kind == FP_387FX) {
+ if (fp_kind == FP_387X || fp_kind == FP_387FX) {
int i;
ifps->xfp_save_state.fp_control = user_fp_state->fp_control;
@@ -454,7 +493,7 @@ ASSERT_IPL(SPL0);
*/
memset(user_fp_state, 0, sizeof(struct i386_fp_save));
- if (fp_kind == FP_387FX) {
+ if (fp_kind == FP_387X || fp_kind == FP_387FX) {
int i;
user_fp_state->fp_control = ifps->xfp_save_state.fp_control;
@@ -697,7 +736,7 @@ fpexterrflt(void)
*/
i386_exception(EXC_ARITHMETIC,
EXC_I386_EXTERR,
- fp_kind == FP_387FX ?
+ fp_kind == FP_387X || fp_kind == FP_387FX ?
thread->pcb->ims.ifps->xfp_save_state.fp_status :
thread->pcb->ims.ifps->fp_save_state.fp_status);
/*NOTREACHED*/
@@ -755,7 +794,7 @@ ASSERT_IPL(SPL0);
*/
i386_exception(EXC_ARITHMETIC,
EXC_I386_EXTERR,
- fp_kind == FP_387FX ?
+ fp_kind == FP_387X || fp_kind == FP_387FX ?
thread->pcb->ims.ifps->xfp_save_state.fp_status :
thread->pcb->ims.ifps->fp_save_state.fp_status);
/*NOTREACHED*/
@@ -779,10 +818,12 @@ fp_save(thread_t thread)
if (ifps != 0 && !ifps->fp_valid) {
/* registers are in FPU */
ifps->fp_valid = TRUE;
- if (fp_kind == FP_387FX)
- fxsave(&ifps->xfp_save_state);
+ if (fp_kind == FP_387X)
+ xsave(&ifps->xfp_save_state);
+ else if (fp_kind == FP_387FX)
+ fxsave(&ifps->xfp_save_state);
else
- fnsave(&ifps->fp_save_state);
+ fnsave(&ifps->fp_save_state);
}
}
@@ -822,7 +863,7 @@ ASSERT_IPL(SPL0);
*/
i386_exception(EXC_ARITHMETIC,
EXC_I386_EXTERR,
- fp_kind == FP_387FX ?
+ fp_kind == FP_387X || fp_kind == FP_387FX ?
thread->pcb->ims.ifps->xfp_save_state.fp_status :
thread->pcb->ims.ifps->fp_save_state.fp_status);
/*NOTREACHED*/
@@ -831,7 +872,9 @@ ASSERT_IPL(SPL0);
printf("fp_load: invalid FPU state!\n");
fninit ();
} else {
- if (fp_kind == FP_387FX)
+ if (fp_kind == FP_387X)
+ xrstor(ifps->xfp_save_state);
+ else if (fp_kind == FP_387FX)
fxrstor(ifps->xfp_save_state);
else
frstor(ifps->fp_save_state);
@@ -857,7 +900,7 @@ fp_state_alloc(void)
ifps->fp_valid = TRUE;
- if (fp_kind == FP_387FX) {
+ if (fp_kind == FP_387X || fp_kind == FP_387FX) {
ifps->xfp_save_state.fp_control = (0x037f
& ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC))
| (FPC_PC_64|FPC_IC_AFF);
diff --git a/i386/i386/fpu.h b/i386/i386/fpu.h
index 3c5b73c5..dc503606 100644
--- a/i386/i386/fpu.h
+++ b/i386/i386/fpu.h
@@ -96,8 +96,20 @@ static inline void set_xcr0(uint64_t value) {
xsetbv(0, value);
}
+#define CPU_XCR0_X87 (1 << 0)
+#define CPU_XCR0_SSE (1 << 1)
+#define CPU_XCR0_AVX (1 << 2)
+#define CPU_XCR0_MPX (3 << 3)
+#define CPU_XCR0_AVX512 (7 << 5)
+
+/* This is the set we support for now in our struct i386_xfp_save */
+#define CPU_XCR0_SUPPORTED (CPU_XCR0_X87 | CPU_XCR0_SSE | CPU_XCR0_AVX)
+
#define xsave(state) \
- asm volatile("xsave %0" : "=m" (*state))
+ asm volatile("xsave %0" \
+ : "=m" (*state) \
+ : "a" ((unsigned) fp_xsave_support) \
+ , "d" ((unsigned) (fp_xsave_support >> 32))) \
#define xrstor(state) \
asm volatile("xrstor %0" : : "m" (state))
@@ -121,7 +133,9 @@ static inline void set_xcr0(uint64_t value) {
if (ifps != 0 && !ifps->fp_valid) { \
/* registers are in FPU - save to memory */ \
ifps->fp_valid = TRUE; \
- if (fp_kind == FP_387FX) \
+ if (fp_kind == FP_387X) \
+ xsave(&ifps->xfp_save_state); \
+ else if (fp_kind == FP_387FX) \
fxsave(&ifps->xfp_save_state); \
else \
fnsave(&ifps->fp_save_state); \
@@ -138,6 +152,7 @@ static inline void set_xcr0(uint64_t value) {
#endif /* NCPUS == 1 */
extern int fp_kind;
+extern uint64_t fp_xsave_support;
extern void fp_save(thread_t thread);
extern void fp_load(thread_t thread);
extern void fp_free(struct i386_fpsave_state *fps);
diff --git a/i386/i386/proc_reg.h b/i386/i386/proc_reg.h
index 624819c6..a83ca0d8 100644
--- a/i386/i386/proc_reg.h
+++ b/i386/i386/proc_reg.h
@@ -379,6 +379,28 @@ extern unsigned long cr3;
})
#endif
+/* Note: gcc might want to use bx or the stack for %1 addressing, so we can't
+ * use them :/ */
+#ifdef __x86_64__
+#define cpuid(eax, ebx, ecx, edx) \
+{ \
+ uint64_t sav_rbx; \
+ asm( "mov %%rbx,%2\n\t" \
+ "cpuid\n\t" \
+ "xchg %2,%%rbx\n\t" \
+ "movl %k2,%1\n\t" \
+ : "+a" (eax), "=m" (ebx), "=&r" (sav_rbx), "+c" (ecx), "=&d" (edx)); \
+}
+#else
+#define cpuid(eax, ebx, ecx, edx) \
+{ \
+ asm ( "mov %%ebx,%1\n\t" \
+ "cpuid\n\t" \
+ "xchg %%ebx,%1\n\t" \
+ : "+a" (eax), "=&SD" (ebx), "+c" (ecx), "=&d" (edx)); \
+}
+#endif
+
#endif /* __GNUC__ */
#endif /* __ASSEMBLER__ */
diff --git a/i386/include/mach/i386/fp_reg.h b/i386/include/mach/i386/fp_reg.h
index 648511ad..5992ea78 100644
--- a/i386/include/mach/i386/fp_reg.h
+++ b/i386/include/mach/i386/fp_reg.h
@@ -26,6 +26,9 @@
#ifndef _MACH_I386_FP_REG_H_
#define _MACH_I386_FP_REG_H_
+
+#include <stdint.h>
+
/*
* Floating point registers and status, as saved
* and restored by FP save/restore instructions.
@@ -50,17 +53,24 @@ struct i386_fp_regs {
/* space for 8 80-bit FP registers */
};
+struct i386_xfp_xstate_header {
+ uint64_t xfp_features;
+ uint64_t xcomp_bv;
+ uint64_t reserved[6];
+} __attribute__((packed, aligned(64)));
+_Static_assert(sizeof(struct i386_xfp_xstate_header) == 8*8);
+
struct i386_xfp_save {
unsigned short fp_control; /* control */
unsigned short fp_status; /* status */
unsigned short fp_tag; /* register tags */
unsigned short fp_opcode; /* opcode of failed instruction */
unsigned int fp_eip; /* eip at failed instruction */
- unsigned short fp_cs; /* cs at failed instruction */
- unsigned short fp_unused_1;
+ unsigned short fp_cs; /* cs at failed instruction / eip high */
+ unsigned short fp_eip3; /* eip higher */
unsigned int fp_dp; /* data address */
- unsigned short fp_ds; /* data segment */
- unsigned short fp_unused_2;
+ unsigned short fp_ds; /* data segment / dp high */
+ unsigned short fp_dp3; /* dp higher */
unsigned int fp_mxcsr; /* MXCSR */
unsigned int fp_mxcsr_mask; /* MXCSR_MASK */
unsigned char fp_reg_word[8][16];
@@ -68,8 +78,13 @@ struct i386_xfp_save {
unsigned char fp_xreg_word[16][16];
/* space for 16 128-bit XMM registers */
unsigned int padding[24];
-} __attribute__((aligned(16)));
-_Static_assert(sizeof(struct i386_xfp_save) == 512);
+ struct i386_xfp_xstate_header header;
+
+ unsigned char fp_yreg_word[16][16];
+ /* space for the high part of the
+ * 16 256-bit YMM registers */
+} __attribute__((packed, aligned(64)));
+_Static_assert(sizeof(struct i386_xfp_save) == 512 + 8*8 + 16*16);
/*
* Control register