Page MenuHomeFreeBSD

D43306.id144039.diff
No OneTemporary

D43306.id144039.diff

diff --git a/sys/arm64/arm64/exec_machdep.c b/sys/arm64/arm64/exec_machdep.c
--- a/sys/arm64/arm64/exec_machdep.c
+++ b/sys/arm64/arm64/exec_machdep.c
@@ -606,6 +606,8 @@
if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
return (EFAULT);
+ /* Stop an interrupt from causing the sve state to be dropped */
+ td->td_sa.code = -1;
error = set_mcontext(td, &uc.uc_mcontext);
if (error != 0)
return (error);
diff --git a/sys/arm64/arm64/trap.c b/sys/arm64/arm64/trap.c
--- a/sys/arm64/arm64/trap.c
+++ b/sys/arm64/arm64/trap.c
@@ -734,7 +734,8 @@
break;
}
- KASSERT((td->td_pcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0,
+ KASSERT(
+ (td->td_pcb->pcb_fpflags & ~(PCB_FP_USERMASK|PCB_FP_SVEVALID)) == 0,
("Kernel VFP flags set while entering userspace"));
KASSERT(
td->td_pcb->pcb_fpusaved == &td->td_pcb->pcb_fpustate,
diff --git a/sys/arm64/arm64/vfp.c b/sys/arm64/arm64/vfp.c
--- a/sys/arm64/arm64/vfp.c
+++ b/sys/arm64/arm64/vfp.c
@@ -30,11 +30,13 @@
#ifdef VFP
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/eventhandler.h>
#include <sys/limits.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/pcpu.h>
#include <sys/proc.h>
+#include <sys/smp.h>
#include <vm/uma.h>
@@ -60,6 +62,63 @@
static uma_zone_t fpu_save_area_zone;
static struct vfpstate *fpu_initialstate;
+static u_int sve_max_vector_len;
+
+static size_t
+_sve_buf_size(u_int sve_len)
+{
+ size_t len;
+
+ /* 32 vector registers */
+ len = (size_t)sve_len * 32;
+ /*
+ * 16 predicate registers and the fault fault register, each 1/8th
+ * the size of a vector register.
+ */
+ len += ((size_t)sve_len * 17) / 8;
+ /*
+ * FPSR and FPCR
+ */
+ len += sizeof(uint64_t) * 2;
+
+ return (len);
+}
+
+size_t
+sve_max_buf_size(void)
+{
+ MPASS(sve_max_vector_len > 0);
+ return (_sve_buf_size(sve_max_vector_len));
+}
+
+size_t
+sve_buf_size(struct thread *td)
+{
+ struct pcb *pcb;
+
+ pcb = td->td_pcb;
+ MPASS(pcb->pcb_svesaved != NULL);
+ MPASS(pcb->pcb_sve_len > 0);
+
+ return (_sve_buf_size(pcb->pcb_sve_len));
+}
+
+static void *
+sve_alloc(void)
+{
+ void *buf;
+
+ buf = malloc(sve_max_buf_size(), M_FPUKERN_CTX, M_WAITOK | M_ZERO);
+
+ return (buf);
+}
+
+static void
+sve_free(void *buf)
+{
+ free(buf, M_FPUKERN_CTX);
+}
+
void
vfp_enable(void)
{
@@ -71,13 +130,30 @@
isb();
}
+static void
+sve_enable(void)
+{
+ uint32_t cpacr;
+
+ cpacr = READ_SPECIALREG(cpacr_el1);
+ /* Enable FP */
+ cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_NONE;
+ /* Enable SVE */
+ cpacr = (cpacr & ~CPACR_ZEN_MASK) | CPACR_ZEN_TRAP_NONE;
+ WRITE_SPECIALREG(cpacr_el1, cpacr);
+ isb();
+}
+
void
vfp_disable(void)
{
uint32_t cpacr;
cpacr = READ_SPECIALREG(cpacr_el1);
+ /* Disable FP */
cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_ALL1;
+ /* Disable SVE */
+ cpacr = (cpacr & ~CPACR_ZEN_MASK) | CPACR_ZEN_TRAP_ALL1;
WRITE_SPECIALREG(cpacr_el1, cpacr);
isb();
}
@@ -171,9 +247,266 @@
}
static void
-vfp_save_state_common(struct thread *td, struct pcb *pcb)
+sve_store(void *state, u_int sve_len)
+{
+ vm_offset_t f_start, p_start, z_start;
+ uint64_t fpcr, fpsr;
+
+ /*
+ * Calculate the start of each register groups. There are three
+ * groups depending on size, with the First Fault Register (FFR)
+ * stored with the predicate registers as we use one of them to
+ * temporarily hold it.
+ *
+ * +-------------------------+-------------------+
+ * | Contents | Register size |
+ * z_start -> +-------------------------+-------------------+
+ * | | |
+ * | 32 Z regs | sve_len |
+ * | | |
+ * p_start -> +-------------------------+-------------------+
+ * | | |
+ * | 16 Predicate registers | 1/8 size of Z reg |
+ * | 1 First Fault register | |
+ * | | |
+ * f_start -> +-------------------------+-------------------+
+ * | | |
+ * | FPSR/FPCR | 32 bit |
+ * | | |
+ * +-------------------------+-------------------+
+ */
+ z_start = (vm_offset_t)state;
+ p_start = z_start + sve_len * 32;
+ f_start = p_start + (sve_len / 8) * 17;
+
+ __asm __volatile(
+ ".arch_extension sve \n"
+ "str z0, [%0, #0, MUL VL] \n"
+ "str z1, [%0, #1, MUL VL] \n"
+ "str z2, [%0, #2, MUL VL] \n"
+ "str z3, [%0, #3, MUL VL] \n"
+ "str z4, [%0, #4, MUL VL] \n"
+ "str z5, [%0, #5, MUL VL] \n"
+ "str z6, [%0, #6, MUL VL] \n"
+ "str z7, [%0, #7, MUL VL] \n"
+ "str z8, [%0, #8, MUL VL] \n"
+ "str z9, [%0, #9, MUL VL] \n"
+ "str z10, [%0, #10, MUL VL] \n"
+ "str z11, [%0, #11, MUL VL] \n"
+ "str z12, [%0, #12, MUL VL] \n"
+ "str z13, [%0, #13, MUL VL] \n"
+ "str z14, [%0, #14, MUL VL] \n"
+ "str z15, [%0, #15, MUL VL] \n"
+ "str z16, [%0, #16, MUL VL] \n"
+ "str z17, [%0, #17, MUL VL] \n"
+ "str z18, [%0, #18, MUL VL] \n"
+ "str z19, [%0, #19, MUL VL] \n"
+ "str z20, [%0, #20, MUL VL] \n"
+ "str z21, [%0, #21, MUL VL] \n"
+ "str z22, [%0, #22, MUL VL] \n"
+ "str z23, [%0, #23, MUL VL] \n"
+ "str z24, [%0, #24, MUL VL] \n"
+ "str z25, [%0, #25, MUL VL] \n"
+ "str z26, [%0, #26, MUL VL] \n"
+ "str z27, [%0, #27, MUL VL] \n"
+ "str z28, [%0, #28, MUL VL] \n"
+ "str z29, [%0, #29, MUL VL] \n"
+ "str z30, [%0, #30, MUL VL] \n"
+ "str z31, [%0, #31, MUL VL] \n"
+ /* Store the predicate registers */
+ "str p0, [%1, #0, MUL VL] \n"
+ "str p1, [%1, #1, MUL VL] \n"
+ "str p2, [%1, #2, MUL VL] \n"
+ "str p3, [%1, #3, MUL VL] \n"
+ "str p4, [%1, #4, MUL VL] \n"
+ "str p5, [%1, #5, MUL VL] \n"
+ "str p6, [%1, #6, MUL VL] \n"
+ "str p7, [%1, #7, MUL VL] \n"
+ "str p8, [%1, #8, MUL VL] \n"
+ "str p9, [%1, #9, MUL VL] \n"
+ "str p10, [%1, #10, MUL VL] \n"
+ "str p11, [%1, #11, MUL VL] \n"
+ "str p12, [%1, #12, MUL VL] \n"
+ "str p13, [%1, #13, MUL VL] \n"
+ "str p14, [%1, #14, MUL VL] \n"
+ "str p15, [%1, #15, MUL VL] \n"
+ ".arch_extension nosve \n"
+ : : "r"(z_start), "r"(p_start));
+
+ /* Save the FFR if needed */
+ /* TODO: Skip if in SME streaming mode (when supported) */
+ __asm __volatile(
+ ".arch_extension sve \n"
+ "rdffr p0.b \n"
+ "str p0, [%0, #16, MUL VL] \n"
+ /*
+ * Load the old p0 value to ensure it is consistent if we enable
+ * without calling sve_restore, e.g. switch to a kernel thread and
+ * back.
+ */
+ "ldr p0, [%0, #0, MUL VL] \n"
+ ".arch_extension nosve \n"
+ : : "r"(p_start));
+
+ __asm __volatile(
+ ".arch_extension fp \n"
+ "mrs %0, fpsr \n"
+ "mrs %1, fpcr \n"
+ "stp %w0, %w1, [%2] \n"
+ ".arch_extension nofp \n"
+ : "=&r"(fpsr), "=&r"(fpcr) : "r"(f_start));
+}
+
+static void
+sve_restore(void *state, u_int sve_len)
+{
+ vm_offset_t f_start, p_start, z_start;
+ uint64_t fpcr, fpsr;
+
+ /* See sve_store for the layout of the state buffer */
+ z_start = (vm_offset_t)state;
+ p_start = z_start + sve_len * 32;
+ f_start = p_start + (sve_len / 8) * 17;
+
+ __asm __volatile(
+ ".arch_extension sve \n"
+ "ldr p0, [%0, #16, MUL VL] \n"
+ "wrffr p0.b \n"
+ ".arch_extension nosve \n"
+ : : "r"(p_start));
+
+ __asm __volatile(
+ ".arch_extension sve \n"
+ "ldr z0, [%0, #0, MUL VL] \n"
+ "ldr z1, [%0, #1, MUL VL] \n"
+ "ldr z2, [%0, #2, MUL VL] \n"
+ "ldr z3, [%0, #3, MUL VL] \n"
+ "ldr z4, [%0, #4, MUL VL] \n"
+ "ldr z5, [%0, #5, MUL VL] \n"
+ "ldr z6, [%0, #6, MUL VL] \n"
+ "ldr z7, [%0, #7, MUL VL] \n"
+ "ldr z8, [%0, #8, MUL VL] \n"
+ "ldr z9, [%0, #9, MUL VL] \n"
+ "ldr z10, [%0, #10, MUL VL] \n"
+ "ldr z11, [%0, #11, MUL VL] \n"
+ "ldr z12, [%0, #12, MUL VL] \n"
+ "ldr z13, [%0, #13, MUL VL] \n"
+ "ldr z14, [%0, #14, MUL VL] \n"
+ "ldr z15, [%0, #15, MUL VL] \n"
+ "ldr z16, [%0, #16, MUL VL] \n"
+ "ldr z17, [%0, #17, MUL VL] \n"
+ "ldr z18, [%0, #18, MUL VL] \n"
+ "ldr z19, [%0, #19, MUL VL] \n"
+ "ldr z20, [%0, #20, MUL VL] \n"
+ "ldr z21, [%0, #21, MUL VL] \n"
+ "ldr z22, [%0, #22, MUL VL] \n"
+ "ldr z23, [%0, #23, MUL VL] \n"
+ "ldr z24, [%0, #24, MUL VL] \n"
+ "ldr z25, [%0, #25, MUL VL] \n"
+ "ldr z26, [%0, #26, MUL VL] \n"
+ "ldr z27, [%0, #27, MUL VL] \n"
+ "ldr z28, [%0, #28, MUL VL] \n"
+ "ldr z29, [%0, #29, MUL VL] \n"
+ "ldr z30, [%0, #30, MUL VL] \n"
+ "ldr z31, [%0, #31, MUL VL] \n"
+ /* Store the predicate registers */
+ "ldr p0, [%1, #0, MUL VL] \n"
+ "ldr p1, [%1, #1, MUL VL] \n"
+ "ldr p2, [%1, #2, MUL VL] \n"
+ "ldr p3, [%1, #3, MUL VL] \n"
+ "ldr p4, [%1, #4, MUL VL] \n"
+ "ldr p5, [%1, #5, MUL VL] \n"
+ "ldr p6, [%1, #6, MUL VL] \n"
+ "ldr p7, [%1, #7, MUL VL] \n"
+ "ldr p8, [%1, #8, MUL VL] \n"
+ "ldr p9, [%1, #9, MUL VL] \n"
+ "ldr p10, [%1, #10, MUL VL] \n"
+ "ldr p11, [%1, #11, MUL VL] \n"
+ "ldr p12, [%1, #12, MUL VL] \n"
+ "ldr p13, [%1, #13, MUL VL] \n"
+ "ldr p14, [%1, #14, MUL VL] \n"
+ "ldr p15, [%1, #15, MUL VL] \n"
+ ".arch_extension nosve \n"
+ : : "r"(z_start), "r"(p_start));
+
+ __asm __volatile(
+ ".arch_extension fp \n"
+ "ldp %w0, %w1, [%2] \n"
+ "msr fpsr, %0 \n"
+ "msr fpcr, %1 \n"
+ ".arch_extension nofp \n"
+ : "=&r"(fpsr), "=&r"(fpcr) : "r"(f_start));
+}
+
+/*
+ * Sync the VFP registers to the SVE register state, e.g. in signal return
+ * when userspace may have changed the vfp register values and expect them
+ * to be used when the signal handler returns.
+ */
+void
+vfp_to_sve_sync(struct thread *td)
+{
+ struct pcb *pcb;
+ uint32_t *fpxr;
+
+ pcb = td->td_pcb;
+ if (pcb->pcb_svesaved == NULL)
+ return;
+
+ MPASS(pcb->pcb_fpusaved != NULL);
+
+ /* Copy the VFP registers to the SVE region */
+ for (int i = 0; i < nitems(pcb->pcb_fpusaved->vfp_regs); i++) {
+ __uint128_t *sve_reg;
+
+ sve_reg = (__uint128_t *)((uintptr_t)pcb->pcb_svesaved +
+ i * pcb->pcb_sve_len);
+ *sve_reg = pcb->pcb_fpusaved->vfp_regs[i];
+ }
+
+ fpxr = (uint32_t *)((uintptr_t)pcb->pcb_svesaved +
+ (32 * pcb->pcb_sve_len) + (17 * pcb->pcb_sve_len / 8));
+ fpxr[0] = pcb->pcb_fpusaved->vfp_fpsr;
+ fpxr[1] = pcb->pcb_fpusaved->vfp_fpcr;
+}
+
+/*
+ * Sync the SVE registers to the VFP register state.
+ */
+void
+sve_to_vfp_sync(struct thread *td)
+{
+ struct pcb *pcb;
+ uint32_t *fpxr;
+
+ pcb = td->td_pcb;
+ if (pcb->pcb_svesaved == NULL)
+ return;
+
+ MPASS(pcb->pcb_fpusaved == &pcb->pcb_fpustate);
+
+ /* Copy the SVE registers to the VFP saved state */
+ for (int i = 0; i < nitems(pcb->pcb_fpusaved->vfp_regs); i++) {
+ __uint128_t *sve_reg;
+
+ sve_reg = (__uint128_t *)((uintptr_t)pcb->pcb_svesaved +
+ i * pcb->pcb_sve_len);
+ pcb->pcb_fpusaved->vfp_regs[i] = *sve_reg;
+ }
+
+ fpxr = (uint32_t *)((uintptr_t)pcb->pcb_svesaved +
+ (32 * pcb->pcb_sve_len) + (17 * pcb->pcb_sve_len / 8));
+ pcb->pcb_fpusaved->vfp_fpsr = fpxr[0];
+ pcb->pcb_fpusaved->vfp_fpcr = fpxr[1];
+}
+
+static void
+vfp_save_state_common(struct thread *td, struct pcb *pcb, bool full_save)
{
uint32_t cpacr;
+ bool save_sve;
+
+ save_sve = false;
critical_enter();
/*
@@ -181,14 +514,49 @@
* i.e. return if we are trapping on FP access.
*/
cpacr = READ_SPECIALREG(cpacr_el1);
- if ((cpacr & CPACR_FPEN_MASK) == CPACR_FPEN_TRAP_NONE) {
- KASSERT(PCPU_GET(fpcurthread) == td,
- ("Storing an invalid VFP state"));
+ if ((cpacr & CPACR_FPEN_MASK) != CPACR_FPEN_TRAP_NONE)
+ goto done;
+ KASSERT(PCPU_GET(fpcurthread) == td,
+ ("Storing an invalid VFP state"));
+
+ /*
+ * Also save the SVE state. As SVE depends on the VFP being
+ * enabled we can rely on only needing to check this when
+ * the VFP unit has been enabled.
+ */
+ if ((cpacr & CPACR_ZEN_MASK) == CPACR_ZEN_TRAP_NONE) {
+ /* If SVE is enabled it should be valid */
+ MPASS((pcb->pcb_fpflags & PCB_FP_SVEVALID) != 0);
+
+ /*
+ * If we are switching while in a system call skip saving
+ * SVE registers. The ABI allows us to drop them over any
+ * system calls, however doing so is expensive in SVE
+ * heavy userspace code. This would require us to disable
+ * SVE for all system calls and trap the next use of them.
+ * As an optimisation only disable SVE on context switch.
+ */
+ if (td->td_frame == NULL ||
+ (ESR_ELx_EXCEPTION(td->td_frame->tf_esr) != EXCP_SVC64 &&
+ td->td_sa.code != (u_int)-1))
+ save_sve = true;
+ }
+
+ if (save_sve) {
+ KASSERT(pcb->pcb_svesaved != NULL,
+ ("Storing to a NULL SVE state"));
+ sve_store(pcb->pcb_svesaved, pcb->pcb_sve_len);
+ if (full_save)
+ sve_to_vfp_sync(td);
+ } else {
+ pcb->pcb_fpflags &= ~PCB_FP_SVEVALID;
vfp_store(pcb->pcb_fpusaved);
- dsb(ish);
- vfp_disable();
}
+ dsb(ish);
+ vfp_disable();
+
+done:
critical_exit();
}
@@ -199,7 +567,7 @@
KASSERT(pcb != NULL, ("NULL vfp pcb"));
KASSERT(td->td_pcb == pcb, ("Invalid vfp pcb"));
- vfp_save_state_common(td, pcb);
+ vfp_save_state_common(td, pcb, true);
}
void
@@ -213,7 +581,7 @@
MPASS(pcb->pcb_fpusaved == NULL);
pcb->pcb_fpusaved = &pcb->pcb_fpustate;
- vfp_save_state_common(curthread, pcb);
+ vfp_save_state_common(curthread, pcb, true);
}
void
@@ -221,7 +589,7 @@
{
KASSERT(td != NULL, ("NULL vfp thread"));
- vfp_save_state_common(td, td->td_pcb);
+ vfp_save_state_common(td, td->td_pcb, false);
}
/*
@@ -231,21 +599,40 @@
void
vfp_new_thread(struct thread *newtd, struct thread *oldtd, bool fork)
{
- struct pcb *newpcb;
+ struct pcb *newpcb, *oldpcb;
newpcb = newtd->td_pcb;
+ oldpcb = oldtd->td_pcb;
/* Kernel threads start with clean VFP */
if ((oldtd->td_pflags & TDP_KTHREAD) != 0) {
newpcb->pcb_fpflags &=
- ~(PCB_FP_STARTED | PCB_FP_KERN | PCB_FP_NOSAVE);
+ ~(PCB_FP_STARTED | PCB_FP_SVEVALID | PCB_FP_KERN |
+ PCB_FP_NOSAVE);
} else {
MPASS((newpcb->pcb_fpflags & (PCB_FP_KERN|PCB_FP_NOSAVE)) == 0);
+
+ /*
+ * The only SVE register state to be guaranteed to be saved
+ * a system call is the lower bits of the Z registers as
+ * these are aliased with the existing FP registers. Because
+ * we can only create a new thread or fork through a system
+ * call it is safe to drop the SVE state in the new thread.
+ */
+ newpcb->pcb_fpflags &= ~PCB_FP_SVEVALID;
if (!fork) {
newpcb->pcb_fpflags &= ~PCB_FP_STARTED;
}
}
+ newpcb->pcb_svesaved = NULL;
+ if (oldpcb->pcb_svesaved == NULL)
+ newpcb->pcb_sve_len = sve_max_vector_len;
+ else
+ KASSERT(newpcb->pcb_sve_len == oldpcb->pcb_sve_len,
+ ("%s: pcb sve vector length differs: %x != %x", __func__,
+ newpcb->pcb_sve_len, oldpcb->pcb_sve_len));
+
newpcb->pcb_fpusaved = &newpcb->pcb_fpustate;
newpcb->pcb_vfpcpu = UINT_MAX;
}
@@ -272,23 +659,48 @@
("pcb_fpusaved should point to pcb_fpustate."));
pcb->pcb_fpustate.vfp_fpcr = VFPCR_INIT;
pcb->pcb_fpustate.vfp_fpsr = 0;
+ /* XXX: Memory leak when using SVE between fork & exec? */
+ pcb->pcb_svesaved = NULL;
pcb->pcb_vfpcpu = UINT_MAX;
pcb->pcb_fpflags = 0;
}
-void
-vfp_restore_state(void)
+static void
+vfp_restore_state_common(struct thread *td, int flags)
{
struct pcb *curpcb;
u_int cpu;
+ bool restore_sve;
+
+ KASSERT(td == curthread, ("%s: Called with non-current thread",
+ __func__));
critical_enter();
cpu = PCPU_GET(cpuid);
- curpcb = curthread->td_pcb;
- curpcb->pcb_fpflags |= PCB_FP_STARTED;
+ curpcb = td->td_pcb;
- vfp_enable();
+ /*
+ * If SVE has been used and the base VFP state is in use then
+ * restore the SVE registers. A non-base VFP state should only
+ * be used by the kernel and SVE should onlu be used by userspace.
+ */
+ restore_sve = false;
+ if ((curpcb->pcb_fpflags & PCB_FP_SVEVALID) != 0 &&
+ curpcb->pcb_fpusaved == &curpcb->pcb_fpustate) {
+ MPASS(curpcb->pcb_svesaved != NULL);
+ /* SVE shouldn't be enabled in the kernel */
+ MPASS((flags & PCB_FP_KERN) == 0);
+ restore_sve = true;
+ }
+
+ if (restore_sve) {
+ MPASS((curpcb->pcb_fpflags & PCB_FP_SVEVALID) != 0);
+ sve_enable();
+ } else {
+ curpcb->pcb_fpflags |= PCB_FP_STARTED;
+ vfp_enable();
+ }
/*
* If the previous thread on this cpu to use the VFP was not the
@@ -296,14 +708,104 @@
* cpu we need to restore the old state.
*/
if (PCPU_GET(fpcurthread) != curthread || cpu != curpcb->pcb_vfpcpu) {
- vfp_restore(curthread->td_pcb->pcb_fpusaved);
- PCPU_SET(fpcurthread, curthread);
+ /*
+ * The VFP registers are the lower 128 bits of the SVE
+ * registers. Use the SVE store state if it was previously
+ * enabled.
+ */
+ if (restore_sve) {
+ MPASS(td->td_pcb->pcb_svesaved != NULL);
+ sve_restore(td->td_pcb->pcb_svesaved,
+ td->td_pcb->pcb_sve_len);
+ } else {
+ vfp_restore(td->td_pcb->pcb_fpusaved);
+ }
+ PCPU_SET(fpcurthread, td);
curpcb->pcb_vfpcpu = cpu;
}
critical_exit();
}
+void
+vfp_restore_state(void)
+{
+ struct thread *td;
+
+ td = curthread;
+ vfp_restore_state_common(td, td->td_pcb->pcb_fpflags);
+}
+
+bool
+sve_restore_state(struct thread *td)
+{
+ struct pcb *curpcb;
+ void *svesaved;
+ uint64_t cpacr;
+
+ KASSERT(td == curthread, ("%s: Called with non-current thread",
+ __func__));
+
+ curpcb = td->td_pcb;
+
+ /* The SVE state should alias the base VFP state */
+ MPASS(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate);
+
+ /* SVE not enabled, tell the caller to raise a fault */
+ if (curpcb->pcb_sve_len == 0) {
+ /*
+ * The init pcb is created before we read the vector length.
+ * Set it to the default length.
+ */
+ if (sve_max_vector_len == 0)
+ return (false);
+
+ MPASS(curpcb->pcb_svesaved == NULL);
+ curpcb->pcb_sve_len = sve_max_vector_len;
+ }
+
+ if (curpcb->pcb_svesaved == NULL) {
+ /* SVE should be disabled so will be invalid */
+ MPASS((curpcb->pcb_fpflags & PCB_FP_SVEVALID) == 0);
+
+ /*
+ * Allocate the SVE buffer of this thread.
+ * Enable interrupts so the allocation can sleep
+ */
+ svesaved = sve_alloc();
+
+ critical_enter();
+
+ /* Restore the VFP state if needed */
+ cpacr = READ_SPECIALREG(cpacr_el1);
+ if ((cpacr & CPACR_FPEN_MASK) != CPACR_FPEN_TRAP_NONE) {
+ vfp_restore_state_common(td, curpcb->pcb_fpflags);
+ }
+
+ /*
+ * Set the flags after enabling the VFP as the SVE saved
+ * state will be invalid.
+ */
+ curpcb->pcb_svesaved = svesaved;
+ curpcb->pcb_fpflags |= PCB_FP_SVEVALID;
+ sve_enable();
+
+ critical_exit();
+ } else {
+ vfp_restore_state_common(td, curpcb->pcb_fpflags);
+
+ /* Enable SVE if it wasn't previously enabled */
+ if ((curpcb->pcb_fpflags & PCB_FP_SVEVALID) == 0) {
+ critical_enter();
+ sve_enable();
+ curpcb->pcb_fpflags |= PCB_FP_SVEVALID;
+ critical_exit();
+ }
+ }
+
+ return (true);
+}
+
void
vfp_init_secondary(void)
{
@@ -348,6 +850,74 @@
SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL);
+static void
+sve_thread_dtor(void *arg __unused, struct thread *td)
+{
+ sve_free(td->td_pcb->pcb_svesaved);
+}
+
+static void
+sve_pcpu_read(void *arg)
+{
+ u_int *len;
+ uint64_t vl;
+
+ len = arg;
+
+ /* Enable SVE to read zcr_el1 and VFP for rdvl */
+ sve_enable();
+
+ /* Set the longest vector length */
+ WRITE_SPECIALREG(ZCR_EL1_REG, ZCR_LEN_MASK);
+ isb();
+
+ /* Read the real vector length */
+ __asm __volatile(
+ ".arch_extension sve \n"
+ "rdvl %0, #1 \n"
+ ".arch_extension nosve \n"
+ : "=&r"(vl));
+
+ vfp_disable();
+
+ len[PCPU_GET(cpuid)] = vl;
+}
+
+static void
+sve_init(const void *dummy __unused)
+{
+ u_int *len_list;
+ uint64_t reg;
+ int i;
+
+ if (!get_kernel_reg(ID_AA64PFR0_EL1, &reg))
+ return;
+
+ if (ID_AA64PFR0_SVE_VAL(reg) == ID_AA64PFR0_SVE_NONE)
+ return;
+
+ len_list = malloc(sizeof(*len_list) * (mp_maxid + 1), M_TEMP,
+ M_WAITOK | M_ZERO);
+ smp_rendezvous(NULL, sve_pcpu_read, NULL, len_list);
+
+ sve_max_vector_len = ZCR_LEN_BYTES(ZCR_LEN_MASK);
+ CPU_FOREACH(i) {
+ if (bootverbose)
+ printf("CPU%d SVE vector length: %u\n", i, len_list[i]);
+ sve_max_vector_len = MIN(sve_max_vector_len, len_list[i]);
+ }
+ free(len_list, M_TEMP);
+
+ if (bootverbose)
+ printf("SVE with %u byte vectors\n", sve_max_vector_len);
+
+ if (sve_max_vector_len > 0) {
+ EVENTHANDLER_REGISTER(thread_dtor, sve_thread_dtor, NULL,
+ EVENTHANDLER_PRI_ANY);
+ }
+}
+SYSINIT(sve, SI_SUB_SMP, SI_ORDER_ANY, sve_init, NULL);
+
struct fpu_kern_ctx *
fpu_kern_alloc_ctx(u_int flags)
{
diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h
--- a/sys/arm64/include/armreg.h
+++ b/sys/arm64/include/armreg.h
@@ -2607,6 +2607,13 @@
#define VBAR_EL12_op2 0
/* ZCR_EL1 - SVE Control Register */
+#define ZCR_EL1 MRS_REG(ZCR_EL1)
+#define ZCR_EL1_REG MRS_REG_ALT_NAME(ZCR_EL1_REG)
+#define ZCR_EL1_REG_op0 3
+#define ZCR_EL1_REG_op1 0
+#define ZCR_EL1_REG_CRn 1
+#define ZCR_EL1_REG_CRm 2
+#define ZCR_EL1_REG_op2 0
#define ZCR_LEN_SHIFT 0
#define ZCR_LEN_MASK (0xf << ZCR_LEN_SHIFT)
#define ZCR_LEN_BYTES(x) ((((x) & ZCR_LEN_MASK) + 1) * 16)
diff --git a/sys/arm64/include/pcb.h b/sys/arm64/include/pcb.h
--- a/sys/arm64/include/pcb.h
+++ b/sys/arm64/include/pcb.h
@@ -59,17 +59,19 @@
u_int pcb_flags;
#define PCB_SINGLE_STEP_SHIFT 0
#define PCB_SINGLE_STEP (1 << PCB_SINGLE_STEP_SHIFT)
- uint32_t pcb_pad1;
+ u_int pcb_sve_len; /* The SVE vector length */
struct vfpstate *pcb_fpusaved;
int pcb_fpflags;
#define PCB_FP_STARTED 0x00000001
+#define PCB_FP_SVEVALID 0x00000002
#define PCB_FP_KERN 0x40000000
#define PCB_FP_NOSAVE 0x80000000
/* The bits passed to userspace in get_fpcontext */
-#define PCB_FP_USERMASK (PCB_FP_STARTED)
+#define PCB_FP_USERMASK (PCB_FP_STARTED | PCB_FP_SVEVALID)
u_int pcb_vfpcpu; /* Last cpu this thread ran VFP code */
- uint64_t pcb_reserved[5];
+ void *pcb_svesaved;
+ uint64_t pcb_reserved[4];
/*
* The userspace VFP state. The pcb_fpusaved pointer will point to
diff --git a/sys/arm64/include/vfp.h b/sys/arm64/include/vfp.h
--- a/sys/arm64/include/vfp.h
+++ b/sys/arm64/include/vfp.h
@@ -80,6 +80,12 @@
void vfp_save_state(struct thread *, struct pcb *);
void vfp_save_state_savectx(struct pcb *);
void vfp_save_state_switch(struct thread *);
+void vfp_to_sve_sync(struct thread *);
+void sve_to_vfp_sync(struct thread *);
+
+size_t sve_max_buf_size(void);
+size_t sve_buf_size(struct thread *);
+bool sve_restore_state(struct thread *);
struct fpu_kern_ctx;

File Metadata

Mime Type
text/plain
Expires
Mon, Jan 27, 5:57 PM (5 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16202442
Default Alt Text
D43306.id144039.diff (22 KB)

Event Timeline