1919 lines
56 KiB
C
1919 lines
56 KiB
C
|
/* This file contains essentially all of the process and message handling.
|
||
|
* Together with "mpx.s" it forms the lowest layer of the MINIX kernel.
|
||
|
* There is one entry point from the outside:
|
||
|
*
|
||
|
* sys_call: a system call, i.e., the kernel is trapped with an INT
|
||
|
*
|
||
|
* Changes:
|
||
|
* Aug 19, 2005 rewrote scheduling code (Jorrit N. Herder)
|
||
|
* Jul 25, 2005 rewrote system call handling (Jorrit N. Herder)
|
||
|
* May 26, 2005 rewrote message passing functions (Jorrit N. Herder)
|
||
|
* May 24, 2005 new notification system call (Jorrit N. Herder)
|
||
|
* Oct 28, 2004 nonblocking send and receive calls (Jorrit N. Herder)
|
||
|
*
|
||
|
* The code here is critical to make everything work and is important for the
|
||
|
* overall performance of the system. A large fraction of the code deals with
|
||
|
* list manipulation. To make this both easy to understand and fast to execute
|
||
|
* pointer pointers are used throughout the code. Pointer pointers prevent
|
||
|
* exceptions for the head or tail of a linked list.
|
||
|
*
|
||
|
* node_t *queue, *new_node; // assume these as global variables
|
||
|
* node_t **xpp = &queue; // get pointer pointer to head of queue
|
||
|
* while (*xpp != NULL) // find last pointer of the linked list
|
||
|
* xpp = &(*xpp)->next; // get pointer to next pointer
|
||
|
* *xpp = new_node; // now replace the end (the NULL pointer)
|
||
|
* new_node->next = NULL; // and mark the new end of the list
|
||
|
*
|
||
|
* For example, when adding a new node to the end of the list, one normally
|
||
|
* makes an exception for an empty list and looks up the end of the list for
|
||
|
* nonempty lists. As shown above, this is not required with pointer pointers.
|
||
|
*/
|
||
|
|
||
|
#include <minix/com.h>
|
||
|
#include <minix/ipcconst.h>
|
||
|
#include <stddef.h>
|
||
|
#include <signal.h>
|
||
|
#include <assert.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#include "kernel/kernel.h"
|
||
|
#include "vm.h"
|
||
|
#include "clock.h"
|
||
|
#include "spinlock.h"
|
||
|
#include "arch_proto.h"
|
||
|
|
||
|
#include <minix/syslib.h>
|
||
|
|
||
|
/* Scheduling and message passing functions */
|
||
|
static void idle(void);
|
||
|
/**
|
||
|
* Made public for use in clock.c (for user-space scheduling)
|
||
|
static int mini_send(struct proc *caller_ptr, endpoint_t dst_e, message
|
||
|
*m_ptr, int flags);
|
||
|
*/
|
||
|
static int mini_receive(struct proc *caller_ptr, endpoint_t src,
|
||
|
message *m_ptr, int flags);
|
||
|
static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t
|
||
|
size);
|
||
|
static int deadlock(int function, register struct proc *caller,
|
||
|
endpoint_t src_dst_e);
|
||
|
static int try_async(struct proc *caller_ptr);
|
||
|
static int try_one(struct proc *src_ptr, struct proc *dst_ptr);
|
||
|
static struct proc * pick_proc(void);
|
||
|
static void enqueue_head(struct proc *rp);
|
||
|
|
||
|
/* all idles share the same idle_priv structure */
|
||
|
static struct priv idle_priv;
|
||
|
|
||
|
static void set_idle_name(char * name, int n)
|
||
|
{
|
||
|
int i, c;
|
||
|
int p_z = 0;
|
||
|
|
||
|
if (n > 999)
|
||
|
n = 999;
|
||
|
|
||
|
name[0] = 'i';
|
||
|
name[1] = 'd';
|
||
|
name[2] = 'l';
|
||
|
name[3] = 'e';
|
||
|
|
||
|
for (i = 4, c = 100; c > 0; c /= 10) {
|
||
|
int digit;
|
||
|
|
||
|
digit = n / c;
|
||
|
n -= digit * c;
|
||
|
|
||
|
if (p_z || digit != 0 || c == 1) {
|
||
|
p_z = 1;
|
||
|
name[i++] = '0' + digit;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
name[i] = '\0';
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
#define PICK_ANY 1
|
||
|
#define PICK_HIGHERONLY 2
|
||
|
|
||
|
#define BuildNotifyMessage(m_ptr, src, dst_ptr) \
|
||
|
memset((m_ptr), 0, sizeof(*(m_ptr))); \
|
||
|
(m_ptr)->m_type = NOTIFY_MESSAGE; \
|
||
|
(m_ptr)->m_notify.timestamp = get_monotonic(); \
|
||
|
switch (src) { \
|
||
|
case HARDWARE: \
|
||
|
(m_ptr)->m_notify.interrupts = \
|
||
|
priv(dst_ptr)->s_int_pending; \
|
||
|
priv(dst_ptr)->s_int_pending = 0; \
|
||
|
break; \
|
||
|
case SYSTEM: \
|
||
|
memcpy(&(m_ptr)->m_notify.sigset, \
|
||
|
&priv(dst_ptr)->s_sig_pending, \
|
||
|
sizeof(sigset_t)); \
|
||
|
sigemptyset(&priv(dst_ptr)->s_sig_pending); \
|
||
|
break; \
|
||
|
}
|
||
|
|
||
|
void proc_init(void)
|
||
|
{
|
||
|
struct proc * rp;
|
||
|
struct priv *sp;
|
||
|
int i;
|
||
|
|
||
|
/* Clear the process table. Announce each slot as empty and set up
|
||
|
* mappings for proc_addr() and proc_nr() macros. Do the same for the
|
||
|
* table with privilege structures for the system processes.
|
||
|
*/
|
||
|
for (rp = BEG_PROC_ADDR, i = -NR_TASKS; rp < END_PROC_ADDR; ++rp, ++i) {
|
||
|
rp->p_rts_flags = RTS_SLOT_FREE;/* initialize free slot */
|
||
|
rp->p_magic = PMAGIC;
|
||
|
rp->p_nr = i; /* proc number from ptr */
|
||
|
rp->p_endpoint = _ENDPOINT(0, rp->p_nr); /* generation no. 0 */
|
||
|
rp->p_scheduler = NULL; /* no user space scheduler */
|
||
|
rp->p_priority = 0; /* no priority */
|
||
|
rp->p_quantum_size_ms = 0; /* no quantum size */
|
||
|
|
||
|
/* arch-specific initialization */
|
||
|
arch_proc_reset(rp);
|
||
|
}
|
||
|
for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) {
|
||
|
sp->s_proc_nr = NONE; /* initialize as free */
|
||
|
sp->s_id = (sys_id_t) i; /* priv structure index */
|
||
|
ppriv_addr[i] = sp; /* priv ptr from number */
|
||
|
sp->s_sig_mgr = NONE; /* clear signal managers */
|
||
|
sp->s_bak_sig_mgr = NONE;
|
||
|
}
|
||
|
|
||
|
idle_priv.s_flags = IDL_F;
|
||
|
/* initialize IDLE structures for every CPU */
|
||
|
for (i = 0; i < CONFIG_MAX_CPUS; i++) {
|
||
|
struct proc * ip = get_cpu_var_ptr(i, idle_proc);
|
||
|
ip->p_endpoint = IDLE;
|
||
|
ip->p_priv = &idle_priv;
|
||
|
/* must not let idle ever get scheduled */
|
||
|
ip->p_rts_flags |= RTS_PROC_STOP;
|
||
|
set_idle_name(ip->p_name, i);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void switch_address_space_idle(void)
|
||
|
{
|
||
|
#ifdef CONFIG_SMP
|
||
|
/*
|
||
|
* currently we bet that VM is always alive and its pages available so
|
||
|
* when the CPU wakes up the kernel is mapped and no surprises happen.
|
||
|
* This is only a problem if more than 1 cpus are available
|
||
|
*/
|
||
|
switch_address_space(proc_addr(VM_PROC_NR));
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* idle *
|
||
|
*===========================================================================*/
|
||
|
static void idle(void)
|
||
|
{
|
||
|
struct proc * p;
|
||
|
|
||
|
/* This function is called whenever there is no work to do.
|
||
|
* Halt the CPU, and measure how many timestamp counter ticks are
|
||
|
* spent not doing anything. This allows test setups to measure
|
||
|
* the CPU utilization of certain workloads with high precision.
|
||
|
*/
|
||
|
|
||
|
p = get_cpulocal_var(proc_ptr) = get_cpulocal_var_ptr(idle_proc);
|
||
|
if (priv(p)->s_flags & BILLABLE)
|
||
|
get_cpulocal_var(bill_ptr) = p;
|
||
|
|
||
|
switch_address_space_idle();
|
||
|
|
||
|
#ifdef CONFIG_SMP
|
||
|
get_cpulocal_var(cpu_is_idle) = 1;
|
||
|
/* we don't need to keep time on APs as it is handled on the BSP */
|
||
|
if (cpuid != bsp_cpu_id)
|
||
|
stop_local_timer();
|
||
|
else
|
||
|
#endif
|
||
|
{
|
||
|
/*
|
||
|
* If the timer has expired while in kernel we must
|
||
|
* rearm it before we go to sleep
|
||
|
*/
|
||
|
restart_local_timer();
|
||
|
}
|
||
|
|
||
|
/* start accounting for the idle time */
|
||
|
context_stop(proc_addr(KERNEL));
|
||
|
#if !SPROFILE
|
||
|
halt_cpu();
|
||
|
#else
|
||
|
if (!sprofiling)
|
||
|
halt_cpu();
|
||
|
else {
|
||
|
volatile int * v;
|
||
|
|
||
|
v = get_cpulocal_var_ptr(idle_interrupted);
|
||
|
interrupts_enable();
|
||
|
while (!*v)
|
||
|
arch_pause();
|
||
|
interrupts_disable();
|
||
|
*v = 0;
|
||
|
}
|
||
|
#endif
|
||
|
/*
|
||
|
* end of accounting for the idle task does not happen here, the kernel
|
||
|
* is handling stuff for quite a while before it gets back here!
|
||
|
*/
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* switch_to_user *
|
||
|
*===========================================================================*/
|
||
|
void switch_to_user(void)
|
||
|
{
|
||
|
/* This function is called an instant before proc_ptr is
|
||
|
* to be scheduled again.
|
||
|
*/
|
||
|
struct proc * p;
|
||
|
#ifdef CONFIG_SMP
|
||
|
int tlb_must_refresh = 0;
|
||
|
#endif
|
||
|
|
||
|
p = get_cpulocal_var(proc_ptr);
|
||
|
/*
|
||
|
* if the current process is still runnable check the misc flags and let
|
||
|
* it run unless it becomes not runnable in the meantime
|
||
|
*/
|
||
|
if (proc_is_runnable(p))
|
||
|
goto check_misc_flags;
|
||
|
/*
|
||
|
* if a process becomes not runnable while handling the misc flags, we
|
||
|
* need to pick a new one here and start from scratch. Also if the
|
||
|
* current process wasn't runnable, we pick a new one here
|
||
|
*/
|
||
|
not_runnable_pick_new:
|
||
|
if (proc_is_preempted(p)) {
|
||
|
p->p_rts_flags &= ~RTS_PREEMPTED;
|
||
|
if (proc_is_runnable(p)) {
|
||
|
if (p->p_cpu_time_left)
|
||
|
enqueue_head(p);
|
||
|
else
|
||
|
enqueue(p);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* if we have no process to run, set IDLE as the current process for
|
||
|
* time accounting and put the cpu in an idle state. After the next
|
||
|
* timer interrupt the execution resumes here and we can pick another
|
||
|
* process. If there is still nothing runnable we "schedule" IDLE again
|
||
|
*/
|
||
|
while (!(p = pick_proc())) {
|
||
|
idle();
|
||
|
}
|
||
|
|
||
|
/* update the global variable */
|
||
|
get_cpulocal_var(proc_ptr) = p;
|
||
|
|
||
|
#ifdef CONFIG_SMP
|
||
|
if (p->p_misc_flags & MF_FLUSH_TLB && get_cpulocal_var(ptproc) == p)
|
||
|
tlb_must_refresh = 1;
|
||
|
#endif
|
||
|
switch_address_space(p);
|
||
|
|
||
|
check_misc_flags:
|
||
|
|
||
|
assert(p);
|
||
|
assert(proc_is_runnable(p));
|
||
|
while (p->p_misc_flags &
|
||
|
(MF_KCALL_RESUME | MF_DELIVERMSG |
|
||
|
MF_SC_DEFER | MF_SC_TRACE | MF_SC_ACTIVE)) {
|
||
|
|
||
|
assert(proc_is_runnable(p));
|
||
|
if (p->p_misc_flags & MF_KCALL_RESUME) {
|
||
|
kernel_call_resume(p);
|
||
|
}
|
||
|
else if (p->p_misc_flags & MF_DELIVERMSG) {
|
||
|
TRACE(VF_SCHEDULING, printf("delivering to %s / %d\n",
|
||
|
p->p_name, p->p_endpoint););
|
||
|
delivermsg(p);
|
||
|
}
|
||
|
else if (p->p_misc_flags & MF_SC_DEFER) {
|
||
|
/* Perform the system call that we deferred earlier. */
|
||
|
|
||
|
assert (!(p->p_misc_flags & MF_SC_ACTIVE));
|
||
|
|
||
|
arch_do_syscall(p);
|
||
|
|
||
|
/* If the process is stopped for signal delivery, and
|
||
|
* not blocked sending a message after the system call,
|
||
|
* inform PM.
|
||
|
*/
|
||
|
if ((p->p_misc_flags & MF_SIG_DELAY) &&
|
||
|
!RTS_ISSET(p, RTS_SENDING))
|
||
|
sig_delay_done(p);
|
||
|
}
|
||
|
else if (p->p_misc_flags & MF_SC_TRACE) {
|
||
|
/* Trigger a system call leave event if this was a
|
||
|
* system call. We must do this after processing the
|
||
|
* other flags above, both for tracing correctness and
|
||
|
* to be able to use 'break'.
|
||
|
*/
|
||
|
if (!(p->p_misc_flags & MF_SC_ACTIVE))
|
||
|
break;
|
||
|
|
||
|
p->p_misc_flags &=
|
||
|
~(MF_SC_TRACE | MF_SC_ACTIVE);
|
||
|
|
||
|
/* Signal the "leave system call" event.
|
||
|
* Block the process.
|
||
|
*/
|
||
|
cause_sig(proc_nr(p), SIGTRAP);
|
||
|
}
|
||
|
else if (p->p_misc_flags & MF_SC_ACTIVE) {
|
||
|
/* If MF_SC_ACTIVE was set, remove it now:
|
||
|
* we're leaving the system call.
|
||
|
*/
|
||
|
p->p_misc_flags &= ~MF_SC_ACTIVE;
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* the selected process might not be runnable anymore. We have
|
||
|
* to checkit and schedule another one
|
||
|
*/
|
||
|
if (!proc_is_runnable(p))
|
||
|
goto not_runnable_pick_new;
|
||
|
}
|
||
|
/*
|
||
|
* check the quantum left before it runs again. We must do it only here
|
||
|
* as we are sure that a possible out-of-quantum message to the
|
||
|
* scheduler will not collide with the regular ipc
|
||
|
*/
|
||
|
if (!p->p_cpu_time_left)
|
||
|
proc_no_time(p);
|
||
|
/*
|
||
|
* After handling the misc flags the selected process might not be
|
||
|
* runnable anymore. We have to checkit and schedule another one
|
||
|
*/
|
||
|
if (!proc_is_runnable(p))
|
||
|
goto not_runnable_pick_new;
|
||
|
|
||
|
TRACE(VF_SCHEDULING, printf("cpu %d starting %s / %d "
|
||
|
"pc 0x%08x\n",
|
||
|
cpuid, p->p_name, p->p_endpoint, p->p_reg.pc););
|
||
|
#if DEBUG_TRACE
|
||
|
p->p_schedules++;
|
||
|
#endif
|
||
|
|
||
|
p = arch_finish_switch_to_user();
|
||
|
assert(p->p_cpu_time_left);
|
||
|
|
||
|
context_stop(proc_addr(KERNEL));
|
||
|
|
||
|
/* If the process isn't the owner of FPU, enable the FPU exception */
|
||
|
if (get_cpulocal_var(fpu_owner) != p)
|
||
|
enable_fpu_exception();
|
||
|
else
|
||
|
disable_fpu_exception();
|
||
|
|
||
|
/* If MF_CONTEXT_SET is set, don't clobber process state within
|
||
|
* the kernel. The next kernel entry is OK again though.
|
||
|
*/
|
||
|
p->p_misc_flags &= ~MF_CONTEXT_SET;
|
||
|
|
||
|
#if defined(__i386__)
|
||
|
assert(p->p_seg.p_cr3 != 0);
|
||
|
#elif defined(__arm__)
|
||
|
assert(p->p_seg.p_ttbr != 0);
|
||
|
#endif
|
||
|
#ifdef CONFIG_SMP
|
||
|
if (p->p_misc_flags & MF_FLUSH_TLB) {
|
||
|
if (tlb_must_refresh)
|
||
|
refresh_tlb();
|
||
|
p->p_misc_flags &= ~MF_FLUSH_TLB;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
restart_local_timer();
|
||
|
|
||
|
/*
|
||
|
* restore_user_context() carries out the actual mode switch from kernel
|
||
|
* to userspace. This function does not return
|
||
|
*/
|
||
|
restore_user_context(p);
|
||
|
NOT_REACHABLE;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* handler for all synchronous IPC calls
|
||
|
*/
|
||
|
static int do_sync_ipc(struct proc * caller_ptr, /* who made the call */
|
||
|
int call_nr, /* system call number and flags */
|
||
|
endpoint_t src_dst_e, /* src or dst of the call */
|
||
|
message *m_ptr) /* users pointer to a message */
|
||
|
{
|
||
|
int result; /* the system call's result */
|
||
|
int src_dst_p; /* Process slot number */
|
||
|
char *callname;
|
||
|
|
||
|
/* Check destination. RECEIVE is the only call that accepts ANY (in addition
|
||
|
* to a real endpoint). The other calls (SEND, SENDREC, and NOTIFY) require an
|
||
|
* endpoint to corresponds to a process. In addition, it is necessary to check
|
||
|
* whether a process is allowed to send to a given destination.
|
||
|
*/
|
||
|
assert(call_nr != SENDA);
|
||
|
|
||
|
/* Only allow non-negative call_nr values less than 32 */
|
||
|
if (call_nr < 0 || call_nr > IPCNO_HIGHEST || call_nr >= 32
|
||
|
|| !(callname = ipc_call_names[call_nr])) {
|
||
|
#if DEBUG_ENABLE_IPC_WARNINGS
|
||
|
printf("sys_call: trap %d not allowed, caller %d, src_dst %d\n",
|
||
|
call_nr, proc_nr(caller_ptr), src_dst_e);
|
||
|
#endif
|
||
|
return(ETRAPDENIED); /* trap denied by mask or kernel */
|
||
|
}
|
||
|
|
||
|
if (src_dst_e == ANY)
|
||
|
{
|
||
|
if (call_nr != RECEIVE)
|
||
|
{
|
||
|
#if 0
|
||
|
printf("sys_call: %s by %d with bad endpoint %d\n",
|
||
|
callname,
|
||
|
proc_nr(caller_ptr), src_dst_e);
|
||
|
#endif
|
||
|
return EINVAL;
|
||
|
}
|
||
|
src_dst_p = (int) src_dst_e;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* Require a valid source and/or destination process. */
|
||
|
if(!isokendpt(src_dst_e, &src_dst_p)) {
|
||
|
#if 0
|
||
|
printf("sys_call: %s by %d with bad endpoint %d\n",
|
||
|
callname,
|
||
|
proc_nr(caller_ptr), src_dst_e);
|
||
|
#endif
|
||
|
return EDEADSRCDST;
|
||
|
}
|
||
|
|
||
|
/* If the call is to send to a process, i.e., for SEND, SENDNB,
|
||
|
* SENDREC or NOTIFY, verify that the caller is allowed to send to
|
||
|
* the given destination.
|
||
|
*/
|
||
|
if (call_nr != RECEIVE)
|
||
|
{
|
||
|
if (!may_send_to(caller_ptr, src_dst_p)) {
|
||
|
#if DEBUG_ENABLE_IPC_WARNINGS
|
||
|
printf(
|
||
|
"sys_call: ipc mask denied %s from %d to %d\n",
|
||
|
callname,
|
||
|
caller_ptr->p_endpoint, src_dst_e);
|
||
|
#endif
|
||
|
return(ECALLDENIED); /* call denied by ipc mask */
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Check if the process has privileges for the requested call. Calls to the
|
||
|
* kernel may only be SENDREC, because tasks always reply and may not block
|
||
|
* if the caller doesn't do receive().
|
||
|
*/
|
||
|
if (!(priv(caller_ptr)->s_trap_mask & (1 << call_nr))) {
|
||
|
#if DEBUG_ENABLE_IPC_WARNINGS
|
||
|
printf("sys_call: %s not allowed, caller %d, src_dst %d\n",
|
||
|
callname, proc_nr(caller_ptr), src_dst_p);
|
||
|
#endif
|
||
|
return(ETRAPDENIED); /* trap denied by mask or kernel */
|
||
|
}
|
||
|
|
||
|
if (call_nr != SENDREC && call_nr != RECEIVE && iskerneln(src_dst_p)) {
|
||
|
#if DEBUG_ENABLE_IPC_WARNINGS
|
||
|
printf("sys_call: trap %s not allowed, caller %d, src_dst %d\n",
|
||
|
callname, proc_nr(caller_ptr), src_dst_e);
|
||
|
#endif
|
||
|
return(ETRAPDENIED); /* trap denied by mask or kernel */
|
||
|
}
|
||
|
|
||
|
switch(call_nr) {
|
||
|
case SENDREC:
|
||
|
/* A flag is set so that notifications cannot interrupt SENDREC. */
|
||
|
caller_ptr->p_misc_flags |= MF_REPLY_PEND;
|
||
|
/* fall through */
|
||
|
case SEND:
|
||
|
result = mini_send(caller_ptr, src_dst_e, m_ptr, 0);
|
||
|
if (call_nr == SEND || result != OK)
|
||
|
break; /* done, or SEND failed */
|
||
|
/* fall through for SENDREC */
|
||
|
case RECEIVE:
|
||
|
if (call_nr == RECEIVE) {
|
||
|
caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
|
||
|
IPC_STATUS_CLEAR(caller_ptr); /* clear IPC status code */
|
||
|
}
|
||
|
result = mini_receive(caller_ptr, src_dst_e, m_ptr, 0);
|
||
|
break;
|
||
|
case NOTIFY:
|
||
|
result = mini_notify(caller_ptr, src_dst_e);
|
||
|
break;
|
||
|
case SENDNB:
|
||
|
result = mini_send(caller_ptr, src_dst_e, m_ptr, NON_BLOCKING);
|
||
|
break;
|
||
|
default:
|
||
|
result = EBADCALL; /* illegal system call */
|
||
|
}
|
||
|
|
||
|
/* Now, return the result of the system call to the caller. */
|
||
|
return(result);
|
||
|
}
|
||
|
|
||
|
int do_ipc(reg_t r1, reg_t r2, reg_t r3)
|
||
|
{
|
||
|
struct proc *const caller_ptr = get_cpulocal_var(proc_ptr); /* get pointer to caller */
|
||
|
int call_nr = (int) r1;
|
||
|
|
||
|
assert(!RTS_ISSET(caller_ptr, RTS_SLOT_FREE));
|
||
|
|
||
|
/* bill kernel time to this process. */
|
||
|
kbill_ipc = caller_ptr;
|
||
|
|
||
|
/* If this process is subject to system call tracing, handle that first. */
|
||
|
if (caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) {
|
||
|
/* Are we tracing this process, and is it the first sys_call entry? */
|
||
|
if ((caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) ==
|
||
|
MF_SC_TRACE) {
|
||
|
/* We must notify the tracer before processing the actual
|
||
|
* system call. If we don't, the tracer could not obtain the
|
||
|
* input message. Postpone the entire system call.
|
||
|
*/
|
||
|
caller_ptr->p_misc_flags &= ~MF_SC_TRACE;
|
||
|
assert(!(caller_ptr->p_misc_flags & MF_SC_DEFER));
|
||
|
caller_ptr->p_misc_flags |= MF_SC_DEFER;
|
||
|
caller_ptr->p_defer.r1 = r1;
|
||
|
caller_ptr->p_defer.r2 = r2;
|
||
|
caller_ptr->p_defer.r3 = r3;
|
||
|
|
||
|
/* Signal the "enter system call" event. Block the process. */
|
||
|
cause_sig(proc_nr(caller_ptr), SIGTRAP);
|
||
|
|
||
|
/* Preserve the return register's value. */
|
||
|
return caller_ptr->p_reg.retreg;
|
||
|
}
|
||
|
|
||
|
/* If the MF_SC_DEFER flag is set, the syscall is now being resumed. */
|
||
|
caller_ptr->p_misc_flags &= ~MF_SC_DEFER;
|
||
|
|
||
|
assert (!(caller_ptr->p_misc_flags & MF_SC_ACTIVE));
|
||
|
|
||
|
/* Set a flag to allow reliable tracing of leaving the system call. */
|
||
|
caller_ptr->p_misc_flags |= MF_SC_ACTIVE;
|
||
|
}
|
||
|
|
||
|
if(caller_ptr->p_misc_flags & MF_DELIVERMSG) {
|
||
|
panic("sys_call: MF_DELIVERMSG on for %s / %d\n",
|
||
|
caller_ptr->p_name, caller_ptr->p_endpoint);
|
||
|
}
|
||
|
|
||
|
/* Now check if the call is known and try to perform the request. The only
|
||
|
* system calls that exist in MINIX are sending and receiving messages.
|
||
|
* - SENDREC: combines SEND and RECEIVE in a single system call
|
||
|
* - SEND: sender blocks until its message has been delivered
|
||
|
* - RECEIVE: receiver blocks until an acceptable message has arrived
|
||
|
* - NOTIFY: asynchronous call; deliver notification or mark pending
|
||
|
* - SENDA: list of asynchronous send requests
|
||
|
*/
|
||
|
switch(call_nr) {
|
||
|
case SENDREC:
|
||
|
case SEND:
|
||
|
case RECEIVE:
|
||
|
case NOTIFY:
|
||
|
case SENDNB:
|
||
|
{
|
||
|
/* Process accounting for scheduling */
|
||
|
caller_ptr->p_accounting.ipc_sync++;
|
||
|
|
||
|
return do_sync_ipc(caller_ptr, call_nr, (endpoint_t) r2,
|
||
|
(message *) r3);
|
||
|
}
|
||
|
case SENDA:
|
||
|
{
|
||
|
/*
|
||
|
* Get and check the size of the argument in bytes as it is a
|
||
|
* table
|
||
|
*/
|
||
|
size_t msg_size = (size_t) r2;
|
||
|
|
||
|
/* Process accounting for scheduling */
|
||
|
caller_ptr->p_accounting.ipc_async++;
|
||
|
|
||
|
/* Limit size to something reasonable. An arbitrary choice is 16
|
||
|
* times the number of process table entries.
|
||
|
*/
|
||
|
if (msg_size > 16*(NR_TASKS + NR_PROCS))
|
||
|
return EDOM;
|
||
|
return mini_senda(caller_ptr, (asynmsg_t *) r3, msg_size);
|
||
|
}
|
||
|
case MINIX_KERNINFO:
|
||
|
{
|
||
|
/* It might not be initialized yet. */
|
||
|
if(!minix_kerninfo_user) {
|
||
|
return EBADCALL;
|
||
|
}
|
||
|
|
||
|
arch_set_secondary_ipc_return(caller_ptr, minix_kerninfo_user);
|
||
|
return OK;
|
||
|
}
|
||
|
default:
|
||
|
return EBADCALL; /* illegal system call */
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* deadlock *
|
||
|
*===========================================================================*/
|
||
|
static int deadlock(function, cp, src_dst_e)
|
||
|
int function; /* trap number */
|
||
|
register struct proc *cp; /* pointer to caller */
|
||
|
endpoint_t src_dst_e; /* src or dst process */
|
||
|
{
|
||
|
/* Check for deadlock. This can happen if 'caller_ptr' and 'src_dst' have
|
||
|
* a cyclic dependency of blocking send and receive calls. The only cyclic
|
||
|
* dependency that is not fatal is if the caller and target directly SEND(REC)
|
||
|
* and RECEIVE to each other. If a deadlock is found, the group size is
|
||
|
* returned. Otherwise zero is returned.
|
||
|
*/
|
||
|
register struct proc *xp; /* process pointer */
|
||
|
int group_size = 1; /* start with only caller */
|
||
|
#if DEBUG_ENABLE_IPC_WARNINGS
|
||
|
static struct proc *processes[NR_PROCS + NR_TASKS];
|
||
|
processes[0] = cp;
|
||
|
#endif
|
||
|
|
||
|
while (src_dst_e != ANY) { /* check while process nr */
|
||
|
int src_dst_slot;
|
||
|
okendpt(src_dst_e, &src_dst_slot);
|
||
|
xp = proc_addr(src_dst_slot); /* follow chain of processes */
|
||
|
assert(proc_ptr_ok(xp));
|
||
|
assert(!RTS_ISSET(xp, RTS_SLOT_FREE));
|
||
|
#if DEBUG_ENABLE_IPC_WARNINGS
|
||
|
processes[group_size] = xp;
|
||
|
#endif
|
||
|
group_size ++; /* extra process in group */
|
||
|
|
||
|
/* Check whether the last process in the chain has a dependency. If it
|
||
|
* has not, the cycle cannot be closed and we are done.
|
||
|
*/
|
||
|
if((src_dst_e = P_BLOCKEDON(xp)) == NONE)
|
||
|
return 0;
|
||
|
|
||
|
/* Now check if there is a cyclic dependency. For group sizes of two,
|
||
|
* a combination of SEND(REC) and RECEIVE is not fatal. Larger groups
|
||
|
* or other combinations indicate a deadlock.
|
||
|
*/
|
||
|
if (src_dst_e == cp->p_endpoint) { /* possible deadlock */
|
||
|
if (group_size == 2) { /* caller and src_dst */
|
||
|
/* The function number is magically converted to flags. */
|
||
|
if ((xp->p_rts_flags ^ (function << 2)) & RTS_SENDING) {
|
||
|
return(0); /* not a deadlock */
|
||
|
}
|
||
|
}
|
||
|
#if DEBUG_ENABLE_IPC_WARNINGS
|
||
|
{
|
||
|
int i;
|
||
|
printf("deadlock between these processes:\n");
|
||
|
for(i = 0; i < group_size; i++) {
|
||
|
printf(" %10s ", processes[i]->p_name);
|
||
|
}
|
||
|
printf("\n\n");
|
||
|
for(i = 0; i < group_size; i++) {
|
||
|
print_proc(processes[i]);
|
||
|
proc_stacktrace(processes[i]);
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
return(group_size); /* deadlock found */
|
||
|
}
|
||
|
}
|
||
|
return(0); /* not a deadlock */
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* has_pending *
|
||
|
*===========================================================================*/
|
||
|
static int has_pending(sys_map_t *map, int src_p, int asynm)
|
||
|
{
|
||
|
/* Check to see if there is a pending message from the desired source
|
||
|
* available.
|
||
|
*/
|
||
|
|
||
|
int src_id;
|
||
|
sys_id_t id = NULL_PRIV_ID;
|
||
|
#ifdef CONFIG_SMP
|
||
|
struct proc * p;
|
||
|
#endif
|
||
|
|
||
|
/* Either check a specific bit in the mask map, or find the first bit set in
|
||
|
* it (if any), depending on whether the receive was called on a specific
|
||
|
* source endpoint.
|
||
|
*/
|
||
|
if (src_p != ANY) {
|
||
|
src_id = nr_to_id(src_p);
|
||
|
if (get_sys_bit(*map, src_id)) {
|
||
|
#ifdef CONFIG_SMP
|
||
|
p = proc_addr(id_to_nr(src_id));
|
||
|
if (asynm && RTS_ISSET(p, RTS_VMINHIBIT))
|
||
|
p->p_misc_flags |= MF_SENDA_VM_MISS;
|
||
|
else
|
||
|
#endif
|
||
|
id = src_id;
|
||
|
}
|
||
|
} else {
|
||
|
/* Find a source with a pending message */
|
||
|
for (src_id = 0; src_id < NR_SYS_PROCS; src_id += BITCHUNK_BITS) {
|
||
|
if (get_sys_bits(*map, src_id) != 0) {
|
||
|
#ifdef CONFIG_SMP
|
||
|
while (src_id < NR_SYS_PROCS) {
|
||
|
while (!get_sys_bit(*map, src_id)) {
|
||
|
if (src_id == NR_SYS_PROCS)
|
||
|
goto quit_search;
|
||
|
src_id++;
|
||
|
}
|
||
|
p = proc_addr(id_to_nr(src_id));
|
||
|
/*
|
||
|
* We must not let kernel fiddle with pages of a
|
||
|
* process which are currently being changed by
|
||
|
* VM. It is dangerous! So do not report such a
|
||
|
* process as having pending async messages.
|
||
|
* Skip it.
|
||
|
*/
|
||
|
if (asynm && RTS_ISSET(p, RTS_VMINHIBIT)) {
|
||
|
p->p_misc_flags |= MF_SENDA_VM_MISS;
|
||
|
src_id++;
|
||
|
} else
|
||
|
goto quit_search;
|
||
|
}
|
||
|
#else
|
||
|
while (!get_sys_bit(*map, src_id)) src_id++;
|
||
|
goto quit_search;
|
||
|
#endif
|
||
|
}
|
||
|
}
|
||
|
|
||
|
quit_search:
|
||
|
if (src_id < NR_SYS_PROCS) /* Found one */
|
||
|
id = src_id;
|
||
|
}
|
||
|
|
||
|
return(id);
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* has_pending_notify *
|
||
|
*===========================================================================*/
|
||
|
int has_pending_notify(struct proc * caller, int src_p)
|
||
|
{
|
||
|
sys_map_t * map = &priv(caller)->s_notify_pending;
|
||
|
return has_pending(map, src_p, 0);
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* has_pending_asend *
|
||
|
*===========================================================================*/
|
||
|
int has_pending_asend(struct proc * caller, int src_p)
|
||
|
{
|
||
|
sys_map_t * map = &priv(caller)->s_asyn_pending;
|
||
|
return has_pending(map, src_p, 1);
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* unset_notify_pending *
|
||
|
*===========================================================================*/
|
||
|
void unset_notify_pending(struct proc * caller, int src_p)
|
||
|
{
|
||
|
sys_map_t * map = &priv(caller)->s_notify_pending;
|
||
|
unset_sys_bit(*map, src_p);
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* mini_send *
|
||
|
*===========================================================================*/
|
||
|
int mini_send(
|
||
|
register struct proc *caller_ptr, /* who is trying to send a message? */
|
||
|
endpoint_t dst_e, /* to whom is message being sent? */
|
||
|
message *m_ptr, /* pointer to message buffer */
|
||
|
const int flags
|
||
|
)
|
||
|
{
|
||
|
/* Send a message from 'caller_ptr' to 'dst'. If 'dst' is blocked waiting
|
||
|
* for this message, copy the message to it and unblock 'dst'. If 'dst' is
|
||
|
* not waiting at all, or is waiting for another source, queue 'caller_ptr'.
|
||
|
*/
|
||
|
register struct proc *dst_ptr;
|
||
|
register struct proc **xpp;
|
||
|
int dst_p;
|
||
|
dst_p = _ENDPOINT_P(dst_e);
|
||
|
dst_ptr = proc_addr(dst_p);
|
||
|
|
||
|
if (RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT))
|
||
|
{
|
||
|
return EDEADSRCDST;
|
||
|
}
|
||
|
|
||
|
/* Check if 'dst' is blocked waiting for this message. The destination's
|
||
|
* RTS_SENDING flag may be set when its SENDREC call blocked while sending.
|
||
|
*/
|
||
|
if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint)) {
|
||
|
int call;
|
||
|
/* Destination is indeed waiting for this message. */
|
||
|
assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
|
||
|
|
||
|
if (!(flags & FROM_KERNEL)) {
|
||
|
if(copy_msg_from_user(m_ptr, &dst_ptr->p_delivermsg))
|
||
|
return EFAULT;
|
||
|
} else {
|
||
|
dst_ptr->p_delivermsg = *m_ptr;
|
||
|
IPC_STATUS_ADD_FLAGS(dst_ptr, IPC_FLG_MSG_FROM_KERNEL);
|
||
|
}
|
||
|
|
||
|
dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
|
||
|
dst_ptr->p_misc_flags |= MF_DELIVERMSG;
|
||
|
|
||
|
call = (caller_ptr->p_misc_flags & MF_REPLY_PEND ? SENDREC
|
||
|
: (flags & NON_BLOCKING ? SENDNB : SEND));
|
||
|
IPC_STATUS_ADD_CALL(dst_ptr, call);
|
||
|
|
||
|
if (dst_ptr->p_misc_flags & MF_REPLY_PEND)
|
||
|
dst_ptr->p_misc_flags &= ~MF_REPLY_PEND;
|
||
|
|
||
|
RTS_UNSET(dst_ptr, RTS_RECEIVING);
|
||
|
|
||
|
#if DEBUG_IPC_HOOK
|
||
|
hook_ipc_msgsend(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
|
||
|
hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
|
||
|
#endif
|
||
|
} else {
|
||
|
if(flags & NON_BLOCKING) {
|
||
|
return(ENOTREADY);
|
||
|
}
|
||
|
|
||
|
/* Check for a possible deadlock before actually blocking. */
|
||
|
if (deadlock(SEND, caller_ptr, dst_e)) {
|
||
|
return(ELOCKED);
|
||
|
}
|
||
|
|
||
|
/* Destination is not waiting. Block and dequeue caller. */
|
||
|
if (!(flags & FROM_KERNEL)) {
|
||
|
if(copy_msg_from_user(m_ptr, &caller_ptr->p_sendmsg))
|
||
|
return EFAULT;
|
||
|
} else {
|
||
|
caller_ptr->p_sendmsg = *m_ptr;
|
||
|
/*
|
||
|
* we need to remember that this message is from kernel so we
|
||
|
* can set the delivery status flags when the message is
|
||
|
* actually delivered
|
||
|
*/
|
||
|
caller_ptr->p_misc_flags |= MF_SENDING_FROM_KERNEL;
|
||
|
}
|
||
|
|
||
|
RTS_SET(caller_ptr, RTS_SENDING);
|
||
|
caller_ptr->p_sendto_e = dst_e;
|
||
|
|
||
|
/* Process is now blocked. Put in on the destination's queue. */
|
||
|
assert(caller_ptr->p_q_link == NULL);
|
||
|
xpp = &dst_ptr->p_caller_q; /* find end of list */
|
||
|
while (*xpp) xpp = &(*xpp)->p_q_link;
|
||
|
*xpp = caller_ptr; /* add caller to end */
|
||
|
|
||
|
#if DEBUG_IPC_HOOK
|
||
|
hook_ipc_msgsend(&caller_ptr->p_sendmsg, caller_ptr, dst_ptr);
|
||
|
#endif
|
||
|
}
|
||
|
return(OK);
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* mini_receive *
|
||
|
*===========================================================================*/
|
||
|
static int mini_receive(struct proc * caller_ptr,
|
||
|
endpoint_t src_e, /* which message source is wanted */
|
||
|
message * m_buff_usr, /* pointer to message buffer */
|
||
|
const int flags)
|
||
|
{
|
||
|
/* A process or task wants to get a message. If a message is already queued,
|
||
|
* acquire it and deblock the sender. If no message from the desired source
|
||
|
* is available block the caller.
|
||
|
*/
|
||
|
register struct proc **xpp;
|
||
|
int r, src_id, src_proc_nr, src_p;
|
||
|
|
||
|
assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
|
||
|
|
||
|
/* This is where we want our message. */
|
||
|
caller_ptr->p_delivermsg_vir = (vir_bytes) m_buff_usr;
|
||
|
|
||
|
if(src_e == ANY) src_p = ANY;
|
||
|
else
|
||
|
{
|
||
|
okendpt(src_e, &src_p);
|
||
|
if (RTS_ISSET(proc_addr(src_p), RTS_NO_ENDPOINT))
|
||
|
{
|
||
|
return EDEADSRCDST;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Check to see if a message from desired source is already available. The
|
||
|
* caller's RTS_SENDING flag may be set if SENDREC couldn't send. If it is
|
||
|
* set, the process should be blocked.
|
||
|
*/
|
||
|
if (!RTS_ISSET(caller_ptr, RTS_SENDING)) {
|
||
|
|
||
|
/* Check if there are pending notifications, except for SENDREC. */
|
||
|
if (! (caller_ptr->p_misc_flags & MF_REPLY_PEND)) {
|
||
|
|
||
|
/* Check for pending notifications */
|
||
|
if ((src_id = has_pending_notify(caller_ptr, src_p)) != NULL_PRIV_ID) {
|
||
|
endpoint_t hisep;
|
||
|
|
||
|
src_proc_nr = id_to_nr(src_id); /* get source proc */
|
||
|
#if DEBUG_ENABLE_IPC_WARNINGS
|
||
|
if(src_proc_nr == NONE) {
|
||
|
printf("mini_receive: sending notify from NONE\n");
|
||
|
}
|
||
|
#endif
|
||
|
assert(src_proc_nr != NONE);
|
||
|
unset_notify_pending(caller_ptr, src_id); /* no longer pending */
|
||
|
|
||
|
/* Found a suitable source, deliver the notification message. */
|
||
|
hisep = proc_addr(src_proc_nr)->p_endpoint;
|
||
|
assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
|
||
|
assert(src_e == ANY || hisep == src_e);
|
||
|
|
||
|
/* assemble message */
|
||
|
BuildNotifyMessage(&caller_ptr->p_delivermsg, src_proc_nr, caller_ptr);
|
||
|
caller_ptr->p_delivermsg.m_source = hisep;
|
||
|
caller_ptr->p_misc_flags |= MF_DELIVERMSG;
|
||
|
|
||
|
IPC_STATUS_ADD_CALL(caller_ptr, NOTIFY);
|
||
|
|
||
|
goto receive_done;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Check for pending asynchronous messages */
|
||
|
if (has_pending_asend(caller_ptr, src_p) != NULL_PRIV_ID) {
|
||
|
if (src_p != ANY)
|
||
|
r = try_one(proc_addr(src_p), caller_ptr);
|
||
|
else
|
||
|
r = try_async(caller_ptr);
|
||
|
|
||
|
if (r == OK) {
|
||
|
IPC_STATUS_ADD_CALL(caller_ptr, SENDA);
|
||
|
goto receive_done;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Check caller queue. Use pointer pointers to keep code simple. */
|
||
|
xpp = &caller_ptr->p_caller_q;
|
||
|
while (*xpp) {
|
||
|
struct proc * sender = *xpp;
|
||
|
|
||
|
if (src_e == ANY || src_p == proc_nr(sender)) {
|
||
|
int call;
|
||
|
assert(!RTS_ISSET(sender, RTS_SLOT_FREE));
|
||
|
assert(!RTS_ISSET(sender, RTS_NO_ENDPOINT));
|
||
|
|
||
|
/* Found acceptable message. Copy it and update status. */
|
||
|
assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
|
||
|
caller_ptr->p_delivermsg = sender->p_sendmsg;
|
||
|
caller_ptr->p_delivermsg.m_source = sender->p_endpoint;
|
||
|
caller_ptr->p_misc_flags |= MF_DELIVERMSG;
|
||
|
RTS_UNSET(sender, RTS_SENDING);
|
||
|
|
||
|
call = (sender->p_misc_flags & MF_REPLY_PEND ? SENDREC : SEND);
|
||
|
IPC_STATUS_ADD_CALL(caller_ptr, call);
|
||
|
|
||
|
/*
|
||
|
* if the message is originally from the kernel on behalf of this
|
||
|
* process, we must send the status flags accordingly
|
||
|
*/
|
||
|
if (sender->p_misc_flags & MF_SENDING_FROM_KERNEL) {
|
||
|
IPC_STATUS_ADD_FLAGS(caller_ptr, IPC_FLG_MSG_FROM_KERNEL);
|
||
|
/* we can clean the flag now, not need anymore */
|
||
|
sender->p_misc_flags &= ~MF_SENDING_FROM_KERNEL;
|
||
|
}
|
||
|
if (sender->p_misc_flags & MF_SIG_DELAY)
|
||
|
sig_delay_done(sender);
|
||
|
|
||
|
#if DEBUG_IPC_HOOK
|
||
|
hook_ipc_msgrecv(&caller_ptr->p_delivermsg, *xpp, caller_ptr);
|
||
|
#endif
|
||
|
|
||
|
*xpp = sender->p_q_link; /* remove from queue */
|
||
|
sender->p_q_link = NULL;
|
||
|
goto receive_done;
|
||
|
}
|
||
|
xpp = &sender->p_q_link; /* proceed to next */
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* No suitable message is available or the caller couldn't send in SENDREC.
|
||
|
* Block the process trying to receive, unless the flags tell otherwise.
|
||
|
*/
|
||
|
if ( ! (flags & NON_BLOCKING)) {
|
||
|
/* Check for a possible deadlock before actually blocking. */
|
||
|
if (deadlock(RECEIVE, caller_ptr, src_e)) {
|
||
|
return(ELOCKED);
|
||
|
}
|
||
|
|
||
|
caller_ptr->p_getfrom_e = src_e;
|
||
|
RTS_SET(caller_ptr, RTS_RECEIVING);
|
||
|
return(OK);
|
||
|
} else {
|
||
|
return(ENOTREADY);
|
||
|
}
|
||
|
|
||
|
receive_done:
|
||
|
if (caller_ptr->p_misc_flags & MF_REPLY_PEND)
|
||
|
caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* mini_notify *
|
||
|
*===========================================================================*/
|
||
|
int mini_notify(
|
||
|
const struct proc *caller_ptr, /* sender of the notification */
|
||
|
endpoint_t dst_e /* which process to notify */
|
||
|
)
|
||
|
{
|
||
|
register struct proc *dst_ptr;
|
||
|
int src_id; /* source id for late delivery */
|
||
|
int dst_p;
|
||
|
|
||
|
if (!isokendpt(dst_e, &dst_p)) {
|
||
|
util_stacktrace();
|
||
|
printf("mini_notify: bogus endpoint %d\n", dst_e);
|
||
|
return EDEADSRCDST;
|
||
|
}
|
||
|
|
||
|
dst_ptr = proc_addr(dst_p);
|
||
|
|
||
|
/* Check to see if target is blocked waiting for this message. A process
|
||
|
* can be both sending and receiving during a SENDREC system call.
|
||
|
*/
|
||
|
if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint) &&
|
||
|
! (dst_ptr->p_misc_flags & MF_REPLY_PEND)) {
|
||
|
/* Destination is indeed waiting for a message. Assemble a notification
|
||
|
* message and deliver it. Copy from pseudo-source HARDWARE, since the
|
||
|
* message is in the kernel's address space.
|
||
|
*/
|
||
|
assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
|
||
|
|
||
|
BuildNotifyMessage(&dst_ptr->p_delivermsg, proc_nr(caller_ptr), dst_ptr);
|
||
|
dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
|
||
|
dst_ptr->p_misc_flags |= MF_DELIVERMSG;
|
||
|
|
||
|
IPC_STATUS_ADD_CALL(dst_ptr, NOTIFY);
|
||
|
RTS_UNSET(dst_ptr, RTS_RECEIVING);
|
||
|
|
||
|
return(OK);
|
||
|
}
|
||
|
|
||
|
/* Destination is not ready to receive the notification. Add it to the
|
||
|
* bit map with pending notifications. Note the indirectness: the privilege id
|
||
|
* instead of the process number is used in the pending bit map.
|
||
|
*/
|
||
|
src_id = priv(caller_ptr)->s_id;
|
||
|
set_sys_bit(priv(dst_ptr)->s_notify_pending, src_id);
|
||
|
return(OK);
|
||
|
}
|
||
|
|
||
|
#define ASCOMPLAIN(caller, entry, field) \
|
||
|
printf("kernel:%s:%d: asyn failed for %s in %s " \
|
||
|
"(%d/%d, tab 0x%lx)\n",__FILE__,__LINE__, \
|
||
|
field, caller->p_name, entry, priv(caller)->s_asynsize, priv(caller)->s_asyntab)
|
||
|
|
||
|
#define A_RETR_FLD(entry, field) \
|
||
|
if(data_copy(caller_ptr->p_endpoint, \
|
||
|
table_v + (entry)*sizeof(asynmsg_t) + offsetof(struct asynmsg,field),\
|
||
|
KERNEL, (vir_bytes) &tabent.field, \
|
||
|
sizeof(tabent.field)) != OK) {\
|
||
|
ASCOMPLAIN(caller_ptr, entry, #field); \
|
||
|
r = EFAULT; \
|
||
|
goto asyn_error; \
|
||
|
}
|
||
|
|
||
|
#define A_RETR(entry) do { \
|
||
|
if (data_copy( \
|
||
|
caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
|
||
|
KERNEL, (vir_bytes) &tabent, \
|
||
|
sizeof(tabent)) != OK) { \
|
||
|
ASCOMPLAIN(caller_ptr, entry, "message entry"); \
|
||
|
r = EFAULT; \
|
||
|
goto asyn_error; \
|
||
|
} \
|
||
|
} while(0)
|
||
|
|
||
|
#define A_INSRT_FLD(entry, field) \
|
||
|
if(data_copy(KERNEL, (vir_bytes) &tabent.field, \
|
||
|
caller_ptr->p_endpoint, \
|
||
|
table_v + (entry)*sizeof(asynmsg_t) + offsetof(struct asynmsg,field),\
|
||
|
sizeof(tabent.field)) != OK) {\
|
||
|
ASCOMPLAIN(caller_ptr, entry, #field); \
|
||
|
r = EFAULT; \
|
||
|
goto asyn_error; \
|
||
|
}
|
||
|
|
||
|
#define A_INSRT(entry) do { \
|
||
|
if (data_copy(KERNEL, (vir_bytes) &tabent, \
|
||
|
caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
|
||
|
sizeof(tabent)) != OK) { \
|
||
|
ASCOMPLAIN(caller_ptr, entry, "message entry"); \
|
||
|
r = EFAULT; \
|
||
|
goto asyn_error; \
|
||
|
} \
|
||
|
} while(0)
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* try_deliver_senda *
|
||
|
*===========================================================================*/
|
||
|
int try_deliver_senda(struct proc *caller_ptr,
|
||
|
asynmsg_t *table,
|
||
|
size_t size)
|
||
|
{
|
||
|
int r, dst_p, done, do_notify;
|
||
|
unsigned int i;
|
||
|
unsigned flags;
|
||
|
endpoint_t dst;
|
||
|
struct proc *dst_ptr;
|
||
|
struct priv *privp;
|
||
|
asynmsg_t tabent;
|
||
|
const vir_bytes table_v = (vir_bytes) table;
|
||
|
|
||
|
privp = priv(caller_ptr);
|
||
|
|
||
|
/* Clear table */
|
||
|
privp->s_asyntab = -1;
|
||
|
privp->s_asynsize = 0;
|
||
|
|
||
|
if (size == 0) return(OK); /* Nothing to do, just return */
|
||
|
|
||
|
/* Scan the table */
|
||
|
do_notify = FALSE;
|
||
|
done = TRUE;
|
||
|
|
||
|
/* Limit size to something reasonable. An arbitrary choice is 16
|
||
|
* times the number of process table entries.
|
||
|
*
|
||
|
* (this check has been duplicated in sys_call but is left here
|
||
|
* as a sanity check)
|
||
|
*/
|
||
|
if (size > 16*(NR_TASKS + NR_PROCS)) {
|
||
|
r = EDOM;
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
for (i = 0; i < size; i++) {
|
||
|
/* Process each entry in the table and store the result in the table.
|
||
|
* If we're done handling a message, copy the result to the sender. */
|
||
|
|
||
|
dst = NONE;
|
||
|
/* Copy message to kernel */
|
||
|
A_RETR(i);
|
||
|
flags = tabent.flags;
|
||
|
dst = tabent.dst;
|
||
|
|
||
|
if (flags == 0) continue; /* Skip empty entries */
|
||
|
|
||
|
/* 'flags' field must contain only valid bits */
|
||
|
if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR)) {
|
||
|
r = EINVAL;
|
||
|
goto asyn_error;
|
||
|
}
|
||
|
if (!(flags & AMF_VALID)) { /* Must contain message */
|
||
|
r = EINVAL;
|
||
|
goto asyn_error;
|
||
|
}
|
||
|
if (flags & AMF_DONE) continue; /* Already done processing */
|
||
|
|
||
|
r = OK;
|
||
|
if (!isokendpt(tabent.dst, &dst_p))
|
||
|
r = EDEADSRCDST; /* Bad destination, report the error */
|
||
|
else if (iskerneln(dst_p))
|
||
|
r = ECALLDENIED; /* Asyn sends to the kernel are not allowed */
|
||
|
else if (!may_send_to(caller_ptr, dst_p))
|
||
|
r = ECALLDENIED; /* Send denied by IPC mask */
|
||
|
else /* r == OK */
|
||
|
dst_ptr = proc_addr(dst_p);
|
||
|
|
||
|
/* XXX: RTS_NO_ENDPOINT should be removed */
|
||
|
if (r == OK && RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT)) {
|
||
|
r = EDEADSRCDST;
|
||
|
}
|
||
|
|
||
|
/* Check if 'dst' is blocked waiting for this message.
|
||
|
* If AMF_NOREPLY is set, do not satisfy the receiving part of
|
||
|
* a SENDREC.
|
||
|
*/
|
||
|
if (r == OK && WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint) &&
|
||
|
(!(flags&AMF_NOREPLY) || !(dst_ptr->p_misc_flags&MF_REPLY_PEND))) {
|
||
|
/* Destination is indeed waiting for this message. */
|
||
|
dst_ptr->p_delivermsg = tabent.msg;
|
||
|
dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
|
||
|
dst_ptr->p_misc_flags |= MF_DELIVERMSG;
|
||
|
IPC_STATUS_ADD_CALL(dst_ptr, SENDA);
|
||
|
RTS_UNSET(dst_ptr, RTS_RECEIVING);
|
||
|
#if DEBUG_IPC_HOOK
|
||
|
hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
|
||
|
#endif
|
||
|
} else if (r == OK) {
|
||
|
/* Inform receiver that something is pending */
|
||
|
set_sys_bit(priv(dst_ptr)->s_asyn_pending,
|
||
|
priv(caller_ptr)->s_id);
|
||
|
done = FALSE;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* Store results */
|
||
|
tabent.result = r;
|
||
|
tabent.flags = flags | AMF_DONE;
|
||
|
if (flags & AMF_NOTIFY)
|
||
|
do_notify = TRUE;
|
||
|
else if (r != OK && (flags & AMF_NOTIFY_ERR))
|
||
|
do_notify = TRUE;
|
||
|
A_INSRT(i); /* Copy results to caller */
|
||
|
continue;
|
||
|
|
||
|
asyn_error:
|
||
|
if (dst != NONE)
|
||
|
printf("KERNEL senda error %d to %d\n", r, dst);
|
||
|
else
|
||
|
printf("KERNEL senda error %d\n", r);
|
||
|
}
|
||
|
|
||
|
if (do_notify)
|
||
|
mini_notify(proc_addr(ASYNCM), caller_ptr->p_endpoint);
|
||
|
|
||
|
if (!done) {
|
||
|
privp->s_asyntab = (vir_bytes) table;
|
||
|
privp->s_asynsize = size;
|
||
|
}
|
||
|
|
||
|
return(OK);
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* mini_senda *
|
||
|
*===========================================================================*/
|
||
|
static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t size)
|
||
|
{
|
||
|
struct priv *privp;
|
||
|
|
||
|
privp = priv(caller_ptr);
|
||
|
if (!(privp->s_flags & SYS_PROC)) {
|
||
|
printf( "mini_senda: warning caller has no privilege structure\n");
|
||
|
return(EPERM);
|
||
|
}
|
||
|
|
||
|
return try_deliver_senda(caller_ptr, table, size);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* try_async *
|
||
|
*===========================================================================*/
|
||
|
static int try_async(caller_ptr)
|
||
|
struct proc *caller_ptr;
|
||
|
{
|
||
|
int r;
|
||
|
struct priv *privp;
|
||
|
struct proc *src_ptr;
|
||
|
sys_map_t *map;
|
||
|
|
||
|
map = &priv(caller_ptr)->s_asyn_pending;
|
||
|
|
||
|
/* Try all privilege structures */
|
||
|
for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; ++privp) {
|
||
|
if (privp->s_proc_nr == NONE)
|
||
|
continue;
|
||
|
|
||
|
if (!get_sys_bit(*map, privp->s_id))
|
||
|
continue;
|
||
|
|
||
|
src_ptr = proc_addr(privp->s_proc_nr);
|
||
|
|
||
|
#ifdef CONFIG_SMP
|
||
|
/*
|
||
|
* Do not copy from a process which does not have a stable address space
|
||
|
* due to VM fiddling with it
|
||
|
*/
|
||
|
if (RTS_ISSET(src_ptr, RTS_VMINHIBIT)) {
|
||
|
src_ptr->p_misc_flags |= MF_SENDA_VM_MISS;
|
||
|
continue;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
|
||
|
if ((r = try_one(src_ptr, caller_ptr)) == OK)
|
||
|
return(r);
|
||
|
}
|
||
|
|
||
|
return(ESRCH);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* try_one *
|
||
|
*===========================================================================*/
|
||
|
static int try_one(struct proc *src_ptr, struct proc *dst_ptr)
|
||
|
{
|
||
|
/* Try to receive an asynchronous message from 'src_ptr' */
|
||
|
int r = EAGAIN, done, do_notify;
|
||
|
unsigned int flags, i;
|
||
|
size_t size;
|
||
|
endpoint_t dst;
|
||
|
struct proc *caller_ptr;
|
||
|
struct priv *privp;
|
||
|
asynmsg_t tabent;
|
||
|
vir_bytes table_v;
|
||
|
|
||
|
privp = priv(src_ptr);
|
||
|
if (!(privp->s_flags & SYS_PROC)) return(EPERM);
|
||
|
size = privp->s_asynsize;
|
||
|
table_v = privp->s_asyntab;
|
||
|
|
||
|
/* Clear table pending message flag. We're done unless we're not. */
|
||
|
unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
|
||
|
|
||
|
if (size == 0) return(EAGAIN);
|
||
|
if (!may_send_to(src_ptr, proc_nr(dst_ptr))) return(ECALLDENIED);
|
||
|
|
||
|
caller_ptr = src_ptr; /* Needed for A_ macros later on */
|
||
|
|
||
|
/* Scan the table */
|
||
|
do_notify = FALSE;
|
||
|
done = TRUE;
|
||
|
|
||
|
for (i = 0; i < size; i++) {
|
||
|
/* Process each entry in the table and store the result in the table.
|
||
|
* If we're done handling a message, copy the result to the sender.
|
||
|
* Some checks done in mini_senda are duplicated here, as the sender
|
||
|
* could've altered the contents of the table in the meantime.
|
||
|
*/
|
||
|
|
||
|
/* Copy message to kernel */
|
||
|
A_RETR(i);
|
||
|
flags = tabent.flags;
|
||
|
dst = tabent.dst;
|
||
|
|
||
|
if (flags == 0) continue; /* Skip empty entries */
|
||
|
|
||
|
/* 'flags' field must contain only valid bits */
|
||
|
if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
|
||
|
r = EINVAL;
|
||
|
else if (!(flags & AMF_VALID)) /* Must contain message */
|
||
|
r = EINVAL;
|
||
|
else if (flags & AMF_DONE) continue; /* Already done processing */
|
||
|
|
||
|
/* Clear done flag. The sender is done sending when all messages in the
|
||
|
* table are marked done or empty. However, we will know that only
|
||
|
* the next time we enter this function or when the sender decides to
|
||
|
* send additional asynchronous messages and manages to deliver them
|
||
|
* all.
|
||
|
*/
|
||
|
done = FALSE;
|
||
|
|
||
|
if (r == EINVAL)
|
||
|
goto store_result;
|
||
|
|
||
|
/* Message must be directed at receiving end */
|
||
|
if (dst != dst_ptr->p_endpoint) continue;
|
||
|
|
||
|
/* If AMF_NOREPLY is set, then this message is not a reply to a
|
||
|
* SENDREC and thus should not satisfy the receiving part of the
|
||
|
* SENDREC. This message is to be delivered later.
|
||
|
*/
|
||
|
if ((flags & AMF_NOREPLY) && (dst_ptr->p_misc_flags & MF_REPLY_PEND))
|
||
|
continue;
|
||
|
|
||
|
/* Destination is ready to receive the message; deliver it */
|
||
|
r = OK;
|
||
|
dst_ptr->p_delivermsg = tabent.msg;
|
||
|
dst_ptr->p_delivermsg.m_source = src_ptr->p_endpoint;
|
||
|
dst_ptr->p_misc_flags |= MF_DELIVERMSG;
|
||
|
#if DEBUG_IPC_HOOK
|
||
|
hook_ipc_msgrecv(&dst_ptr->p_delivermsg, src_ptr, dst_ptr);
|
||
|
#endif
|
||
|
|
||
|
store_result:
|
||
|
/* Store results for sender */
|
||
|
tabent.result = r;
|
||
|
tabent.flags = flags | AMF_DONE;
|
||
|
if (flags & AMF_NOTIFY) do_notify = TRUE;
|
||
|
else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
|
||
|
A_INSRT(i); /* Copy results to sender */
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (do_notify)
|
||
|
mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
|
||
|
|
||
|
if (done) {
|
||
|
privp->s_asyntab = -1;
|
||
|
privp->s_asynsize = 0;
|
||
|
} else {
|
||
|
set_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
|
||
|
}
|
||
|
|
||
|
asyn_error:
|
||
|
return(r);
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* cancel_async *
|
||
|
*===========================================================================*/
|
||
|
int cancel_async(struct proc *src_ptr, struct proc *dst_ptr)
|
||
|
{
|
||
|
/* Cancel asynchronous messages from src to dst, because dst is not interested
|
||
|
* in them (e.g., dst has been restarted) */
|
||
|
int done, do_notify;
|
||
|
unsigned int flags, i;
|
||
|
size_t size;
|
||
|
endpoint_t dst;
|
||
|
struct proc *caller_ptr;
|
||
|
struct priv *privp;
|
||
|
asynmsg_t tabent;
|
||
|
vir_bytes table_v;
|
||
|
|
||
|
privp = priv(src_ptr);
|
||
|
if (!(privp->s_flags & SYS_PROC)) return(EPERM);
|
||
|
size = privp->s_asynsize;
|
||
|
table_v = privp->s_asyntab;
|
||
|
|
||
|
/* Clear table pending message flag. We're done unless we're not. */
|
||
|
privp->s_asyntab = -1;
|
||
|
privp->s_asynsize = 0;
|
||
|
unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
|
||
|
|
||
|
if (size == 0) return(EAGAIN);
|
||
|
if (!may_send_to(src_ptr, proc_nr(dst_ptr))) return(ECALLDENIED);
|
||
|
|
||
|
caller_ptr = src_ptr; /* Needed for A_ macros later on */
|
||
|
|
||
|
/* Scan the table */
|
||
|
do_notify = FALSE;
|
||
|
done = TRUE;
|
||
|
|
||
|
|
||
|
for (i = 0; i < size; i++) {
|
||
|
/* Process each entry in the table and store the result in the table.
|
||
|
* If we're done handling a message, copy the result to the sender.
|
||
|
* Some checks done in mini_senda are duplicated here, as the sender
|
||
|
* could've altered the contents of the table in the mean time.
|
||
|
*/
|
||
|
|
||
|
int r = EDEADSRCDST; /* Cancel delivery due to dead dst */
|
||
|
|
||
|
/* Copy message to kernel */
|
||
|
A_RETR(i);
|
||
|
flags = tabent.flags;
|
||
|
dst = tabent.dst;
|
||
|
|
||
|
if (flags == 0) continue; /* Skip empty entries */
|
||
|
|
||
|
/* 'flags' field must contain only valid bits */
|
||
|
if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
|
||
|
r = EINVAL;
|
||
|
else if (!(flags & AMF_VALID)) /* Must contain message */
|
||
|
r = EINVAL;
|
||
|
else if (flags & AMF_DONE) continue; /* Already done processing */
|
||
|
|
||
|
/* Message must be directed at receiving end */
|
||
|
if (dst != dst_ptr->p_endpoint) {
|
||
|
done = FALSE;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* Store results for sender */
|
||
|
tabent.result = r;
|
||
|
tabent.flags = flags | AMF_DONE;
|
||
|
if (flags & AMF_NOTIFY) do_notify = TRUE;
|
||
|
else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
|
||
|
A_INSRT(i); /* Copy results to sender */
|
||
|
}
|
||
|
|
||
|
if (do_notify)
|
||
|
mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
|
||
|
|
||
|
if (!done) {
|
||
|
privp->s_asyntab = table_v;
|
||
|
privp->s_asynsize = size;
|
||
|
}
|
||
|
|
||
|
asyn_error:
|
||
|
return(OK);
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* enqueue *
|
||
|
*===========================================================================*/
|
||
|
void enqueue(
|
||
|
register struct proc *rp /* this process is now runnable */
|
||
|
)
|
||
|
{
|
||
|
/* Add 'rp' to one of the queues of runnable processes. This function is
|
||
|
* responsible for inserting a process into one of the scheduling queues.
|
||
|
* The mechanism is implemented here. The actual scheduling policy is
|
||
|
* defined in sched() and pick_proc().
|
||
|
*
|
||
|
* This function can be used x-cpu as it always uses the queues of the cpu the
|
||
|
* process is assigned to.
|
||
|
*/
|
||
|
int q = rp->p_priority; /* scheduling queue to use */
|
||
|
struct proc **rdy_head, **rdy_tail;
|
||
|
|
||
|
assert(proc_is_runnable(rp));
|
||
|
|
||
|
assert(q >= 0);
|
||
|
|
||
|
rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
|
||
|
rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
|
||
|
|
||
|
/* Now add the process to the queue. */
|
||
|
if (!rdy_head[q]) { /* add to empty queue */
|
||
|
rdy_head[q] = rdy_tail[q] = rp; /* create a new queue */
|
||
|
rp->p_nextready = NULL; /* mark new end */
|
||
|
}
|
||
|
else { /* add to tail of queue */
|
||
|
rdy_tail[q]->p_nextready = rp; /* chain tail of queue */
|
||
|
rdy_tail[q] = rp; /* set new queue tail */
|
||
|
rp->p_nextready = NULL; /* mark new end */
|
||
|
}
|
||
|
|
||
|
if (cpuid == rp->p_cpu) {
|
||
|
/*
|
||
|
* enqueueing a process with a higher priority than the current one,
|
||
|
* it gets preempted. The current process must be preemptible. Testing
|
||
|
* the priority also makes sure that a process does not preempt itself
|
||
|
*/
|
||
|
struct proc * p;
|
||
|
p = get_cpulocal_var(proc_ptr);
|
||
|
assert(p);
|
||
|
if((p->p_priority > rp->p_priority) &&
|
||
|
(priv(p)->s_flags & PREEMPTIBLE))
|
||
|
RTS_SET(p, RTS_PREEMPTED); /* calls dequeue() */
|
||
|
}
|
||
|
#ifdef CONFIG_SMP
|
||
|
/*
|
||
|
* if the process was enqueued on a different cpu and the cpu is idle, i.e.
|
||
|
* the time is off, we need to wake up that cpu and let it schedule this new
|
||
|
* process
|
||
|
*/
|
||
|
else if (get_cpu_var(rp->p_cpu, cpu_is_idle)) {
|
||
|
smp_schedule(rp->p_cpu);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/* Make note of when this process was added to queue */
|
||
|
read_tsc_64(&(get_cpulocal_var(proc_ptr)->p_accounting.enter_queue));
|
||
|
|
||
|
|
||
|
#if DEBUG_SANITYCHECKS
|
||
|
assert(runqueues_ok_local());
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* enqueue_head *
|
||
|
*===========================================================================*/
|
||
|
/*
|
||
|
* put a process at the front of its run queue. It comes handy when a process is
|
||
|
* preempted and removed from run queue to not to have a currently not-runnable
|
||
|
* process on a run queue. We have to put this process back at the fron to be
|
||
|
* fair
|
||
|
*/
|
||
|
static void enqueue_head(struct proc *rp)
|
||
|
{
|
||
|
const int q = rp->p_priority; /* scheduling queue to use */
|
||
|
|
||
|
struct proc **rdy_head, **rdy_tail;
|
||
|
|
||
|
assert(proc_ptr_ok(rp));
|
||
|
assert(proc_is_runnable(rp));
|
||
|
|
||
|
/*
|
||
|
* the process was runnable without its quantum expired when dequeued. A
|
||
|
* process with no time left should have been handled else and differently
|
||
|
*/
|
||
|
assert(rp->p_cpu_time_left);
|
||
|
|
||
|
assert(q >= 0);
|
||
|
|
||
|
|
||
|
rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
|
||
|
rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
|
||
|
|
||
|
/* Now add the process to the queue. */
|
||
|
if (!rdy_head[q]) { /* add to empty queue */
|
||
|
rdy_head[q] = rdy_tail[q] = rp; /* create a new queue */
|
||
|
rp->p_nextready = NULL; /* mark new end */
|
||
|
} else { /* add to head of queue */
|
||
|
rp->p_nextready = rdy_head[q]; /* chain head of queue */
|
||
|
rdy_head[q] = rp; /* set new queue head */
|
||
|
}
|
||
|
|
||
|
/* Make note of when this process was added to queue */
|
||
|
read_tsc_64(&(get_cpulocal_var(proc_ptr->p_accounting.enter_queue)));
|
||
|
|
||
|
|
||
|
/* Process accounting for scheduling */
|
||
|
rp->p_accounting.dequeues--;
|
||
|
rp->p_accounting.preempted++;
|
||
|
|
||
|
#if DEBUG_SANITYCHECKS
|
||
|
assert(runqueues_ok_local());
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* dequeue *
|
||
|
*===========================================================================*/
|
||
|
void dequeue(struct proc *rp)
|
||
|
/* this process is no longer runnable */
|
||
|
{
|
||
|
/* A process must be removed from the scheduling queues, for example, because
|
||
|
* it has blocked. If the currently active process is removed, a new process
|
||
|
* is picked to run by calling pick_proc().
|
||
|
*
|
||
|
* This function can operate x-cpu as it always removes the process from the
|
||
|
* queue of the cpu the process is currently assigned to.
|
||
|
*/
|
||
|
int q = rp->p_priority; /* queue to use */
|
||
|
struct proc **xpp; /* iterate over queue */
|
||
|
struct proc *prev_xp;
|
||
|
u64_t tsc, tsc_delta;
|
||
|
|
||
|
struct proc **rdy_tail;
|
||
|
|
||
|
assert(proc_ptr_ok(rp));
|
||
|
assert(!proc_is_runnable(rp));
|
||
|
|
||
|
/* Side-effect for kernel: check if the task's stack still is ok? */
|
||
|
assert (!iskernelp(rp) || *priv(rp)->s_stack_guard == STACK_GUARD);
|
||
|
|
||
|
rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
|
||
|
|
||
|
/* Now make sure that the process is not in its ready queue. Remove the
|
||
|
* process if it is found. A process can be made unready even if it is not
|
||
|
* running by being sent a signal that kills it.
|
||
|
*/
|
||
|
prev_xp = NULL;
|
||
|
for (xpp = get_cpu_var_ptr(rp->p_cpu, run_q_head[q]); *xpp;
|
||
|
xpp = &(*xpp)->p_nextready) {
|
||
|
if (*xpp == rp) { /* found process to remove */
|
||
|
*xpp = (*xpp)->p_nextready; /* replace with next chain */
|
||
|
if (rp == rdy_tail[q]) { /* queue tail removed */
|
||
|
rdy_tail[q] = prev_xp; /* set new tail */
|
||
|
}
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
prev_xp = *xpp; /* save previous in chain */
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Process accounting for scheduling */
|
||
|
rp->p_accounting.dequeues++;
|
||
|
|
||
|
/* this is not all that accurate on virtual machines, especially with
|
||
|
IO bound processes that only spend a short amount of time in the queue
|
||
|
at a time. */
|
||
|
if (rp->p_accounting.enter_queue) {
|
||
|
read_tsc_64(&tsc);
|
||
|
tsc_delta = tsc - rp->p_accounting.enter_queue;
|
||
|
rp->p_accounting.time_in_queue = rp->p_accounting.time_in_queue +
|
||
|
tsc_delta;
|
||
|
rp->p_accounting.enter_queue = 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
#if DEBUG_SANITYCHECKS
|
||
|
assert(runqueues_ok_local());
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* pick_proc *
|
||
|
*===========================================================================*/
|
||
|
static struct proc * pick_proc(void)
|
||
|
{
|
||
|
/* Decide who to run now. A new process is selected an returned.
|
||
|
* When a billable process is selected, record it in 'bill_ptr', so that the
|
||
|
* clock task can tell who to bill for system time.
|
||
|
*
|
||
|
* This function always uses the run queues of the local cpu!
|
||
|
*/
|
||
|
register struct proc *rp; /* process to run */
|
||
|
struct proc **rdy_head;
|
||
|
int q; /* iterate over queues */
|
||
|
|
||
|
/* Check each of the scheduling queues for ready processes. The number of
|
||
|
* queues is defined in proc.h, and priorities are set in the task table.
|
||
|
* If there are no processes ready to run, return NULL.
|
||
|
*/
|
||
|
rdy_head = get_cpulocal_var(run_q_head);
|
||
|
for (q=0; q < NR_SCHED_QUEUES; q++) {
|
||
|
if(!(rp = rdy_head[q])) {
|
||
|
TRACE(VF_PICKPROC, printf("cpu %d queue %d empty\n", cpuid, q););
|
||
|
continue;
|
||
|
}
|
||
|
assert(proc_is_runnable(rp));
|
||
|
if (priv(rp)->s_flags & BILLABLE)
|
||
|
get_cpulocal_var(bill_ptr) = rp; /* bill for system time */
|
||
|
return rp;
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* endpoint_lookup *
|
||
|
*===========================================================================*/
|
||
|
struct proc *endpoint_lookup(endpoint_t e)
|
||
|
{
|
||
|
int n;
|
||
|
|
||
|
if(!isokendpt(e, &n)) return NULL;
|
||
|
|
||
|
return proc_addr(n);
|
||
|
}
|
||
|
|
||
|
/*===========================================================================*
|
||
|
* isokendpt_f *
|
||
|
*===========================================================================*/
|
||
|
#if DEBUG_ENABLE_IPC_WARNINGS
|
||
|
int isokendpt_f(file, line, e, p, fatalflag)
|
||
|
const char *file;
|
||
|
int line;
|
||
|
#else
|
||
|
int isokendpt_f(e, p, fatalflag)
|
||
|
#endif
|
||
|
endpoint_t e;
|
||
|
int *p;
|
||
|
const int fatalflag;
|
||
|
{
|
||
|
int ok = 0;
|
||
|
/* Convert an endpoint number into a process number.
|
||
|
* Return nonzero if the process is alive with the corresponding
|
||
|
* generation number, zero otherwise.
|
||
|
*
|
||
|
* This function is called with file and line number by the
|
||
|
* isokendpt_d macro if DEBUG_ENABLE_IPC_WARNINGS is defined,
|
||
|
* otherwise without. This allows us to print the where the
|
||
|
* conversion was attempted, making the errors verbose without
|
||
|
* adding code for that at every call.
|
||
|
*
|
||
|
* If fatalflag is nonzero, we must panic if the conversion doesn't
|
||
|
* succeed.
|
||
|
*/
|
||
|
*p = _ENDPOINT_P(e);
|
||
|
ok = 0;
|
||
|
if(isokprocn(*p) && !isemptyn(*p) && proc_addr(*p)->p_endpoint == e)
|
||
|
ok = 1;
|
||
|
if(!ok && fatalflag)
|
||
|
panic("invalid endpoint: %d", e);
|
||
|
return ok;
|
||
|
}
|
||
|
|
||
|
static void notify_scheduler(struct proc *p)
|
||
|
{
|
||
|
message m_no_quantum;
|
||
|
int err;
|
||
|
|
||
|
assert(!proc_kernel_scheduler(p));
|
||
|
|
||
|
/* dequeue the process */
|
||
|
RTS_SET(p, RTS_NO_QUANTUM);
|
||
|
/*
|
||
|
* Notify the process's scheduler that it has run out of
|
||
|
* quantum. This is done by sending a message to the scheduler
|
||
|
* on the process's behalf
|
||
|
*/
|
||
|
m_no_quantum.m_source = p->p_endpoint;
|
||
|
m_no_quantum.m_type = SCHEDULING_NO_QUANTUM;
|
||
|
m_no_quantum.m_krn_lsys_schedule.acnt_queue = cpu_time_2_ms(p->p_accounting.time_in_queue);
|
||
|
m_no_quantum.m_krn_lsys_schedule.acnt_deqs = p->p_accounting.dequeues;
|
||
|
m_no_quantum.m_krn_lsys_schedule.acnt_ipc_sync = p->p_accounting.ipc_sync;
|
||
|
m_no_quantum.m_krn_lsys_schedule.acnt_ipc_async = p->p_accounting.ipc_async;
|
||
|
m_no_quantum.m_krn_lsys_schedule.acnt_preempt = p->p_accounting.preempted;
|
||
|
m_no_quantum.m_krn_lsys_schedule.acnt_cpu = cpuid;
|
||
|
m_no_quantum.m_krn_lsys_schedule.acnt_cpu_load = cpu_load();
|
||
|
|
||
|
/* Reset accounting */
|
||
|
reset_proc_accounting(p);
|
||
|
|
||
|
if ((err = mini_send(p, p->p_scheduler->p_endpoint,
|
||
|
&m_no_quantum, FROM_KERNEL))) {
|
||
|
panic("WARNING: Scheduling: mini_send returned %d\n", err);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void proc_no_time(struct proc * p)
|
||
|
{
|
||
|
if (!proc_kernel_scheduler(p) && priv(p)->s_flags & PREEMPTIBLE) {
|
||
|
/* this dequeues the process */
|
||
|
notify_scheduler(p);
|
||
|
}
|
||
|
else {
|
||
|
/*
|
||
|
* non-preemptible processes only need their quantum to
|
||
|
* be renewed. In fact, they by pass scheduling
|
||
|
*/
|
||
|
p->p_cpu_time_left = ms_2_cpu_time(p->p_quantum_size_ms);
|
||
|
#if DEBUG_RACE
|
||
|
RTS_SET(p, RTS_PREEMPTED);
|
||
|
RTS_UNSET(p, RTS_PREEMPTED);
|
||
|
#endif
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void reset_proc_accounting(struct proc *p)
|
||
|
{
|
||
|
p->p_accounting.preempted = 0;
|
||
|
p->p_accounting.ipc_sync = 0;
|
||
|
p->p_accounting.ipc_async = 0;
|
||
|
p->p_accounting.dequeues = 0;
|
||
|
p->p_accounting.time_in_queue = 0;
|
||
|
p->p_accounting.enter_queue = 0;
|
||
|
}
|
||
|
|
||
|
void copr_not_available_handler(void)
|
||
|
{
|
||
|
struct proc * p;
|
||
|
struct proc ** local_fpu_owner;
|
||
|
/*
|
||
|
* Disable the FPU exception (both for the kernel and for the process
|
||
|
* once it's scheduled), and initialize or restore the FPU state.
|
||
|
*/
|
||
|
|
||
|
disable_fpu_exception();
|
||
|
|
||
|
p = get_cpulocal_var(proc_ptr);
|
||
|
|
||
|
/* if FPU is not owned by anyone, do not store anything */
|
||
|
local_fpu_owner = get_cpulocal_var_ptr(fpu_owner);
|
||
|
if (*local_fpu_owner != NULL) {
|
||
|
assert(*local_fpu_owner != p);
|
||
|
save_local_fpu(*local_fpu_owner, FALSE /*retain*/);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* restore the current process' state and let it run again, do not
|
||
|
* schedule!
|
||
|
*/
|
||
|
if (restore_fpu(p) != OK) {
|
||
|
/* Restoring FPU state failed. This is always the process's own
|
||
|
* fault. Send a signal, and schedule another process instead.
|
||
|
*/
|
||
|
*local_fpu_owner = NULL; /* release FPU */
|
||
|
cause_sig(proc_nr(p), SIGFPE);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
*local_fpu_owner = p;
|
||
|
context_stop(proc_addr(KERNEL));
|
||
|
restore_user_context(p);
|
||
|
NOT_REACHABLE;
|
||
|
}
|
||
|
|
||
|
void release_fpu(struct proc * p) {
|
||
|
struct proc ** fpu_owner_ptr;
|
||
|
|
||
|
fpu_owner_ptr = get_cpu_var_ptr(p->p_cpu, fpu_owner);
|
||
|
|
||
|
if (*fpu_owner_ptr == p)
|
||
|
*fpu_owner_ptr = NULL;
|
||
|
}
|
||
|
|
||
|
void ser_dump_proc()
|
||
|
{
|
||
|
struct proc *pp;
|
||
|
|
||
|
for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++)
|
||
|
{
|
||
|
if (isemptyp(pp))
|
||
|
continue;
|
||
|
print_proc_recursive(pp);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void increase_proc_signals(struct proc *p)
|
||
|
{
|
||
|
p->p_signal_received++;
|
||
|
}
|