minix3/drivers/net/uds/uds.c

753 lines
19 KiB
C

/*
* Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL)
* This code handles requests generated by operations on /dev/uds
*
* The interface to UNIX domain sockets is similar to the interface to network
* sockets. There is a character device (/dev/uds) and this server is a
* 'driver' for that device.
*/
#include "uds.h"
static ssize_t uds_perform_write(devminor_t, endpoint_t, cp_grant_id_t, size_t,
int);
static int uds_open(devminor_t, int, endpoint_t);
static int uds_close(devminor_t);
static ssize_t uds_read(devminor_t, u64_t, endpoint_t, cp_grant_id_t, size_t,
int, cdev_id_t);
static ssize_t uds_write(devminor_t, u64_t, endpoint_t, cp_grant_id_t, size_t,
int, cdev_id_t);
static int uds_ioctl(devminor_t, unsigned long, endpoint_t, cp_grant_id_t, int,
endpoint_t, cdev_id_t);
static int uds_cancel(devminor_t, endpoint_t, cdev_id_t);
static int uds_select(devminor_t, unsigned int, endpoint_t);
static struct chardriver uds_tab = {
.cdr_open = uds_open,
.cdr_close = uds_close,
.cdr_read = uds_read,
.cdr_write = uds_write,
.cdr_ioctl = uds_ioctl,
.cdr_cancel = uds_cancel,
.cdr_select = uds_select
};
/* File Descriptor Table */
uds_fd_t uds_fd_table[NR_FDS];
static unsigned int uds_exit_left;
static int
uds_open(devminor_t UNUSED(orig_minor), int access,
endpoint_t user_endpt)
{
devminor_t minor;
char *buf;
int i;
dprintf(("UDS: uds_open() from %d\n", user_endpt));
/*
* Find a slot in the descriptor table for the new descriptor.
* The index of the descriptor in the table will be returned.
* Subsequent calls to read/write/close/ioctl/etc will use this
* minor number. The minor number must be different from the
* the /dev/uds device's minor number (0).
*/
for (minor = 1; minor < NR_FDS; minor++)
if (uds_fd_table[minor].state == UDS_FREE)
break;
if (minor == NR_FDS)
return ENFILE;
/*
* Allocate memory for the ringer buffer. In order to save on memory
* in the common case, the buffer is allocated only when the socket is
* in use. We use mmap instead of malloc to allow the memory to be
* actually freed later.
*/
if ((buf = mmap(NULL, UDS_BUF, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_PRIVATE, -1, 0)) == MAP_FAILED)
return ENOMEM;
/*
* Allocate the socket, and set its initial parameters.
*/
uds_fd_table[minor].state = UDS_INUSE;
uds_fd_table[minor].owner = user_endpt;
uds_fd_table[minor].sel_endpt = NONE;
uds_fd_table[minor].sel_ops = 0;
uds_fd_table[minor].buf = buf;
uds_fd_table[minor].pos = 0;
uds_fd_table[minor].size = 0;
uds_fd_table[minor].mode = UDS_R | UDS_W;
uds_fd_table[minor].type = -1;
for (i = 0; i < UDS_SOMAXCONN; i++)
uds_fd_table[minor].backlog[i] = -1;
uds_fd_table[minor].backlog_size = UDS_SOMAXCONN;
memset(&uds_fd_table[minor].ancillary_data, '\0',
sizeof(struct ancillary));
for (i = 0; i < OPEN_MAX; i++)
uds_fd_table[minor].ancillary_data.fds[i] = -1;
uds_fd_table[minor].listening = 0;
uds_fd_table[minor].peer = -1;
uds_fd_table[minor].child = -1;
memset(&uds_fd_table[minor].addr, '\0', sizeof(struct sockaddr_un));
memset(&uds_fd_table[minor].source, '\0', sizeof(struct sockaddr_un));
memset(&uds_fd_table[minor].target, '\0', sizeof(struct sockaddr_un));
uds_fd_table[minor].suspended = UDS_NOT_SUSPENDED;
return CDEV_CLONED | minor;
}
static void
uds_reset(devminor_t minor)
{
/* Disconnect the socket from its peer. */
uds_fd_table[minor].peer = -1;
/* Set an error to pass to the caller. */
uds_fd_table[minor].err = ECONNRESET;
/* If a process was blocked on I/O, revive it. */
if (uds_fd_table[minor].suspended != UDS_NOT_SUSPENDED)
uds_unsuspend(minor);
/* All of the peer's calls will fail immediately now. */
if (uds_fd_table[minor].sel_ops != 0) {
chardriver_reply_select(uds_fd_table[minor].sel_endpt, minor,
uds_fd_table[minor].sel_ops);
uds_fd_table[minor].sel_ops = 0;
}
}
static int
uds_close(devminor_t minor)
{
int i, peer;
dprintf(("UDS: uds_close(%d)\n", minor));
if (minor < 0 || minor >= NR_FDS) return ENXIO;
if (uds_fd_table[minor].state != UDS_INUSE)
return EINVAL;
peer = uds_fd_table[minor].peer;
if (peer != -1 && uds_fd_table[peer].peer == -1) {
/* Connecting socket: clear from server's backlog. */
if (!uds_fd_table[peer].listening)
panic("connecting socket attached to non-server");
for (i = 0; i < uds_fd_table[peer].backlog_size; i++) {
if (uds_fd_table[peer].backlog[i] == minor) {
uds_fd_table[peer].backlog[i] = -1;
break;
}
}
} else if (peer != -1) {
/* Connected socket: disconnect it. */
uds_reset(peer);
} else if (uds_fd_table[minor].listening) {
/* Listening socket: disconnect all sockets in the backlog. */
for (i = 0; i < uds_fd_table[minor].backlog_size; i++)
if (uds_fd_table[minor].backlog[i] != -1)
uds_reset(uds_fd_table[minor].backlog[i]);
}
if (uds_fd_table[minor].ancillary_data.nfiledes > 0)
uds_clear_fds(minor, &uds_fd_table[minor].ancillary_data);
/* Release the memory for the ring buffer. */
munmap(uds_fd_table[minor].buf, UDS_BUF);
/* Set the socket back to its original UDS_FREE state. */
memset(&uds_fd_table[minor], '\0', sizeof(uds_fd_t));
/* If terminating, and this was the last open socket, exit now. */
if (uds_exit_left > 0) {
if (--uds_exit_left == 0)
chardriver_terminate();
}
return OK;
}
static int
uds_select(devminor_t minor, unsigned int ops, endpoint_t endpt)
{
unsigned int ready_ops;
int i, bytes, watch;
dprintf(("UDS: uds_select(%d)\n", minor));
if (minor < 0 || minor >= NR_FDS) return ENXIO;
if (uds_fd_table[minor].state != UDS_INUSE)
return EINVAL;
watch = (ops & CDEV_NOTIFY);
ops &= (CDEV_OP_RD | CDEV_OP_WR | CDEV_OP_ERR);
ready_ops = 0;
/* Check if there is data available to read. */
if (ops & CDEV_OP_RD) {
bytes = uds_perform_read(minor, NONE, GRANT_INVALID, 1, 1);
if (bytes > 0) {
ready_ops |= CDEV_OP_RD; /* data available */
} else if (uds_fd_table[minor].listening == 1) {
/* Check for pending connections. */
for (i = 0; i < uds_fd_table[minor].backlog_size; i++)
{
if (uds_fd_table[minor].backlog[i] != -1) {
ready_ops |= CDEV_OP_RD;
break;
}
}
} else if (bytes != EDONTREPLY) {
ready_ops |= CDEV_OP_RD; /* error */
}
}
/* Check if we can write without blocking. */
if (ops & CDEV_OP_WR) {
bytes = uds_perform_write(minor, NONE, GRANT_INVALID, 1, 1);
if (bytes != 0 && bytes != EDONTREPLY)
ready_ops |= CDEV_OP_WR;
}
/*
* If not all requested ops were ready, and the caller requests to be
* notified about changes, we add the remaining ops to the saved set.
*/
ops &= ~ready_ops;
if (ops && watch) {
uds_fd_table[minor].sel_endpt = endpt;
uds_fd_table[minor].sel_ops |= ops;
}
return ready_ops;
}
ssize_t
uds_perform_read(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant,
size_t size, int pretend)
{
size_t pos, subsize;
int r, peer;
dprintf(("UDS: uds_perform_read(%d)\n", minor));
peer = uds_fd_table[minor].peer;
/* Skip reads of zero bytes. */
if (size == 0)
return 0;
/* Check if the socket isn't shut down for reads. */
if (!(uds_fd_table[minor].mode & UDS_R))
return EPIPE;
if (uds_fd_table[minor].size == 0) {
if (peer == -1) {
/*
* We're not connected. That's only a problem when this
* socket is connection oriented.
*/
if (uds_fd_table[minor].type == SOCK_STREAM ||
uds_fd_table[minor].type == SOCK_SEQPACKET) {
if (uds_fd_table[minor].err == ECONNRESET) {
if (!pretend)
uds_fd_table[minor].err = 0;
return ECONNRESET;
} else
return ENOTCONN;
}
}
/* Check if process is reading from a closed pipe. */
if (peer != -1 && !(uds_fd_table[peer].mode & UDS_W) &&
uds_fd_table[minor].size == 0)
return 0;
if (pretend)
return EDONTREPLY;
if (peer != -1 &&
uds_fd_table[peer].suspended == UDS_SUSPENDED_WRITE)
panic("writer blocked on empty socket");
dprintf(("UDS: suspending read request on %d\n", minor));
/* Process is reading from an empty pipe. Suspend it. */
return EDONTREPLY;
}
/* How much can we get from the ring buffer? */
if (size > uds_fd_table[minor].size)
size = uds_fd_table[minor].size;
if (pretend)
return size;
/* Get the data from the tail of the ring buffer. */
pos = uds_fd_table[minor].pos;
subsize = UDS_BUF - pos;
if (subsize > size)
subsize = size;
if ((r = sys_safecopyto(endpt, grant, 0,
(vir_bytes) &uds_fd_table[minor].buf[pos], subsize)) != OK)
return r;
if (subsize < size) {
if ((r = sys_safecopyto(endpt, grant, subsize,
(vir_bytes) uds_fd_table[minor].buf,
size - subsize)) != OK)
return r;
}
/* Advance the buffer tail. */
uds_fd_table[minor].pos = (pos + size) % UDS_BUF;
uds_fd_table[minor].size -= size;
/* Reset position if the buffer is empty (it may save a copy call). */
if (uds_fd_table[minor].size == 0)
uds_fd_table[minor].pos = 0;
/* See if we can wake up a blocked writer. */
if (peer != -1 && uds_fd_table[peer].suspended == UDS_SUSPENDED_WRITE)
uds_unsuspend(peer);
/* See if we can satisfy an ongoing select. */
if (peer != -1 && (uds_fd_table[peer].sel_ops & CDEV_OP_WR) &&
uds_fd_table[minor].size < UDS_BUF) {
/* A write on the peer is possible now. */
chardriver_reply_select(uds_fd_table[peer].sel_endpt, peer,
CDEV_OP_WR);
uds_fd_table[peer].sel_ops &= ~CDEV_OP_WR;
}
return size; /* number of bytes read */
}
static ssize_t
uds_perform_write(devminor_t minor, endpoint_t endpt, cp_grant_id_t grant,
size_t size, int pretend)
{
size_t subsize, pos;
int i, r, peer;
dprintf(("UDS: uds_perform_write(%d)\n", minor));
/* Skip writes of zero bytes. */
if (size == 0)
return 0;
/* Check if the socket isn't shut down for writes. */
if (!(uds_fd_table[minor].mode & UDS_W))
return EPIPE;
/* Datagram messages must fit in the buffer entirely. */
if (size > UDS_BUF && uds_fd_table[minor].type != SOCK_STREAM)
return EMSGSIZE;
if (uds_fd_table[minor].type == SOCK_STREAM ||
uds_fd_table[minor].type == SOCK_SEQPACKET) {
/*
* If we're writing to a connection-oriented socket, then it
* needs a peer to write to. For disconnected sockets, writing
* is an error; for connecting sockets, writes should suspend.
*/
peer = uds_fd_table[minor].peer;
if (peer == -1) {
if (uds_fd_table[minor].err == ECONNRESET) {
if (!pretend)
uds_fd_table[minor].err = 0;
return ECONNRESET;
} else
return ENOTCONN;
} else if (uds_fd_table[peer].peer == -1) /* connecting */
return EDONTREPLY;
} else /* uds_fd_table[minor].type == SOCK_DGRAM */ {
peer = -1;
/* Locate the "peer" we want to write to. */
for (i = 0; i < NR_FDS; i++) {
/*
* Look for a SOCK_DGRAM socket that is bound on
* the target address.
*/
if (uds_fd_table[i].type == SOCK_DGRAM &&
uds_fd_table[i].addr.sun_family == AF_UNIX &&
!strncmp(uds_fd_table[minor].target.sun_path,
uds_fd_table[i].addr.sun_path,
sizeof(uds_fd_table[i].addr.sun_path))) {
peer = i;
break;
}
}
if (peer == -1)
return ENOENT;
}
/* Check if we write to a closed pipe. */
if (!(uds_fd_table[peer].mode & UDS_R))
return EPIPE;
/*
* We have to preserve the boundary for DGRAM. If there's already a
* packet waiting, discard it silently and pretend it was written.
*/
if (uds_fd_table[minor].type == SOCK_DGRAM &&
uds_fd_table[peer].size > 0)
return size;
/*
* Check if the ring buffer is already full, and if the SEQPACKET
* message wouldn't write to an empty buffer.
*/
if (uds_fd_table[peer].size == UDS_BUF ||
(uds_fd_table[minor].type == SOCK_SEQPACKET &&
uds_fd_table[peer].size > 0)) {
if (pretend)
return EDONTREPLY;
if (uds_fd_table[peer].suspended == UDS_SUSPENDED_READ)
panic("reader blocked on full socket");
dprintf(("UDS: suspending write request on %d\n", minor));
/* Process is reading from an empty pipe. Suspend it. */
return EDONTREPLY;
}
/* How much can we add to the ring buffer? */
if (size > UDS_BUF - uds_fd_table[peer].size)
size = UDS_BUF - uds_fd_table[peer].size;
if (pretend)
return size;
/* Put the data at the head of the ring buffer. */
pos = (uds_fd_table[peer].pos + uds_fd_table[peer].size) % UDS_BUF;
subsize = UDS_BUF - pos;
if (subsize > size)
subsize = size;
if ((r = sys_safecopyfrom(endpt, grant, 0,
(vir_bytes) &uds_fd_table[peer].buf[pos], subsize)) != OK)
return r;
if (subsize < size) {
if ((r = sys_safecopyfrom(endpt, grant, subsize,
(vir_bytes) uds_fd_table[peer].buf, size - subsize)) != OK)
return r;
}
/* Advance the buffer head. */
uds_fd_table[peer].size += size;
/* Fill in the source address to be returned by recvfrom, recvmsg. */
if (uds_fd_table[minor].type == SOCK_DGRAM)
memcpy(&uds_fd_table[peer].source, &uds_fd_table[minor].addr,
sizeof(struct sockaddr_un));
/* See if we can wake up a blocked reader. */
if (uds_fd_table[peer].suspended == UDS_SUSPENDED_READ)
uds_unsuspend(peer);
/* See if we can satisfy an ongoing select. */
if ((uds_fd_table[peer].sel_ops & CDEV_OP_RD) &&
uds_fd_table[peer].size > 0) {
/* A read on the peer is possible now. */
chardriver_reply_select(uds_fd_table[peer].sel_endpt, peer,
CDEV_OP_RD);
uds_fd_table[peer].sel_ops &= ~CDEV_OP_RD;
}
return size; /* number of bytes written */
}
static ssize_t
uds_read(devminor_t minor, u64_t position, endpoint_t endpt,
cp_grant_id_t grant, size_t size, int flags, cdev_id_t id)
{
ssize_t rc;
dprintf(("UDS: uds_read(%d)\n", minor));
if (minor < 0 || minor >= NR_FDS) return ENXIO;
if (uds_fd_table[minor].state != UDS_INUSE)
return EINVAL;
rc = uds_perform_read(minor, endpt, grant, size, 0);
/* If the call couldn't complete, suspend the caller. */
if (rc == EDONTREPLY) {
uds_fd_table[minor].suspended = UDS_SUSPENDED_READ;
uds_fd_table[minor].susp_endpt = endpt;
uds_fd_table[minor].susp_grant = grant;
uds_fd_table[minor].susp_size = size;
uds_fd_table[minor].susp_id = id;
/* If the call wasn't supposed to block, cancel immediately. */
if (flags & CDEV_NONBLOCK) {
uds_cancel(minor, endpt, id);
rc = EAGAIN;
}
}
return rc;
}
static ssize_t
uds_write(devminor_t minor, u64_t position, endpoint_t endpt,
cp_grant_id_t grant, size_t size, int flags, cdev_id_t id)
{
ssize_t rc;
dprintf(("UDS: uds_write(%d)\n", minor));
if (minor < 0 || minor >= NR_FDS) return ENXIO;
if (uds_fd_table[minor].state != UDS_INUSE)
return EINVAL;
rc = uds_perform_write(minor, endpt, grant, size, 0);
/* If the call couldn't complete, suspend the caller. */
if (rc == EDONTREPLY) {
uds_fd_table[minor].suspended = UDS_SUSPENDED_WRITE;
uds_fd_table[minor].susp_endpt = endpt;
uds_fd_table[minor].susp_grant = grant;
uds_fd_table[minor].susp_size = size;
uds_fd_table[minor].susp_id = id;
/* If the call wasn't supposed to block, cancel immediately. */
if (flags & CDEV_NONBLOCK) {
uds_cancel(minor, endpt, id);
rc = EAGAIN;
}
}
return rc;
}
static int
uds_ioctl(devminor_t minor, unsigned long request, endpoint_t endpt,
cp_grant_id_t grant, int flags, endpoint_t user_endpt, cdev_id_t id)
{
int rc, s;
dprintf(("UDS: uds_ioctl(%d, %lu)\n", minor, request));
if (minor < 0 || minor >= NR_FDS) return ENXIO;
if (uds_fd_table[minor].state != UDS_INUSE)
return EINVAL;
/* Update the owner endpoint. */
uds_fd_table[minor].owner = user_endpt;
/* Let the UDS ioctl subsystem handle the actual request. */
rc = uds_do_ioctl(minor, request, endpt, grant);
/* If the call couldn't complete, suspend the caller. */
if (rc == EDONTREPLY) {
/* The suspension type is already set by the IOCTL handler. */
if ((s = uds_fd_table[minor].suspended) == UDS_NOT_SUSPENDED)
panic("IOCTL did not actually suspend?");
uds_fd_table[minor].susp_endpt = endpt;
uds_fd_table[minor].susp_grant = grant;
uds_fd_table[minor].susp_size = 0; /* irrelevant */
uds_fd_table[minor].susp_id = id;
/* If the call wasn't supposed to block, cancel immediately. */
if (flags & CDEV_NONBLOCK) {
uds_cancel(minor, endpt, id);
if (s == UDS_SUSPENDED_CONNECT)
rc = EINPROGRESS;
else
rc = EAGAIN;
}
}
return rc;
}
void
uds_unsuspend(devminor_t minor)
{
int r;
uds_fd_t *fdp;
fdp = &uds_fd_table[minor];
switch (fdp->suspended) {
case UDS_SUSPENDED_READ:
r = uds_perform_read(minor, fdp->susp_endpt, fdp->susp_grant,
fdp->susp_size, 0);
if (r == EDONTREPLY)
return;
break;
case UDS_SUSPENDED_WRITE:
r = uds_perform_write(minor, fdp->susp_endpt, fdp->susp_grant,
fdp->susp_size, 0);
if (r == EDONTREPLY)
return;
break;
case UDS_SUSPENDED_CONNECT:
case UDS_SUSPENDED_ACCEPT:
/*
* In both cases, the caller already set up the connection.
* The only thing to do here is unblock.
*/
r = fdp->err;
fdp->err = 0;
break;
default:
panic("unknown suspension type %d", fdp->suspended);
}
chardriver_reply_task(fdp->susp_endpt, fdp->susp_id, r);
fdp->suspended = UDS_NOT_SUSPENDED;
}
static int
uds_cancel(devminor_t minor, endpoint_t endpt, cdev_id_t id)
{
uds_fd_t *fdp;
int i;
dprintf(("UDS: uds_cancel(%d)\n", minor));
if (minor < 0 || minor >= NR_FDS) return EDONTREPLY;
fdp = &uds_fd_table[minor];
if (fdp->state != UDS_INUSE) {
printf("UDS: cancel request for closed minor %d\n", minor);
return EDONTREPLY;
}
/* Make sure the cancel request is for a request we're hanging on. */
if (fdp->suspended == UDS_NOT_SUSPENDED || fdp->susp_endpt != endpt ||
fdp->susp_id != id)
return EDONTREPLY; /* this happens. */
/*
* The system call was cancelled, so the socket is not suspended
* anymore.
*/
switch (fdp->suspended) {
case UDS_SUSPENDED_ACCEPT:
/* A partial accept() only sets the server's child. */
for (i = 0; i < NR_FDS; i++)
if (uds_fd_table[i].child == minor)
uds_fd_table[i].child = -1;
break;
case UDS_SUSPENDED_CONNECT:
/* Connect requests should continue asynchronously. */
break;
case UDS_SUSPENDED_READ:
case UDS_SUSPENDED_WRITE:
/* Nothing more to do. */
break;
default:
panic("unknown suspension type %d", fdp->suspended);
}
fdp->suspended = UDS_NOT_SUSPENDED;
return EINTR; /* reply to the original request */
}
/*
* Initialize the server.
*/
static int
uds_init(int UNUSED(type), sef_init_info_t *UNUSED(info))
{
/* Setting everything to NULL implicitly sets the state to UDS_FREE. */
memset(uds_fd_table, '\0', sizeof(uds_fd_t) * NR_FDS);
uds_exit_left = 0;
return(OK);
}
static void
uds_signal(int signo)
{
int i;
/* Only check for termination signal, ignore anything else. */
if (signo != SIGTERM) return;
/* Only exit once all sockets have been closed. */
uds_exit_left = 0;
for (i = 0; i < NR_FDS; i++)
if (uds_fd_table[i].state == UDS_INUSE)
uds_exit_left++;
if (uds_exit_left == 0)
chardriver_terminate();
}
static void
uds_startup(void)
{
/* Register init callbacks. */
sef_setcb_init_fresh(uds_init);
/* No live update support for now. */
/* Register signal callbacks. */
sef_setcb_signal_handler(uds_signal);
/* Let SEF perform startup. */
sef_startup();
}
/*
* The UNIX domain sockets driver.
*/
int
main(void)
{
uds_startup();
chardriver_task(&uds_tab);
return(OK);
}