PERFORCE change 98847 for review
Kip Macy
kmacy at FreeBSD.org
Fri Jun 9 04:30:31 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=98847
Change 98847 by kmacy at kmacy_storage:sun4v_work_ifc on 2006/06/09 04:28:08
revert changes that were not meant to go in in the last commit
Affected files ...
.. //depot/projects/kmacy_sun4v/src/sys/kern/sys_socket.c#4 edit
.. //depot/projects/kmacy_sun4v/src/sys/kern/uipc_usrreq.c#6 edit
Differences ...
==== //depot/projects/kmacy_sun4v/src/sys/kern/sys_socket.c#4 (text+ko) ====
@@ -63,7 +63,7 @@
.fo_kqfilter = soo_kqfilter,
.fo_stat = soo_stat,
.fo_close = soo_close,
- .fo_flags = DFLAG_PASSABLE | DFLAG_MPSAFE
+ .fo_flags = DFLAG_PASSABLE
};
/* ARGSUSED */
==== //depot/projects/kmacy_sun4v/src/sys/kern/uipc_usrreq.c#6 (text+ko) ====
@@ -88,100 +88,33 @@
struct mbuf *unp_addsockcred(struct thread *, struct mbuf *);
/*
- * Both send and receive buffers are allocated PIPSIZ bytes of buffering
- * for stream sockets, although the total for sender and receiver is
- * actually only PIPSIZ.
- * Datagram sockets really use the sendspace as the maximum datagram size,
- * and don't really want to reserve the sendspace. Their recvspace should
- * be large enough for at least one max-size datagram plus address.
- */
-#ifndef PIPSIZ
-#define PIPSIZ 8192
-#endif
-static u_long unpst_sendspace = PIPSIZ;
-static u_long unpst_recvspace = PIPSIZ;
-static u_long unpdg_sendspace = 2*1024; /* really max datagram size */
-static u_long unpdg_recvspace = 4*1024;
-
-static int unp_rights; /* file descriptors in flight */
-
-SYSCTL_DECL(_net_local_stream);
-SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
- &unpst_sendspace, 0, "");
-SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
- &unpst_recvspace, 0, "");
-SYSCTL_DECL(_net_local_dgram);
-SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
- &unpdg_sendspace, 0, "");
-SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
- &unpdg_recvspace, 0, "");
-SYSCTL_DECL(_net_local);
-SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
-
-/*
- * Locking and synchronization:
+ * Currently, UNIX domain sockets are protected by a single subsystem lock,
+ * which covers global data structures and variables, the contents of each
+ * per-socket unpcb structure, and the so_pcb field in sockets attached to
+ * the UNIX domain. This provides for a moderate degree of paralellism, as
+ * receive operations on UNIX domain sockets do not need to acquire the
+ * subsystem lock. Finer grained locking to permit send() without acquiring
+ * a global lock would be a logical next step.
*
- * A global UNIX domain socket mutex protects all global variables in the
- * implementation, as well as the linked lists tracking the set of allocated
- * UNIX domain sockets. These variables/fields may be read lockless using
- * atomic operations if stale values are permissible; otherwise the global
- * mutex is required to read or read-modify-write. The global mutex also
- * serves to prevent deadlock when multiple PCB locks may be acquired at once
- * (see below). Finally, the global mutex protects uncounted references from
- * vnodes to sockets bound to those vnodes: to safely dereference the
- * v_socket pointer, the global mutex must be held while a full reference is
- * acquired.
+ * The UNIX domain socket lock preceds all socket layer locks, including the
+ * socket lock and socket buffer lock, permitting UNIX domain socket code to
+ * call into socket support routines without releasing its locks.
*
- * UNIX domain sockets each have one unpcb PCB associated with them from
- * pru_attach() to pru_detach() via the so_pcb pointer. The validity of that
- * reference is an invariant for the lifetime of the socket, so no lock is
- * required to dereference the so_pcb pointer if a valid socket reference is
- * held.
- *
- * Each PCB has a back-pointer to its socket, unp_socket. This pointer may
- * only be safely dereferenced as long as a valid reference to the PCB is
- * held. Typically, this reference will be from the socket, or from another
- * PCB when the referring PCB's lock is held (in order that the reference not
- * be invalidated during use). In particular, to follow
- * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn.
- *
- * Fields of PCBs are locked using a per-unpcb lock, unp_mtx. Individual
- * atomic reads without the lock may be performed "lockless", but more
- * complex reads and read-modify-writes require the mutex to be held. No
- * lock order is defined between PCB locks -- multiple PCB locks may be
- * acquired at the same time only when holding the global UNIX domain socket
- * mutex, which prevents deadlocks. To prevent inter-PCB references from
- * becoming invalid, the lock protecting the reference must be held for the
- * lifetime of use of the reference.
- *
- * Blocking with UNIX domain sockets is a tricky issue: unlike most network
- * protocols, bind() is a non-atomic operation, and connect() requires
- * potential sleeping in the protocol, due to potentially waiting on local or
- * distributed file systems. We try to separate "lookup" operations, which
- * may sleep, and the IPC operations themselves, which typically can occur
- * with relative atomicity as locks can be held over the entire operation.
- *
- * Another tricky issue is simultaneous multi-threaded or multi-process
- * access to a single UNIX domain socket. These are handled by the flags
- * UNP_CONNECTING and UNP_BINDING.
+ * Some caution is required in areas where the UNIX domain socket code enters
+ * VFS in order to create or find rendezvous points. This results in
+ * dropping of the UNIX domain socket subsystem lock, acquisition of the
+ * Giant lock, and potential sleeping. This increases the chances of races,
+ * and exposes weaknesses in the socket->protocol API by offering poor
+ * failure modes.
*/
-static struct mtx unp_global_mtx;
-
-#define UNP_GLOBAL_LOCK_INIT() mtx_init(&unp_global_mtx, \
- "unp_global_mtx", NULL, MTX_DEF)
-#define UNP_GLOBAL_LOCK() mtx_lock(&unp_global_mtx)
-#define UNP_GLOBAL_UNLOCK() mtx_unlock(&unp_global_mtx)
-#define UNP_GLOBAL_UNLOCK_ASSERT() mtx_assert(&unp_global_mtx, MA_NOTOWNED)
-#define UNP_GLOBAL_LOCK_ASSERT() mtx_assert(&unp_global_mtx, MA_OWNED)
+static struct mtx unp_mtx;
+#define UNP_LOCK_INIT() \
+ mtx_init(&unp_mtx, "unp", NULL, MTX_DEF)
+#define UNP_LOCK() mtx_lock(&unp_mtx)
+#define UNP_UNLOCK() mtx_unlock(&unp_mtx)
+#define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED)
+#define UNP_UNLOCK_ASSERT() mtx_assert(&unp_mtx, MA_NOTOWNED)
-#define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \
- "unp_mtx", "unp_mtx", \
- MTX_DUPOK|MTX_DEF|MTX_RECURSE)
-#define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx)
-#define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx)
-#define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx)
-#define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED)
-
/*
* Garbage collection of cyclic file descriptor/socket references occurs
* asynchronously in a taskqueue context in order to avoid recursion and
@@ -190,10 +123,12 @@
*/
static struct task unp_gc_task;
+static int unp_attach(struct socket *);
static void unp_detach(struct unpcb *);
+static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
static int unp_connect(struct socket *,struct sockaddr *, struct thread *);
static int unp_connect2(struct socket *so, struct socket *so2, int);
-static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
+static void unp_disconnect(struct unpcb *);
static void unp_shutdown(struct unpcb *);
static void unp_drop(struct unpcb *, int);
static void unp_gc(__unused void *, int);
@@ -202,6 +137,8 @@
static void unp_discard(struct file *);
static void unp_freerights(struct file **, int);
static int unp_internalize(struct mbuf **, struct thread *);
+static int unp_listen(struct socket *, struct unpcb *, int,
+ struct thread *);
static void
uipc_abort(struct socket *so)
@@ -210,238 +147,83 @@
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
-
- UNP_GLOBAL_LOCK();
- UNP_PCB_LOCK(unp);
+ UNP_LOCK();
unp_drop(unp, ECONNABORTED);
unp_detach(unp);
- UNP_GLOBAL_UNLOCK_ASSERT();
+ UNP_UNLOCK_ASSERT();
}
static int
uipc_accept(struct socket *so, struct sockaddr **nam)
{
- struct unpcb *unp, *unp2;
+ struct unpcb *unp;
const struct sockaddr *sa;
/*
- * Pass back name of connected socket, if it was bound and we are
- * still connected (our peer may have closed already!).
+ * Pass back name of connected socket,
+ * if it was bound and we are still connected
+ * (our peer may have closed already!).
*/
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_accept: unp == NULL"));
-
*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
- UNP_GLOBAL_LOCK();
- UNP_PCB_LOCK(unp);
- unp2 = unp->unp_conn;
- if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
- UNP_PCB_LOCK(unp2);
+ UNP_LOCK();
+ if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL)
sa = (struct sockaddr *) unp->unp_conn->unp_addr;
- bcopy(sa, *nam, sa->sa_len);
- UNP_PCB_UNLOCK(unp2);
- } else {
+ else
sa = &sun_noname;
- bcopy(sa, *nam, sa->sa_len);
- }
- UNP_PCB_UNLOCK(unp);
- UNP_GLOBAL_UNLOCK();
+ bcopy(sa, *nam, sa->sa_len);
+ UNP_UNLOCK();
return (0);
}
static int
uipc_attach(struct socket *so, int proto, struct thread *td)
{
- u_long sendspace, recvspace;
- struct unpcb *unp;
- int error;
- KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
- if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
- switch (so->so_type) {
- case SOCK_STREAM:
- sendspace = unpst_sendspace;
- recvspace = unpst_recvspace;
- break;
-
- case SOCK_DGRAM:
- sendspace = unpdg_sendspace;
- recvspace = unpdg_recvspace;
- break;
-
- default:
- panic("uipc_attach");
- }
- error = soreserve(so, sendspace, recvspace);
- if (error)
- return (error);
- }
- unp = uma_zalloc(unp_zone, M_WAITOK | M_ZERO);
- if (unp == NULL)
- return (ENOBUFS);
- LIST_INIT(&unp->unp_refs);
- UNP_PCB_LOCK_INIT(unp);
- unp->unp_socket = so;
- so->so_pcb = unp;
-
- UNP_GLOBAL_LOCK();
- unp->unp_gencnt = ++unp_gencnt;
- unp_count++;
- LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
- : &unp_shead, unp, unp_link);
- UNP_GLOBAL_UNLOCK();
-
- return (0);
+ return (unp_attach(so));
}
static int
uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
{
- struct sockaddr_un *soun = (struct sockaddr_un *)nam;
- struct vnode *vp;
- struct mount *mp;
- struct vattr vattr;
- int error, namelen;
- struct nameidata nd;
struct unpcb *unp;
- char *buf;
+ int error;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
-
- namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
- if (namelen <= 0)
- return (EINVAL);
-
- /*
- * We don't allow simultaneous bind() calls on a single UNIX domain
- * socket, so flag in-progress operations, and return an error if an
- * operation is already in progress.
- *
- * Historically, we have not allowed a socket to be rebound, so this
- * also returns an error. Not allowing re-binding certainly
- * simplifies the implementation and avoids a great many possible
- * failure modes.
- */
- UNP_PCB_LOCK(unp);
- if (unp->unp_vnode != NULL) {
- UNP_PCB_UNLOCK(unp);
- return (EINVAL);
- }
- if (unp->unp_flags & UNP_BINDING) {
- UNP_PCB_UNLOCK(unp);
- return (EALREADY);
- }
- unp->unp_flags |= UNP_BINDING;
- UNP_PCB_UNLOCK(unp);
-
- buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
- strlcpy(buf, soun->sun_path, namelen + 1);
-
- mtx_lock(&Giant);
-restart:
- mtx_assert(&Giant, MA_OWNED);
- NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE,
- buf, td);
-/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
- error = namei(&nd);
- if (error)
- goto error;
- vp = nd.ni_vp;
- if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
- NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
- if (vp != NULL) {
- vrele(vp);
- error = EADDRINUSE;
- goto error;
- }
- error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
- if (error)
- goto error;
- goto restart;
- }
- VATTR_NULL(&vattr);
- vattr.va_type = VSOCK;
- vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
-#ifdef MAC
- error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
- &vattr);
-#endif
- if (error == 0) {
- VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
- error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
- }
- NDFREE(&nd, NDF_ONLY_PNBUF);
- vput(nd.ni_dvp);
- if (error) {
- vn_finished_write(mp);
- goto error;
- }
- vp = nd.ni_vp;
- ASSERT_VOP_LOCKED(vp, "uipc_bind");
- soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
-
- /*
- * XXXRW: handle race against another consumer also frobbing
- * v_socket? Or not.
- */
- UNP_GLOBAL_LOCK();
- UNP_PCB_LOCK(unp);
- vp->v_socket = unp->unp_socket;
- unp->unp_vnode = vp;
- unp->unp_addr = soun;
- unp->unp_flags &= ~UNP_BINDING;
- UNP_PCB_UNLOCK(unp);
- UNP_GLOBAL_UNLOCK();
- VOP_UNLOCK(vp, 0, td);
- vn_finished_write(mp);
- mtx_unlock(&Giant);
- free(buf, M_TEMP);
- return (0);
-
-error:
- UNP_PCB_LOCK(unp);
- unp->unp_flags &= ~UNP_BINDING;
- UNP_PCB_UNLOCK(unp);
- mtx_unlock(&Giant);
- free(buf, M_TEMP);
+ UNP_LOCK();
+ error = unp_bind(unp, nam, td);
+ UNP_UNLOCK();
return (error);
}
static int
uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
+ struct unpcb *unp;
int error;
KASSERT(td == curthread, ("uipc_connect: td != curthread"));
-
- UNP_GLOBAL_LOCK();
+ unp = sotounpcb(so);
+ KASSERT(unp != NULL, ("uipc_connect: unp == NULL"));
+ UNP_LOCK();
error = unp_connect(so, nam, td);
- UNP_GLOBAL_UNLOCK();
+ UNP_UNLOCK();
return (error);
}
int
uipc_connect2(struct socket *so1, struct socket *so2)
{
- struct unpcb *unp, *unp2;
+ struct unpcb *unp;
int error;
- UNP_GLOBAL_LOCK();
- unp = so1->so_pcb;
+ unp = sotounpcb(so1);
KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
- UNP_PCB_LOCK(unp);
- unp2 = so2->so_pcb;
- KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
- UNP_PCB_LOCK(unp2);
+ UNP_LOCK();
error = unp_connect2(so1, so2, PRU_CONNECT2);
- UNP_PCB_UNLOCK(unp2);
- UNP_PCB_UNLOCK(unp);
- UNP_GLOBAL_UNLOCK();
+ UNP_UNLOCK();
return (error);
}
@@ -454,31 +236,21 @@
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
-
- UNP_GLOBAL_LOCK();
- UNP_PCB_LOCK(unp);
+ UNP_LOCK();
unp_detach(unp);
- UNP_GLOBAL_UNLOCK_ASSERT();
+ UNP_UNLOCK_ASSERT();
}
static int
uipc_disconnect(struct socket *so)
{
- struct unpcb *unp, *unp2;
+ struct unpcb *unp;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
-
- UNP_GLOBAL_LOCK();
- UNP_PCB_LOCK(unp);
- unp2 = unp->unp_conn;
- if (unp2 != NULL) {
- UNP_PCB_LOCK(unp2);
- unp_disconnect(unp, unp2);
- UNP_PCB_UNLOCK(unp2);
- }
- UNP_PCB_UNLOCK(unp);
- UNP_GLOBAL_UNLOCK();
+ UNP_LOCK();
+ unp_disconnect(unp);
+ UNP_UNLOCK();
return (0);
}
@@ -490,105 +262,81 @@
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_listen: unp == NULL"));
-
- UNP_PCB_LOCK(unp);
+ UNP_LOCK();
if (unp->unp_vnode == NULL) {
- UNP_PCB_UNLOCK(unp);
+ UNP_UNLOCK();
return (EINVAL);
}
-
- SOCK_LOCK(so);
- error = solisten_proto_check(so);
- if (error == 0) {
- cru2x(td->td_ucred, &unp->unp_peercred);
- unp->unp_flags |= UNP_HAVEPCCACHED;
- solisten_proto(so, backlog);
- }
- SOCK_UNLOCK(so);
- UNP_PCB_UNLOCK(unp);
+ error = unp_listen(so, unp, backlog, td);
+ UNP_UNLOCK();
return (error);
}
static int
uipc_peeraddr(struct socket *so, struct sockaddr **nam)
{
- struct unpcb *unp, *unp2;
+ struct unpcb *unp;
const struct sockaddr *sa;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL"));
-
*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
- UNP_PCB_LOCK(unp);
- /*
- * XXX: It seems that this test always fails even when connection is
- * established. So, this else clause is added as workaround to
- * return PF_LOCAL sockaddr.
- */
- unp2 = unp->unp_conn;
- if (unp2 != NULL) {
- UNP_PCB_LOCK(unp2);
- if (unp2->unp_addr != NULL)
- sa = (struct sockaddr *) unp->unp_conn->unp_addr;
- else
- sa = &sun_noname;
- bcopy(sa, *nam, sa->sa_len);
- UNP_PCB_UNLOCK(unp2);
- } else {
+ UNP_LOCK();
+ if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL)
+ sa = (struct sockaddr *) unp->unp_conn->unp_addr;
+ else {
+ /*
+ * XXX: It seems that this test always fails even when
+ * connection is established. So, this else clause is
+ * added as workaround to return PF_LOCAL sockaddr.
+ */
sa = &sun_noname;
- bcopy(sa, *nam, sa->sa_len);
}
- UNP_PCB_UNLOCK(unp);
+ bcopy(sa, *nam, sa->sa_len);
+ UNP_UNLOCK();
return (0);
}
static int
uipc_rcvd(struct socket *so, int flags)
{
- struct unpcb *unp, *unp2;
+ struct unpcb *unp;
struct socket *so2;
u_long newhiwat;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL"));
+ UNP_LOCK();
+ switch (so->so_type) {
+ case SOCK_DGRAM:
+ panic("uipc_rcvd DGRAM?");
+ /*NOTREACHED*/
- if (so->so_type == SOCK_DGRAM)
- panic("uipc_rcvd DGRAM?");
+ case SOCK_STREAM:
+ if (unp->unp_conn == NULL)
+ break;
+ so2 = unp->unp_conn->unp_socket;
+ SOCKBUF_LOCK(&so2->so_snd);
+ SOCKBUF_LOCK(&so->so_rcv);
+ /*
+ * Adjust backpressure on sender
+ * and wakeup any waiting to write.
+ */
+ so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt;
+ unp->unp_mbcnt = so->so_rcv.sb_mbcnt;
+ newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc -
+ so->so_rcv.sb_cc;
+ (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
+ newhiwat, RLIM_INFINITY);
+ unp->unp_cc = so->so_rcv.sb_cc;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ sowwakeup_locked(so2);
+ break;
- if (so->so_type != SOCK_STREAM)
+ default:
panic("uipc_rcvd unknown socktype");
-
- /*
- * Adjust backpressure on sender and wakeup any waiting to write.
- *
- * The consistency requirements here are a bit complex: we must
- * acquire the lock for our own unpcb in order to prevent it from
- * disconnecting while in use, changing the unp_conn peer. We do not
- * need unp2's lock, since the unp2->unp_socket pointer will remain
- * static as long as the unp2 pcb is valid, which it will be until we
- * release unp's lock to allow a disconnect. We do need socket
- * mutexes for both socket endpoints since we manipulate fields in
- * both; we hold both locks at once since we access both
- * simultaneously.
- */
- UNP_PCB_LOCK(unp);
- unp2 = unp->unp_conn;
- if (unp2 == NULL) {
- UNP_PCB_UNLOCK(unp);
- return (0);
}
- so2 = unp2->unp_socket;
- SOCKBUF_LOCK(&so2->so_snd);
- SOCKBUF_LOCK(&so->so_rcv);
- so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt;
- unp->unp_mbcnt = so->so_rcv.sb_mbcnt;
- newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - so->so_rcv.sb_cc;
- (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
- newhiwat, RLIM_INFINITY);
- unp->unp_cc = so->so_rcv.sb_cc;
- SOCKBUF_UNLOCK(&so->so_rcv);
- sowwakeup_locked(so2);
- UNP_PCB_UNLOCK(unp);
+ UNP_UNLOCK();
return (0);
}
@@ -598,14 +346,13 @@
uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
struct mbuf *control, struct thread *td)
{
- struct unpcb *unp, *unp2;
+ int error = 0;
+ struct unpcb *unp;
struct socket *so2;
u_long newhiwat;
- int error = 0;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_send: unp == NULL"));
-
if (flags & PRUS_OOB) {
error = EOPNOTSUPP;
goto release;
@@ -614,38 +361,32 @@
if (control != NULL && (error = unp_internalize(&control, td)))
goto release;
- UNP_GLOBAL_LOCK();
- UNP_PCB_LOCK(unp);
+ UNP_LOCK();
switch (so->so_type) {
case SOCK_DGRAM:
{
const struct sockaddr *from;
- unp2 = unp->unp_conn;
if (nam != NULL) {
- if (unp2 != NULL) {
+ if (unp->unp_conn != NULL) {
error = EISCONN;
break;
}
- UNP_PCB_UNLOCK(unp);
error = unp_connect(so, nam, td);
- UNP_PCB_LOCK(unp);
if (error)
break;
- unp2 = unp->unp_conn;
} else {
- if (unp2 == NULL) {
+ if (unp->unp_conn == NULL) {
error = ENOTCONN;
break;
}
}
- UNP_PCB_LOCK(unp2);
- so2 = unp2->unp_socket;
+ so2 = unp->unp_conn->unp_socket;
if (unp->unp_addr != NULL)
from = (struct sockaddr *)unp->unp_addr;
else
from = &sun_noname;
- if (unp2->unp_flags & UNP_WANTCRED)
+ if (unp->unp_conn->unp_flags & UNP_WANTCRED)
control = unp_addsockcred(td, control);
SOCKBUF_LOCK(&so2->so_rcv);
if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) {
@@ -657,22 +398,19 @@
error = ENOBUFS;
}
if (nam != NULL)
- unp_disconnect(unp, unp2);
- UNP_PCB_UNLOCK(unp2);
+ unp_disconnect(unp);
break;
}
case SOCK_STREAM:
/* Connect if not connected yet. */
/*
- * Note: A better implementation would complain if not equal
- * to the peer's address.
+ * Note: A better implementation would complain
+ * if not equal to the peer's address.
*/
if ((so->so_state & SS_ISCONNECTED) == 0) {
if (nam != NULL) {
- UNP_PCB_UNLOCK(unp);
error = unp_connect(so, nam, td);
- UNP_PCB_LOCK(unp);
if (error)
break; /* XXX */
} else {
@@ -681,34 +419,22 @@
}
}
- /*
- * Lock order here has to be handled carefully: we hold the
- * global lock, so acquiring two unpcb locks is OK. We must
- * acquire both before acquiring any socket mutexes. We must
- * also acquire the local socket send mutex before the remote
- * socket receive mutex. The only tricky thing is making
- * sure to acquire the unp2 lock before the local socket send
- * lock, or we will experience deadlocks.
- */
- unp2 = unp->unp_conn;
- KASSERT(unp2 != NULL,
- ("uipc_send connected but no connection?"));
- UNP_PCB_LOCK(unp2);
SOCKBUF_LOCK(&so->so_snd);
if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
SOCKBUF_UNLOCK(&so->so_snd);
- UNP_PCB_UNLOCK(unp2);
error = EPIPE;
break;
}
- so2 = unp2->unp_socket;
+ if (unp->unp_conn == NULL)
+ panic("uipc_send connected but no connection?");
+ so2 = unp->unp_conn->unp_socket;
SOCKBUF_LOCK(&so2->so_rcv);
- if (unp2->unp_flags & UNP_WANTCRED) {
+ if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
/*
* Credentials are passed only once on
* SOCK_STREAM.
*/
- unp2->unp_flags &= ~UNP_WANTCRED;
+ unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
control = unp_addsockcred(td, control);
}
/*
@@ -719,19 +445,19 @@
if (control != NULL) {
if (sbappendcontrol_locked(&so2->so_rcv, m, control))
control = NULL;
- } else
+ } else {
sbappend_locked(&so2->so_rcv, m);
+ }
so->so_snd.sb_mbmax -=
so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt;
- unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt;
+ unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt;
newhiwat = so->so_snd.sb_hiwat -
- (so2->so_rcv.sb_cc - unp2->unp_cc);
+ (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc);
(void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
newhiwat, RLIM_INFINITY);
SOCKBUF_UNLOCK(&so->so_snd);
- unp2->unp_cc = so2->so_rcv.sb_cc;
+ unp->unp_conn->unp_cc = so2->so_rcv.sb_cc;
sorwakeup_locked(so2);
- UNP_PCB_UNLOCK(unp2);
m = NULL;
break;
@@ -747,8 +473,7 @@
socantsendmore(so);
unp_shutdown(unp);
}
- UNP_PCB_UNLOCK(unp);
- UNP_GLOBAL_UNLOCK();
+ UNP_UNLOCK();
if (control != NULL && error != 0)
unp_dispose(control);
@@ -764,28 +489,22 @@
static int
uipc_sense(struct socket *so, struct stat *sb)
{
- struct unpcb *unp, *unp2;
+ struct unpcb *unp;
struct socket *so2;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
-
- UNP_GLOBAL_LOCK();
- UNP_PCB_LOCK(unp);
+ UNP_LOCK();
sb->st_blksize = so->so_snd.sb_hiwat;
- unp2 = unp->unp_conn;
- if (so->so_type == SOCK_STREAM && unp2 != NULL) {
- UNP_PCB_LOCK(unp2);
- so2 = unp2->unp_socket;
+ if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) {
+ so2 = unp->unp_conn->unp_socket;
sb->st_blksize += so2->so_rcv.sb_cc;
- UNP_PCB_UNLOCK(unp2);
}
sb->st_dev = NODEV;
if (unp->unp_ino == 0)
unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
sb->st_ino = unp->unp_ino;
- UNP_PCB_UNLOCK(unp);
- UNP_GLOBAL_UNLOCK();
+ UNP_UNLOCK();
return (0);
}
@@ -796,13 +515,10 @@
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
-
- UNP_GLOBAL_LOCK();
- UNP_PCB_LOCK(unp);
+ UNP_LOCK();
socantsendmore(so);
unp_shutdown(unp);
- UNP_PCB_UNLOCK(unp);
- UNP_GLOBAL_UNLOCK();
+ UNP_UNLOCK();
return (0);
}
@@ -814,15 +530,14 @@
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL"));
-
*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
- UNP_PCB_LOCK(unp);
+ UNP_LOCK();
if (unp->unp_addr != NULL)
sa = (struct sockaddr *) unp->unp_addr;
else
sa = &sun_noname;
bcopy(sa, *nam, sa->sa_len);
- UNP_PCB_UNLOCK(unp);
+ UNP_UNLOCK();
return (0);
}
@@ -859,13 +574,12 @@
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL"));
-
+ UNP_LOCK();
error = 0;
switch (sopt->sopt_dir) {
case SOPT_GET:
switch (sopt->sopt_name) {
case LOCAL_PEERCRED:
- UNP_PCB_LOCK(unp);
if (unp->unp_flags & UNP_HAVEPC)
xu = unp->unp_peercred;
else {
@@ -874,31 +588,22 @@
else
error = EINVAL;
}
- UNP_PCB_UNLOCK(unp);
if (error == 0)
error = sooptcopyout(sopt, &xu, sizeof(xu));
break;
-
case LOCAL_CREDS:
- UNP_PCB_LOCK(unp);
optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0;
- UNP_PCB_UNLOCK(unp);
error = sooptcopyout(sopt, &optval, sizeof(optval));
break;
-
case LOCAL_CONNWAIT:
- UNP_PCB_LOCK(unp);
optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
- UNP_PCB_UNLOCK(unp);
error = sooptcopyout(sopt, &optval, sizeof(optval));
break;
-
default:
error = EOPNOTSUPP;
break;
}
break;
-
case SOPT_SET:
switch (sopt->sopt_name) {
case LOCAL_CREDS:
@@ -908,24 +613,19 @@
if (error)
break;
-#define OPTSET(bit) do { \
- UNP_PCB_LOCK(unp); \
- if (optval) \
- unp->unp_flags |= bit; \
- else \
- unp->unp_flags &= ~bit; \
- UNP_PCB_UNLOCK(unp); \
-} while (0)
+#define OPTSET(bit) \
+ if (optval) \
+ unp->unp_flags |= bit; \
+ else \
+ unp->unp_flags &= ~bit;
switch (sopt->sopt_name) {
case LOCAL_CREDS:
OPTSET(UNP_WANTCRED);
break;
-
case LOCAL_CONNWAIT:
OPTSET(UNP_CONNWAIT);
break;
-
default:
break;
}
@@ -936,60 +636,117 @@
break;
}
break;
-
default:
error = EOPNOTSUPP;
break;
}
+ UNP_UNLOCK();
return (error);
}
+/*
+ * Both send and receive buffers are allocated PIPSIZ bytes of buffering
+ * for stream sockets, although the total for sender and receiver is
+ * actually only PIPSIZ.
+ * Datagram sockets really use the sendspace as the maximum datagram size,
+ * and don't really want to reserve the sendspace. Their recvspace should
+ * be large enough for at least one max-size datagram plus address.
+ */
+#ifndef PIPSIZ
+#define PIPSIZ 8192
+#endif
+static u_long unpst_sendspace = PIPSIZ;
+static u_long unpst_recvspace = PIPSIZ;
+static u_long unpdg_sendspace = 2*1024; /* really max datagram size */
+static u_long unpdg_recvspace = 4*1024;
+
+static int unp_rights; /* file descriptors in flight */
+
+SYSCTL_DECL(_net_local_stream);
+SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
+ &unpst_sendspace, 0, "");
+SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
+ &unpst_recvspace, 0, "");
+SYSCTL_DECL(_net_local_dgram);
+SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
+ &unpdg_sendspace, 0, "");
+SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
+ &unpdg_recvspace, 0, "");
+SYSCTL_DECL(_net_local);
+SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
+
+static int
+unp_attach(struct socket *so)
+{
+ struct unpcb *unp;
+ int error;
+
+ KASSERT(so->so_pcb == NULL, ("unp_attach: so_pcb != NULL"));
+ if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+ switch (so->so_type) {
+
+ case SOCK_STREAM:
+ error = soreserve(so, unpst_sendspace, unpst_recvspace);
+ break;
+
+ case SOCK_DGRAM:
+ error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
+ break;
+
+ default:
+ panic("unp_attach");
+ }
+ if (error)
+ return (error);
+ }
+ unp = uma_zalloc(unp_zone, M_WAITOK | M_ZERO);
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list