svn commit: r216450 - projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp

Jeff Roberson jeff at FreeBSD.org
Wed Dec 15 01:08:19 UTC 2010


Author: jeff
Date: Wed Dec 15 01:08:19 2010
New Revision: 216450
URL: http://svn.freebsd.org/changeset/base/216450

Log:
  Initial port and rewrite of Sockets Direct Protocol (IB socket layer)
  
   - sdp_main.c was GPL polluted as a result of copy & paste code from linux
     tcp sources.  It was rewritten from BSD tcp sources and copyrights were
     properly retained.
   - Remaining files are dual BSD/GPL licensed and appear to be free of
     unsafe copy & paste code.  Most linuxisms removed although the wrapper
     layer is still included as it must be for the rdma/* includes.
  
  Sponsored by:	Isilon Systems, iX Systems, and Panasas.

Modified:
  projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h
  projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c
  projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_cma.c
  projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_dbg.h
  projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
  projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c
  projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c
  projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_zcopy.c

Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h
==============================================================================
--- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h	Tue Dec 14 21:33:17 2010	(r216449)
+++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h	Wed Dec 15 01:08:19 2010	(r216450)
@@ -1,16 +1,66 @@
 #ifndef _SDP_H_
 #define _SDP_H_
 
+#include "opt_ddb.h"
+#include "opt_inet.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/mbuf.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/proc.h>
+#include <sys/jail.h>
+#include <sys/domain.h>
+
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_timer.h>
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/sched.h>
 #include <linux/workqueue.h>
 #include <linux/wait.h>
-#include <net/inet_sock.h>
-#include <net/tcp.h> /* For urgent data flags */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+
 #include <rdma/ib_verbs.h>
-#include <linux/sched.h>
 #include <rdma/rdma_cm.h>
 #include <rdma/ib_cm.h>
+#include <rdma/sdp_socket.h>
+#include <rdma/ib_fmr_pool.h>
+
+#define	CONFIG_INFINIBAND_SDP_DEBUG		1
+#define	CONFIG_INFINIBAND_SDP_DEBUG_DATA	1
+
 #include "sdp_dbg.h"
 
+#undef LIST_HEAD
+/* From sys/queue.h */
+#define LIST_HEAD(name, type)                                           \
+struct name {                                                           \
+        struct type *lh_first;  /* first element */                     \
+}
+
 /* Interval between sucessive polls in the Tx routine when polling is used
    instead of interrupts (in per-core Tx rings) - should be power of 2 */
 #define SDP_TX_POLL_MODER	16
@@ -40,12 +90,13 @@
 #define SDP_MAX_SEND_SGES 9 /* same as above */
 
 /* mb inlined data len - rest will be rx'ed into frags */
-#define SDP_SKB_HEAD_SIZE (0x500 + sizeof(struct sdp_bsdh))
+#define SDP_HEAD_SIZE (sizeof(struct sdp_bsdh))
 
 /* limit tx payload len, if the sink supports bigger buffers than the source
  * can handle.
  * or rx fragment size (limited by sge->length size) */
-#define SDP_MAX_PAYLOAD ((1 << 16) - SDP_SKB_HEAD_SIZE)
+#define	SDP_MAX_PACKET	(1 << 16)
+#define SDP_MAX_PAYLOAD (SDP_MAX_PACKET - SDP_HEAD_SIZE)
 
 #define SDP_NUM_WC 4
 
@@ -66,13 +117,14 @@
 
 struct sdp_mb_cb {
 	__u32		seq;		/* Starting sequence number	*/
-	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
-	__u8		flags;		/* TCP header flags.		*/
 	struct bzcopy_state      *bz;
 	struct rx_srcavail_state *rx_sa;
 	struct tx_srcavail_state *tx_sa;
 };
 
+#define	M_PUSH	M_PROTO1	/* Do a 'push'. */
+#define	M_URG	M_PROTO2	/* Mark as urgent (oob). */
+
 #define SDP_SKB_CB(__mb)      ((struct sdp_mb_cb *)&((__mb)->cb[0]))
 #define BZCOPY_STATE(mb)      (SDP_SKB_CB(mb)->bz)
 #define RX_SRCAVAIL_STATE(mb) (SDP_SKB_CB(mb)->rx_sa)
@@ -87,29 +139,17 @@ struct sdp_mb_cb {
 #define ring_posted(ring) (ring_head(ring) - ring_tail(ring))
 
 #define rx_ring_posted(ssk) ring_posted(ssk->rx_ring)
+#ifdef SDP_ZCOPY
 #define tx_ring_posted(ssk) (ring_posted(ssk->tx_ring) + \
 	(ssk->tx_ring.rdma_inflight ? ssk->tx_ring.rdma_inflight->busy : 0))
-
-#define posts_handler(ssk) atomic_read(&ssk->somebody_is_doing_posts)
-#define posts_handler_get(ssk) atomic_inc(&ssk->somebody_is_doing_posts)
-#define posts_handler_put(ssk) do {\
-	atomic_dec(&ssk->somebody_is_doing_posts); \
-	sdp_do_posts(ssk); \
-} while (0)
+#else
+#define tx_ring_posted(ssk) ring_posted(ssk->tx_ring)
+#endif
 
 extern int sdp_zcopy_thresh;
-extern struct workqueue_struct *sdp_wq;
-extern struct list_head sock_list;
-extern spinlock_t sock_list_lock;
 extern int rcvbuf_initial_size;
-extern struct proto sdp_proto;
 extern struct workqueue_struct *rx_comp_wq;
-extern atomic_t sdp_current_mem_usage;
-extern spinlock_t sdp_large_sockets_lock;
 extern struct ib_client sdp_client;
-#ifdef SDPSTATS_ON
-DECLARE_PER_CPU(struct sdpstats, sdpstats);
-#endif
 
 enum sdp_mid {
 	SDP_MID_HELLO = 0x0,
@@ -264,7 +304,9 @@ struct tx_srcavail_state {
 };
 
 struct sdp_tx_ring {
+#ifdef SDP_ZCOPY
 	struct rx_srcavail_state *rdma_inflight;
+#endif
 	struct sdp_buf   	*buffer;
 	atomic_t          	head;
 	atomic_t          	tail;
@@ -274,8 +316,7 @@ struct sdp_tx_ring {
 	atomic_t 	  	credits;
 #define tx_credits(ssk) (atomic_read(&ssk->tx_ring.credits))
 
-	struct timer_list 	timer;
-	struct tasklet_struct 	tasklet;
+	struct callout		timer;
 	u16 		  	poll_cnt;
 };
 
@@ -286,9 +327,7 @@ struct sdp_rx_ring {
 	struct ib_cq 	 *cq;
 
 	int		 destroyed;
-	rwlock_t 	 destroyed_lock;
-
-	struct tasklet_struct 	tasklet;
+	struct rwlock	 destroyed_lock;
 };
 
 struct sdp_device {
@@ -318,68 +357,49 @@ struct sdp_moderation {
 	int moder_time;
 };
 
+#define	SDP_TIMEWAIT	0x0001		/* In ssk timewait state. */
+#define	SDP_DROPPED	0x0002		/* Socket has been dropped. */
+#define	SDP_SOCKREF	0x0004		/* Holding a sockref for close. */
+#define	SDP_NODELAY	0x0008		/* Disble nagle. */
+#define	SDP_NEEDFIN	0x0010		/* Send a fin on the next tx. */
+#define	SDP_DREQWAIT	0x0020		/* Waiting on DREQ. */
+#define	SDP_HAVEOOB	0x0040		/* Have OOB data. */
+#define	SDP_HADOOB	0x0080		/* Had OOB data. */
+#define	SDP_DESTROY	0x0100		/* Being destroyed. */
+
 struct sdp_sock {
-	/* sk has to be the first member of inet_sock */
-	struct inet_sock isk;
-	struct list_head sock_list;
-	struct list_head accept_queue;
-	struct list_head backlog_queue;
-	struct mbuf_head rx_ctl_q;
-	struct socket *parent;
+	LIST_ENTRY(sdp_sock) list;
+	struct socket *socket;
+	struct rdma_cm_id *id;
+	struct ib_device *ib_device;
 	struct sdp_device *sdp_dev;
-
-	int qp_active;
-	struct tx_srcavail_state *tx_sa;
-	struct rx_srcavail_state *rx_sa;
-	spinlock_t tx_sa_lock;
-	struct delayed_work srcavail_cancel_work;
-	int srcavail_cancel_mseq;
-
+	struct ib_qp *qp;
+	struct ucred *cred;
+	struct callout keep2msl;	/* 2msl and keepalive timer. */
+	struct callout nagle_timer;	/* timeout waiting for ack */
 	struct ib_ucontext context;
-
-	int max_sge;
-
-	struct work_struct rx_comp_work;
-	wait_queue_head_t wq;
-
-	struct delayed_work dreq_wait_work;
-	struct work_struct destroy_work;
-
-	int tx_compl_pending;
-	atomic_t somebody_is_doing_posts;
-
-	/* Like tcp_sock */
-	u16 urg_data;
-	u32 urg_seq;
-	u32 copied_seq;
-#define rcv_nxt(ssk) atomic_read(&(ssk->rcv_nxt))
-	atomic_t rcv_nxt;
-
-	int write_seq;
-	int pushed_seq;
+	in_port_t lport;
+	in_addr_t laddr;
+	in_port_t fport;
+	in_addr_t faddr;
+	int flags;
+	int state;
+	int softerror;
+	int recv_bytes;		/* Bytes per recv. buf including header */
 	int xmit_size_goal;
-	int nonagle;
-
-	int dreq_wait_timeout;
-
-	unsigned keepalive_time;
-
-	spinlock_t lock;
-
-	/* tx_head/rx_head when keepalive timer started */
-	unsigned keepalive_tx_head;
-	unsigned keepalive_rx_head;
-
-	int destructed_already;
-	int sdp_disconnect;
-	int destruct_in_process;
+	char iobc;
 
 	struct sdp_rx_ring rx_ring;
 	struct sdp_tx_ring tx_ring;
+	struct rwlock	lock;
+	struct mbuf *rx_ctl_q;
+	struct mbuf *rx_ctl_tail;
 
-	/* Data below will be reset on error */
-	struct rdma_cm_id *id;
-	struct ib_device *ib_device;
+	int qp_active;	/* XXX Flag. */
+	int max_sge;
+	struct work_struct rx_comp_work;
+#define rcv_nxt(ssk) atomic_read(&(ssk->rcv_nxt))
+	atomic_t rcv_nxt;
 
 	/* SDP specific */
 	atomic_t mseq_ack;
@@ -388,39 +408,42 @@ struct sdp_sock {
 	unsigned min_bufs;	/* Low water mark to wake senders */
 
 	unsigned long nagle_last_unacked; /* mseq of lastest unacked packet */
-	struct timer_list nagle_timer; /* timeout waiting for ack */
 
 	atomic_t               remote_credits;
 #define remote_credits(ssk) (atomic_read(&ssk->remote_credits))
 	int 		  poll_cq;
 
-	/* rdma specific */
-	struct ib_qp *qp;
-
 	/* SDP slow start */
-	int rcvbuf_scale; 	/* local recv buf scale for each socket */
-	int sent_request_head; 	/* mark the tx_head of the last send resize
-				   request */
-	int sent_request; 	/* 0 - not sent yet, 1 - request pending
-				   -1 - resize done succesfully */
 	int recv_request_head; 	/* mark the rx_head when the resize request
 				   was recieved */
-	int recv_request; 	/* flag if request to resize was recieved */
-	int recv_frags; 	/* max mb frags in recv packets */
-	int send_frags; 	/* max mb frags in send packets */
+	int recv_request; 	/* XXX flag if request to resize was recieved */
 
 	unsigned long tx_packets;
 	unsigned long rx_packets;
 	unsigned long tx_bytes;
 	unsigned long rx_bytes;
 	struct sdp_moderation auto_mod;
-
+#ifdef SDP_ZCOPY
+	struct tx_srcavail_state *tx_sa;
+	struct rx_srcavail_state *rx_sa;
+	spinlock_t tx_sa_lock;
+	struct delayed_work srcavail_cancel_work;
+	int srcavail_cancel_mseq;
 	/* ZCOPY data: -1:use global; 0:disable zcopy; >0: zcopy threshold */
 	int zcopy_thresh;
-
-	int last_bind_err;
+#endif
 };
 
+#define	sdp_sk(so)	((struct sdp_sock *)(so->so_pcb))
+
+#define	SDP_RLOCK(ssk)		rw_rlock(&(ssk)->lock)
+#define	SDP_WLOCK(ssk)		rw_wlock(&(ssk)->lock)
+#define	SDP_RUNLOCK(ssk)	rw_runlock(&(ssk)->lock)
+#define	SDP_WUNLOCK(ssk)	rw_wunlock(&(ssk)->lock)
+#define	SDP_WLOCK_ASSERT(ssk)	rw_assert(&(ssk)->lock, RA_WLOCKED)
+#define	SDP_RLOCK_ASSERT(ssk)	rw_assert(&(ssk)->lock, RA_RLOCKED)
+#define	SDP_LOCK_ASSERT(ssk)	rw_assert(&(ssk)->lock, RA_LOCKED)
+
 static inline void tx_sa_reset(struct tx_srcavail_state *tx_sa)
 {
 	memset((void *)&tx_sa->busy, 0,
@@ -429,12 +452,12 @@ static inline void tx_sa_reset(struct tx
 
 static inline void rx_ring_unlock(struct sdp_rx_ring *rx_ring)
 {
-	read_unlock_bh(&rx_ring->destroyed_lock);
+	rw_runlock(&rx_ring->destroyed_lock);
 }
 
 static inline int rx_ring_trylock(struct sdp_rx_ring *rx_ring)
 {
-	read_lock_bh(&rx_ring->destroyed_lock);
+	rw_rlock(&rx_ring->destroyed_lock);
 	if (rx_ring->destroyed) {
 		rx_ring_unlock(rx_ring);
 		return 0;
@@ -444,76 +467,26 @@ static inline int rx_ring_trylock(struct
 
 static inline void rx_ring_destroy_lock(struct sdp_rx_ring *rx_ring)
 {
-	write_lock_bh(&rx_ring->destroyed_lock);
+	rw_wlock(&rx_ring->destroyed_lock);
 	rx_ring->destroyed = 1;
-	write_unlock_bh(&rx_ring->destroyed_lock);
-}
-
-static inline struct sdp_sock *sdp_sk(const struct socket *sk)
-{
-	        return (struct sdp_sock *)sk;
-}
-
-static inline int _sdp_exch_state(const char *func, int line, struct socket *sk,
-				 int from_states, int state)
-{
-	unsigned long flags;
-	int old;
-
-	spin_lock_irqsave(&sdp_sk(sk)->lock, flags);
-
-	sdp_dbg(sk, "%s:%d - set state: %s -> %s 0x%x\n", func, line,
-		sdp_state_str(sk->sk_state),
-		sdp_state_str(state), from_states);
-
-	if ((1 << sk->sk_state) & ~from_states) {
-		sdp_warn(sk, "trying to exchange state from unexpected state "
-			"%s to state %s. expected states: 0x%x\n",
-			sdp_state_str(sk->sk_state), sdp_state_str(state),
-			from_states);
-	}
-
-	old = sk->sk_state;
-	sk->sk_state = state;
-
-	spin_unlock_irqrestore(&sdp_sk(sk)->lock, flags);
-
-	return old;
-}
-#define sdp_exch_state(sk, from_states, state) \
-	_sdp_exch_state(__func__, __LINE__, sk, from_states, state)
-
-static inline void sdp_set_error(struct socket *sk, int err)
-{
-	int ib_teardown_states = TCPF_FIN_WAIT1 | TCPF_CLOSE_WAIT
-		| TCPF_LAST_ACK;
-	sk->sk_err = -err;
-	if (sk->sk_socket)
-		sk->sk_socket->state = SS_DISCONNECTING;
-
-	if ((1 << sk->sk_state) & ib_teardown_states)
-		sdp_exch_state(sk, ib_teardown_states, TCPS_TIME_WAIT);
-	else
-		sdp_exch_state(sk, ~0, TCPS_CLOSED);
-
-	sk->sk_error_report(sk);
+	rw_wunlock(&rx_ring->destroyed_lock);
 }
 
-static inline void sdp_arm_rx_cq(struct socket *sk)
+static inline void sdp_arm_rx_cq(struct sdp_sock *ssk)
 {
-	sdp_prf(sk, NULL, "Arming RX cq");
-	sdp_dbg_data(sk, "Arming RX cq\n");
+	sdp_prf(ssk->socket, NULL, "Arming RX cq");
+	sdp_dbg_data(ssk->socket, "Arming RX cq\n");
 
-	ib_req_notify_cq(sdp_sk(sk)->rx_ring.cq, IB_CQ_NEXT_COMP);
+	ib_req_notify_cq(ssk->rx_ring.cq, IB_CQ_NEXT_COMP);
 }
 
-static inline void sdp_arm_tx_cq(struct socket *sk)
+static inline void sdp_arm_tx_cq(struct sdp_sock *ssk)
 {
-	sdp_prf(sk, NULL, "Arming TX cq");
-	sdp_dbg_data(sk, "Arming TX cq. credits: %d, posted: %d\n",
-		tx_credits(sdp_sk(sk)), tx_ring_posted(sdp_sk(sk)));
+	sdp_prf(ssk->socket, NULL, "Arming TX cq");
+	sdp_dbg_data(ssk->socket, "Arming TX cq. credits: %d, posted: %d\n",
+		tx_credits(ssk), tx_ring_posted(ssk));
 
-	ib_req_notify_cq(sdp_sk(sk)->tx_ring.cq, IB_CQ_NEXT_COMP);
+	ib_req_notify_cq(ssk->tx_ring.cq, IB_CQ_NEXT_COMP);
 }
 
 /* return the min of:
@@ -557,90 +530,68 @@ static inline char *mid2str(int mid)
 	return mid2str[mid];
 }
 
-static inline struct mbuf *sdp_stream_alloc_mb(struct socket *sk, int size,
-		gfp_t gfp)
-{
-	struct mbuf *mb;
-
-	/* The TCP header must be at least 32-bit aligned.  */
-	size = ALIGN(size, 4);
-
-	mb = alloc_mb_fclone(size + sk->sk_prot->max_header, gfp);
-	if (mb) {
-		if (sk_wmem_schedule(sk, mb->truesize)) {
-			/*
-			 * Make sure that we have exactly size bytes
-			 * available to the caller, no more, no less.
-			 */
-			mb_reserve(mb, mb_tailroom(mb) - size);
-			return mb;
-		}
-		m_freem(mb);
-	} else {
-		sk->sk_prot->enter_memory_pressure(sk);
-		sk_stream_moderate_sndbuf(sk);
-	}
-	return NULL;
-}
-
-static inline struct mbuf *sdp_alloc_mb(struct socket *sk, u8 mid, int size,
-		gfp_t gfp)
+static inline struct mbuf *
+sdp_alloc_mb(struct socket *sk, u8 mid, int size, int wait)
 {
 	struct sdp_bsdh *h;
 	struct mbuf *mb;
 
-	if (!gfp) {
-		if (unlikely(sk->sk_allocation))
-			gfp = sk->sk_allocation;
-		else
-			gfp = GFP_KERNEL;
-	}
-
-	mb = sdp_stream_alloc_mb(sk, sizeof(struct sdp_bsdh) + size, gfp);
-	BUG_ON(!mb);
-
-        mb_header_release(mb);
-
-	h = (struct sdp_bsdh *)mb_push(mb, sizeof *h);
+	MGETHDR(mb, wait, MT_DATA);
+	if (mb == NULL)
+		return (NULL);
+	mb->m_pkthdr.len = mb->m_len = sizeof(struct sdp_bsdh);
+	h = mtod(mb, struct sdp_bsdh *);
 	h->mid = mid;
 
-	mb_reset_transport_header(mb);
-
 	return mb;
 }
-static inline struct mbuf *sdp_alloc_mb_data(struct socket *sk, gfp_t gfp)
+static inline struct mbuf *
+sdp_alloc_mb_data(struct socket *sk, int wait)
 {
-	return sdp_alloc_mb(sk, SDP_MID_DATA, 0, gfp);
+	return sdp_alloc_mb(sk, SDP_MID_DATA, 0, wait);
 }
 
-static inline struct mbuf *sdp_alloc_mb_disconnect(struct socket *sk,
-		gfp_t gfp)
+static inline struct mbuf *
+sdp_alloc_mb_disconnect(struct socket *sk, int wait)
 {
-	return sdp_alloc_mb(sk, SDP_MID_DISCONN, 0, gfp);
+	return sdp_alloc_mb(sk, SDP_MID_DISCONN, 0, wait);
 }
 
-static inline struct mbuf *sdp_alloc_mb_chrcvbuf_ack(struct socket *sk,
-		int size, gfp_t gfp)
+static inline void *
+mb_put(struct mbuf *mb, int len)
+{
+	uint8_t *data;
+
+	data = mb->m_data;
+	data += mb->m_len;
+	mb->m_len += len;
+	return (void *)data;
+}
+
+static inline struct mbuf *
+sdp_alloc_mb_chrcvbuf_ack(struct socket *sk, int size, int wait)
 {
 	struct mbuf *mb;
 	struct sdp_chrecvbuf *resp_size;
 
-	mb = sdp_alloc_mb(sk, SDP_MID_CHRCVBUF_ACK, sizeof(*resp_size), gfp);
-
+	mb = sdp_alloc_mb(sk, SDP_MID_CHRCVBUF_ACK, sizeof(*resp_size), wait);
+	if (mb == NULL)
+		return (NULL);
 	resp_size = (struct sdp_chrecvbuf *)mb_put(mb, sizeof *resp_size);
 	resp_size->size = htonl(size);
 
 	return mb;
 }
 
-static inline struct mbuf *sdp_alloc_mb_srcavail(struct socket *sk,
-	u32 len, u32 rkey, u64 vaddr, gfp_t gfp)
+static inline struct mbuf *
+sdp_alloc_mb_srcavail(struct socket *sk, u32 len, u32 rkey, u64 vaddr, int wait)
 {
 	struct mbuf *mb;
 	struct sdp_srcah *srcah;
 
-	mb = sdp_alloc_mb(sk, SDP_MID_SRCAVAIL, sizeof(*srcah), gfp);
-
+	mb = sdp_alloc_mb(sk, SDP_MID_SRCAVAIL, sizeof(*srcah), wait);
+	if (mb == NULL)
+		return (NULL);
 	srcah = (struct sdp_srcah *)mb_put(mb, sizeof(*srcah));
 	srcah->len = htonl(len);
 	srcah->rkey = htonl(rkey);
@@ -649,29 +600,31 @@ static inline struct mbuf *sdp_alloc_mb_
 	return mb;
 }
 
-static inline struct mbuf *sdp_alloc_mb_srcavail_cancel(struct socket *sk,
-		gfp_t gfp)
+static inline struct mbuf *
+sdp_alloc_mb_srcavail_cancel(struct socket *sk, int wait)
 {
-	return sdp_alloc_mb(sk, SDP_MID_SRCAVAIL_CANCEL, 0, gfp);
+	return sdp_alloc_mb(sk, SDP_MID_SRCAVAIL_CANCEL, 0, wait);
 }
 
-static inline struct mbuf *sdp_alloc_mb_rdmardcompl(struct socket *sk,
-	u32 len, gfp_t gfp)
+static inline struct mbuf *
+sdp_alloc_mb_rdmardcompl(struct socket *sk, u32 len, int wait)
 {
 	struct mbuf *mb;
 	struct sdp_rrch *rrch;
 
-	mb = sdp_alloc_mb(sk, SDP_MID_RDMARDCOMPL, sizeof(*rrch), gfp);
-
+	mb = sdp_alloc_mb(sk, SDP_MID_RDMARDCOMPL, sizeof(*rrch), wait);
+	if (mb == NULL)
+		return (NULL);
 	rrch = (struct sdp_rrch *)mb_put(mb, sizeof(*rrch));
 	rrch->len = htonl(len);
 
 	return mb;
 }
 
-static inline struct mbuf *sdp_alloc_mb_sendsm(struct socket *sk, gfp_t gfp)
+static inline struct mbuf *
+sdp_alloc_mb_sendsm(struct socket *sk, int wait)
 {
-	return sdp_alloc_mb(sk, SDP_MID_SENDSM, 0, gfp);
+	return sdp_alloc_mb(sk, SDP_MID_SENDSM, 0, wait);
 }
 static inline int sdp_tx_ring_slots_left(struct sdp_sock *ssk)
 {
@@ -691,98 +644,33 @@ static inline int credit_update_needed(s
 }
 
 
-#ifdef SDPSTATS_ON
-
-#define SDPSTATS_MAX_HIST_SIZE 256
-struct sdpstats {
-	u32 post_send[256];
-	u32 sendmsg_bcopy_segment;
-	u32 sendmsg_bzcopy_segment;
-	u32 sendmsg_zcopy_segment;
-	u32 sendmsg;
-	u32 post_send_credits;
-	u32 sendmsg_nagle_skip;
-	u32 sendmsg_seglen[25];
-	u32 send_size[25];
-	u32 post_recv;
-	u32 rx_int_count;
-	u32 tx_int_count;
-	u32 bzcopy_poll_miss;
-	u32 send_wait_for_mem;
-	u32 send_miss_no_credits;
-	u32 rx_poll_miss;
-	u32 tx_poll_miss;
-	u32 tx_poll_hit;
-	u32 tx_poll_busy;
-	u32 memcpy_count;
-	u32 credits_before_update[64];
-	u32 zcopy_tx_timeout;
-	u32 zcopy_cross_send;
-	u32 zcopy_tx_aborted;
-	u32 zcopy_tx_error;
-};
-
-static inline void sdpstats_hist(u32 *h, u32 val, u32 maxidx, int is_log)
-{
-	int idx = is_log ? ilog2(val) : val;
-	if (idx > maxidx)
-		idx = maxidx;
-
-	h[idx]++;
-}
-
-#define SDPSTATS_COUNTER_INC(stat) do { __get_cpu_var(sdpstats).stat++; } while (0)
-#define SDPSTATS_COUNTER_ADD(stat, val) do { __get_cpu_var(sdpstats).stat += val; } while (0)
-#define SDPSTATS_COUNTER_MID_INC(stat, mid) do { __get_cpu_var(sdpstats).stat[mid]++; } \
-	while (0)
-#define SDPSTATS_HIST(stat, size) \
-	sdpstats_hist(__get_cpu_var(sdpstats).stat, size, ARRAY_SIZE(__get_cpu_var(sdpstats).stat) - 1, 1)
-
-#define SDPSTATS_HIST_LINEAR(stat, size) \
-	sdpstats_hist(__get_cpu_var(sdpstats).stat, size, ARRAY_SIZE(__get_cpu_var(sdpstats).stat) - 1, 0)
-
-#else
 #define SDPSTATS_COUNTER_INC(stat)
 #define SDPSTATS_COUNTER_ADD(stat, val)
 #define SDPSTATS_COUNTER_MID_INC(stat, mid)
 #define SDPSTATS_HIST_LINEAR(stat, size)
 #define SDPSTATS_HIST(stat, size)
-#endif
 
-static inline void sdp_cleanup_sdp_buf(struct sdp_sock *ssk, struct sdp_buf *sbuf,
-		size_t head_size, enum dma_data_direction dir)
+static inline void
+sdp_cleanup_sdp_buf(struct sdp_sock *ssk, struct sdp_buf *sbuf,
+    enum dma_data_direction dir)
 {
-	int i;
+	struct ib_device *dev;
 	struct mbuf *mb;
-	struct ib_device *dev = ssk->ib_device;
-
-	mb = sbuf->mb;
-
-	ib_dma_unmap_single(dev, sbuf->mapping[0], head_size, dir);
+	int i;
 
-	for (i = 0; i < mb_shinfo(mb)->nr_frags; i++) {
-		ib_dma_unmap_page(dev, sbuf->mapping[i + 1],
-				  mb_shinfo(mb)->frags[i].size,
-				  dir);
-	}
+	dev = ssk->ib_device;
+	for (i = 0, mb = sbuf->mb; mb != NULL; mb = mb->m_next, i++)
+		ib_dma_unmap_single(dev, sbuf->mapping[i], mb->m_len, dir);
 }
 
 /* sdp_main.c */
 void sdp_set_default_moderation(struct sdp_sock *ssk);
-int sdp_init_sock(struct socket *sk);
 void sdp_start_keepalive_timer(struct socket *sk);
-void sdp_remove_sock(struct sdp_sock *ssk);
-void sdp_add_sock(struct sdp_sock *ssk);
 void sdp_urg(struct sdp_sock *ssk, struct mbuf *mb);
 void sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk);
-void sdp_reset_sk(struct socket *sk, int rc);
-void sdp_reset(struct socket *sk);
-int sdp_tx_wait_memory(struct sdp_sock *ssk, long *timeo_p, int *credits_needed);
-void mb_entail(struct socket *sk, struct sdp_sock *ssk, struct mbuf *mb);
-
-/* sdp_proc.c */
-int __init sdp_proc_init(void);
-void sdp_proc_unregister(void);
+void sdp_abort(struct socket *sk);
+struct sdp_sock *sdp_notify(struct sdp_sock *ssk, int error);
+
 
 /* sdp_cma.c */
 int sdp_cma_handler(struct rdma_cm_id *, struct rdma_cm_event *);
@@ -792,8 +680,7 @@ int sdp_tx_ring_create(struct sdp_sock *
 void sdp_tx_ring_destroy(struct sdp_sock *ssk);
 int sdp_xmit_poll(struct sdp_sock *ssk, int force);
 void sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb);
-void sdp_post_sends(struct sdp_sock *ssk, gfp_t gfp);
-void sdp_nagle_timeout(unsigned long data);
+void sdp_post_sends(struct sdp_sock *ssk, int wait);
 void sdp_post_keepalive(struct sdp_sock *ssk);
 
 /* sdp_rx.c */
@@ -804,8 +691,6 @@ int sdp_resize_buffers(struct sdp_sock *
 int sdp_init_buffers(struct sdp_sock *ssk, u32 new_size);
 void sdp_do_posts(struct sdp_sock *ssk);
 void sdp_rx_comp_full(struct sdp_sock *ssk);
-void sdp_remove_large_sock(struct sdp_sock *ssk);
-void sdp_handle_disconn(struct socket *sk);
 
 /* sdp_zcopy.c */
 int sdp_sendmsg_zcopy(struct kiocb *iocb, struct socket *sk, struct iovec *iov);

Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c
==============================================================================
--- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c	Tue Dec 14 21:33:17 2010	(r216449)
+++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c	Wed Dec 15 01:08:19 2010	(r216450)
@@ -33,6 +33,8 @@
  */
 #include "sdp.h"
 
+static void sdp_nagle_timeout(void *data);
+
 #ifdef CONFIG_INFINIBAND_SDP_DEBUG_DATA
 void _dump_packet(const char *func, int line, struct socket *sk, char *str,
 		struct mbuf *mb, const struct sdp_bsdh *h)
@@ -85,7 +87,7 @@ void _dump_packet(const char *func, int 
 		srcah = (struct sdp_srcah *)(h+1);
 
 		len += snprintf(buf + len, 255-len, " | payload: 0x%lx, "
-				"len: 0x%x, rkey: 0x%x, vaddr: 0x%llx |",
+				"len: 0x%x, rkey: 0x%x, vaddr: 0x%jx |",
 				ntohl(h->len) - sizeof(struct sdp_bsdh) - 
 				sizeof(struct sdp_srcah),
 				ntohl(srcah->len), ntohl(srcah->rkey),
@@ -99,95 +101,79 @@ void _dump_packet(const char *func, int 
 }
 #endif
 
-static inline void update_send_head(struct socket *sk, struct mbuf *mb)
+static inline int
+sdp_nagle_off(struct sdp_sock *ssk, struct mbuf *mb)
 {
-	struct page *page;
-	sk->sk_send_head = mb->next;
-	if (sk->sk_send_head == (struct mbuf *)&sk->sk_write_queue) {
-		sk->sk_send_head = NULL;
-		page = sk->sk_sndmsg_page;
-		if (page) {
-			put_page(page);
-			sk->sk_sndmsg_page = NULL;
-		}
-	}
-}
 
-static inline int sdp_nagle_off(struct sdp_sock *ssk, struct mbuf *mb)
-{
-	struct sdp_bsdh *h = (struct sdp_bsdh *)mb_transport_header(mb);
+	struct sdp_bsdh *h;
+
+	h = mtod(mb, struct sdp_bsdh *);
 	int send_now =
+#ifdef SDP_ZCOPY
 		BZCOPY_STATE(mb) ||
+#endif
 		unlikely(h->mid != SDP_MID_DATA) ||
-		(ssk->nonagle & TCP_NAGLE_OFF) ||
+		(ssk->flags & SDP_NODELAY) ||
 		!ssk->nagle_last_unacked ||
-		mb->next != (struct mbuf *)&ssk->isk.sk.sk_write_queue ||
-		mb->len + sizeof(struct sdp_bsdh) >= ssk->xmit_size_goal ||
-		(SDP_SKB_CB(mb)->flags & TCPCB_FLAG_PSH);
+		mb->m_pkthdr.len >= ssk->xmit_size_goal ||
+		(mb->m_flags & M_PUSH);
 
 	if (send_now) {
 		unsigned long mseq = ring_head(ssk->tx_ring);
 		ssk->nagle_last_unacked = mseq;
 	} else {
-		if (!timer_pending(&ssk->nagle_timer)) {
-			mod_timer(&ssk->nagle_timer,
-					jiffies + SDP_NAGLE_TIMEOUT);
-			sdp_dbg_data(&ssk->isk.sk, "Starting nagle timer\n");
+		if (!callout_pending(&ssk->nagle_timer)) {
+			callout_reset(&ssk->nagle_timer, SDP_NAGLE_TIMEOUT,
+			    sdp_nagle_timeout, ssk);
+			sdp_dbg_data(ssk->socket, "Starting nagle timer\n");
 		}
 	}
-	sdp_dbg_data(&ssk->isk.sk, "send_now = %d last_unacked = %ld\n",
+	sdp_dbg_data(ssk->socket, "send_now = %d last_unacked = %ld\n",
 		send_now, ssk->nagle_last_unacked);
 
 	return send_now;
 }
 
-void sdp_nagle_timeout(unsigned long data)
+static void
+sdp_nagle_timeout(void *data)
 {
 	struct sdp_sock *ssk = (struct sdp_sock *)data;
-	struct socket *sk = &ssk->isk.sk;
+	struct socket *sk = ssk->socket;
 
 	sdp_dbg_data(sk, "last_unacked = %ld\n", ssk->nagle_last_unacked);
 
-	if (!ssk->nagle_last_unacked)
-		goto out2;
+	if (!callout_active(&ssk->nagle_timer))
+		return;
+	callout_deactivate(&ssk->nagle_timer);
 
-	/* Only process if the socket is not in use */
-	bh_lock_sock(sk);
-	if (sock_owned_by_user(sk)) {
-		sdp_dbg_data(sk, "socket is busy - will try later\n");
+	if (!ssk->nagle_last_unacked)
 		goto out;
-	}
-
-	if (sk->sk_state == TCPS_CLOSED) {
-		bh_unlock_sock(sk);
+	if (ssk->state == TCPS_CLOSED)
 		return;
-	}
-
 	ssk->nagle_last_unacked = 0;
-	sdp_post_sends(ssk, GFP_ATOMIC);
+	sdp_post_sends(ssk, M_DONTWAIT);
 
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		sk_stream_write_space(&ssk->isk.sk);
+	sowwakeup(ssk->socket);
 out:
-	bh_unlock_sock(sk);
-out2:
-	if (sk->sk_send_head) /* If has pending sends - rearm */
-		mod_timer(&ssk->nagle_timer, jiffies + SDP_NAGLE_TIMEOUT);
+	if (sk->so_snd.sb_sndptr)
+		callout_reset(&ssk->nagle_timer, SDP_NAGLE_TIMEOUT,
+		    sdp_nagle_timeout, ssk);
 }
 
-void sdp_post_sends(struct sdp_sock *ssk, gfp_t gfp)
+void
+sdp_post_sends(struct sdp_sock *ssk, int wait)
 {
 	/* TODO: nonagle? */
 	struct mbuf *mb;
 	int post_count = 0;
-	struct socket *sk = &ssk->isk.sk;
+	struct socket *sk;
 
+	sk = ssk->socket;
 	if (unlikely(!ssk->id)) {
-		if (ssk->isk.sk.sk_send_head) {
-			sdp_dbg(&ssk->isk.sk,
+		if (sk->so_snd.sb_sndptr) {
+			sdp_dbg(ssk->socket,
 				"Send on socket without cmid ECONNRESET.\n");
-			/* TODO: flush send queue? */
-			sdp_reset(&ssk->isk.sk);
+			sdp_notify(ssk, ECONNRESET);
 		}
 		return;
 	}
@@ -199,39 +185,42 @@ void sdp_post_sends(struct sdp_sock *ssk
 	    ring_tail(ssk->rx_ring) >= ssk->recv_request_head &&
 	    tx_credits(ssk) >= SDP_MIN_TX_CREDITS &&
 	    sdp_tx_ring_slots_left(ssk)) {
+		mb = sdp_alloc_mb_chrcvbuf_ack(sk,
+		    ssk->recv_bytes - SDP_HEAD_SIZE, wait);
+		if (mb == NULL)
+			goto allocfail;
 		ssk->recv_request = 0;
-
-		mb = sdp_alloc_mb_chrcvbuf_ack(sk, 
-				ssk->recv_frags * PAGE_SIZE, gfp);
-
 		sdp_post_send(ssk, mb);
 		post_count++;
 	}
 
 	if (tx_credits(ssk) <= SDP_MIN_TX_CREDITS &&
-	       sdp_tx_ring_slots_left(ssk) &&
-	       ssk->isk.sk.sk_send_head &&
-		sdp_nagle_off(ssk, ssk->isk.sk.sk_send_head)) {
+	    sdp_tx_ring_slots_left(ssk) && sk->so_snd.sb_sndptr &&
+	    sdp_nagle_off(ssk, sk->so_snd.sb_sndptr)) {
 		SDPSTATS_COUNTER_INC(send_miss_no_credits);
 	}
 
 	while (tx_credits(ssk) > SDP_MIN_TX_CREDITS &&
-	       sdp_tx_ring_slots_left(ssk) &&
-	       (mb = ssk->isk.sk.sk_send_head) &&
-		sdp_nagle_off(ssk, mb)) {
-		update_send_head(&ssk->isk.sk, mb);
-		__mb_dequeue(&ssk->isk.sk.sk_write_queue);
-
+	    sdp_tx_ring_slots_left(ssk) && (mb = sk->so_snd.sb_sndptr) &&
+	    sdp_nagle_off(ssk, mb)) {
+		struct mbuf *n;
+
+		SOCKBUF_LOCK(&sk->so_snd);
+		sk->so_snd.sb_sndptr = mb->m_nextpkt;
+		sk->so_snd.sb_mb = mb->m_nextpkt;
+		for (n = mb; n != NULL; n = mb->m_next)
+			sbfree(&sk->so_snd, mb);
+		SB_EMPTY_FIXUP(&sk->so_snd);
+		SOCKBUF_UNLOCK(&sk->so_snd);
 		sdp_post_send(ssk, mb);
-
 		post_count++;
 	}
 
-	if (credit_update_needed(ssk) &&
-	    likely((1 << ssk->isk.sk.sk_state) &
-		    (TCPF_ESTABLISHED | TCPF_FIN_WAIT1))) {
-
-		mb = sdp_alloc_mb_data(&ssk->isk.sk, gfp);
+	if (credit_update_needed(ssk) && ssk->state >= TCPS_ESTABLISHED &&
+	    ssk->state < TCPS_FIN_WAIT_2) {
+		mb = sdp_alloc_mb_data(ssk->socket, wait);
+		if (mb == NULL)
+			goto allocfail;
 		sdp_post_send(ssk, mb);
 
 		SDPSTATS_COUNTER_INC(post_send_credits);
@@ -243,17 +232,21 @@ void sdp_post_sends(struct sdp_sock *ssk
 	 * If one credit is available, an implementation shall only send SDP
 	 * messages that provide additional credits and also do not contain ULP
 	 * payload. */
-	if (unlikely(ssk->sdp_disconnect) &&
-			!ssk->isk.sk.sk_send_head &&
-			tx_credits(ssk) > 1) {
-		ssk->sdp_disconnect = 0;
-
-		mb = sdp_alloc_mb_disconnect(sk, gfp);
+	if ((ssk->flags & SDP_NEEDFIN) && !sk->so_snd.sb_sndptr &&
+	    tx_credits(ssk) > 1) {
+		mb = sdp_alloc_mb_disconnect(sk, wait);
+		if (mb == NULL)
+			goto allocfail;
+		ssk->flags &= ~SDP_NEEDFIN;
 		sdp_post_send(ssk, mb);
-
 		post_count++;
 	}
-
 	if (post_count)
 		sdp_xmit_poll(ssk, 0);
+	return;
+
+allocfail:
+	ssk->nagle_last_unacked = -1;
+	callout_reset(&ssk->nagle_timer, 1, sdp_nagle_timeout, ssk);
+	return;
 }

Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_cma.c
==============================================================================
--- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_cma.c	Tue Dec 14 21:33:17 2010	(r216449)
+++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_cma.c	Wed Dec 15 01:08:19 2010	(r216450)
@@ -31,19 +31,6 @@
  *
  * $Id$
  */
-#include <linux/device.h>
-#include <linux/in.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/time.h>
-#include <linux/workqueue.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/rdma_cm.h>
-#include <net/tcp_states.h>
-#include <rdma/sdp_socket.h>
 #include "sdp.h"
 
 #define SDP_MAJV_MINV 0x22
@@ -56,11 +43,13 @@ enum {
 	SDP_HAH_SIZE = 180,
 };
 
-static void sdp_qp_event_handler(struct ib_event *event, void *data)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-projects mailing list