PERFORCE change 132604 for review
Kip Macy
kmacy at FreeBSD.org
Sat Jan 5 17:22:24 PST 2008
http://perforce.freebsd.org/chv.cgi?CH=132604
Change 132604 by kmacy at pandemonium:kmacy:xen31 on 2008/01/06 01:21:40
get ping working
- bring the rx control flow more in line with what exists in current linux
- switch to tracking pfn in ext_args to allow for larger than 4GB VMs
- move page management structures in to netfront_tinfo (softc) so that multiple
devices can operate in parallel
Affected files ...
.. //depot/projects/xen31/sys/dev/xen/netfront/netfront.c#4 edit
Differences ...
==== //depot/projects/xen31/sys/dev/xen/netfront/netfront.c#4 (text+ko) ====
@@ -88,7 +88,11 @@
#define RX_COPY_THRESHOLD 256
+#define net_ratelimit() 0
+
struct netfront_info;
+struct netfront_rx_info;
+
static void xn_txeof(struct netfront_info *);
static void xn_rxeof(struct netfront_info *);
static void network_alloc_rx_buffers(struct netfront_info *);
@@ -127,10 +131,12 @@
static void xn_free_tx_ring(struct netfront_info *);
+static int xennet_get_responses(struct netfront_info *np,
+ struct netfront_rx_info *rinfo, RING_IDX rp, struct mbuf_head *list,
+ int *pages_flipped_p);
+
#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
-/* XXX: This isn't supported in FreeBSD, so ignore it for now. */
-#define TASK_UNINTERRUPTIBLE 0
#define INVALID_P2M_ENTRY (~0UL)
/*
@@ -144,10 +150,6 @@
};
-static unsigned long rx_pfn_array[NET_RX_RING_SIZE];
-static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
-static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
-
struct net_device_stats
{
unsigned long rx_packets; /* total packets received */
@@ -225,6 +227,11 @@
int xn_if_flags;
struct callout xn_stat_ch;
+
+
+ unsigned long rx_pfn_array[NET_RX_RING_SIZE];
+ multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
+ mmu_update_t rx_mmu[NET_RX_RING_SIZE];
};
#define rx_mbufs xn_cdata.xn_rx_chain
@@ -251,6 +258,11 @@
mtx_destroy(&(_sc)->tx_lock); \
sx_destroy(&(_sc)->sc_lock);
+struct netfront_rx_info {
+ struct netif_rx_response rx;
+ struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+};
+
#define netfront_carrier_on(netif) ((netif)->carrier = 1)
#define netfront_carrier_off(netif) ((netif)->carrier = 0)
#define netfront_carrier_ok(netif) ((netif)->carrier)
@@ -315,6 +327,7 @@
#define DPRINTK(fmt, args...) \
printf("[XEN] " fmt, ##args)
+
static __inline struct mbuf*
makembuf (struct mbuf *buf)
{
@@ -331,11 +344,8 @@
m->m_pkthdr.len = buf->m_pkthdr.len;
m->m_len = buf->m_len;
m_copydata(buf, 0, buf->m_pkthdr.len, mtod(m,caddr_t) );
- /*
- * XXX this will break on > 4GB
- *
- */
- m->m_ext.ext_args = (caddr_t *)(uintptr_t)vtophys(mtod(m,caddr_t));
+
+ m->m_ext.ext_args = (caddr_t *)(uintptr_t)(vtophys(mtod(m,caddr_t)) >> PAGE_SHIFT);
return m;
}
@@ -737,7 +747,8 @@
if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL)
break;
- m_new->m_ext.ext_args = (vm_paddr_t *)(uintptr_t)vtophys(m_new->m_ext.ext_buf);
+ m_new->m_ext.ext_args = (vm_paddr_t *)(uintptr_t)(vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT);
+
id = xennet_rxidx(req_prod + i);
PANIC_IF(sc->xn_cdata.xn_rx_chain[id] != NULL);
@@ -755,13 +766,13 @@
gnttab_grant_foreign_transfer_ref(ref,
sc->xbdev->otherend_id,
pfn);
- rx_pfn_array[nr_flips] = PFNTOMFN(pfn);
+ sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn);
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
/* Remove this page before passing
* back to Xen.
*/
set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
- MULTI_update_va_mapping(&rx_mcl[i],
+ MULTI_update_va_mapping(&sc->rx_mcl[i],
vaddr, 0, 0);
}
nr_flips++;
@@ -774,7 +785,7 @@
req->id = id;
req->gref = ref;
- rx_pfn_array[i] = vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
+ sc->rx_pfn_array[i] = vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
}
KASSERT(i, ("no mbufs processed")); /* should have returned earlier */
@@ -789,7 +800,7 @@
/* Tell the ballon driver what is going on. */
balloon_update_driver_allowance(i);
#endif
- set_xen_guest_handle(reservation.extent_start, rx_pfn_array);
+ set_xen_guest_handle(reservation.extent_start, sc->rx_pfn_array);
reservation.nr_extents = i;
reservation.extent_order = 0;
reservation.address_bits = 0;
@@ -798,18 +809,18 @@
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
/* After all PTEs have been zapped, flush the TLB. */
- rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
+ sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
UVMF_TLB_FLUSH|UVMF_ALL;
/* Give away a batch of pages. */
- rx_mcl[i].op = __HYPERVISOR_memory_op;
- rx_mcl[i].args[0] = XENMEM_decrease_reservation;
- rx_mcl[i].args[1] = (unsigned long)&reservation;
+ sc->rx_mcl[i].op = __HYPERVISOR_memory_op;
+ sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+ sc->rx_mcl[i].args[1] = (unsigned long)&reservation;
/* Zap PTEs and give away pages in one big multicall. */
- (void)HYPERVISOR_multicall(rx_mcl, i+1);
+ (void)HYPERVISOR_multicall(sc->rx_mcl, i+1);
/* Check return status of HYPERVISOR_dom_mem_op(). */
- if (unlikely(rx_mcl[i].result != i))
+ if (unlikely(sc->rx_mcl[i].result != i))
panic("Unable to reduce memory reservation\n");
} else {
@@ -832,144 +843,119 @@
static void
xn_rxeof(struct netfront_info *np)
{
- struct ifnet *ifp;
- netif_rx_response_t *rx;
- RING_IDX i, rp;
- mmu_update_t *mmu = rx_mmu;
- multicall_entry_t *mcl = rx_mcl;
- struct mbuf *tail_mbuf = NULL, *head_mbuf = NULL, *m, *next;
- unsigned long mfn;
- grant_ref_t ref;
+ struct ifnet *ifp;
+ struct netfront_rx_info rinfo;
+ struct netif_rx_response *rx = &rinfo.rx;
+ struct netif_extra_info *extras = rinfo.extras;
+ RING_IDX i, rp;
+ multicall_entry_t *mcl;
+ struct mbuf *m;
+ struct mbuf_head rxq, errq, tmpq;
+ int err, pages_flipped = 0;
+
+ XN_RX_LOCK_ASSERT(np);
+ if (!netfront_carrier_ok(np))
+ return;
- XN_RX_LOCK_ASSERT(np);
- if (!netfront_carrier_ok(np))
- return;
+ mbufq_init(&tmpq);
+ mbufq_init(&errq);
+ mbufq_init(&rxq);
+
+ ifp = np->xn_ifp;
- ifp = np->xn_ifp;
-
- rp = np->rx.sring->rsp_prod;
- rmb(); /* Ensure we see queued responses up to 'rp'. */
-
- for (i = np->rx.rsp_cons; i != rp; i++) {
-
- rx = RING_GET_RESPONSE(&np->rx, i);
- KASSERT(rx->id != 0, ("xn_rxeof: found free receive index of 0\n"));
- /*
- * This definitely indicates a bug, either in this driver or
- * in the backend driver. In future this should flag the bad
- * situation to the system controller to reboot the backed.
- */
- if ((ref = np->grant_rx_ref[rx->id]) == GRANT_INVALID_REF) {
- WPRINTK("Bad rx response id %d.\n", rx->id);
- continue;
- }
+ rp = np->rx.sring->rsp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ i = np->rx.rsp_cons;
+ while ((i != rp)) {
+ memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
+ memset(extras, 0, sizeof(rinfo.extras));
+
+ err = xennet_get_responses(np, &rinfo, rp, &tmpq,
+ &pages_flipped);
+
+ if (unlikely(err)) {
+ while ((m = mbufq_dequeue(&tmpq)))
+ mbufq_tail(&errq, m);
+ np->stats.rx_errors++;
+ i = np->rx.rsp_cons;
+ continue;
+ }
-
- /* Memory pressure, insufficient buffer headroom, ... */
- if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) {
-#if 0
- if (net_ratelimit())
- WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
- rx->id, rx->status);
-#endif
- RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id =
- rx->id;
- RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref =
- ref;
- np->rx.req_prod_pvt++;
- RING_PUSH_REQUESTS(&np->rx);
- continue;
- }
+ m = mbufq_dequeue(&tmpq);
- gnttab_release_grant_reference(&np->gref_rx_head, ref);
- np->grant_rx_ref[rx->id] = GRANT_INVALID_REF;
+ m->m_data += rx->offset;/* (rx->addr & PAGE_MASK); */
+ m->m_pkthdr.len = m->m_len = rx->status;
+ m->m_pkthdr.rcvif = ifp;
- m = (struct mbuf *)np->xn_cdata.xn_rx_chain[rx->id];
- if (m->m_next)
- panic("mbuf is already part of a valid mbuf chain");
- add_id_to_freelist(np->xn_cdata.xn_rx_chain, rx->id);
-
- m->m_data += rx->offset;/* (rx->addr & PAGE_MASK); */
- m->m_pkthdr.len = m->m_len = rx->status;
- m->m_pkthdr.rcvif = ifp;
-
- if ( rx->flags & NETRXF_data_validated ) {
- /* Tell the stack the checksums are okay */
- m->m_pkthdr.csum_flags |=
- (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
- m->m_pkthdr.csum_data = 0xffff;
- }
+ if ( rx->flags & NETRXF_data_validated ) {
+ /* Tell the stack the checksums are okay */
+ /*
+ * XXX this isn't necessarily the case - need to add check
+ *
+ */
+
+ m->m_pkthdr.csum_flags |=
+ (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+ m->m_pkthdr.csum_data = 0xffff;
+ }
- np->stats.rx_packets++;
- np->stats.rx_bytes += rx->status;
-
-
- /* Remap the page. */
- mmu->ptr = ((vm_offset_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- mmu->val = (unsigned long)m->m_ext.ext_args >> PAGE_SHIFT;
- mmu++;
- /* XXX validate me */
- mcl->op = __HYPERVISOR_update_va_mapping;
- mcl->args[0] = (unsigned long)m->m_data;
- mcl->args[1] = (mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A;
- mcl->args[2] = 0;
- mcl->args[3] = 0;
- mcl++;
+ np->stats.rx_packets++;
+ np->stats.rx_bytes += rx->status;
- xen_phys_machine[((unsigned long)m->m_ext.ext_args >> PAGE_SHIFT)] =
- mfn;
+ mbufq_tail(&rxq, m);
+ np->rx.rsp_cons = ++i;
+ }
- if (unlikely(!head_mbuf))
- head_mbuf = m;
-
- if (tail_mbuf)
- tail_mbuf->m_next = m;
- tail_mbuf = m;
-
- np->xn_cdata.xn_rx_chain[rx->id] = NULL;
- }
+ if (pages_flipped) {
+ /* Some pages are no longer absent... */
+#ifdef notyet
+ balloon_update_driver_allowance(-pages_flipped);
+#endif
+ /* Do all the remapping work, and M->P updates, in one big hypercall. */
+ if (!!xen_feature(XENFEAT_auto_translated_physmap)) {
+ mcl = np->rx_mcl + pages_flipped;
+ mcl->op = __HYPERVISOR_mmu_update;
+ mcl->args[0] = (unsigned long)np->rx_mmu;
+ mcl->args[1] = pages_flipped;
+ mcl->args[2] = 0;
+ mcl->args[3] = DOMID_SELF;
+ (void)HYPERVISOR_multicall(np->rx_mcl, pages_flipped + 1);
+ }
+ }
- /* Do all the remapping work, and M->P updates, in one big hypercall. */
- if (likely((mcl - rx_mcl) != 0)) {
- mcl->op = __HYPERVISOR_mmu_update;
- mcl->args[0] = (unsigned long)rx_mmu;
- mcl->args[1] = mmu - rx_mmu;
- mcl->args[2] = 0;
- mcl->args[3] = DOMID_SELF;
- mcl++;
- (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
- }
+ while ((m = mbufq_dequeue(&errq)))
+ m_freem(m);
+
+
+ /*
+ * Process all the mbufs after the remapping is complete.
+ * Break the mbuf chain first though.
+ */
+ while ((m = mbufq_dequeue(&rxq)) != NULL) {
+ ifp->if_ipackets++;
+
+ /*
+ * Do we really need to drop the rx lock?
+ */
+ XN_RX_UNLOCK(np);
+ /* Pass it up. */
+ (*ifp->if_input)(ifp, m);
+ XN_RX_LOCK(np);
+ }
+ np->rx.rsp_cons = i;
- /*
- * Process all the mbufs after the remapping is complete.
- * Break the mbuf chain first though.
- */
- for (m = head_mbuf; m; m = next) {
- next = m->m_next;
- m->m_next = NULL;
-
- ifp->if_ipackets++;
-
- XN_RX_UNLOCK(np);
-
- /* Pass it up. */
- (*ifp->if_input)(ifp, m);
- XN_RX_LOCK(np);
- }
+ /* If we get a callback with very few responses, reduce fill target. */
+ /* NB. Note exponential increase, linear decrease. */
+ if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
+ ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target))
+ np->rx_target = np->rx_min_target;
- np->rx.rsp_cons = i;
-
- /* If we get a callback with very few responses, reduce fill target. */
- /* NB. Note exponential increase, linear decrease. */
- if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
- ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target))
- np->rx_target = np->rx_min_target;
-
- network_alloc_rx_buffers(np);
+ network_alloc_rx_buffers(np);
- np->rx.sring->rsp_event = i + 1;
+ np->rx.sring->rsp_event = i + 1;
}
@@ -1054,23 +1040,212 @@
struct ifnet *ifp = np->xn_ifp;
- while (np->rx.rsp_cons != np->rx.sring->rsp_prod &&
+ if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod &&
likely(netfront_carrier_ok(np)) &&
- ifp->if_drv_flags & IFF_DRV_RUNNING) {
-
- XN_RX_LOCK(np);
- xn_rxeof(np);
- XN_RX_UNLOCK(np);
- if (np->tx.rsp_cons != np->tx.sring->rsp_prod) {
+ ifp->if_drv_flags & IFF_DRV_RUNNING))
+ return;
+ if (np->tx.rsp_cons != np->tx.sring->rsp_prod) {
XN_TX_LOCK(np);
xn_txeof(np);
XN_TX_UNLOCK(np);
+ }
+
+ XN_RX_LOCK(np);
+ xn_rxeof(np);
+ XN_RX_UNLOCK(np);
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
+ !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ xn_start(ifp);
+}
+
+
+static void
+xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m,
+ grant_ref_t ref)
+{
+ int new = xennet_rxidx(np->rx.req_prod_pvt);
+
+ PANIC_IF(np->rx_mbufs[new] != NULL);
+ np->rx_mbufs[new] = m;
+ np->grant_rx_ref[new] = ref;
+ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
+ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
+ np->rx.req_prod_pvt++;
+}
+
+static int
+xennet_get_extras(struct netfront_info *np,
+ struct netif_extra_info *extras, RING_IDX rp)
+
+{
+ struct netif_extra_info *extra;
+ RING_IDX cons = np->rx.rsp_cons;
+
+ int err = 0;
+
+ do {
+ struct mbuf *m;
+ grant_ref_t ref;
+
+ if (unlikely(cons + 1 == rp)) {
+#if 0
+ if (net_ratelimit())
+ WPRINTK("Missing extra info\n");
+#endif
+ err = -EINVAL;
+ break;
+ }
+
+ extra = (struct netif_extra_info *)
+ RING_GET_RESPONSE(&np->rx, ++cons);
+
+ if (unlikely(!extra->type ||
+ extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+#if 0
+ if (net_ratelimit())
+ WPRINTK("Invalid extra type: %d\n",
+ extra->type);
+#endif
+ err = -EINVAL;
+ } else {
+ memcpy(&extras[extra->type - 1], extra,
+ sizeof(*extra));
+ }
+
+ m = xennet_get_rx_mbuf(np, cons);
+ ref = xennet_get_rx_ref(np, cons);
+ xennet_move_rx_slot(np, m, ref);
+ } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+
+ np->rx.rsp_cons = cons;
+ return err;
+}
+
+static int
+xennet_get_responses(struct netfront_info *np,
+ struct netfront_rx_info *rinfo, RING_IDX rp,
+ struct mbuf_head *list,
+ int *pages_flipped_p)
+{
+ int pages_flipped = *pages_flipped_p;
+ struct mmu_update *mmu;
+ struct multicall_entry *mcl;
+ struct netif_rx_response *rx = &rinfo->rx;
+ struct netif_extra_info *extras = rinfo->extras;
+ RING_IDX cons = np->rx.rsp_cons;
+ struct mbuf *m = xennet_get_rx_mbuf(np, cons);
+ grant_ref_t ref = xennet_get_rx_ref(np, cons);
+ int max = 24 /* MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD) */;
+ int frags = 1;
+ int err = 0;
+ unsigned long ret;
+
+ if (rx->flags & NETRXF_extra_info) {
+ err = xennet_get_extras(np, extras, rp);
+ cons = np->rx.rsp_cons;
}
- if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
- !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
- xn_start(ifp);
- }
- return;
+
+ for (;;) {
+ unsigned long mfn;
+
+ if (unlikely(rx->status < 0 ||
+ rx->offset + rx->status > PAGE_SIZE)) {
+#if 0
+ if (net_ratelimit())
+ WPRINTK("rx->offset: %x, size: %u\n",
+ rx->offset, rx->status);
+#endif
+ xennet_move_rx_slot(np, m, ref);
+ err = -EINVAL;
+ goto next;
+ }
+
+ /*
+ * This definitely indicates a bug, either in this driver or in
+ * the backend driver. In future this should flag the bad
+ * situation to the system controller to reboot the backed.
+ */
+ if (ref == GRANT_INVALID_REF) {
+#if 0
+ if (net_ratelimit())
+ WPRINTK("Bad rx response id %d.\n", rx->id);
+#endif
+ err = -EINVAL;
+ goto next;
+ }
+
+ if (!np->copying_receiver) {
+ /* Memory pressure, insufficient buffer
+ * headroom, ...
+ */
+ if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
+ if (net_ratelimit())
+ WPRINTK("Unfulfilled rx req "
+ "(id=%d, st=%d).\n",
+ rx->id, rx->status);
+ xennet_move_rx_slot(np, m, ref);
+ err = -ENOMEM;
+ goto next;
+ }
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* Remap the page. */
+ void *vaddr = mtod(m, void *);
+ uint32_t pfn;
+
+ mcl = np->rx_mcl + pages_flipped;
+ mmu = np->rx_mmu + pages_flipped;
+
+ MULTI_update_va_mapping(mcl,
+ (unsigned long)vaddr,
+ (mfn << PAGE_SHIFT) | PG_RW | PG_V | PG_M | PG_A,
+ 0);
+ pfn = (uint32_t)m->m_ext.ext_args;
+ mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT)
+ | MMU_MACHPHYS_UPDATE;
+ mmu->val = pfn;
+
+ set_phys_to_machine(pfn, mfn);
+ }
+ pages_flipped++;
+ } else {
+ ret = gnttab_end_foreign_access_ref(ref, 0);
+ PANIC_IF(!ret);
+ }
+
+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
+ mbufq_tail(list, m);
+
+ next:
+ if (!(rx->flags & NETRXF_more_data))
+ break;
+
+ if (cons + frags == rp) {
+ if (net_ratelimit())
+ WPRINTK("Need more frags\n");
+ err = -ENOENT;
+ break;
+ }
+
+ rx = RING_GET_RESPONSE(&np->rx, cons + frags);
+ m = xennet_get_rx_mbuf(np, cons + frags);
+ ref = xennet_get_rx_ref(np, cons + frags);
+ frags++;
+ }
+
+ if (unlikely(frags > max)) {
+ if (net_ratelimit())
+ WPRINTK("Too many frags\n");
+ err = -E2BIG;
+ }
+
+ if (unlikely(err))
+ np->rx.rsp_cons = cons + frags;
+
+ *pages_flipped_p = pages_flipped;
+
+ return err;
}
static void
@@ -1164,9 +1339,7 @@
if (notify)
notify_remote_via_irq(sc->irq);
- XN_TX_LOCK(sc);
xn_txeof(sc);
- XN_TX_UNLOCK(sc);
if (RING_FULL(&sc->tx)) {
sc->tx_full = 1;
@@ -1190,8 +1363,6 @@
XN_TX_UNLOCK(sc);
}
-
-
/* equivalent of network_open() in Linux */
static void
xn_ifinit_locked(struct netfront_info *sc)
More information about the p4-projects
mailing list