svn commit: r223059 - head/sys/dev/xen/blkback
Justin T. Gibbs
gibbs at FreeBSD.org
Mon Jun 13 20:36:30 UTC 2011
Author: gibbs
Date: Mon Jun 13 20:36:29 2011
New Revision: 223059
URL: http://svn.freebsd.org/changeset/base/223059
Log:
Several enhancements to the Xen block back driver.
sys/dev/xen/blkback/blkback.c:
o Implement front-end request coalescing. This greatly improves the
performance of front-end clients that are unaware of the dynamic
request-size/number of requests negotiation available in the
FreeBSD backend driver. This required a large restructuring
in how this driver records in-flight transactions and how those
transactions are mapped into kernel KVA. For example, the driver
now includes a mini "KVA manager" that allocates ranges of
contiguous KVA to patches of requests that are physically
contiguous in the backing store so that a single bio or UIO
segment can be used to represent the I/O.
o Refuse to open any backend files or devices if the system
has yet to mount root. This avoids a panic.
o Properly handle "onlined" devices. An "onlined" backend
device stays attached to its backing store across front-end
disconnections. This feature is intended to reduce latency
when a front-end does a hand-off to another driver (e.g.
PV aware bootloader to OS kernel) or during a VM reboot.
o Harden the driver against a pathological/buggy front-end
by carefully vetting front-end XenStore data such as the
front-end state.
o Add sysctls that report the negotiated number of
segments per-request and the number of requests that
can be concurrently in flight.
Submitted by: kdm
Reviewed by: gibbs
Sponsored by: Spectra Logic Corporation
MFC after: 1 week
Modified:
head/sys/dev/xen/blkback/blkback.c
Modified: head/sys/dev/xen/blkback/blkback.c
==============================================================================
--- head/sys/dev/xen/blkback/blkback.c Mon Jun 13 20:34:12 2011 (r223058)
+++ head/sys/dev/xen/blkback/blkback.c Mon Jun 13 20:36:29 2011 (r223059)
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2010 Spectra Logic Corporation
+ * Copyright (c) 2009-2011 Spectra Logic Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -61,6 +61,8 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/vnode.h>
#include <sys/mount.h>
+#include <sys/sysctl.h>
+#include <sys/bitstring.h>
#include <geom/geom.h>
@@ -153,9 +155,19 @@ MALLOC_DEFINE(M_XENBLOCKBACK, "xbbd", "X
#define XBB_MAX_RING_PAGES \
BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBB_MAX_SEGMENTS_PER_REQUEST) \
* XBB_MAX_REQUESTS)
+/**
+ * The maximum number of ring pages that we can allow per request list.
+ * We limit this to the maximum number of segments per request, because
+ * that is already a reasonable number of segments to aggregate. This
+ * number should never be smaller than XBB_MAX_SEGMENTS_PER_REQUEST,
+ * because that would leave situations where we can't dispatch even one
+ * large request.
+ */
+#define XBB_MAX_SEGMENTS_PER_REQLIST XBB_MAX_SEGMENTS_PER_REQUEST
/*--------------------------- Forward Declarations ---------------------------*/
struct xbb_softc;
+struct xbb_xen_req;
static void xbb_attach_failed(struct xbb_softc *xbb, int err, const char *fmt,
...) __attribute__((format(printf, 3, 4)));
@@ -163,16 +175,15 @@ static int xbb_shutdown(struct xbb_soft
static int xbb_detach(device_t dev);
/*------------------------------ Data Structures -----------------------------*/
-/**
- * \brief Object tracking an in-flight I/O from a Xen VBD consumer.
- */
-struct xbb_xen_req {
- /**
- * Linked list links used to aggregate idle request in the
- * request free pool (xbb->request_free_slist).
- */
- SLIST_ENTRY(xbb_xen_req) links;
+STAILQ_HEAD(xbb_xen_req_list, xbb_xen_req);
+
+typedef enum {
+ XBB_REQLIST_NONE = 0x00,
+ XBB_REQLIST_MAPPED = 0x01
+} xbb_reqlist_flags;
+
+struct xbb_xen_reqlist {
/**
* Back reference to the parent block back instance for this
* request. Used during bio_done handling.
@@ -180,17 +191,71 @@ struct xbb_xen_req {
struct xbb_softc *xbb;
/**
- * The remote domain's identifier for this I/O request.
+ * BLKIF_OP code for this request.
+ */
+ int operation;
+
+ /**
+ * Set to BLKIF_RSP_* to indicate request status.
+ *
+ * This field allows an error status to be recorded even if the
+ * delivery of this status must be deferred. Deferred reporting
+ * is necessary, for example, when an error is detected during
+ * completion processing of one bio when other bios for this
+ * request are still outstanding.
+ */
+ int status;
+
+ /**
+ * Number of 512 byte sectors not transferred.
+ */
+ int residual_512b_sectors;
+
+ /**
+ * Starting sector number of the first request in the list.
+ */
+ off_t starting_sector_number;
+
+ /**
+ * If we're going to coalesce, the next contiguous sector would be
+ * this one.
+ */
+ off_t next_contig_sector;
+
+ /**
+ * Number of child requests in the list.
*/
- uint64_t id;
+ int num_children;
+
+ /**
+ * Number of I/O requests dispatched to the backend.
+ */
+ int pendcnt;
+
+ /**
+ * Total number of segments for requests in the list.
+ */
+ int nr_segments;
+
+ /**
+ * Flags for this particular request list.
+ */
+ xbb_reqlist_flags flags;
/**
* Kernel virtual address space reserved for this request
- * structure and used to map the remote domain's pages for
+ * list structure and used to map the remote domain's pages for
* this I/O, into our domain's address space.
*/
uint8_t *kva;
+ /**
+ * Base, psuedo-physical address, corresponding to the start
+ * of this request's kva region.
+ */
+ uint64_t gnt_base;
+
+
#ifdef XBB_USE_BOUNCE_BUFFERS
/**
* Pre-allocated domain local memory used to proxy remote
@@ -200,53 +265,91 @@ struct xbb_xen_req {
#endif
/**
- * Base, psuedo-physical address, corresponding to the start
- * of this request's kva region.
+ * Array of grant handles (one per page) used to map this request.
*/
- uint64_t gnt_base;
+ grant_handle_t *gnt_handles;
+
+ /**
+ * Device statistics request ordering type (ordered or simple).
+ */
+ devstat_tag_type ds_tag_type;
+
+ /**
+ * Device statistics request type (read, write, no_data).
+ */
+ devstat_trans_flags ds_trans_type;
+
+ /**
+ * The start time for this request.
+ */
+ struct bintime ds_t0;
+
+ /**
+ * Linked list of contiguous requests with the same operation type.
+ */
+ struct xbb_xen_req_list contig_req_list;
+
+ /**
+ * Linked list links used to aggregate idle requests in the
+ * request list free pool (xbb->reqlist_free_stailq) and pending
+ * requests waiting for execution (xbb->reqlist_pending_stailq).
+ */
+ STAILQ_ENTRY(xbb_xen_reqlist) links;
+};
+
+STAILQ_HEAD(xbb_xen_reqlist_list, xbb_xen_reqlist);
+
+/**
+ * \brief Object tracking an in-flight I/O from a Xen VBD consumer.
+ */
+struct xbb_xen_req {
+ /**
+ * Linked list links used to aggregate requests into a reqlist
+ * and to store them in the request free pool.
+ */
+ STAILQ_ENTRY(xbb_xen_req) links;
+
+ /**
+ * The remote domain's identifier for this I/O request.
+ */
+ uint64_t id;
/**
* The number of pages currently mapped for this request.
*/
- int nr_pages;
+ int nr_pages;
/**
* The number of 512 byte sectors comprising this requests.
*/
- int nr_512b_sectors;
+ int nr_512b_sectors;
/**
* The number of struct bio requests still outstanding for this
* request on the backend device. This field is only used for
* device (rather than file) backed I/O.
*/
- int pendcnt;
+ int pendcnt;
/**
* BLKIF_OP code for this request.
*/
- int operation;
+ int operation;
/**
- * BLKIF_RSP status code for this request.
- *
- * This field allows an error status to be recorded even if the
- * delivery of this status must be deferred. Deferred reporting
- * is necessary, for example, when an error is detected during
- * completion processing of one bio when other bios for this
- * request are still outstanding.
+ * Storage used for non-native ring requests.
*/
- int status;
+ blkif_request_t ring_req_storage;
/**
- * Device statistics request ordering type (ordered or simple).
+ * Pointer to the Xen request in the ring.
*/
- devstat_tag_type ds_tag_type;
+ blkif_request_t *ring_req;
/**
- * Device statistics request type (read, write, no_data).
+ * Consumer index for this request.
*/
- devstat_trans_flags ds_trans_type;
+ RING_IDX req_ring_idx;
/**
* The start time for this request.
@@ -254,9 +357,9 @@ struct xbb_xen_req {
struct bintime ds_t0;
/**
- * Array of grant handles (one per page) used to map this request.
+ * Pointer back to our parent request list.
*/
- grant_handle_t *gnt_handles;
+ struct xbb_xen_reqlist *reqlist;
};
SLIST_HEAD(xbb_xen_req_slist, xbb_xen_req);
@@ -321,7 +424,10 @@ typedef enum
XBBF_RESOURCE_SHORTAGE = 0x04,
/** Connection teardown in progress. */
- XBBF_SHUTDOWN = 0x08
+ XBBF_SHUTDOWN = 0x08,
+
+ /** A thread is already performing shutdown processing. */
+ XBBF_IN_SHUTDOWN = 0x10
} xbb_flag_t;
/** Backend device type. */
@@ -399,7 +505,7 @@ struct xbb_file_data {
* Only a single file based request is outstanding per-xbb instance,
* so we only need one of these.
*/
- struct iovec xiovecs[XBB_MAX_SEGMENTS_PER_REQUEST];
+ struct iovec xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST];
#ifdef XBB_USE_BOUNCE_BUFFERS
/**
@@ -411,7 +517,7 @@ struct xbb_file_data {
* bounce-out the read data. This array serves as the temporary
* storage for this saved data.
*/
- struct iovec saved_xiovecs[XBB_MAX_SEGMENTS_PER_REQUEST];
+ struct iovec saved_xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST];
/**
* \brief Array of memoized bounce buffer kva offsets used
@@ -422,7 +528,7 @@ struct xbb_file_data {
* the request sg elements is unavoidable. We memoize the computed
* bounce address here to reduce the cost of the second walk.
*/
- void *xiovecs_vaddr[XBB_MAX_SEGMENTS_PER_REQUEST];
+ void *xiovecs_vaddr[XBB_MAX_SEGMENTS_PER_REQLIST];
#endif /* XBB_USE_BOUNCE_BUFFERS */
};
@@ -437,9 +543,9 @@ union xbb_backend_data {
/**
* Function signature of backend specific I/O handlers.
*/
-typedef int (*xbb_dispatch_t)(struct xbb_softc *xbb, blkif_request_t *ring_req,
- struct xbb_xen_req *req, int nseg,
- int operation, int flags);
+typedef int (*xbb_dispatch_t)(struct xbb_softc *xbb,
+ struct xbb_xen_reqlist *reqlist, int operation,
+ int flags);
/**
* Per-instance configuration data.
@@ -467,14 +573,23 @@ struct xbb_softc {
xbb_dispatch_t dispatch_io;
/** The number of requests outstanding on the backend device/file. */
- u_int active_request_count;
+ int active_request_count;
/** Free pool of request tracking structures. */
- struct xbb_xen_req_slist request_free_slist;
+ struct xbb_xen_req_list request_free_stailq;
/** Array, sized at connection time, of request tracking structures. */
struct xbb_xen_req *requests;
+ /** Free pool of request list structures. */
+ struct xbb_xen_reqlist_list reqlist_free_stailq;
+
+ /** List of pending request lists awaiting execution. */
+ struct xbb_xen_reqlist_list reqlist_pending_stailq;
+
+ /** Array, sized at connection time, of request list structures. */
+ struct xbb_xen_reqlist *request_lists;
+
/**
* Global pool of kva used for mapping remote domain ring
* and I/O transaction data.
@@ -487,6 +602,15 @@ struct xbb_softc {
/** The size of the global kva pool. */
int kva_size;
+ /** The size of the KVA area used for request lists. */
+ int reqlist_kva_size;
+
+ /** The number of pages of KVA used for request lists */
+ int reqlist_kva_pages;
+
+ /** Bitmap of free KVA pages */
+ bitstr_t *kva_free;
+
/**
* \brief Cached value of the front-end's domain id.
*
@@ -508,12 +632,12 @@ struct xbb_softc {
int abi;
/**
- * \brief The maximum number of requests allowed to be in
- * flight at a time.
+ * \brief The maximum number of requests and request lists allowed
+ * to be in flight at a time.
*
* This value is negotiated via the XenStore.
*/
- uint32_t max_requests;
+ u_int max_requests;
/**
* \brief The maximum number of segments (1 page per segment)
@@ -521,7 +645,15 @@ struct xbb_softc {
*
* This value is negotiated via the XenStore.
*/
- uint32_t max_request_segments;
+ u_int max_request_segments;
+
+ /**
+ * \brief Maximum number of segments per request list.
+ *
+ * This value is derived from and will generally be larger than
+ * max_request_segments.
+ */
+ u_int max_reqlist_segments;
/**
* The maximum size of any request to this back-end
@@ -529,7 +661,13 @@ struct xbb_softc {
*
* This value is negotiated via the XenStore.
*/
- uint32_t max_request_size;
+ u_int max_request_size;
+
+ /**
+ * The maximum size of any request list. This is derived directly
+ * from max_reqlist_segments.
+ */
+ u_int max_reqlist_size;
/** Various configuration and state bit flags. */
xbb_flag_t flags;
@@ -574,6 +712,7 @@ struct xbb_softc {
struct vnode *vn;
union xbb_backend_data backend;
+
/** The native sector size of the backend. */
u_int sector_size;
@@ -598,7 +737,14 @@ struct xbb_softc {
*
* Ring processing is serialized so we only need one of these.
*/
- struct xbb_sg xbb_sgs[XBB_MAX_SEGMENTS_PER_REQUEST];
+ struct xbb_sg xbb_sgs[XBB_MAX_SEGMENTS_PER_REQLIST];
+
+ /**
+ * Temporary grant table map used in xbb_dispatch_io(). When
+ * XBB_MAX_SEGMENTS_PER_REQLIST gets large, keeping this on the
+ * stack could cause a stack overflow.
+ */
+ struct gnttab_map_grant_ref maps[XBB_MAX_SEGMENTS_PER_REQLIST];
/** Mutex protecting per-instance data. */
struct mtx lock;
@@ -614,8 +760,51 @@ struct xbb_softc {
int pseudo_phys_res_id;
#endif
- /** I/O statistics. */
+ /**
+ * I/O statistics from BlockBack dispatch down. These are
+ * coalesced requests, and we start them right before execution.
+ */
struct devstat *xbb_stats;
+
+ /**
+ * I/O statistics coming into BlockBack. These are the requests as
+ * we get them from BlockFront. They are started as soon as we
+ * receive a request, and completed when the I/O is complete.
+ */
+ struct devstat *xbb_stats_in;
+
+ /** Disable sending flush to the backend */
+ int disable_flush;
+
+ /** Send a real flush for every N flush requests */
+ int flush_interval;
+
+ /** Count of flush requests in the interval */
+ int flush_count;
+
+ /** Don't coalesce requests if this is set */
+ int no_coalesce_reqs;
+
+ /** Number of requests we have received */
+ uint64_t reqs_received;
+
+ /** Number of requests we have completed*/
+ uint64_t reqs_completed;
+
+ /** How many forced dispatches (i.e. without coalescing) have happend */
+ uint64_t forced_dispatch;
+
+ /** How many normal dispatches have happend */
+ uint64_t normal_dispatch;
+
+ /** How many total dispatches have happend */
+ uint64_t total_dispatch;
+
+ /** How many times we have run out of KVA */
+ uint64_t kva_shortages;
+
+ /** How many times we have run out of request structures */
+ uint64_t request_shortages;
};
/*---------------------------- Request Processing ----------------------------*/
@@ -633,21 +822,14 @@ xbb_get_req(struct xbb_softc *xbb)
struct xbb_xen_req *req;
req = NULL;
- mtx_lock(&xbb->lock);
- /*
- * Do not allow new requests to be allocated while we
- * are shutting down.
- */
- if ((xbb->flags & XBBF_SHUTDOWN) == 0) {
- if ((req = SLIST_FIRST(&xbb->request_free_slist)) != NULL) {
- SLIST_REMOVE_HEAD(&xbb->request_free_slist, links);
- xbb->active_request_count++;
- } else {
- xbb->flags |= XBBF_RESOURCE_SHORTAGE;
- }
+ mtx_assert(&xbb->lock, MA_OWNED);
+
+ if ((req = STAILQ_FIRST(&xbb->request_free_stailq)) != NULL) {
+ STAILQ_REMOVE_HEAD(&xbb->request_free_stailq, links);
+ xbb->active_request_count++;
}
- mtx_unlock(&xbb->lock);
+
return (req);
}
@@ -660,34 +842,40 @@ xbb_get_req(struct xbb_softc *xbb)
static inline void
xbb_release_req(struct xbb_softc *xbb, struct xbb_xen_req *req)
{
- int wake_thread;
+ mtx_assert(&xbb->lock, MA_OWNED);
- mtx_lock(&xbb->lock);
- wake_thread = xbb->flags & XBBF_RESOURCE_SHORTAGE;
- xbb->flags &= ~XBBF_RESOURCE_SHORTAGE;
- SLIST_INSERT_HEAD(&xbb->request_free_slist, req, links);
+ STAILQ_INSERT_HEAD(&xbb->request_free_stailq, req, links);
xbb->active_request_count--;
- if ((xbb->flags & XBBF_SHUTDOWN) != 0) {
- /*
- * Shutdown is in progress. See if we can
- * progress further now that one more request
- * has completed and been returned to the
- * free pool.
- */
- xbb_shutdown(xbb);
- }
- mtx_unlock(&xbb->lock);
+ KASSERT(xbb->active_request_count >= 0,
+ ("xbb_release_req: negative active count"));
+}
- if (wake_thread != 0)
- taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task);
+/**
+ * Return an xbb_xen_req_list of allocated xbb_xen_reqs to the free pool.
+ *
+ * \param xbb Per-instance xbb configuration structure.
+ * \param req_list The list of requests to free.
+ * \param nreqs The number of items in the list.
+ */
+static inline void
+xbb_release_reqs(struct xbb_softc *xbb, struct xbb_xen_req_list *req_list,
+ int nreqs)
+{
+ mtx_assert(&xbb->lock, MA_OWNED);
+
+ STAILQ_CONCAT(&xbb->request_free_stailq, req_list);
+ xbb->active_request_count -= nreqs;
+
+ KASSERT(xbb->active_request_count >= 0,
+ ("xbb_release_reqs: negative active count"));
}
/**
* Given a page index and 512b sector offset within that page,
* calculate an offset into a request's kva region.
*
- * \param req The request structure whose kva region will be accessed.
+ * \param reqlist The request structure whose kva region will be accessed.
* \param pagenr The page index used to compute the kva offset.
* \param sector The 512b sector index used to compute the page relative
* kva offset.
@@ -695,9 +883,9 @@ xbb_release_req(struct xbb_softc *xbb, s
* \return The computed global KVA offset.
*/
static inline uint8_t *
-xbb_req_vaddr(struct xbb_xen_req *req, int pagenr, int sector)
+xbb_reqlist_vaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
{
- return (req->kva + (PAGE_SIZE * pagenr) + (sector << 9));
+ return (reqlist->kva + (PAGE_SIZE * pagenr) + (sector << 9));
}
#ifdef XBB_USE_BOUNCE_BUFFERS
@@ -705,7 +893,7 @@ xbb_req_vaddr(struct xbb_xen_req *req, i
* Given a page index and 512b sector offset within that page,
* calculate an offset into a request's local bounce memory region.
*
- * \param req The request structure whose bounce region will be accessed.
+ * \param reqlist The request structure whose bounce region will be accessed.
* \param pagenr The page index used to compute the bounce offset.
* \param sector The 512b sector index used to compute the page relative
* bounce offset.
@@ -713,9 +901,9 @@ xbb_req_vaddr(struct xbb_xen_req *req, i
* \return The computed global bounce buffer address.
*/
static inline uint8_t *
-xbb_req_bounce_addr(struct xbb_xen_req *req, int pagenr, int sector)
+xbb_reqlist_bounce_addr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
{
- return (req->bounce + (PAGE_SIZE * pagenr) + (sector << 9));
+ return (reqlist->bounce + (PAGE_SIZE * pagenr) + (sector << 9));
}
#endif
@@ -724,7 +912,7 @@ xbb_req_bounce_addr(struct xbb_xen_req *
* calculate an offset into the request's memory region that the
* underlying backend device/file should use for I/O.
*
- * \param req The request structure whose I/O region will be accessed.
+ * \param reqlist The request structure whose I/O region will be accessed.
* \param pagenr The page index used to compute the I/O offset.
* \param sector The 512b sector index used to compute the page relative
* I/O offset.
@@ -736,12 +924,12 @@ xbb_req_bounce_addr(struct xbb_xen_req *
* this request.
*/
static inline uint8_t *
-xbb_req_ioaddr(struct xbb_xen_req *req, int pagenr, int sector)
+xbb_reqlist_ioaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
{
#ifdef XBB_USE_BOUNCE_BUFFERS
- return (xbb_req_bounce_addr(req, pagenr, sector));
+ return (xbb_reqlist_bounce_addr(reqlist, pagenr, sector));
#else
- return (xbb_req_vaddr(req, pagenr, sector));
+ return (xbb_reqlist_vaddr(reqlist, pagenr, sector));
#endif
}
@@ -750,7 +938,7 @@ xbb_req_ioaddr(struct xbb_xen_req *req,
* an offset into the local psuedo-physical address space used to map a
* front-end's request data into a request.
*
- * \param req The request structure whose pseudo-physical region
+ * \param reqlist The request list structure whose pseudo-physical region
* will be accessed.
* \param pagenr The page index used to compute the pseudo-physical offset.
* \param sector The 512b sector index used to compute the page relative
@@ -763,10 +951,126 @@ xbb_req_ioaddr(struct xbb_xen_req *req,
* this request.
*/
static inline uintptr_t
-xbb_req_gntaddr(struct xbb_xen_req *req, int pagenr, int sector)
+xbb_get_gntaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
+{
+ struct xbb_softc *xbb;
+
+ xbb = reqlist->xbb;
+
+ return ((uintptr_t)(xbb->gnt_base_addr +
+ (uintptr_t)(reqlist->kva - xbb->kva) +
+ (PAGE_SIZE * pagenr) + (sector << 9)));
+}
+
+/**
+ * Get Kernel Virtual Address space for mapping requests.
+ *
+ * \param xbb Per-instance xbb configuration structure.
+ * \param nr_pages Number of pages needed.
+ * \param check_only If set, check for free KVA but don't allocate it.
+ * \param have_lock If set, xbb lock is already held.
+ *
+ * \return On success, a pointer to the allocated KVA region. Otherwise NULL.
+ *
+ * Note: This should be unnecessary once we have either chaining or
+ * scatter/gather support for struct bio. At that point we'll be able to
+ * put multiple addresses and lengths in one bio/bio chain and won't need
+ * to map everything into one virtual segment.
+ */
+static uint8_t *
+xbb_get_kva(struct xbb_softc *xbb, int nr_pages)
+{
+ intptr_t first_clear, num_clear;
+ uint8_t *free_kva;
+ int i;
+
+ KASSERT(nr_pages != 0, ("xbb_get_kva of zero length"));
+
+ first_clear = 0;
+ free_kva = NULL;
+
+ mtx_lock(&xbb->lock);
+
+ /*
+ * Look for the first available page. If there are none, we're done.
+ */
+ bit_ffc(xbb->kva_free, xbb->reqlist_kva_pages, &first_clear);
+
+ if (first_clear == -1)
+ goto bailout;
+
+ /*
+ * Starting at the first available page, look for consecutive free
+ * pages that will satisfy the user's request.
+ */
+ for (i = first_clear, num_clear = 0; i < xbb->reqlist_kva_pages; i++) {
+ /*
+ * If this is true, the page is used, so we have to reset
+ * the number of clear pages and the first clear page
+ * (since it pointed to a region with an insufficient number
+ * of clear pages).
+ */
+ if (bit_test(xbb->kva_free, i)) {
+ num_clear = 0;
+ first_clear = -1;
+ continue;
+ }
+
+ if (first_clear == -1)
+ first_clear = i;
+
+ /*
+ * If this is true, we've found a large enough free region
+ * to satisfy the request.
+ */
+ if (++num_clear == nr_pages) {
+
+ bit_nset(xbb->kva_free, first_clear,
+ first_clear + nr_pages - 1);
+
+ free_kva = xbb->kva +
+ (uint8_t *)(first_clear * PAGE_SIZE);
+
+ KASSERT(free_kva >= (uint8_t *)xbb->kva &&
+ free_kva + (nr_pages * PAGE_SIZE) <=
+ (uint8_t *)xbb->ring_config.va,
+ ("Free KVA %p len %d out of range, "
+ "kva = %#jx, ring VA = %#jx\n", free_kva,
+ nr_pages * PAGE_SIZE, (uintmax_t)xbb->kva,
+ (uintmax_t)xbb->ring_config.va));
+ break;
+ }
+ }
+
+bailout:
+
+ if (free_kva == NULL) {
+ xbb->flags |= XBBF_RESOURCE_SHORTAGE;
+ xbb->kva_shortages++;
+ }
+
+ mtx_unlock(&xbb->lock);
+
+ return (free_kva);
+}
+
+/**
+ * Free allocated KVA.
+ *
+ * \param xbb Per-instance xbb configuration structure.
+ * \param kva_ptr Pointer to allocated KVA region.
+ * \param nr_pages Number of pages in the KVA region.
+ */
+static void
+xbb_free_kva(struct xbb_softc *xbb, uint8_t *kva_ptr, int nr_pages)
{
- return ((uintptr_t)(req->gnt_base
- + (PAGE_SIZE * pagenr) + (sector << 9)));
+ intptr_t start_page;
+
+ mtx_assert(&xbb->lock, MA_OWNED);
+
+ start_page = (intptr_t)(kva_ptr - xbb->kva) >> PAGE_SHIFT;
+ bit_nclear(xbb->kva_free, start_page, start_page + nr_pages - 1);
+
}
/**
@@ -775,23 +1079,23 @@ xbb_req_gntaddr(struct xbb_xen_req *req,
* \param req The request structure to unmap.
*/
static void
-xbb_unmap_req(struct xbb_xen_req *req)
+xbb_unmap_reqlist(struct xbb_xen_reqlist *reqlist)
{
- struct gnttab_unmap_grant_ref unmap[XBB_MAX_SEGMENTS_PER_REQUEST];
+ struct gnttab_unmap_grant_ref unmap[XBB_MAX_SEGMENTS_PER_REQLIST];
u_int i;
u_int invcount;
int error;
invcount = 0;
- for (i = 0; i < req->nr_pages; i++) {
+ for (i = 0; i < reqlist->nr_segments; i++) {
- if (req->gnt_handles[i] == GRANT_REF_INVALID)
+ if (reqlist->gnt_handles[i] == GRANT_REF_INVALID)
continue;
- unmap[invcount].host_addr = xbb_req_gntaddr(req, i, 0);
+ unmap[invcount].host_addr = xbb_get_gntaddr(reqlist, i, 0);
unmap[invcount].dev_bus_addr = 0;
- unmap[invcount].handle = req->gnt_handles[i];
- req->gnt_handles[i] = GRANT_REF_INVALID;
+ unmap[invcount].handle = reqlist->gnt_handles[i];
+ reqlist->gnt_handles[i] = GRANT_REF_INVALID;
invcount++;
}
@@ -801,6 +1105,175 @@ xbb_unmap_req(struct xbb_xen_req *req)
}
/**
+ * Allocate an internal transaction tracking structure from the free pool.
+ *
+ * \param xbb Per-instance xbb configuration structure.
+ *
+ * \return On success, a pointer to the allocated xbb_xen_reqlist structure.
+ * Otherwise NULL.
+ */
+static inline struct xbb_xen_reqlist *
+xbb_get_reqlist(struct xbb_softc *xbb)
+{
+ struct xbb_xen_reqlist *reqlist;
+
+ reqlist = NULL;
+
+ mtx_assert(&xbb->lock, MA_OWNED);
+
+ if ((reqlist = STAILQ_FIRST(&xbb->reqlist_free_stailq)) != NULL) {
+
+ STAILQ_REMOVE_HEAD(&xbb->reqlist_free_stailq, links);
+ reqlist->flags = XBB_REQLIST_NONE;
+ reqlist->kva = NULL;
+ reqlist->status = BLKIF_RSP_OKAY;
+ reqlist->residual_512b_sectors = 0;
+ reqlist->num_children = 0;
+ reqlist->nr_segments = 0;
+ STAILQ_INIT(&reqlist->contig_req_list);
+ }
+
+ return (reqlist);
+}
+
+/**
+ * Return an allocated transaction tracking structure to the free pool.
+ *
+ * \param xbb Per-instance xbb configuration structure.
+ * \param req The request list structure to free.
+ * \param wakeup If set, wakeup the work thread if freeing this reqlist
+ * during a resource shortage condition.
+ */
+static inline void
+xbb_release_reqlist(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
+ int wakeup)
+{
+
+ mtx_lock(&xbb->lock);
+
+ if (wakeup) {
+ wakeup = xbb->flags & XBBF_RESOURCE_SHORTAGE;
+ xbb->flags &= ~XBBF_RESOURCE_SHORTAGE;
+ }
+
+ if (reqlist->kva != NULL)
+ xbb_free_kva(xbb, reqlist->kva, reqlist->nr_segments);
+
+ xbb_release_reqs(xbb, &reqlist->contig_req_list, reqlist->num_children);
+
+ STAILQ_INSERT_TAIL(&xbb->reqlist_free_stailq, reqlist, links);
+
+ if ((xbb->flags & XBBF_SHUTDOWN) != 0) {
+ /*
+ * Shutdown is in progress. See if we can
+ * progress further now that one more request
+ * has completed and been returned to the
+ * free pool.
+ */
+ xbb_shutdown(xbb);
+ }
+
+ mtx_unlock(&xbb->lock);
+
+ if (wakeup != 0)
+ taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task);
+}
+
+/**
+ * Request resources and do basic request setup.
+ *
+ * \param xbb Per-instance xbb configuration structure.
+ * \param reqlist Pointer to reqlist pointer.
+ * \param ring_req Pointer to a block ring request.
+ * \param ring_index The ring index of this request.
+ *
+ * \return 0 for success, non-zero for failure.
+ */
+static int
+xbb_get_resources(struct xbb_softc *xbb, struct xbb_xen_reqlist **reqlist,
+ blkif_request_t *ring_req, RING_IDX ring_idx)
+{
+ struct xbb_xen_reqlist *nreqlist;
+ struct xbb_xen_req *nreq;
+
+ nreqlist = NULL;
+ nreq = NULL;
+
+ mtx_lock(&xbb->lock);
+
+ /*
+ * We don't allow new resources to be allocated if we're in the
+ * process of shutting down.
+ */
+ if ((xbb->flags & XBBF_SHUTDOWN) != 0) {
+ mtx_unlock(&xbb->lock);
+ return (1);
+ }
+
+ /*
+ * Allocate a reqlist if the caller doesn't have one already.
+ */
+ if (*reqlist == NULL) {
+ nreqlist = xbb_get_reqlist(xbb);
+ if (nreqlist == NULL)
+ goto bailout_error;
+ }
+
+ /* We always allocate a request. */
+ nreq = xbb_get_req(xbb);
+ if (nreq == NULL)
+ goto bailout_error;
+
+ mtx_unlock(&xbb->lock);
+
+ if (*reqlist == NULL) {
+ *reqlist = nreqlist;
+ nreqlist->operation = ring_req->operation;
+ nreqlist->starting_sector_number = ring_req->sector_number;
+ STAILQ_INSERT_TAIL(&xbb->reqlist_pending_stailq, nreqlist,
+ links);
+ }
+
+ nreq->reqlist = *reqlist;
+ nreq->req_ring_idx = ring_idx;
+
+ if (xbb->abi != BLKIF_PROTOCOL_NATIVE) {
+ bcopy(ring_req, &nreq->ring_req_storage, sizeof(*ring_req));
+ nreq->ring_req = &nreq->ring_req_storage;
+ } else {
+ nreq->ring_req = ring_req;
+ }
+
+ binuptime(&nreq->ds_t0);
+ devstat_start_transaction(xbb->xbb_stats_in, &nreq->ds_t0);
+ STAILQ_INSERT_TAIL(&(*reqlist)->contig_req_list, nreq, links);
+ (*reqlist)->num_children++;
+ (*reqlist)->nr_segments += ring_req->nr_segments;
+
+ return (0);
+
+bailout_error:
+
+ /*
+ * We're out of resources, so set the shortage flag. The next time
+ * a request is released, we'll try waking up the work thread to
+ * see if we can allocate more resources.
+ */
+ xbb->flags |= XBBF_RESOURCE_SHORTAGE;
+ xbb->request_shortages++;
+
+ if (nreq != NULL)
+ xbb_release_req(xbb, nreq);
+
+ mtx_unlock(&xbb->lock);
+
+ if (nreqlist != NULL)
+ xbb_release_reqlist(xbb, nreqlist, /*wakeup*/ 0);
+
+ return (1);
+}
+
+/**
* Create and transmit a response to a blkif request.
*
* \param xbb Per-instance xbb configuration structure.
@@ -862,6 +1335,8 @@ xbb_send_response(struct xbb_softc *xbb,
more_to_do = 1;
}
+ xbb->reqs_completed++;
+
mtx_unlock(&xbb->lock);
if (more_to_do)
@@ -872,6 +1347,70 @@ xbb_send_response(struct xbb_softc *xbb,
}
/**
+ * Complete a request list.
+ *
+ * \param xbb Per-instance xbb configuration structure.
+ * \param reqlist Allocated internal request list structure.
+ */
+static void
+xbb_complete_reqlist(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist)
+{
+ struct xbb_xen_req *nreq;
+ off_t sectors_sent;
+
+ sectors_sent = 0;
+
+ if (reqlist->flags & XBB_REQLIST_MAPPED)
+ xbb_unmap_reqlist(reqlist);
+
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-head
mailing list