PERFORCE change 106547 for review
Alexander Leidinger
netchild at FreeBSD.org
Sat Sep 23 05:09:22 PDT 2006
http://perforce.freebsd.org/chv.cgi?CH=106547
Change 106547 by netchild at netchild_magellan on 2006/09/23 12:09:15
1. Flexible on-demand linking to FreeBSD native AIO module. Only when
linux_io_xxx() is called will native AIO module be loaded.
2. Only two symbols "aio_init_aioinfo" and "aio_aqueue" of the native
AIO module need to be exported.
Submitted by: Intron <mag at intron.ac>
Requested by: netchild (something similar to 1)
Affected files ...
.. //depot/projects/linuxolator/src/sys/compat/linux/linux_aio.c#5 edit
.. //depot/projects/linuxolator/src/sys/kern/vfs_aio.c#3 edit
.. //depot/projects/linuxolator/src/sys/modules/aio/Makefile#3 edit
Differences ...
==== //depot/projects/linuxolator/src/sys/compat/linux/linux_aio.c#5 (text+ko) ====
@@ -44,6 +44,9 @@
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sx.h>
+#include <sys/linker.h>
+#include <sys/sysctl.h>
+#include <sys/syscall.h>
#include <sys/sysproto.h>
#ifdef COMPAT_LINUX32
@@ -190,13 +193,13 @@
*/
#define LINUX_AIO_LOCK(p) { \
if ((p)->p_aioinfo == NULL) \
- aio_init_aioinfo(p); \
+ p_aio_init_aioinfo(p); \
mtx_lock((struct mtx *)((p)->p_aioinfo)); \
}
#define LINUX_AIO_UNLOCK(p) { \
if ((p)->p_aioinfo == NULL) \
- aio_init_aioinfo(p); \
+ p_aio_init_aioinfo(p); \
mtx_unlock((struct mtx *)((p)->p_aioinfo)); \
}
@@ -211,18 +214,43 @@
* which is NOT a normal calling way and can cause kernel crash.
*/
-/* Variables in /sys/kern/vfs_aio.c, XXX defined with "static" */
-extern int max_aio_queue_per_proc; /* sysctl */
-extern int max_queue_count; /* sysctl */
+#define NATIVE_AIO_MODULE_NAME "aio"
+static struct mod_depend native_aio_module_depend = {1, 1, 1};
+static linker_file_t native_aio_module_handle = NULL;
+
+/* Mirror of sysctls in /sys/kern/vfs_aio.c */
+#define NATIVE_AIO_SYSCTL_CAPACITY_PROC "vfs.aio.max_aio_queue_per_proc"
+static int native_aio_capacity_proc;
+#define NATIVE_AIO_SYSCTL_CAPACITY_SYS "vfs.aio.max_aio_queue"
+static int native_aio_capacity_sys;
/* For declaration of aio_aqueue(), defined in /sys/kern/vfs_aio.c */
struct aioliojob;
/* Functions in /sys/kern/vfs_aio.c, XXX defined with "static" */
-extern void aio_init_aioinfo(struct proc *p);
-extern int aio_aqueue(struct thread *td, struct aiocb *job,
+#define GET_INTERNAL_FUNC_POINTER(s) { \
+ * ((caddr_t *) & p_ ## s) = linker_file_lookup_symbol( \
+ native_aio_module_handle, #s, FALSE); \
+ if (p_ ## s == NULL) \
+ break; \
+}
+static void (*p_aio_init_aioinfo) (struct proc *p);
+static int (*p_aio_aqueue) (struct thread *td, struct aiocb *job,
struct aioliojob *lio, int type, int osigev);
+/* System calls in /sys/kern/vfs_aio.c */
+#define DEFINE_SYSCALL_POINTER_VARIABLE(s) \
+ static int (* p_ ## s) (struct thread *, struct s ## _args *)
+#define GET_SYSCALL_POINTER(s) { \
+ * ((sy_call_t **) & p_ ## s) = sysent[SYS_ ## s].sy_call; \
+ if ((sy_call_t *) p_ ## s == (sy_call_t *)lkmressys) \
+ break; \
+}
+DEFINE_SYSCALL_POINTER_VARIABLE(aio_return);
+DEFINE_SYSCALL_POINTER_VARIABLE(aio_suspend);
+DEFINE_SYSCALL_POINTER_VARIABLE(aio_cancel);
+DEFINE_SYSCALL_POINTER_VARIABLE(aio_error);
+
static int user_mem_rw_verify(void *p, size_t s)
{
char buf[256];
@@ -394,6 +422,87 @@
return nerr;
}
+static int link_to_native_aio_module(struct thread *td)
+{
+ int nerr;
+
+ if (native_aio_module_handle != NULL)
+ { /* Linking has been done successfully. */
+
+ return 0;
+ }
+
+ nerr = linker_reference_module(NATIVE_AIO_MODULE_NAME,
+ &native_aio_module_depend, &native_aio_module_handle);
+ if (nerr)
+ return nerr;
+
+ do {
+ nerr = EINVAL;
+
+ /* Kernel internal functions */
+ GET_INTERNAL_FUNC_POINTER(aio_init_aioinfo);
+ GET_INTERNAL_FUNC_POINTER(aio_aqueue);
+
+ /* System calls */
+ GET_SYSCALL_POINTER(aio_return);
+ GET_SYSCALL_POINTER(aio_suspend);
+ GET_SYSCALL_POINTER(aio_cancel);
+ GET_SYSCALL_POINTER(aio_error);
+
+ nerr = 0;
+ } while (0);
+
+ if (nerr)
+ {
+ linker_release_module(NULL, NULL, native_aio_module_handle);
+ native_aio_module_handle = NULL;
+
+ printf(LMSG("Unable to link to the native module \""
+ NATIVE_AIO_MODULE_NAME "\" correctly."));
+
+ return nerr;
+ }
+
+ return 0;
+}
+
+#define LINK_TO_NATIVE_AIO_MODULE() \
+ if (link_to_native_aio_module(td)) { \
+ printf(LMSG("Please load the correct module \"" \
+ NATIVE_AIO_MODULE_NAME "\" correctly " \
+ "to provide FreeBSD " \
+ "native Asynchronous I/O support.")); \
+ return ENOSYS; \
+}
+
+static int mirror_native_aio_sysctl(struct thread *td)
+{
+ int nerr = 0;
+ int l;
+
+ l = sizeof(native_aio_capacity_proc);
+ nerr = kernel_sysctlbyname(td, NATIVE_AIO_SYSCTL_CAPACITY_PROC,
+ &native_aio_capacity_proc, &l, NULL, 0,
+ NULL ,0);
+ if (nerr)
+ return nerr;
+
+ l = sizeof(native_aio_capacity_sys);
+ nerr = kernel_sysctlbyname(td, NATIVE_AIO_SYSCTL_CAPACITY_SYS,
+ &native_aio_capacity_sys, &l, NULL, 0,
+ NULL ,0);
+ if (nerr)
+ return nerr;
+
+ DPRINTF(NATIVE_AIO_SYSCTL_CAPACITY_PROC "=%d, "
+ NATIVE_AIO_SYSCTL_CAPACITY_SYS "=%d",
+ native_aio_capacity_proc,
+ native_aio_capacity_sys);
+
+ return nerr;
+}
+
/* Linux system call io_setup(2) */
int linux_io_setup(struct thread *td, struct linux_io_setup_args *args)
{
@@ -404,18 +513,29 @@
int nerr = 0, nr, nrall, nq, arg_nr_reqs;
DARGPRINTF("%u, %p", args->nr_reqs, args->ctxp);
+ LINK_TO_NATIVE_AIO_MODULE();
+ nerr = mirror_native_aio_sysctl(td);
+ if (nerr)
+ {
+ printf(LMSG("linux_io_setup(): Unable to query sysctls "
+ NATIVE_AIO_SYSCTL_CAPACITY_PROC
+ " and/or " NATIVE_AIO_SYSCTL_CAPACITY_SYS
+ " ."));
+ return nerr;
+ }
/* Signed integer is a little safer than unsigned */
arg_nr_reqs = args->nr_reqs;
if (arg_nr_reqs <= 0)
return EINVAL;
- if (arg_nr_reqs > max_aio_queue_per_proc
- || arg_nr_reqs > max_queue_count)
+ if (arg_nr_reqs > native_aio_capacity_proc
+ || arg_nr_reqs > native_aio_capacity_sys)
{
printf(LMSG("linux_io_setup(): Please increase sysctls "
- "vfs.aio.max_aio_queue_per_proc "
- "and/or vfs.aio.max_aio_queue. "));
+ NATIVE_AIO_SYSCTL_CAPACITY_PROC
+ " and/or " NATIVE_AIO_SYSCTL_CAPACITY_SYS
+ " ."));
return ENOMEM;
}
@@ -455,12 +575,12 @@
nq, nr, nrall);
/* Check whether there are enough resources for requested queue */
- if (arg_nr_reqs > max_aio_queue_per_proc - nr
- || arg_nr_reqs > max_queue_count - nrall) {
+ if (arg_nr_reqs > native_aio_capacity_proc - nr
+ || arg_nr_reqs > native_aio_capacity_sys - nrall) {
printf(LMSG("linux_io_setup(): "
"Please increase sysctls "
- "vfs.aio.max_aio_queue_per_proc "
- "and/or vfs.aio.max_aio_queue. "
+ NATIVE_AIO_SYSCTL_CAPACITY_PROC
+ " and/or " NATIVE_AIO_SYSCTL_CAPACITY_SYS " ."
"Besides %d queues of %d requests totally "
"for this process, and %d requests' queues "
"totally for the whole system, "
@@ -519,6 +639,7 @@
struct aio_return_args aioretargs;
DARGPRINTF("%lx", (unsigned long)args->ctx);
+ LINK_TO_NATIVE_AIO_MODULE();
p = td->td_proc;
@@ -570,7 +691,7 @@
/* Cancel FreeBSD native clone */
cancelargs.fd = preq->req_linux.aio_fildes;
cancelargs.aiocbp = preq->req_pbsd;
- aio_cancel(td, &cancelargs);
+ p_aio_cancel(td, &cancelargs);
DPRINTF("aio_cancel() returned %ld", (long)td->td_retval[0]);
if (td->td_retval[0] == AIO_NOTCANCELED)
printf(LMSG("linux_io_destroy(): Asynchronous IO "
@@ -584,7 +705,7 @@
if (td->td_retval[0] == AIO_ALLDONE) {
aioretargs.aiocbp = preq->req_pbsd;
- aio_return(td, &aioretargs);
+ p_aio_return(td, &aioretargs);
DPRINTF("aio_return(%p) returned %ld",
aioretargs.aiocbp,
(long)td->td_retval[0]);
@@ -640,6 +761,7 @@
(unsigned long) args->ctx_id,
(long)args->min_nr, (long)args->nr,
args->events, args->timeout);
+ LINK_TO_NATIVE_AIO_MODULE();
if (args->nr <= 0)
return EINVAL;
@@ -731,7 +853,7 @@
break;
aioerrargs.aiocbp = preq->req_pbsd;
- aio_error(td, &aioerrargs);
+ p_aio_error(td, &aioerrargs);
aio_ret = td->td_retval[0];
td->td_retval[0] = 0;
@@ -750,7 +872,7 @@
LINUX_AIO_REQ_UNHOOK(pctx, preq);
aioretargs.aiocbp = preq->req_pbsd;
- aio_err = aio_return(td, &aioretargs);
+ aio_err = p_aio_return(td, &aioretargs);
aio_ret = td->td_retval[0];
td->td_retval[0] = 0;
@@ -846,7 +968,7 @@
aiosusargs.timeout = NULL;
}
- aio_err = aio_suspend(td, &aiosusargs);
+ aio_err = p_aio_suspend(td, &aiosusargs);
DPRINTF("aio_suspend(%p, %d, %p) returned %ld",
aiosusargs.aiocbp, aiosusargs.nent,
aiosusargs.timeout, (long)aio_err);
@@ -911,7 +1033,9 @@
struct linux_iocb *porig;
struct aiocb iocb, *piocb;
- DARGPRINTF("%lx, %ld, %p", (unsigned long)args->ctx_id, (long)args->nr, args->iocbpp);
+ DARGPRINTF("%lx, %ld, %p", (unsigned long)args->ctx_id,
+ (long)args->nr, args->iocbpp);
+ LINK_TO_NATIVE_AIO_MODULE();
if(args->nr <= 0)
return EINVAL;
@@ -977,7 +1101,7 @@
DUMP_FREEBSD_AIOCB(piocb, 1);
/* Submit user space control block */
- nerr = aio_aqueue(td, piocb, NULL, iocb.aio_lio_opcode, 0);
+ nerr = p_aio_aqueue(td, piocb, NULL, iocb.aio_lio_opcode, 0);
if (nerr != 0) {
user_free(td, piocb, sizeof(*piocb));
break;
@@ -1022,7 +1146,9 @@
struct linux_io_event evt;
struct aio_cancel_args aiocnclargs;
- DARGPRINTF("%lx, %p, %p", (unsigned long)args->ctx_id, args->iocb, args->result);
+ DARGPRINTF("%lx, %p, %p", (unsigned long)args->ctx_id,
+ args->iocb, args->result);
+ LINK_TO_NATIVE_AIO_MODULE();
nerr = copyin(args->iocb, &lcb, sizeof(lcb));
if (nerr != 0)
@@ -1086,7 +1212,7 @@
/* Cancel FreeBSD native clone */
aiocnclargs.fd = preq->req_linux.aio_fildes;
aiocnclargs.aiocbp = preq->req_pbsd;
- aio_cancel(td, &aiocnclargs);
+ p_aio_cancel(td, &aiocnclargs);
DPRINTF("aio_cancel() returned %ld", (long)td->td_retval[0]);
if (td->td_retval[0] == AIO_CANCELED) {
@@ -1204,6 +1330,16 @@
mtx_destroy(&linux_aio_context_list_mtx);
uma_zdestroy(linux_aio_request_zone);
uma_zdestroy(linux_aio_context_zone);
+ if (native_aio_module_handle != NULL)
+ {
+ /*
+ * linker_release_module() cannot be used here.
+ * It tries to hold "kld_sx", conflicting against
+ * module_unload().
+ */
+ linker_file_unload(native_aio_module_handle,
+ LINKER_UNLOAD_NORMAL);
+ }
break;
case MOD_SHUTDOWN:
break;
@@ -1221,4 +1357,3 @@
};
DECLARE_MODULE(linuxaio, linux_aio_mod, SI_SUB_VFS, SI_ORDER_ANY);
-MODULE_DEPEND(linuxaio, aio, 1, 1, 1);
==== //depot/projects/linuxolator/src/sys/kern/vfs_aio.c#3 (text+ko) ====
@@ -139,7 +139,7 @@
SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, CTLFLAG_RW, &target_aio_procs,
0, "Preferred number of ready kernel threads for async IO");
-int max_queue_count = MAX_AIO_QUEUE;
+static int max_queue_count = MAX_AIO_QUEUE;
SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, CTLFLAG_RW, &max_queue_count, 0,
"Maximum number of aio requests to queue, globally");
@@ -172,7 +172,7 @@
SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, CTLFLAG_RW, &max_aio_per_proc,
0, "Maximum active aio requests per process (stored in the process)");
-int max_aio_queue_per_proc = MAX_AIO_QUEUE_PER_PROC;
+static int max_aio_queue_per_proc = MAX_AIO_QUEUE_PER_PROC;
SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, CTLFLAG_RW,
&max_aio_queue_per_proc, 0,
"Maximum queued aio requests per process (stored in the process)");
==== //depot/projects/linuxolator/src/sys/modules/aio/Makefile#3 (text+ko) ====
@@ -5,8 +5,6 @@
KMOD= aio
SRCS= vfs_aio.c opt_vfs_aio.h vnode_if.h
-EXPORT_SYMS= max_aio_queue_per_proc max_queue_count \
- aio_init_aioinfo aio_aqueue aio_suspend \
- aio_cancel aio_return aio_error
+EXPORT_SYMS= aio_init_aioinfo aio_aqueue
.include <bsd.kmod.mk>
More information about the p4-projects
mailing list