PERFORCE change 161467 for review
Marko Zec
zec at FreeBSD.org
Sat May 2 08:44:32 UTC 2009
http://perforce.freebsd.org/chv.cgi?CH=161467
Change 161467 by zec at zec_amdx2 on 2009/05/02 08:44:24
IFC @ 161464
Affected files ...
.. //depot/projects/vimage/src/sys/amd64/amd64/local_apic.c#17 integrate
.. //depot/projects/vimage/src/sys/amd64/amd64/mp_machdep.c#21 integrate
.. //depot/projects/vimage/src/sys/amd64/conf/GENERIC#29 integrate
.. //depot/projects/vimage/src/sys/amd64/isa/clock.c#10 integrate
.. //depot/projects/vimage/src/sys/amd64/linux32/linux32_sysvec.c#15 integrate
.. //depot/projects/vimage/src/sys/compat/linux/linux_futex.c#10 integrate
.. //depot/projects/vimage/src/sys/conf/NOTES#48 integrate
.. //depot/projects/vimage/src/sys/conf/files#65 integrate
.. //depot/projects/vimage/src/sys/conf/options#47 integrate
.. //depot/projects/vimage/src/sys/dev/acpica/acpi.c#19 integrate
.. //depot/projects/vimage/src/sys/dev/ata/ata-all.h#14 integrate
.. //depot/projects/vimage/src/sys/dev/ata/ata-disk.c#12 integrate
.. //depot/projects/vimage/src/sys/dev/ata/ata-queue.c#12 integrate
.. //depot/projects/vimage/src/sys/dev/sk/if_sk.c#9 integrate
.. //depot/projects/vimage/src/sys/dev/usb/wlan/if_ural.c#7 integrate
.. //depot/projects/vimage/src/sys/i386/conf/GENERIC#32 integrate
.. //depot/projects/vimage/src/sys/i386/i386/local_apic.c#17 integrate
.. //depot/projects/vimage/src/sys/i386/i386/mp_machdep.c#20 integrate
.. //depot/projects/vimage/src/sys/i386/linux/linux_sysvec.c#11 integrate
.. //depot/projects/vimage/src/sys/isa/atrtc.c#2 integrate
.. //depot/projects/vimage/src/sys/kern/kern_osd.c#4 integrate
.. //depot/projects/vimage/src/sys/mips/include/pmap.h#4 integrate
.. //depot/projects/vimage/src/sys/mips/mips/pmap.c#11 integrate
.. //depot/projects/vimage/src/sys/modules/Makefile#46 integrate
.. //depot/projects/vimage/src/sys/net/ieee8023ad_lacp.c#11 integrate
.. //depot/projects/vimage/src/sys/net/if_bridge.c#29 integrate
.. //depot/projects/vimage/src/sys/net/if_gif.h#13 integrate
.. //depot/projects/vimage/src/sys/net/if_lagg.c#22 integrate
.. //depot/projects/vimage/src/sys/net/route.c#49 integrate
.. //depot/projects/vimage/src/sys/net/vnet.h#24 integrate
.. //depot/projects/vimage/src/sys/net80211/ieee80211_ddb.c#22 integrate
.. //depot/projects/vimage/src/sys/netinet/igmp.c#37 integrate
.. //depot/projects/vimage/src/sys/netinet/ip_fw.h#30 integrate
.. //depot/projects/vimage/src/sys/netinet/tcp_timewait.c#32 integrate
.. //depot/projects/vimage/src/sys/netinet/vinet.h#57 integrate
.. //depot/projects/vimage/src/sys/netinet6/udp6_usrreq.c#42 integrate
.. //depot/projects/vimage/src/sys/netinet6/vinet6.h#36 integrate
.. //depot/projects/vimage/src/sys/netipsec/vipsec.h#26 integrate
.. //depot/projects/vimage/src/sys/pc98/conf/GENERIC#21 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_atalk.c#3 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_audit.c#6 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_cred.c#3 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_inet.c#13 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_inet6.c#5 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_internal.h#10 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_net.c#8 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_pipe.c#8 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_posix_sem.c#9 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_posix_shm.c#4 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_priv.c#5 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_process.c#11 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_socket.c#7 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_system.c#7 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_sysv_msg.c#7 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_sysv_sem.c#7 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_sysv_shm.c#7 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_vfs.c#11 integrate
Differences ...
==== //depot/projects/vimage/src/sys/amd64/amd64/local_apic.c#17 (text+ko) ====
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/amd64/amd64/local_apic.c,v 1.52 2009/02/21 23:15:34 jeff Exp $");
+__FBSDID("$FreeBSD: src/sys/amd64/amd64/local_apic.c,v 1.54 2009/05/01 20:53:37 mav Exp $");
#include "opt_hwpmc_hooks.h"
#include "opt_kdtrace.h"
@@ -112,7 +112,7 @@
u_long la_stat_ticks;
u_long la_prof_ticks;
/* Include IDT_SYSCALL to make indexing easier. */
- u_int la_ioint_irqs[APIC_NUM_IOINTS + 1];
+ int la_ioint_irqs[APIC_NUM_IOINTS + 1];
} static lapics[MAX_APIC_ID + 1];
/* XXX: should thermal be an NMI? */
@@ -254,6 +254,8 @@
lapics[apic_id].la_lvts[i] = lvts[i];
lapics[apic_id].la_lvts[i].lvt_active = 0;
}
+ for (i = 0; i <= APIC_NUM_IOINTS; i++)
+ lapics[apic_id].la_ioint_irqs[i] = -1;
lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
IRQ_TIMER;
@@ -363,11 +365,15 @@
lapic_setup_clock(void)
{
u_long value;
+ int i;
/* Can't drive the timer without a local APIC. */
if (lapic == NULL)
return (0);
+ if (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0)
+ return (0);
+
/* Start off with a divisor of 2 (power on reset default). */
lapic_timer_divisor = 2;
@@ -807,7 +813,7 @@
*/
mtx_lock_spin(&icu_lock);
for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
- if (lapics[apic_id].la_ioint_irqs[vector] != 0)
+ if (lapics[apic_id].la_ioint_irqs[vector] != -1)
continue;
lapics[apic_id].la_ioint_irqs[vector] = irq;
mtx_unlock_spin(&icu_lock);
@@ -847,7 +853,7 @@
for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
/* Vector is in use, end run. */
- if (lapics[apic_id].la_ioint_irqs[vector] != 0) {
+ if (lapics[apic_id].la_ioint_irqs[vector] != -1) {
run = 0;
first = 0;
continue;
@@ -932,7 +938,7 @@
sched_bind(td, apic_cpuid(apic_id));
thread_unlock(td);
mtx_lock_spin(&icu_lock);
- lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = 0;
+ lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1;
mtx_unlock_spin(&icu_lock);
thread_lock(td);
sched_unbind(td);
@@ -944,11 +950,15 @@
u_int
apic_idt_to_irq(u_int apic_id, u_int vector)
{
+ int irq;
KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
("Vector %u does not map to an IRQ line", vector));
- return (lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]);
+ irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS];
+ if (irq < 0)
+ irq = 0;
+ return (irq);
}
#ifdef DDB
@@ -974,7 +984,7 @@
db_printf("Interrupts bound to lapic %u\n", apic_id);
for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
irq = lapics[apic_id].la_ioint_irqs[i];
- if (irq == 0 || irq == IRQ_SYSCALL)
+ if (irq == -1 || irq == IRQ_SYSCALL)
continue;
db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
if (irq == IRQ_TIMER)
==== //depot/projects/vimage/src/sys/amd64/amd64/mp_machdep.c#21 (text+ko) ====
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/amd64/amd64/mp_machdep.c,v 1.304 2009/04/29 06:54:40 jeff Exp $");
+__FBSDID("$FreeBSD: src/sys/amd64/amd64/mp_machdep.c,v 1.305 2009/04/30 22:10:04 jkim Exp $");
#include "opt_cpu.h"
#include "opt_kstack_pages.h"
@@ -292,6 +292,10 @@
static void
topo_probe(void)
{
+ static int cpu_topo_probed = 0;
+
+ if (cpu_topo_probed)
+ return;
logical_cpus = logical_cpus_mask = 0;
if (cpu_high >= 0xb)
@@ -299,9 +303,10 @@
else if (cpu_high)
topo_probe_0x4();
if (cpu_cores == 0)
- cpu_cores = mp_ncpus;
+ cpu_cores = mp_ncpus > 0 ? mp_ncpus : 1;
if (cpu_logical == 0)
cpu_logical = 1;
+ cpu_topo_probed = 1;
}
struct cpu_group *
@@ -313,6 +318,7 @@
* Determine whether any threading flags are
* necessry.
*/
+ topo_probe();
if (cpu_logical > 1 && hyperthreading_cpus)
cg_flags = CG_FLAG_HTT;
else if (cpu_logical > 1)
==== //depot/projects/vimage/src/sys/amd64/conf/GENERIC#29 (text+ko) ====
@@ -16,7 +16,7 @@
# If you are in doubt as to the purpose or necessity of a line, check first
# in NOTES.
#
-# $FreeBSD: src/sys/amd64/conf/GENERIC,v 1.523 2009/04/10 00:40:48 jfv Exp $
+# $FreeBSD: src/sys/amd64/conf/GENERIC,v 1.524 2009/05/01 17:20:16 sam Exp $
cpu HAMMER
ident GENERIC
@@ -292,8 +292,10 @@
device ulpt # Printer
device umass # Disks/Mass storage - Requires scbus and da
device ums # Mouse
+device rum # Ralink Technology RT2501USB wireless NICs
+device uath # Atheros AR5523 wireless NICs
device ural # Ralink Technology RT2500USB wireless NICs
-device rum # Ralink Technology RT2501USB wireless NICs
+device zyd # ZyDAS zb1211/zb1211b wireless NICs
device urio # Diamond Rio 500 MP3 player
# USB Serial devices
device uark # Technologies ARK3116 based serial adapters
==== //depot/projects/vimage/src/sys/amd64/isa/clock.c#10 (text+ko) ====
@@ -33,7 +33,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/amd64/isa/clock.c,v 1.243 2008/04/22 19:38:27 phk Exp $");
+__FBSDID("$FreeBSD: src/sys/amd64/isa/clock.c,v 1.244 2009/05/01 21:43:04 mav Exp $");
/*
* Routines to handle clock hardware.
@@ -376,6 +376,17 @@
mtx_unlock_spin(&clock_lock);
}
+static void
+i8254_restore(void)
+{
+
+ mtx_lock_spin(&clock_lock);
+ outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
+ outb(TIMER_CNTR0, i8254_real_max_count & 0xff);
+ outb(TIMER_CNTR0, i8254_real_max_count >> 8);
+ mtx_unlock_spin(&clock_lock);
+}
+
/* This is separate from startrtclock() so that it can be called early. */
void
i8254_init(void)
@@ -558,6 +569,14 @@
return(0);
}
+static int
+attimer_resume(device_t dev)
+{
+
+ i8254_restore();
+ return(0);
+}
+
static device_method_t attimer_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, attimer_probe),
@@ -565,7 +584,7 @@
DEVMETHOD(device_detach, bus_generic_detach),
DEVMETHOD(device_shutdown, bus_generic_shutdown),
DEVMETHOD(device_suspend, bus_generic_suspend),
- DEVMETHOD(device_resume, bus_generic_resume),
+ DEVMETHOD(device_resume, attimer_resume),
{ 0, 0 }
};
==== //depot/projects/vimage/src/sys/amd64/linux32/linux32_sysvec.c#15 (text+ko) ====
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/amd64/linux32/linux32_sysvec.c,v 1.46 2009/04/05 09:27:19 dchagin Exp $");
+__FBSDID("$FreeBSD: src/sys/amd64/linux32/linux32_sysvec.c,v 1.47 2009/05/01 15:36:02 dchagin Exp $");
#include "opt_compat.h"
#ifndef COMPAT_IA32
@@ -128,7 +128,7 @@
static void linux32_fixlimit(struct rlimit *rl, int which);
extern LIST_HEAD(futex_list, futex) futex_list;
-extern struct sx futex_sx;
+extern struct mtx futex_mtx;
static eventhandler_tag linux_exit_tag;
static eventhandler_tag linux_schedtail_tag;
@@ -1117,7 +1117,7 @@
mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
sx_init(&emul_shared_lock, "emuldata->shared lock");
LIST_INIT(&futex_list);
- sx_init(&futex_sx, "futex protection lock");
+ mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
linux_proc_exit, NULL, 1000);
linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
@@ -1149,7 +1149,7 @@
linux_device_unregister_handler(*ldhp);
mtx_destroy(&emul_lock);
sx_destroy(&emul_shared_lock);
- sx_destroy(&futex_sx);
+ mtx_destroy(&futex_mtx);
EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
==== //depot/projects/vimage/src/sys/compat/linux/linux_futex.c#10 (text+ko) ====
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/compat/linux/linux_futex.c,v 1.19 2009/04/19 13:48:42 dchagin Exp $");
+__FBSDID("$FreeBSD: src/sys/compat/linux/linux_futex.c,v 1.20 2009/05/01 15:36:02 dchagin Exp $");
#if 0
__KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $");
#endif
@@ -62,419 +62,284 @@
#include <compat/linux/linux_futex.h>
#include <compat/linux/linux_emul.h>
+MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes");
+MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futexes wp");
+
struct futex;
struct waiting_proc {
- struct thread *wp_t;
- struct futex *wp_new_futex;
+ uint32_t wp_flags;
+ struct futex *wp_futex;
TAILQ_ENTRY(waiting_proc) wp_list;
};
+
struct futex {
- void *f_uaddr;
- int f_refcount;
+ struct sx f_lck;
+ uint32_t *f_uaddr;
+ uint32_t f_refcount;
LIST_ENTRY(futex) f_list;
TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc;
};
LIST_HEAD(futex_list, futex) futex_list;
-struct sx futex_sx; /* this protects the LIST of futexes */
-#define FUTEX_LOCK sx_xlock(&futex_sx)
-#define FUTEX_UNLOCK sx_xunlock(&futex_sx)
+#define FUTEX_LOCK(f) sx_xlock(&(f)->f_lck)
+#define FUTEX_UNLOCK(f) sx_xunlock(&(f)->f_lck)
+#define FUTEX_INIT(f) sx_init_flags(&(f)->f_lck, "ftlk", 0)
+#define FUTEX_DESTROY(f) sx_destroy(&(f)->f_lck)
+#define FUTEX_ASSERT_LOCKED(f) sx_assert(&(f)->f_lck, SA_XLOCKED)
-#define FUTEX_LOCKED 1
-#define FUTEX_UNLOCKED 0
+struct mtx futex_mtx; /* protects the futex list */
+#define FUTEXES_LOCK mtx_lock(&futex_mtx)
+#define FUTEXES_UNLOCK mtx_unlock(&futex_mtx)
-#define FUTEX_SYSTEM_LOCK mtx_lock(&Giant)
-#define FUTEX_SYSTEM_UNLOCK mtx_unlock(&Giant)
+/* flags for futex_get() */
+#define FUTEX_CREATE_WP 0x1 /* create waiting_proc */
+#define FUTEX_DONTCREATE 0x2 /* don't create futex if not exists */
+#define FUTEX_DONTEXISTS 0x4 /* return EINVAL if futex exists */
-static struct futex *futex_get(void *, int);
-static void futex_put(struct futex *);
-static int futex_sleep(struct futex *, struct thread *, unsigned long);
-static int futex_wake(struct futex *, int, struct futex *, int);
-static int futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr);
+/* wp_flags */
+#define FUTEX_WP_REQUEUED 0x1 /* wp requeued - wp moved from wp_list
+ * of futex where thread sleep to wp_list
+ * of another futex.
+ */
+#define FUTEX_WP_REMOVED 0x2 /* wp is woken up and removed from futex
+ * wp_list to prevent double wakeup.
+ */
/* support.s */
-int futex_xchgl(int oparg, caddr_t uaddr, int *oldval);
-int futex_addl(int oparg, caddr_t uaddr, int *oldval);
-int futex_orl(int oparg, caddr_t uaddr, int *oldval);
-int futex_andl(int oparg, caddr_t uaddr, int *oldval);
-int futex_xorl(int oparg, caddr_t uaddr, int *oldval);
+int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval);
+int futex_addl(int oparg, uint32_t *uaddr, int *oldval);
+int futex_orl(int oparg, uint32_t *uaddr, int *oldval);
+int futex_andl(int oparg, uint32_t *uaddr, int *oldval);
+int futex_xorl(int oparg, uint32_t *uaddr, int *oldval);
-int
-linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
+static void
+futex_put(struct futex *f, struct waiting_proc *wp)
{
- int val;
- int ret;
- struct l_timespec timeout = {0, 0};
- int error = 0;
- struct futex *f;
- struct futex *newf;
- int timeout_hz;
- struct timeval tv = {0, 0};
- struct futex *f2;
- int op_ret;
- struct linux_emuldata *em;
+
+ FUTEX_ASSERT_LOCKED(f);
+ if (wp != NULL) {
+ if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0)
+ TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
+ free(wp, M_FUTEX_WP);
+ }
-#ifdef DEBUG
- if (ldebug(sys_futex))
- printf(ARGS(futex, "%p, %i, %i, *, %p, %i"), args->uaddr, args->op,
- args->val, args->uaddr2, args->val3);
-#endif
+ FUTEXES_LOCK;
+ if (--f->f_refcount == 0) {
+ LIST_REMOVE(f, f_list);
+ FUTEXES_UNLOCK;
+ FUTEX_UNLOCK(f);
- /*
- * Our implementation provides only privates futexes. Most of the apps
- * should use private futexes but don't claim so. Therefore we treat
- * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works
- * in most cases (ie. when futexes are not shared on file descriptor
- * or between different processes.).
- */
- args->op = (args->op & ~LINUX_FUTEX_PRIVATE_FLAG);
+ FUTEX_DESTROY(f);
+ free(f, M_FUTEX);
+ return;
+ }
- switch (args->op) {
- case LINUX_FUTEX_WAIT:
- FUTEX_SYSTEM_LOCK;
+ FUTEXES_UNLOCK;
+ FUTEX_UNLOCK(f);
+}
- if ((error = copyin(args->uaddr,
- &val, sizeof(val))) != 0) {
- FUTEX_SYSTEM_UNLOCK;
- return error;
- }
+static int
+futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags)
+{
+ struct futex *f, *tmpf;
- if (val != args->val) {
- FUTEX_SYSTEM_UNLOCK;
- return EWOULDBLOCK;
- }
+ *newf = tmpf = NULL;
- if (args->timeout != NULL) {
- if ((error = copyin(args->timeout,
- &timeout, sizeof(timeout))) != 0) {
- FUTEX_SYSTEM_UNLOCK;
- return error;
+retry:
+ FUTEXES_LOCK;
+ LIST_FOREACH(f, &futex_list, f_list) {
+ if (f->f_uaddr == uaddr) {
+ if (tmpf != NULL) {
+ FUTEX_UNLOCK(tmpf);
+ FUTEX_DESTROY(tmpf);
+ free(tmpf, M_FUTEX);
+ }
+ if (flags & FUTEX_DONTEXISTS) {
+ FUTEXES_UNLOCK;
+ return (EINVAL);
}
- }
-#ifdef DEBUG
- if (ldebug(sys_futex))
- printf("FUTEX_WAIT %d: val = %d, uaddr = %p, "
- "*uaddr = %d, timeout = %d.%09lu\n",
- td->td_proc->p_pid, args->val,
- args->uaddr, val, timeout.tv_sec,
- (unsigned long)timeout.tv_nsec);
-#endif
- tv.tv_usec = timeout.tv_sec * 1000000 + timeout.tv_nsec / 1000;
- timeout_hz = tvtohz(&tv);
+ /*
+ * Increment refcount of the found futex to
+ * prevent it from deallocation before FUTEX_LOCK()
+ */
+ ++f->f_refcount;
+ FUTEXES_UNLOCK;
- if (timeout.tv_sec == 0 && timeout.tv_nsec == 0)
- timeout_hz = 0;
- /*
- * If the user process requests a non null timeout,
- * make sure we do not turn it into an infinite
- * timeout because timeout_hz gets null.
- *
- * We use a minimal timeout of 1/hz. Maybe it would
- * make sense to just return ETIMEDOUT without sleeping.
- */
- if (((timeout.tv_sec != 0) || (timeout.tv_nsec != 0)) &&
- (timeout_hz == 0))
- timeout_hz = 1;
-
-
- f = futex_get(args->uaddr, FUTEX_UNLOCKED);
- ret = futex_sleep(f, td, timeout_hz);
- futex_put(f);
-
-#ifdef DEBUG
- if (ldebug(sys_futex))
- printf("FUTEX_WAIT %d: uaddr = %p, "
- "ret = %d\n", td->td_proc->p_pid, args->uaddr, ret);
-#endif
-
- FUTEX_SYSTEM_UNLOCK;
- switch (ret) {
- case EWOULDBLOCK: /* timeout */
- return ETIMEDOUT;
- break;
- case EINTR: /* signal */
- return EINTR;
- break;
- case 0: /* FUTEX_WAKE received */
-#ifdef DEBUG
- if (ldebug(sys_futex))
- printf("FUTEX_WAIT %d: uaddr = %p, "
- "got FUTEX_WAKE\n",
- td->td_proc->p_pid, args->uaddr);
-#endif
- return 0;
- break;
- default:
-#ifdef DEBUG
- if (ldebug(sys_futex))
- printf("FUTEX_WAIT: unexpected ret = %d\n",
- ret);
-#endif
- break;
+ FUTEX_LOCK(f);
+ *newf = f;
+ return (0);
}
+ }
- /* NOTREACHED */
- break;
+ if (flags & FUTEX_DONTCREATE) {
+ FUTEXES_UNLOCK;
+ return (0);
+ }
- case LINUX_FUTEX_WAKE:
- FUTEX_SYSTEM_LOCK;
+ if (tmpf == NULL) {
+ FUTEXES_UNLOCK;
+ tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO);
+ tmpf->f_uaddr = uaddr;
+ tmpf->f_refcount = 1;
+ FUTEX_INIT(tmpf);
+ TAILQ_INIT(&tmpf->f_waiting_proc);
/*
- * XXX: Linux is able to cope with different addresses
- * corresponding to the same mapped memory in the sleeping
- * and waker process(es).
+ * Lock the new futex before an insert into the futex_list
+ * to prevent futex usage by other.
*/
-#ifdef DEBUG
- if (ldebug(sys_futex))
- printf("FUTEX_WAKE %d: uaddr = %p, val = %d\n",
- td->td_proc->p_pid, args->uaddr, args->val);
-#endif
- f = futex_get(args->uaddr, FUTEX_UNLOCKED);
- td->td_retval[0] = futex_wake(f, args->val, NULL, 0);
- futex_put(f);
+ FUTEX_LOCK(tmpf);
+ goto retry;
+ }
- FUTEX_SYSTEM_UNLOCK;
- break;
+ LIST_INSERT_HEAD(&futex_list, tmpf, f_list);
+ FUTEXES_UNLOCK;
- case LINUX_FUTEX_CMP_REQUEUE:
- FUTEX_SYSTEM_LOCK;
+ *newf = tmpf;
+ return (0);
+}
- if ((error = copyin(args->uaddr,
- &val, sizeof(val))) != 0) {
- FUTEX_SYSTEM_UNLOCK;
- return error;
- }
+static int
+futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f,
+ uint32_t flags)
+{
+ int error;
- if (val != args->val3) {
- FUTEX_SYSTEM_UNLOCK;
- return EAGAIN;
- }
+ if (flags & FUTEX_CREATE_WP) {
+ *wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK);
+ (*wp)->wp_flags = 0;
+ }
+ error = futex_get0(uaddr, f, flags);
+ if (error) {
+ if (flags & FUTEX_CREATE_WP)
+ free(*wp, M_FUTEX_WP);
+ return (error);
+ }
+ if (flags & FUTEX_CREATE_WP) {
+ TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list);
+ (*wp)->wp_futex = *f;
+ }
- f = futex_get(args->uaddr, FUTEX_UNLOCKED);
- newf = futex_get(args->uaddr2, FUTEX_UNLOCKED);
- td->td_retval[0] = futex_wake(f, args->val, newf,
- (int)(unsigned long)args->timeout);
- futex_put(f);
- futex_put(newf);
-
- FUTEX_SYSTEM_UNLOCK;
- break;
-
- case LINUX_FUTEX_WAKE_OP:
- FUTEX_SYSTEM_LOCK;
-#ifdef DEBUG
- if (ldebug(sys_futex))
- printf("FUTEX_WAKE_OP: %d: uaddr = %p, op = %d, "
- "val = %x, uaddr2 = %p, val3 = %x\n",
- td->td_proc->p_pid, args->uaddr, args->op,
- args->val, args->uaddr2, args->val3);
-#endif
- f = futex_get(args->uaddr, FUTEX_UNLOCKED);
- f2 = futex_get(args->uaddr2, FUTEX_UNLOCKED);
-
- /*
- * This function returns positive number as results and
- * negative as errors
- */
- op_ret = futex_atomic_op(td, args->val3, args->uaddr2);
-#ifdef DEBUG
- if (ldebug(sys_futex))
- printf("futex_atomic_op ret %d\n", op_ret);
-#endif
- if (op_ret < 0) {
- /* XXX: We don't handle the EFAULT yet. */
- if (op_ret != -EFAULT) {
- futex_put(f);
- futex_put(f2);
- FUTEX_SYSTEM_UNLOCK;
- return (-op_ret);
- }
-
- futex_put(f);
- futex_put(f2);
-
- FUTEX_SYSTEM_UNLOCK;
- return (EFAULT);
- }
-
- ret = futex_wake(f, args->val, NULL, 0);
- futex_put(f);
- if (op_ret > 0) {
- op_ret = 0;
- /*
- * Linux abuses the address of the timespec parameter
- * as the number of retries.
- */
- op_ret += futex_wake(f2,
- (int)(unsigned long)args->timeout, NULL, 0);
- ret += op_ret;
- }
- futex_put(f2);
- td->td_retval[0] = ret;
-
- FUTEX_SYSTEM_UNLOCK;
- break;
-
- case LINUX_FUTEX_LOCK_PI:
- /* not yet implemented */
- return (ENOSYS);
-
- case LINUX_FUTEX_UNLOCK_PI:
- /* not yet implemented */
- return (ENOSYS);
-
- case LINUX_FUTEX_TRYLOCK_PI:
- /* not yet implemented */
- return (ENOSYS);
-
- case LINUX_FUTEX_REQUEUE:
-
- /*
- * Glibc does not use this operation since Jun 2004 (2.3.3),
- * as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
- * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
- * FUTEX_REQUEUE returned EINVAL.
- */
- em = em_find(td->td_proc, EMUL_DONTLOCK);
- if (em->used_requeue == 0) {
- printf("linux(%s (%d)) sys_futex: "
- "unsupported futex_requeue op\n",
- td->td_proc->p_comm, td->td_proc->p_pid);
- em->used_requeue = 1;
- }
- return (EINVAL);
-
- default:
- printf("linux_sys_futex: unknown op %d\n",
- args->op);
- return (ENOSYS);
- }
- return (0);
+ return (error);
}
-static struct futex *
-futex_get(void *uaddr, int locked)
+static int
+futex_sleep(struct futex *f, struct waiting_proc *wp, unsigned long timeout)
{
- struct futex *f;
+ int error;
- if (locked == FUTEX_UNLOCKED)
- FUTEX_LOCK;
- LIST_FOREACH(f, &futex_list, f_list) {
- if (f->f_uaddr == uaddr) {
- f->f_refcount++;
- if (locked == FUTEX_UNLOCKED)
- FUTEX_UNLOCK;
- return f;
- }
+ FUTEX_ASSERT_LOCKED(f);
+ error = sx_sleep(wp, &f->f_lck, PCATCH, "futex", timeout);
+ if (wp->wp_flags & FUTEX_WP_REQUEUED) {
+ KASSERT(f != wp->wp_futex, ("futex != wp_futex"));
+ futex_put(f, NULL);
+ f = wp->wp_futex;
+ FUTEX_LOCK(f);
}
- f = malloc(sizeof(*f), M_LINUX, M_WAITOK);
- f->f_uaddr = uaddr;
- f->f_refcount = 1;
- TAILQ_INIT(&f->f_waiting_proc);
- LIST_INSERT_HEAD(&futex_list, f, f_list);
- if (locked == FUTEX_UNLOCKED)
- FUTEX_UNLOCK;
-
- return f;
+ futex_put(f, wp);
+ return (error);
}
-static void
-futex_put(f)
- struct futex *f;
+static int
+futex_wake(struct futex *f, int n)
{
- FUTEX_LOCK;
- f->f_refcount--;
- if (f->f_refcount == 0) {
- LIST_REMOVE(f, f_list);
- free(f, M_LINUX);
+ struct waiting_proc *wp, *wpt;
+ int count = 0;
+
+ FUTEX_ASSERT_LOCKED(f);
+ TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
+ wp->wp_flags |= FUTEX_WP_REMOVED;
+ TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
+ wakeup_one(wp);
+ if (++count == n)
+ break;
}
- FUTEX_UNLOCK;
- return;
+ return (count);
}
static int
-futex_sleep(struct futex *f, struct thread *td, unsigned long timeout)
+futex_requeue(struct futex *f, int n, struct futex *f2, int n2)
{
- struct waiting_proc *wp;
- int ret;
+ struct waiting_proc *wp, *wpt;
+ int count = 0;
- wp = malloc(sizeof(*wp), M_LINUX, M_WAITOK);
- wp->wp_t = td;
- wp->wp_new_futex = NULL;
- FUTEX_LOCK;
- TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list);
- FUTEX_UNLOCK;
+ FUTEX_ASSERT_LOCKED(f);
+ FUTEX_ASSERT_LOCKED(f2);
-#ifdef DEBUG
- if (ldebug(sys_futex))
- printf("FUTEX --> %d tlseep timeout = %ld\n",
- td->td_proc->p_pid, timeout);
-#endif
- ret = tsleep(wp, PCATCH | PZERO, "linuxfutex", timeout);
-#ifdef DEBUG
- if (ldebug(sys_futex))
- printf("FUTEX -> %d tsleep returns %d\n",
- td->td_proc->p_pid, ret);
-#endif
+ TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
+ if (++count <= n) {
+ wp->wp_flags |= FUTEX_WP_REMOVED;
+ TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
+ wakeup_one(wp);
+ } else {
+ wp->wp_flags |= FUTEX_WP_REQUEUED;
+ /* Move wp to wp_list of f2 futex */
+ TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
+ TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list);
- FUTEX_LOCK;
- TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
- FUTEX_UNLOCK;
-
- /* if we got woken up in futex_wake */
- if ((ret == 0) && (wp->wp_new_futex != NULL)) {
- /* suspend us on the new futex */
- ret = futex_sleep(wp->wp_new_futex, td, timeout);
- /* and release the old one */
- futex_put(wp->wp_new_futex);
+ /*
+ * Thread which sleeps on wp after waking should
+ * acquire f2 lock, so increment refcount of f2 to
+ * prevent it from premature deallocation.
+ */
+ wp->wp_futex = f2;
+ FUTEXES_LOCK;
+ ++f2->f_refcount;
+ FUTEXES_UNLOCK;
+ if (count - n >= n2)
+ break;
+ }
}
- free(wp, M_LINUX);
-
- return ret;
+ return (count);
}
static int
-futex_wake(struct futex *f, int n, struct futex *newf, int n2)
+futex_wait(struct futex *f, struct waiting_proc *wp, struct l_timespec *ts)
{
- struct waiting_proc *wp;
- int count;
+ struct l_timespec timeout = {0, 0};
+ struct timeval tv = {0, 0};
+ int timeout_hz;
+ int error;
+
+ if (ts != NULL) {
+ error = copyin(ts, &timeout, sizeof(timeout));
+ if (error)
+ return (error);
+ }
+
+ tv.tv_usec = timeout.tv_sec * 1000000 + timeout.tv_nsec / 1000;
+ timeout_hz = tvtohz(&tv);
+
+ if (timeout.tv_sec == 0 && timeout.tv_nsec == 0)
+ timeout_hz = 0;
/*
- * Linux is very strange it wakes up N threads for
- * all operations BUT requeue ones where its N+1
- * mimic this.
+ * If the user process requests a non null timeout,
+ * make sure we do not turn it into an infinite
+ * timeout because timeout_hz gets null.
+ *
+ * We use a minimal timeout of 1/hz. Maybe it would
+ * make sense to just return ETIMEDOUT without sleeping.
*/
- count = newf ? 0 : 1;
+ if (((timeout.tv_sec != 0) || (timeout.tv_nsec != 0)) &&
+ (timeout_hz == 0))
+ timeout_hz = 1;
- FUTEX_LOCK;
- TAILQ_FOREACH(wp, &f->f_waiting_proc, wp_list) {
- if (count <= n) {
- wakeup_one(wp);
- count++;
- } else {
- if (newf != NULL) {
- /* futex_put called after tsleep */
- wp->wp_new_futex = futex_get(newf->f_uaddr,
- FUTEX_LOCKED);
- wakeup_one(wp);
- if (count - n >= n2)
- break;
- }
- }
- }
- FUTEX_UNLOCK;
+ error = futex_sleep(f, wp, timeout_hz);
+ if (error == EWOULDBLOCK)
+ error = ETIMEDOUT;
- return count;
+ return (error);
}
static int
-futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr)
+futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -536,14 +401,237 @@
}
int
+linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
+{
+ int op_ret, val, ret, nrwake;
+ struct linux_emuldata *em;
+ struct waiting_proc *wp;
+ struct futex *f, *f2;
+ int error = 0;
+
+ /*
+ * Our implementation provides only privates futexes. Most of the apps
+ * should use private futexes but don't claim so. Therefore we treat
+ * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works
+ * in most cases (ie. when futexes are not shared on file descriptor
+ * or between different processes.).
+ */
+ args->op = (args->op & ~LINUX_FUTEX_PRIVATE_FLAG);
+
+ switch (args->op) {
+ case LINUX_FUTEX_WAIT:
+
+#ifdef DEBUG
+ if (ldebug(sys_futex))
+ printf(ARGS(sys_futex, "futex_wait val %d uaddr %p"),
+ args->val, args->uaddr);
+#endif
+ error = futex_get(args->uaddr, &wp, &f, FUTEX_CREATE_WP);
+ if (error)
+ return (error);
+ error = copyin(args->uaddr, &val, sizeof(val));
+ if (error) {
+ futex_put(f, wp);
+ return (error);
+ }
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list