svn commit: r185162 - in head: . sys/amd64/include sys/arm/include sys/conf sys/dev/bce sys/dev/cxgb sys/dev/cxgb/sys sys/dev/cxgb/ulp/iw_cxgb sys/dev/mxge sys/dev/nxge sys/i386/include sys/i386/in...

Kostik Belousov kostikbel at gmail.com
Sun Nov 23 07:41:48 PST 2008


On Sun, Nov 23, 2008 at 12:51:58AM +0000, Kip Macy wrote:
> On Sat, Nov 22, 2008 at 11:08 PM, Scott Long <scottl at samsco.org> wrote:
> > Kostik Belousov wrote:
> >>
> >> On Sat, Nov 22, 2008 at 03:05:22PM -0700, Scott Long wrote:
> >>>
> >>> A neat hack would be for the kernel linker to scan the text and do a
> >>> drop-in replacement of the opcode that is appropriate for the platform.
> >>> I can't see how a CPU_XXX definition would work because it's just a
> >>> compile time construct, one that can be included with any kernel
> >>> compile.
> >>
> >> Yes, it is possible to do that. Less drastic change is to directly
> >> check features. I moved slow code to separate section to eliminate
> >> unconditional jump in fast path.
> >> Only compile-tested.
> >>
> >
> > As long as it works, I think it's a step in the right direction; I'm
> > assuming that cpu_feature is a symbol filled in at runtime and not a
> > macro for the cpuid instruction, right?
> >
> > Scott
> >
> 
> i386/include/md_var.h:
> <..>
> extern  u_int   cpu_exthigh;
> extern  u_int   cpu_feature;
> extern  u_int   cpu_feature2;
> extern  u_int   amd_feature;
> extern  u_int   amd_feature2;
> <...>
> 
> I'm not thrilled with it, but we can revisit the issue if it makes a
> measurable difference on someone's workload.

Below is the updated patch. It includes changes made after private comments
by bde@ and uses symbolic definitions for the bits in the features words.
I thought about accessing a per-CPU word for serialized instruction in the
slow path, but decided that it does not beneficial.

diff --git a/sys/conf/ldscript.i386 b/sys/conf/ldscript.i386
index a94f32f..49d9636 100644
--- a/sys/conf/ldscript.i386
+++ b/sys/conf/ldscript.i386
@@ -45,6 +45,7 @@ SECTIONS
   .text      :
   {
     *(.text)
+    *(.text.offpath)
     *(.stub)
     /* .gnu.warning sections are handled specially by elf32.em.  */
     *(.gnu.warning)
diff --git a/sys/dev/iir/iir.c b/sys/dev/iir/iir.c
index cbcb449..a01b685 100644
--- a/sys/dev/iir/iir.c
+++ b/sys/dev/iir/iir.c
@@ -403,7 +403,7 @@ iir_init(struct gdt_softc *gdt)
 		gdt->oem_name[7]='\0';
 	} else {
 		/* Old method, based on PCI ID */
-		if (gdt->sc_vendor == INTEL_VENDOR_ID)
+		if (gdt->sc_vendor == INTEL_VENDOR_ID_IIR)
             strcpy(gdt->oem_name,"Intel  ");
         else 
        	    strcpy(gdt->oem_name,"ICP    ");
@@ -1447,7 +1447,7 @@ iir_action( struct cam_sim *sim, union ccb *ccb )
                   (bus == gdt->sc_virt_bus ? 127 : gdt->sc_bus_id[bus]);
               cpi->base_transfer_speed = 3300;
               strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
-              if (gdt->sc_vendor == INTEL_VENDOR_ID)
+              if (gdt->sc_vendor == INTEL_VENDOR_ID_IIR)
                   strncpy(cpi->hba_vid, "Intel Corp.", HBA_IDLEN);
               else
                   strncpy(cpi->hba_vid, "ICP vortex ", HBA_IDLEN);
diff --git a/sys/dev/iir/iir.h b/sys/dev/iir/iir.h
index dca493d..dbae7d2 100644
--- a/sys/dev/iir/iir.h
+++ b/sys/dev/iir/iir.h
@@ -63,7 +63,7 @@
 #define GDT_DEVICE_ID_MAX       0x2ff
 #define GDT_DEVICE_ID_NEWRX     0x300
 
-#define INTEL_VENDOR_ID         0x8086
+#define INTEL_VENDOR_ID_IIR     0x8086
 #define INTEL_DEVICE_ID_IIR     0x600
 
 #define GDT_MAXBUS              6       /* XXX Why not 5? */
diff --git a/sys/dev/iir/iir_ctrl.c b/sys/dev/iir/iir_ctrl.c
index e5fbac9..2af2aad 100644
--- a/sys/dev/iir/iir_ctrl.c
+++ b/sys/dev/iir/iir_ctrl.c
@@ -278,7 +278,7 @@ iir_ioctl(struct cdev *dev, u_long cmd, caddr_t cmdarg, int flags, d_thread_t *
                 return (ENXIO);
             /* only RP controllers */
             p->ext_type = 0x6000 | gdt->sc_device;
-            if (gdt->sc_vendor == INTEL_VENDOR_ID) {
+            if (gdt->sc_vendor == INTEL_VENDOR_ID_IIR) {
                 p->oem_id = OEM_ID_INTEL;
                 p->type = 0xfd;
                 /* new -> subdevice into ext_type */
diff --git a/sys/dev/iir/iir_pci.c b/sys/dev/iir/iir_pci.c
index 528b7d7..0f420a1 100644
--- a/sys/dev/iir/iir_pci.c
+++ b/sys/dev/iir/iir_pci.c
@@ -164,7 +164,7 @@ MODULE_DEPEND(iir, cam, 1, 1, 1);
 static int
 iir_pci_probe(device_t dev)
 {
-    if (pci_get_vendor(dev) == INTEL_VENDOR_ID &&
+    if (pci_get_vendor(dev) == INTEL_VENDOR_ID_IIR &&
         pci_get_device(dev) == INTEL_DEVICE_ID_IIR) {
         device_set_desc(dev, "Intel Integrated RAID Controller");
         return (BUS_PROBE_DEFAULT);
diff --git a/sys/dev/ixgb/if_ixgb.c b/sys/dev/ixgb/if_ixgb.c
index c1d6858..df2f51a 100644
--- a/sys/dev/ixgb/if_ixgb.c
+++ b/sys/dev/ixgb/if_ixgb.c
@@ -72,8 +72,8 @@ char            ixgb_copyright[] = "Copyright (c) 2001-2004 Intel Corporation.";
 static ixgb_vendor_info_t ixgb_vendor_info_array[] =
 {
 	/* Intel(R) PRO/10000 Network Connection */
-	{INTEL_VENDOR_ID, IXGB_DEVICE_ID_82597EX, PCI_ANY_ID, PCI_ANY_ID, 0},
-	{INTEL_VENDOR_ID, IXGB_DEVICE_ID_82597EX_SR, PCI_ANY_ID, PCI_ANY_ID, 0},
+	{INTEL_VENDOR_ID_IXGB, IXGB_DEVICE_ID_82597EX, PCI_ANY_ID, PCI_ANY_ID, 0},
+	{INTEL_VENDOR_ID_IXGB, IXGB_DEVICE_ID_82597EX_SR, PCI_ANY_ID, PCI_ANY_ID, 0},
 	/* required last entry */
 	{0, 0, 0, 0, 0}
 };
diff --git a/sys/dev/ixgb/ixgb_ids.h b/sys/dev/ixgb/ixgb_ids.h
index a224f63..aa7dab8 100644
--- a/sys/dev/ixgb/ixgb_ids.h
+++ b/sys/dev/ixgb/ixgb_ids.h
@@ -40,7 +40,7 @@
 ** The Device and Vendor IDs for 10 Gigabit MACs
 **********************************************************************/
 
-#define INTEL_VENDOR_ID         0x8086
+#define INTEL_VENDOR_ID_IXGB    0x8086
 #define INTEL_SUBVENDOR_ID      0x8086
 
 
diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h
index f6bcf0c..dbdc945 100644
--- a/sys/i386/include/atomic.h
+++ b/sys/i386/include/atomic.h
@@ -32,21 +32,47 @@
 #error this file needs sys/cdefs.h as a prerequisite
 #endif
 
-
-#if defined(I686_CPU)
-#define mb()	__asm__ __volatile__ ("mfence;": : :"memory")
-#define wmb()	__asm__ __volatile__ ("sfence;": : :"memory")
-#define rmb()	__asm__ __volatile__ ("lfence;": : :"memory")
-#else
-/*
- * do we need a serializing instruction?
- */
-#define mb()
-#define wmb()
-#define rmb()
+#if defined(_KERNEL)
+#include <machine/cpufunc.h>
+#include <machine/specialreg.h>
+#define	mb()	__asm __volatile(			\
+	"testl	%0,cpu_feature				\n\
+	je	2f					\n\
+	mfence						\n\
+1:							\n\
+	.section .text.offpath				\n\
+2:	lock						\n\
+	addl	$0,cpu_feature				\n\
+	jmp	1b					\n\
+	.text						\n\
+"							\
+	: : "i"(CPUID_SSE2) : "memory")
+#define	wmb()	__asm __volatile(			\
+	"testl	%0,cpu_feature				\n\
+	je	2f					\n\
+	sfence						\n\
+1:							\n\
+	.section .text.offpath				\n\
+2:	lock						\n\
+	addl	$0,cpu_feature				\n\
+	jmp	1b					\n\
+	.text						\n\
+"							\
+	: : "i"(CPUID_XMM) : "memory")
+#define	rmb()	__asm __volatile(			\
+	"testl	%0,cpu_feature				\n\
+	je	2f					\n\
+	lfence						\n\
+1:							\n\
+	.section .text.offpath				\n\
+2:	lock						\n\
+	addl	$0,cpu_feature				\n\
+	jmp	1b					\n\
+	.text						\n\
+"							\
+	: : "i"(CPUID_SSE2) : "memory")
 #endif
 
-
 /*
  * Various simple operations on memory, each of which is atomic in the
  * presence of interrupts and multiple processors.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/svn-src-head/attachments/20081123/12130489/attachment.pgp


More information about the svn-src-head mailing list