svn commit: r185162 - in head: . sys/amd64/include sys/arm/include sys/conf sys/dev/bce sys/dev/cxgb sys/dev/cxgb/sys sys/dev/cxgb/ulp/iw_cxgb sys/dev/mxge sys/dev/nxge sys/i386/include sys/i386/in...

Kostik Belousov kostikbel at gmail.com
Mon Dec 1 13:42:25 PST 2008


On Mon, Dec 01, 2008 at 02:07:06PM -0500, John Baldwin wrote:
> On Sunday 23 November 2008 10:41:38 am Kostik Belousov wrote:
> > On Sun, Nov 23, 2008 at 12:51:58AM +0000, Kip Macy wrote:
> > > On Sat, Nov 22, 2008 at 11:08 PM, Scott Long <scottl at samsco.org> wrote:
> > > > Kostik Belousov wrote:
> > > >>
> > > >> On Sat, Nov 22, 2008 at 03:05:22PM -0700, Scott Long wrote:
> > > >>>
> > > >>> A neat hack would be for the kernel linker to scan the text and do a
> > > >>> drop-in replacement of the opcode that is appropriate for the 
> platform.
> > > >>> I can't see how a CPU_XXX definition would work because it's just a
> > > >>> compile time construct, one that can be included with any kernel
> > > >>> compile.
> > > >>
> > > >> Yes, it is possible to do that. Less drastic change is to directly
> > > >> check features. I moved slow code to separate section to eliminate
> > > >> unconditional jump in fast path.
> > > >> Only compile-tested.
> > > >>
> > > >
> > > > As long as it works, I think it's a step in the right direction; I'm
> > > > assuming that cpu_feature is a symbol filled in at runtime and not a
> > > > macro for the cpuid instruction, right?
> > > >
> > > > Scott
> > > >
> > > 
> > > i386/include/md_var.h:
> > > <..>
> > > extern  u_int   cpu_exthigh;
> > > extern  u_int   cpu_feature;
> > > extern  u_int   cpu_feature2;
> > > extern  u_int   amd_feature;
> > > extern  u_int   amd_feature2;
> > > <...>
> > > 
> > > I'm not thrilled with it, but we can revisit the issue if it makes a
> > > measurable difference on someone's workload.
> > 
> > Below is the updated patch. It includes changes made after private comments
> > by bde@ and uses symbolic definitions for the bits in the features words.
> > I thought about accessing a per-CPU word for serialized instruction in the
> > slow path, but decided that it does not beneficial.\
> 
> Is the branch really better than just doing what the atomic operations for 
> mutexes, etc. do and just use 'lock addl $0,%esp' for a barrier in all cases 
> on i386 and only bother with using the fancier instructions on amd64?  Even 
> amd64 doesn't use *fence yet for the atomic ops actually.  I have had a patch 
> to use it for years, but during testing there was no discernable difference 
> between the existing 'lock addl' approach vs '*fence'.  I'd much rather just 
> use 486 code for all i386 machines than add a branch, esp. if 
> the "optimization" the branch is doing isn't an actual optimization.

I do not insist. The branch is done only for arches that has no fence
instructions. The section for the slow code is put after the main text,
that should, according to Intel documentation, be predicted as not taken
for the first execution.

For reference, below is the latest version of the patch. I postponed
the commit, in particular, because it touches ixgb, and Jack did
not responded.

diff --git a/sys/conf/ldscript.i386 b/sys/conf/ldscript.i386
index a94f32f..49d9636 100644
--- a/sys/conf/ldscript.i386
+++ b/sys/conf/ldscript.i386
@@ -45,6 +45,7 @@ SECTIONS
   .text      :
   {
     *(.text)
+    *(.text.offpath)
     *(.stub)
     /* .gnu.warning sections are handled specially by elf32.em.  */
     *(.gnu.warning)
diff --git a/sys/dev/iir/iir.c b/sys/dev/iir/iir.c
index cbcb449..a01b685 100644
--- a/sys/dev/iir/iir.c
+++ b/sys/dev/iir/iir.c
@@ -403,7 +403,7 @@ iir_init(struct gdt_softc *gdt)
 		gdt->oem_name[7]='\0';
 	} else {
 		/* Old method, based on PCI ID */
-		if (gdt->sc_vendor == INTEL_VENDOR_ID)
+		if (gdt->sc_vendor == INTEL_VENDOR_ID_IIR)
             strcpy(gdt->oem_name,"Intel  ");
         else 
        	    strcpy(gdt->oem_name,"ICP    ");
@@ -1447,7 +1447,7 @@ iir_action( struct cam_sim *sim, union ccb *ccb )
                   (bus == gdt->sc_virt_bus ? 127 : gdt->sc_bus_id[bus]);
               cpi->base_transfer_speed = 3300;
               strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
-              if (gdt->sc_vendor == INTEL_VENDOR_ID)
+              if (gdt->sc_vendor == INTEL_VENDOR_ID_IIR)
                   strncpy(cpi->hba_vid, "Intel Corp.", HBA_IDLEN);
               else
                   strncpy(cpi->hba_vid, "ICP vortex ", HBA_IDLEN);
diff --git a/sys/dev/iir/iir.h b/sys/dev/iir/iir.h
index dca493d..dbae7d2 100644
--- a/sys/dev/iir/iir.h
+++ b/sys/dev/iir/iir.h
@@ -63,7 +63,7 @@
 #define GDT_DEVICE_ID_MAX       0x2ff
 #define GDT_DEVICE_ID_NEWRX     0x300
 
-#define INTEL_VENDOR_ID         0x8086
+#define INTEL_VENDOR_ID_IIR     0x8086
 #define INTEL_DEVICE_ID_IIR     0x600
 
 #define GDT_MAXBUS              6       /* XXX Why not 5? */
diff --git a/sys/dev/iir/iir_ctrl.c b/sys/dev/iir/iir_ctrl.c
index e5fbac9..2af2aad 100644
--- a/sys/dev/iir/iir_ctrl.c
+++ b/sys/dev/iir/iir_ctrl.c
@@ -278,7 +278,7 @@ iir_ioctl(struct cdev *dev, u_long cmd, caddr_t cmdarg, int flags, d_thread_t *
                 return (ENXIO);
             /* only RP controllers */
             p->ext_type = 0x6000 | gdt->sc_device;
-            if (gdt->sc_vendor == INTEL_VENDOR_ID) {
+            if (gdt->sc_vendor == INTEL_VENDOR_ID_IIR) {
                 p->oem_id = OEM_ID_INTEL;
                 p->type = 0xfd;
                 /* new -> subdevice into ext_type */
diff --git a/sys/dev/iir/iir_pci.c b/sys/dev/iir/iir_pci.c
index 528b7d7..0f420a1 100644
--- a/sys/dev/iir/iir_pci.c
+++ b/sys/dev/iir/iir_pci.c
@@ -164,7 +164,7 @@ MODULE_DEPEND(iir, cam, 1, 1, 1);
 static int
 iir_pci_probe(device_t dev)
 {
-    if (pci_get_vendor(dev) == INTEL_VENDOR_ID &&
+    if (pci_get_vendor(dev) == INTEL_VENDOR_ID_IIR &&
         pci_get_device(dev) == INTEL_DEVICE_ID_IIR) {
         device_set_desc(dev, "Intel Integrated RAID Controller");
         return (BUS_PROBE_DEFAULT);
diff --git a/sys/dev/ixgb/if_ixgb.c b/sys/dev/ixgb/if_ixgb.c
index c1d6858..df2f51a 100644
--- a/sys/dev/ixgb/if_ixgb.c
+++ b/sys/dev/ixgb/if_ixgb.c
@@ -72,8 +72,8 @@ char            ixgb_copyright[] = "Copyright (c) 2001-2004 Intel Corporation.";
 static ixgb_vendor_info_t ixgb_vendor_info_array[] =
 {
 	/* Intel(R) PRO/10000 Network Connection */
-	{INTEL_VENDOR_ID, IXGB_DEVICE_ID_82597EX, PCI_ANY_ID, PCI_ANY_ID, 0},
-	{INTEL_VENDOR_ID, IXGB_DEVICE_ID_82597EX_SR, PCI_ANY_ID, PCI_ANY_ID, 0},
+	{INTEL_VENDOR_ID_IXGB, IXGB_DEVICE_ID_82597EX, PCI_ANY_ID, PCI_ANY_ID, 0},
+	{INTEL_VENDOR_ID_IXGB, IXGB_DEVICE_ID_82597EX_SR, PCI_ANY_ID, PCI_ANY_ID, 0},
 	/* required last entry */
 	{0, 0, 0, 0, 0}
 };
diff --git a/sys/dev/ixgb/ixgb_ids.h b/sys/dev/ixgb/ixgb_ids.h
index a224f63..aa7dab8 100644
--- a/sys/dev/ixgb/ixgb_ids.h
+++ b/sys/dev/ixgb/ixgb_ids.h
@@ -40,7 +40,7 @@
 ** The Device and Vendor IDs for 10 Gigabit MACs
 **********************************************************************/
 
-#define INTEL_VENDOR_ID         0x8086
+#define INTEL_VENDOR_ID_IXGB    0x8086
 #define INTEL_SUBVENDOR_ID      0x8086
 
 
diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h
index f6bcf0c..dbdc945 100644
--- a/sys/i386/include/atomic.h
+++ b/sys/i386/include/atomic.h
@@ -32,21 +32,47 @@
 #error this file needs sys/cdefs.h as a prerequisite
 #endif
 
-
-#if defined(I686_CPU)
-#define mb()	__asm__ __volatile__ ("mfence;": : :"memory")
-#define wmb()	__asm__ __volatile__ ("sfence;": : :"memory")
-#define rmb()	__asm__ __volatile__ ("lfence;": : :"memory")
-#else
-/*
- * do we need a serializing instruction?
- */
-#define mb()
-#define wmb()
-#define rmb()
+#if defined(_KERNEL)
+#include <machine/cpufunc.h>
+#include <machine/specialreg.h>
+#define	mb()	__asm __volatile(			\
+	"testl	%0,cpu_feature				\n\
+	je	2f					\n\
+	mfence						\n\
+1:							\n\
+	.section .text.offpath				\n\
+2:	lock						\n\
+	addl	$0,cpu_feature				\n\
+	jmp	1b					\n\
+	.text						\n\
+"							\
+	: : "i"(CPUID_SSE2) : "memory")
+#define	wmb()	__asm __volatile(			\
+	"testl	%0,cpu_feature				\n\
+	je	2f					\n\
+	sfence						\n\
+1:							\n\
+	.section .text.offpath				\n\
+2:	lock						\n\
+	addl	$0,cpu_feature				\n\
+	jmp	1b					\n\
+	.text						\n\
+"							\
+	: : "i"(CPUID_XMM) : "memory")
+#define	rmb()	__asm __volatile(			\
+	"testl	%0,cpu_feature				\n\
+	je	2f					\n\
+	lfence						\n\
+1:							\n\
+	.section .text.offpath				\n\
+2:	lock						\n\
+	addl	$0,cpu_feature				\n\
+	jmp	1b					\n\
+	.text						\n\
+"							\
+	: : "i"(CPUID_SSE2) : "memory")
 #endif
 
-
 /*
  * Various simple operations on memory, each of which is atomic in the
  * presence of interrupts and multiple processors.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/svn-src-all/attachments/20081201/b038063e/attachment.pgp


More information about the svn-src-all mailing list