[PATCH] Simplify in*() and out*() functions of AMD64 and i386

Christoph Mallon christoph.mallon at gmx.de
Wed Apr 8 13:25:05 PDT 2009


Hi amd64@ and i386@,

attached is a patch which simplifies the in*() and out*() functions for 
I/O port access of AMD64 and i386. It removes an unnecessary distinction 
of cases for inb() and outb(), which was used to generate better code 
for ports < 256. This is unnecessary, because GCC supports an asm input 
constraint to handle this ("N"). The stale comment, which states there 
is no constraint for this, is removed, too. Also the {in,out}{w,l}() get 
treated with this constraint. They had no special case before, so now 
better code is generated for them. Further, the unnecessary "cld" is 
removed from {in,out}s{b,w,l}(), because it is guaranteed by the ABI 
that the direction flag is cleared. All in all the code for in/out gets 
a bit simpler.


Comments are welcome

	Christoph
-------------- next part --------------
Index: sys/i386/include/cpufunc.h
===================================================================
--- sys/i386/include/cpufunc.h	(Revision 190841)
+++ sys/i386/include/cpufunc.h	(Arbeitskopie)
@@ -170,177 +170,97 @@
 	__asm __volatile("hlt");
 }
 
-#if !defined(__GNUCLIKE_BUILTIN_CONSTANT_P) || __GNUCLIKE_ASM < 3
-
-#define	inb(port)		inbv(port)
-#define	outb(port, data)	outbv(port, data)
-
-#else /* __GNUCLIKE_BUILTIN_CONSTANT_P && __GNUCLIKE_ASM >= 3 */
-
-/*
- * The following complications are to get around gcc not having a
- * constraint letter for the range 0..255.  We still put "d" in the
- * constraint because "i" isn't a valid constraint when the port
- * isn't constant.  This only matters for -O0 because otherwise
- * the non-working version gets optimized away.
- * 
- * Use an expression-statement instead of a conditional expression
- * because gcc-2.6.0 would promote the operands of the conditional
- * and produce poor code for "if ((inb(var) & const1) == const2)".
- *
- * The unnecessary test `(port) < 0x10000' is to generate a warning if
- * the `port' has type u_short or smaller.  Such types are pessimal.
- * This actually only works for signed types.  The range check is
- * careful to avoid generating warnings.
- */
-#define	inb(port) __extension__ ({					\
-	u_char	_data;							\
-	if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100	\
-	    && (port) < 0x10000)					\
-		_data = inbc(port);					\
-	else								\
-		_data = inbv(port);					\
-	_data; })
-
-#define	outb(port, data) (						\
-	__builtin_constant_p(port) && ((port) & 0xffff) < 0x100		\
-	&& (port) < 0x10000						\
-	? outbc(port, data) : outbv(port, data))
-
-static __inline u_char
-inbc(u_int port)
+static inline u_char
+inb(u_short port)
 {
-	u_char	data;
-
-	__asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
-	return (data);
+	u_char data;
+	__asm volatile("inb %1, %0" : "=a" (data) : "Nd" (port));
+	return data;
 }
 
-static __inline void
-outbc(u_int port, u_char data)
+static inline u_int
+inl(u_short port)
 {
-	__asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
-}
-
-#endif /* __GNUCLIKE_BUILTIN_CONSTANT_P  && __GNUCLIKE_ASM >= 3*/
-
-static __inline u_char
-inbv(u_int port)
-{
-	u_char	data;
-	/*
-	 * We use %%dx and not %1 here because i/o is done at %dx and not at
-	 * %edx, while gcc generates inferior code (movw instead of movl)
-	 * if we tell it to load (u_short) port.
-	 */
-	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
+	u_int data;
+	__asm volatile("inl %1, %0" : "=a" (data) : "Nd" (port));
 	return (data);
 }
 
-static __inline u_int
-inl(u_int port)
+static inline void
+insb(u_short port, void *addr, size_t cnt)
 {
-	u_int	data;
-
-	__asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
-	return (data);
+	__asm volatile("rep; insb"
+	    : "+D" (addr), "+c" (cnt)
+	    : "d" (port)
+	    : "memory");
 }
 
-static __inline void
-insb(u_int port, void *addr, size_t cnt)
+static inline void
+insw(u_short port, void *addr, size_t cnt)
 {
-	__asm __volatile("cld; rep; insb"
-			 : "+D" (addr), "+c" (cnt)
-			 : "d" (port)
-			 : "memory");
+	__asm volatile("rep; insw"
+	    : "+D" (addr), "+c" (cnt)
+	    : "d" (port)
+	    : "memory");
 }
 
-static __inline void
-insw(u_int port, void *addr, size_t cnt)
+static inline void
+insl(u_short port, void *addr, size_t cnt)
 {
-	__asm __volatile("cld; rep; insw"
-			 : "+D" (addr), "+c" (cnt)
-			 : "d" (port)
-			 : "memory");
+	__asm volatile("rep; insl"
+	    : "+D" (addr), "+c" (cnt)
+	    : "d" (port)
+	    : "memory");
 }
 
 static __inline void
-insl(u_int port, void *addr, size_t cnt)
-{
-	__asm __volatile("cld; rep; insl"
-			 : "+D" (addr), "+c" (cnt)
-			 : "d" (port)
-			 : "memory");
-}
-
-static __inline void
 invd(void)
 {
 	__asm __volatile("invd");
 }
 
-static __inline u_short
-inw(u_int port)
+static inline u_short
+inw(u_short port)
 {
-	u_short	data;
-
-	__asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
+	u_short data;
+	__asm volatile("inw %1, %0" : "=a" (data) : "Nd" (port));
 	return (data);
 }
 
-static __inline void
-outbv(u_int port, u_char data)
+static inline void
+outb(u_short port, u_char data)
 {
-	u_char	al;
-	/*
-	 * Use an unnecessary assignment to help gcc's register allocator.
-	 * This make a large difference for gcc-1.40 and a tiny difference
-	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
-	 * best results.  gcc-2.6.0 can't handle this.
-	 */
-	al = data;
-	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
+	__asm volatile("outb %0, %1" : : "a" (data), "Nd" (port));
 }
 
-static __inline void
-outl(u_int port, u_int data)
+static inline void
+outl(u_short port, u_int data)
 {
-	/*
-	 * outl() and outw() aren't used much so we haven't looked at
-	 * possible micro-optimizations such as the unnecessary
-	 * assignment for them.
-	 */
-	__asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
+	__asm volatile("outl %0, %1" : : "a" (data), "Nd" (port));
 }
 
-static __inline void
-outsb(u_int port, const void *addr, size_t cnt)
+static inline void
+outsb(u_short port, const void *addr, size_t cnt)
 {
-	__asm __volatile("cld; rep; outsb"
-			 : "+S" (addr), "+c" (cnt)
-			 : "d" (port));
+	__asm volatile("rep; outsb" : "+S" (addr), "+c" (cnt) : "d" (port));
 }
 
-static __inline void
-outsw(u_int port, const void *addr, size_t cnt)
+static inline void
+outsw(u_short port, const void *addr, size_t cnt)
 {
-	__asm __volatile("cld; rep; outsw"
-			 : "+S" (addr), "+c" (cnt)
-			 : "d" (port));
+	__asm volatile("rep; outsw" : "+S" (addr), "+c" (cnt) : "d" (port));
 }
 
-static __inline void
-outsl(u_int port, const void *addr, size_t cnt)
+static inline void
+outsl(u_short port, const void *addr, size_t cnt)
 {
-	__asm __volatile("cld; rep; outsl"
-			 : "+S" (addr), "+c" (cnt)
-			 : "d" (port));
+	__asm volatile("rep; outsl" : "+S" (addr), "+c" (cnt) : "d" (port));
 }
 
-static __inline void
-outw(u_int port, u_short data)
+static inline void
+outw(u_short port, u_short data)
 {
-	__asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
+	__asm volatile("outw %0, %1" : : "a" (data), "Nd" (port));
 }
 
 static __inline void
Index: sys/i386/i386/machdep.c
===================================================================
--- sys/i386/i386/machdep.c	(Revision 190841)
+++ sys/i386/i386/machdep.c	(Arbeitskopie)
@@ -3555,45 +3555,24 @@
 #ifdef KDB
 
 /*
- * Provide inb() and outb() as functions.  They are normally only
- * available as macros calling inlined functions, thus cannot be
- * called from the debugger.
- *
- * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
+ * Provide inb() and outb() as functions.  They are normally only available as
+ * inline functions, thus cannot be called from the debugger.
  */
 
-#undef inb
-#undef outb
-
 /* silence compiler warnings */
-u_char inb(u_int);
-void outb(u_int, u_char);
+u_char inb_(u_short);
+void outb_(u_short, u_char);
 
 u_char
-inb(u_int port)
+inb_(u_short port)
 {
-	u_char	data;
-	/*
-	 * We use %%dx and not %1 here because i/o is done at %dx and not at
-	 * %edx, while gcc generates inferior code (movw instead of movl)
-	 * if we tell it to load (u_short) port.
-	 */
-	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
-	return (data);
+	return inb(port);
 }
 
 void
-outb(u_int port, u_char data)
+outb_(u_short port, u_char data)
 {
-	u_char	al;
-	/*
-	 * Use an unnecessary assignment to help gcc's register allocator.
-	 * This make a large difference for gcc-1.40 and a tiny difference
-	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
-	 * best results.  gcc-2.6.0 can't handle this.
-	 */
-	al = data;
-	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
+	outb(port, data);
 }
 
 #endif /* KDB */
Index: sys/amd64/include/cpufunc.h
===================================================================
--- sys/amd64/include/cpufunc.h	(Revision 190841)
+++ sys/amd64/include/cpufunc.h	(Arbeitskopie)
@@ -164,177 +164,97 @@
 	__asm __volatile("hlt");
 }
 
-#if !defined(__GNUCLIKE_BUILTIN_CONSTANT_P) || __GNUCLIKE_ASM < 3
-
-#define	inb(port)		inbv(port)
-#define	outb(port, data)	outbv(port, data)
-
-#else /* __GNUCLIKE_BUILTIN_CONSTANT_P && __GNUCLIKE_ASM >= 3 */
-
-/*
- * The following complications are to get around gcc not having a
- * constraint letter for the range 0..255.  We still put "d" in the
- * constraint because "i" isn't a valid constraint when the port
- * isn't constant.  This only matters for -O0 because otherwise
- * the non-working version gets optimized away.
- * 
- * Use an expression-statement instead of a conditional expression
- * because gcc-2.6.0 would promote the operands of the conditional
- * and produce poor code for "if ((inb(var) & const1) == const2)".
- *
- * The unnecessary test `(port) < 0x10000' is to generate a warning if
- * the `port' has type u_short or smaller.  Such types are pessimal.
- * This actually only works for signed types.  The range check is
- * careful to avoid generating warnings.
- */
-#define	inb(port) __extension__ ({					\
-	u_char	_data;							\
-	if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100	\
-	    && (port) < 0x10000)					\
-		_data = inbc(port);					\
-	else								\
-		_data = inbv(port);					\
-	_data; })
-
-#define	outb(port, data) (						\
-	__builtin_constant_p(port) && ((port) & 0xffff) < 0x100		\
-	&& (port) < 0x10000						\
-	? outbc(port, data) : outbv(port, data))
-
-static __inline u_char
-inbc(u_int port)
+static inline u_char
+inb(u_short port)
 {
-	u_char	data;
-
-	__asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
-	return (data);
+	u_char data;
+	__asm volatile("inb %1, %0" : "=a" (data) : "Nd" (port));
+	return data;
 }
 
-static __inline void
-outbc(u_int port, u_char data)
+static inline u_int
+inl(u_short port)
 {
-	__asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
-}
-
-#endif /* __GNUCLIKE_BUILTIN_CONSTANT_P  && __GNUCLIKE_ASM >= 3*/
-
-static __inline u_char
-inbv(u_int port)
-{
-	u_char	data;
-	/*
-	 * We use %%dx and not %1 here because i/o is done at %dx and not at
-	 * %edx, while gcc generates inferior code (movw instead of movl)
-	 * if we tell it to load (u_short) port.
-	 */
-	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
+	u_int data;
+	__asm volatile("inl %1, %0" : "=a" (data) : "Nd" (port));
 	return (data);
 }
 
-static __inline u_int
-inl(u_int port)
+static inline void
+insb(u_short port, void *addr, size_t cnt)
 {
-	u_int	data;
-
-	__asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
-	return (data);
+	__asm volatile("rep; insb"
+	    : "+D" (addr), "+c" (cnt)
+	    : "d" (port)
+	    : "memory");
 }
 
-static __inline void
-insb(u_int port, void *addr, size_t cnt)
+static inline void
+insw(u_short port, void *addr, size_t cnt)
 {
-	__asm __volatile("cld; rep; insb"
-			 : "+D" (addr), "+c" (cnt)
-			 : "d" (port)
-			 : "memory");
+	__asm volatile("rep; insw"
+	    : "+D" (addr), "+c" (cnt)
+	    : "d" (port)
+	    : "memory");
 }
 
-static __inline void
-insw(u_int port, void *addr, size_t cnt)
+static inline void
+insl(u_short port, void *addr, size_t cnt)
 {
-	__asm __volatile("cld; rep; insw"
-			 : "+D" (addr), "+c" (cnt)
-			 : "d" (port)
-			 : "memory");
+	__asm volatile("rep; insl"
+	    : "+D" (addr), "+c" (cnt)
+	    : "d" (port)
+	    : "memory");
 }
 
 static __inline void
-insl(u_int port, void *addr, size_t cnt)
-{
-	__asm __volatile("cld; rep; insl"
-			 : "+D" (addr), "+c" (cnt)
-			 : "d" (port)
-			 : "memory");
-}
-
-static __inline void
 invd(void)
 {
 	__asm __volatile("invd");
 }
 
-static __inline u_short
-inw(u_int port)
+static inline u_short
+inw(u_short port)
 {
-	u_short	data;
-
-	__asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
+	u_short data;
+	__asm volatile("inw %1, %0" : "=a" (data) : "Nd" (port));
 	return (data);
 }
 
-static __inline void
-outbv(u_int port, u_char data)
+static inline void
+outb(u_short port, u_char data)
 {
-	u_char	al;
-	/*
-	 * Use an unnecessary assignment to help gcc's register allocator.
-	 * This make a large difference for gcc-1.40 and a tiny difference
-	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
-	 * best results.  gcc-2.6.0 can't handle this.
-	 */
-	al = data;
-	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
+	__asm volatile("outb %0, %1" : : "a" (data), "Nd" (port));
 }
 
-static __inline void
-outl(u_int port, u_int data)
+static inline void
+outl(u_short port, u_int data)
 {
-	/*
-	 * outl() and outw() aren't used much so we haven't looked at
-	 * possible micro-optimizations such as the unnecessary
-	 * assignment for them.
-	 */
-	__asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
+	__asm volatile("outl %0, %1" : : "a" (data), "Nd" (port));
 }
 
-static __inline void
-outsb(u_int port, const void *addr, size_t cnt)
+static inline void
+outsb(u_short port, const void *addr, size_t cnt)
 {
-	__asm __volatile("cld; rep; outsb"
-			 : "+S" (addr), "+c" (cnt)
-			 : "d" (port));
+	__asm volatile("rep; outsb" : "+S" (addr), "+c" (cnt) : "d" (port));
 }
 
-static __inline void
-outsw(u_int port, const void *addr, size_t cnt)
+static inline void
+outsw(u_short port, const void *addr, size_t cnt)
 {
-	__asm __volatile("cld; rep; outsw"
-			 : "+S" (addr), "+c" (cnt)
-			 : "d" (port));
+	__asm volatile("rep; outsw" : "+S" (addr), "+c" (cnt) : "d" (port));
 }
 
-static __inline void
-outsl(u_int port, const void *addr, size_t cnt)
+static inline void
+outsl(u_short port, const void *addr, size_t cnt)
 {
-	__asm __volatile("cld; rep; outsl"
-			 : "+S" (addr), "+c" (cnt)
-			 : "d" (port));
+	__asm volatile("rep; outsl" : "+S" (addr), "+c" (cnt) : "d" (port));
 }
 
-static __inline void
-outw(u_int port, u_short data)
+static inline void
+outw(u_short port, u_short data)
 {
-	__asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
+	__asm volatile("outw %0, %1" : : "a" (data), "Nd" (port));
 }
 
 static __inline void
Index: sys/amd64/amd64/machdep.c
===================================================================
--- sys/amd64/amd64/machdep.c	(Revision 190841)
+++ sys/amd64/amd64/machdep.c	(Arbeitskopie)
@@ -2178,45 +2178,24 @@
 #ifdef KDB
 
 /*
- * Provide inb() and outb() as functions.  They are normally only
- * available as macros calling inlined functions, thus cannot be
- * called from the debugger.
- *
- * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
+ * Provide inb() and outb() as functions.  They are normally only available as
+ * inline functions, thus cannot be called from the debugger.
  */
 
-#undef inb
-#undef outb
-
 /* silence compiler warnings */
-u_char inb(u_int);
-void outb(u_int, u_char);
+u_char inb_(u_short);
+void outb_(u_short, u_char);
 
 u_char
-inb(u_int port)
+inb_(u_short port)
 {
-	u_char	data;
-	/*
-	 * We use %%dx and not %1 here because i/o is done at %dx and not at
-	 * %edx, while gcc generates inferior code (movw instead of movl)
-	 * if we tell it to load (u_short) port.
-	 */
-	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
-	return (data);
+	return inb(port);
 }
 
 void
-outb(u_int port, u_char data)
+outb_(u_short port, u_char data)
 {
-	u_char	al;
-	/*
-	 * Use an unnecessary assignment to help gcc's register allocator.
-	 * This make a large difference for gcc-1.40 and a tiny difference
-	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
-	 * best results.  gcc-2.6.0 can't handle this.
-	 */
-	al = data;
-	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
+	outb(port, data);
 }
 
 #endif /* KDB */


More information about the freebsd-amd64 mailing list