[Openal-devel] x86 cpu capabilites detection routine to be commited

Prakash Punnoor prakash at punnoor.de
Mon Sep 26 07:00:57 PDT 2005


Hi,

I gathered some info and worked out a bit of infrastructure for OpenAL
so that routines can safely use SIMD code when available by checking for
_alHave[SIMD](). The caps detection does this first via compile time
detection and on x86 via runtime detection, as well. Furthermore SIMD
usage can be controlled via env vars.

The detection is initialized via call to _alDetectCPUCaps(). I put it
into alcCreateContext(), as I consider this to be OpenAL's "main"
function. I tried to minimize scattering of ifdefs in al code and as
such I added a stub for the portable variant.

Unless someone has major objections to this (the configure.ac file is
volatile though...), I will commit this shortly and then prepare my mmx
mixing routines for it and commit, as well.

I attached the patch as inlining would mangle it. (I hope the mailing
list sw doesn't act up...)


Cheers,
-- 
(°=                 =°)
//\ Prakash Punnoor /\\
V_/                 \_V
-------------- next part --------------
diff -Nurd openal/linux/configure.ac openal.det/linux/configure.ac
--- openal/linux/configure.ac	2005-09-25 23:45:42.577586824 +0200
+++ openal.det/linux/configure.ac	2005-09-26 15:23:42.760208712 +0200
@@ -188,15 +188,32 @@
 
 dnl Special objs for architectures
 case "$target" in
-    *i386* | *i486* | *i586* | *i686* )
-    ARCH_OBJS="\$(x86_OBJS)"
-    ;;
-    *x86_64* )
-    ARCH_OBJS="\$(x86_OBJS)"
-    ;;
+    *i386* | *i486* | *i586* | *i686*)
+        ARCH_OBJS="\$(x86_OBJS)"
+
+        AC_PATH_PROG(NASM, nasm, no)
+        if test "${NASM}" != "no"; then
+            AC_DEFINE(HAVE_NASM, 1, have nasm)
+            AC_DEFINE(HAVE_CPU_CAPS_DETECTION, 1, detect cpu capabilities)
+            ARCH_OBJS="$ARCH_OBJS \$(x86_NASM_OBJS)"
+            INCLUDES="$INCLUDES -Iarch/i386"
+
+            case "$target" in
+            *linux*)
+                NASM_FORMAT="-f elf"
+                ;;
+            *cygwin* | *mingw*)
+                NASM_FORMAT="-f win32 -DWIN32"
+                ;;
+            esac
+        fi
+        ;;
+    *x86_64*)
+        ARCH_OBJS="\$(x86_OBJS)"
+        ;;
     *)
-    ARCH_OBJS="\$(PORTABLE_OBJS)"
-    ;;
+        ARCH_OBJS="\$(PORTABLE_OBJS)"
+        ;;
 esac
 
 dnl Check for headers/libs required for native backends
@@ -756,6 +773,7 @@
 
 AC_SUBST([DO_MINOR_LIBS])
 AC_SUBST([NASM])
+AC_SUBST([NASM_FORMAT])
 AC_SUBST([RANLIB])
 AC_SUBST([CC])
 AC_SUBST([CFLAGS])
diff -Nurd openal/linux/src/alc/alc_context.c openal.det/linux/src/alc/alc_context.c
--- openal/linux/src/alc/alc_context.c	2005-09-21 17:20:37.000000000 +0200
+++ openal.det/linux/src/alc/alc_context.c	2005-09-26 15:15:08.459394320 +0200
@@ -22,6 +22,7 @@
 #include "al_buffer.h"
 #include "al_filter.h"
 #include "al_distance.h"
+#include "al_cpu_caps.h"
 
 #include "alc/alc_error.h"
 #include "alc/alc_device.h"
@@ -453,7 +454,9 @@
 
 		return NULL;
 	}
-
+	
+	_alDetectCPUCaps();
+	
 	if( al_contexts.items == 0 ) {
 		/*
 		 * This is the first context to be created.  Initialize the
diff -Nurd openal/linux/src/al_cpu_caps.h openal.det/linux/src/al_cpu_caps.h
--- openal/linux/src/al_cpu_caps.h	1970-01-01 01:00:00.000000000 +0100
+++ openal.det/linux/src/al_cpu_caps.h	2005-09-26 15:35:39.146301472 +0200
@@ -0,0 +1,10 @@
+#ifndef AL_CPU_CAPS_H
+#define AL_CPU_CAPS_H
+
+void _alDetectCPUCaps(void);
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "x86_cpu_caps_prk.h"
+#endif
+
+#endif /* AL_CPU_CAPS_H */
diff -Nurd openal/linux/src/arch/i386/x86_cpu_caps_detect_prk.nasm openal.det/linux/src/arch/i386/x86_cpu_caps_detect_prk.nasm
--- openal/linux/src/arch/i386/x86_cpu_caps_detect_prk.nasm	1970-01-01 01:00:00.000000000 +0100
+++ openal.det/linux/src/arch/i386/x86_cpu_caps_detect_prk.nasm	2005-09-26 15:41:33.875374456 +0200
@@ -0,0 +1,66 @@
+;***************************************************************************
+;*   Copyright (C) 2005 by Prakash Punnoor                                 *
+;*   prakash at punnoor.de                                                    *
+;*                                                                         *
+;*   This program is free software; you can redistribute it and/or modify  *
+;*   it under the terms of the GNU Library General Public License as       *
+;*   published by the Free Software Foundation; either version 2 of the    *
+;*   License, or (at your option) any later version.                       *
+;*                                                                         *
+;*   This program is distributed in the hope that it will be useful,       *
+;*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+;*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+;*   GNU General Public License for more details.                          *
+;*                                                                         *
+;*   You should have received a copy of the GNU Library General Public     *
+;*   License along with this program; if not, write to the                 *
+;*   Free Software Foundation, Inc.,                                       *
+;*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
+;***************************************************************************
+
+; int _alDetectx86CPUCaps(uint* caps1, uint* caps2, uint* caps3);
+; derived from loki_cpuinfo.c, 1997-98 by H. Dietz and R. Fisher
+; using infos from sandpile.org
+
+; returns 0 if no CPUID available
+global __alDetectx86CPUCaps
+global _alDetectx86CPUCaps
+
+__alDetectx86CPUCaps:
+_alDetectx86CPUCaps:
+
+pushf
+pop	eax
+mov	ecx, eax
+
+xor	eax, 0x200000
+push	eax
+popf
+
+pushf
+pop	eax
+
+xor	ecx, eax
+xor	eax, eax
+test	ecx, 0x200000
+jz	.Return
+
+; standard CPUID
+push ebx
+mov	eax, 1
+cpuid
+mov	eax, [esp + 8]	;caps1 - MMX, SSE, SSE2
+mov	[eax], edx
+mov	eax, [esp + 12]	;caps2 - SSE3
+mov	[eax], ecx
+
+; extended CPUID
+mov	eax, 0x80000001
+cpuid
+mov	eax, [esp + 16]	;caps3 - 3DNOW!, 3DNOW!EXT, CYRIX-MMXEXT, AMD-MMX-SSE
+mov	[eax], edx
+pop ebx
+
+; End
+.Return
+ret
diff -Nurd openal/linux/src/arch/i386/x86_cpu_caps_prk.c openal.det/linux/src/arch/i386/x86_cpu_caps_prk.c
--- openal/linux/src/arch/i386/x86_cpu_caps_prk.c	1970-01-01 01:00:00.000000000 +0100
+++ openal.det/linux/src/arch/i386/x86_cpu_caps_prk.c	2005-09-26 15:43:56.245730872 +0200
@@ -0,0 +1,155 @@
+/***************************************************************************
+ *   Copyright (C) 2005 by Prakash Punnoor                                 *
+ *   prakash at punnoor.de                                                    *
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU Library General Public License as       *
+ *   published by the Free Software Foundation; either version 2 of the    *
+ *   License, or (at your option) any later version.                       *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU Library General Public     *
+ *   License along with this program; if not, write to the                 *
+ *   Free Software Foundation, Inc.,                                       *
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
+ ***************************************************************************/
+#include "al_siteconfig.h"
+#include <string.h>
+#include <stdlib.h>
+#include "al_cpu_caps.h"
+#include "al_debug.h"
+
+typedef unsigned int uint;
+int _alDetectx86CPUCaps(uint* caps1, uint* caps2, uint* caps3);
+
+	/* caps1 */
+#define MMX_BIT             23
+#define SSE_BIT             25
+#define SSE2_BIT            26
+
+	/* caps2 */
+#define SSE3_BIT             0
+
+	/* caps3 */
+#define	AMD_3DNOW_BIT       31
+#define	AMD_3DNOWEXT_BIT    30
+#define AMD_SSE_MMX_BIT     22
+#define CYRIX_MMXEXT_BIT    24
+
+
+struct x86cpu_caps_s x86cpu_caps = { 0, 0, 0, 0, 0, 0, 0, 0};
+struct x86cpu_caps_s x86cpu_caps_use = { 1, 1, 1, 1, 1, 1, 1, 1};
+
+void _alDetectCPUCaps()
+{
+	/* compile time detection */
+#ifdef __MMX__
+	x86cpu_caps.mmx = 1;
+#endif
+#ifdef __SSE__
+	x86cpu_caps.sse = 1;
+#endif
+#ifdef __SSE2__
+	x86cpu_caps.sse2 = 1;
+#endif
+#ifdef __SSE3__
+	x86cpu_caps.sse3 = 1;
+#endif
+#ifdef __3dNOW__
+	x86cpu_caps.amd_3dnow = 1;
+#ifdef __athlon__
+	x86cpu_caps.amd_sse_mmx = 1;
+#endif
+#endif
+#ifdef __3dNOW_A__
+	x86cpu_caps.amd_3dnowext = 1;
+#endif
+	/* end compile time detection */
+	
+	/* runtime detection */
+#ifdef HAVE_CPU_CAPS_DETECTION
+	{
+		uint caps1, caps2, caps3;
+		
+		if (_alDetectx86CPUCaps(&caps1, &caps2, &caps3)) {
+			
+			x86cpu_caps.mmx = (caps1 >> MMX_BIT) & 1;
+			x86cpu_caps.sse = (caps1 >> SSE_BIT) & 1;
+			x86cpu_caps.sse2 = (caps1 >> SSE2_BIT) & 1;
+			
+			x86cpu_caps.sse3 = (caps2 >> SSE3_BIT) & 1;
+			
+			x86cpu_caps.amd_3dnow = (caps3 >> AMD_3DNOW_BIT) & 1;
+			x86cpu_caps.amd_3dnowext = (caps3 >> AMD_3DNOWEXT_BIT) & 1;
+			x86cpu_caps.amd_sse_mmx = (caps3 >> AMD_SSE_MMX_BIT) & 1;
+			/* FIXME: For Cyrix MMXEXT detect Cyrix CPU first! */
+			/*
+			x86cpu_caps.cyrix_mmxext = (caps3 >> CYRIX_MMXEXT_BIT) & 1;
+			*/
+		}
+	}
+#endif /*HAVE_CPU_CAPS_DETECTION*/
+	/* end runtime detection */
+	
+	/* check environment vars */
+	{
+		char *env;
+		
+		env = getenv("OPENAL_DISABLE_MMX");
+		if (env)
+			x86cpu_caps_use.mmx = !atoi(env);
+		
+		env = getenv("OPENAL_DISABLE_SSE");
+		if (env)
+			x86cpu_caps_use.sse = !atoi(env);
+		
+		env = getenv("OPENAL_DISABLE_SSE2");
+		if (env)
+			x86cpu_caps_use.sse2 = !atoi(env);
+		
+		env = getenv("OPENAL_DISABLE_SSE3");
+		if (env)
+			x86cpu_caps_use.sse3 = !atoi(env);
+		
+		env = getenv("OPENAL_DISABLE_3DNOW");
+		if (env)
+			x86cpu_caps_use.amd_3dnow = !atoi(env);
+		
+		env = getenv("OPENAL_DISABLE_3DNOWEXT");
+		if (env)
+			x86cpu_caps_use.amd_3dnowext = !atoi(env);
+		
+		env = getenv("OPENAL_DISABLE_SSE_MMX");
+		if (env)
+			x86cpu_caps_use.amd_sse_mmx = !atoi(env);
+		
+		env = getenv("OPENAL_DISABLE_SIMD");
+		if (env)
+			memset(&x86cpu_caps_use, 0, sizeof x86cpu_caps_use);
+	}
+	/* end check environment vars */
+
+	_alDebug(ALD_CONFIG, __FILE__, __LINE__,"mmx found %i  use %i\n",
+	         x86cpu_caps.mmx, x86cpu_caps_use.mmx);
+	_alDebug(ALD_CONFIG, __FILE__, __LINE__,"sse found %i  use %i\n",
+	         x86cpu_caps.sse, x86cpu_caps_use.sse);
+	_alDebug(ALD_CONFIG, __FILE__, __LINE__,"sse2 found %i  use %i\n",
+	         x86cpu_caps.sse2, x86cpu_caps_use.sse2);
+	_alDebug(ALD_CONFIG, __FILE__, __LINE__,"sse3 found %i  use %i\n",
+	         x86cpu_caps.sse3, x86cpu_caps_use.sse3);
+	_alDebug(ALD_CONFIG, __FILE__, __LINE__,"amd_3dnow found %i  use %i\n",
+	         x86cpu_caps.amd_3dnow, x86cpu_caps_use.amd_3dnow);
+	_alDebug(ALD_CONFIG, __FILE__, __LINE__,"amd_3dnowext found %i  use %i\n",
+	         x86cpu_caps.amd_3dnowext, x86cpu_caps_use.amd_3dnowext);
+	_alDebug(ALD_CONFIG, __FILE__, __LINE__,"amd_sse_mmx found %i  use %i\n",
+	         x86cpu_caps.amd_sse_mmx, x86cpu_caps_use.amd_sse_mmx);
+	/*
+	_alDebug(ALD_CONFIG, __FILE__, __LINE__,"cyrix_mmxext found %i  use %i\n",
+	         x86cpu_caps.cyrix_mmxext, x86cpu_caps_use.cyrix_mmxext);
+	*/
+
+}
diff -Nurd openal/linux/src/arch/i386/x86_cpu_caps_prk.h openal.det/linux/src/arch/i386/x86_cpu_caps_prk.h
--- openal/linux/src/arch/i386/x86_cpu_caps_prk.h	1970-01-01 01:00:00.000000000 +0100
+++ openal.det/linux/src/arch/i386/x86_cpu_caps_prk.h	2005-09-26 15:44:09.104776000 +0200
@@ -0,0 +1,81 @@
+/***************************************************************************
+ *   Copyright (C) 2005 by Prakash Punnoor                                 *
+ *   prakash at punnoor.de                                                    *
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU Library General Public License as       *
+ *   published by the Free Software Foundation; either version 2 of the    *
+ *   License, or (at your option) any later version.                       *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU Library General Public     *
+ *   License along with this program; if not, write to the                 *
+ *   Free Software Foundation, Inc.,                                       *
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
+ ***************************************************************************/
+#ifndef X86_CPU_CAPS_H
+#define X86_CPU_CAPS_H
+
+struct x86cpu_caps_s {
+	int mmx;
+	int sse;
+	int sse2;
+	int sse3;
+	int amd_3dnow;
+	int amd_3dnowext;
+	int amd_sse_mmx;
+	int cyrix_mmxext;
+};
+
+extern struct x86cpu_caps_s x86cpu_caps;
+extern struct x86cpu_caps_s x86cpu_caps_use;
+
+static __inline int _alHaveMMX(void);
+static __inline int _alHaveSSE(void);
+static __inline int _alHaveSSE2(void);
+static __inline int _alHaveSSE3(void);
+static __inline int _alHave3DNOW(void);
+static __inline int _alHave3DNOWEXT(void);
+static __inline int _alHaveSSEMMX(void);
+
+
+static __inline int _alHaveMMX()
+{
+	return x86cpu_caps.mmx & x86cpu_caps_use.mmx;
+}
+
+static __inline int _alHaveSSE()
+{
+	return x86cpu_caps.sse & x86cpu_caps_use.sse;
+}
+
+static __inline int _alHaveSSE2()
+{
+	return x86cpu_caps.sse2 & x86cpu_caps_use.sse2;
+}
+
+static __inline int _alHaveSSE3()
+{
+	return x86cpu_caps.sse3 & x86cpu_caps_use.sse3;
+}
+
+static __inline int _alHave3DNOW()
+{
+	return x86cpu_caps.amd_3dnow & x86cpu_caps_use.amd_3dnow;
+}
+
+static __inline int _alHave3DNOWEXT()
+{
+	return x86cpu_caps.amd_3dnowext & x86cpu_caps_use.amd_3dnowext;
+}
+
+static __inline int _alHaveSSEMMX()
+{
+	return x86cpu_caps.amd_sse_mmx & x86cpu_caps_use.amd_sse_mmx;
+}
+
+#endif /* X86_CPU_CAPS_H */
diff -Nurd openal/linux/src/arch/portable/cpu_caps.c openal.det/linux/src/arch/portable/cpu_caps.c
--- openal/linux/src/arch/portable/cpu_caps.c	1970-01-01 01:00:00.000000000 +0100
+++ openal.det/linux/src/arch/portable/cpu_caps.c	2005-09-26 15:50:30.005870232 +0200
@@ -0,0 +1,6 @@
+#include "al_cpu_caps.h"
+
+/* stub to make linker happy */
+void _alDetectCPUCaps()
+{
+}
diff -Nurd openal/linux/src/Makefile.in openal.det/linux/src/Makefile.in
--- openal/linux/src/Makefile.in	2005-09-25 23:44:06.243231872 +0200
+++ openal.det/linux/src/Makefile.in	2005-09-26 15:19:10.334623696 +0200
@@ -9,6 +9,8 @@
 LIBS=-L. @LIBS@
 SO_EXT=@SHARED_LIBRARY_EXT@
 RANLIB=@RANLIB@
+NASM=@NASM@
+NASMFLAGS=@NASM_FORMAT@
 
 AL_OBJS=al_able.o       \
 	al_bpool.o 	\
@@ -49,8 +51,10 @@
 # loaded with load-extension via alrc)
 EXT_DLL_OBJS=$(addsuffix .o, $(addprefix extensions/al_ext_, ))
 
-x86_OBJS=arch/i386/floatmul.o
-PORTABLE_OBJS=arch/portable/floatmul.o
+x86_OBJS=arch/i386/floatmul.o arch/i386/x86_cpu_caps_prk.o
+x86_NASM_OBJS=arch/i386/x86_cpu_caps_detect_prk.o
+
+PORTABLE_OBJS=arch/portable/floatmul.o arch/portable/cpu_caps.o
 
 # backend specific stuff
 INTERFACE_OBJS=arch/interface/interface_sound.o
@@ -85,6 +89,8 @@
 	$(CPP) $(DEFINES) $< | $(AS) -o $@
 %.o : %.c al_siteconfig.h ../config.h
 	$(CC) $(DEFINES) $(INC) $(CFLAGS) -c $< -o $@
+%.o : %.nasm
+	$(NASM) $(NASMFLAGS) $< -o $@
 
 %.def : %.a libopenal.a
 	dlltool -a $< --output-def $@


More information about the Openal-devel mailing list