Actual source code: sseenabled.c

  1: #include "petscsys.h" /*I "petscsys.h" I*/

  3: #ifdef PETSC_HAVE_SSE

  5: #include PETSC_HAVE_SSE
  6: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */

  8: #include <string.h>

 12: PetscErrorCode PetscSSEHardwareTest(PetscTruth *flag)
 13: {
 15:   char *vendor;
 16:   char Intel[13]="GenuineIntel";
 17:   char AMD[13]  ="AuthenticAMD";

 20:   PetscMalloc(13*sizeof(char),&vendor);
 21:   strcpy(vendor,"************");
 22:   CPUID_GET_VENDOR(vendor);
 23:   if (!strcmp(vendor,Intel) || !strcmp(vendor,AMD)) {
 24:     /* Both Intel and AMD use bit 25 of CPUID_FEATURES */
 25:     /* to denote availability of SSE Support */
 26:     unsigned long myeax,myebx,myecx,myedx;
 27:     CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
 28:     if (myedx & SSE_FEATURE_FLAG) {
 29:       *flag = PETSC_TRUE;
 30:     } else {
 31:       *flag = PETSC_FALSE;
 32:     }
 33:   }
 34:   PetscFree(vendor);
 35:   return(0);
 36: }

 38: #if defined(PETSC_HAVE_FORK)
 39: #include <signal.h>
 40: /* 
 41:    Early versions of the Linux kernel disables SSE hardware because
 42:    it does not know how to preserve the SSE state at a context switch.
 43:    To detect this feature, try an sse instruction in another process.  
 44:    If it works, great!  If not, an illegal instruction signal will be thrown,
 45:    so catch it and return an error code. 
 46: */
 47: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)

 49: static void PetscSSEDisabledHandler(int sig) {
 50:   signal(SIGILL,SIG_IGN);
 51:   exit(-1);
 52: }

 56: PetscErrorCode PetscSSEOSEnabledTest_Linux(PetscTruth *flag)
 57: {
 58:   int status, pid = 0;
 60:   signal(SIGILL,PetscSSEDisabledHandler);
 61:   pid = fork();
 62:   if (pid==0) {
 63:     SSE_SCOPE_BEGIN;
 64:       XOR_PS(XMM0,XMM0);
 65:     SSE_SCOPE_END;
 66:     exit(0);
 67:   } else {
 68:     wait(&status);
 69:   }
 70:   if (!status) {
 71:     *flag = PETSC_TRUE;
 72:   } else {
 73:     *flag = PETSC_FALSE;
 74:   }
 75:   return(0);
 76: }

 78: #else
 79: /* 
 80:    Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
 81:    Windows ME/2000 doesn't disable SSE Hardware 
 82: */
 83: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
 84: #endif 

 88: PetscErrorCode PetscSSEOSEnabledTest_TRUE(PetscTruth *flag)
 89: {
 91:   if (flag) {
 92:     *flag = PETSC_TRUE;
 93:   }
 94:   return(0);
 95: }

 97: #else  /* Not defined PETSC_HAVE_SSE */

 99: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
100: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)

104: PetscErrorCode PetscSSEEnabledTest_FALSE(PetscTruth *flag)
105: {
107:   if (flag) {
108:     *flag = PETSC_FALSE;
109:   }
110:   return(0);
111: }

113: #endif /* defined PETSC_HAVE_SSE */

117: /*@C
118:      PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction 
119:      set can be used.  Some operating systems do not allow the use of these instructions despite
120:      hardware availability.

122:      Collective on MPI_Comm

124:      Input Parameter:
125: .    comm - the MPI Communicator

127:      Output Parameters:
128: .    lflag - Local Flag:  PETSC_TRUE if enabled in this process
129: .    gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm

131:      Notes:
132:      PETSC_NULL can be specified for lflag or gflag if either of these values are not desired.

134:      Options Database Keys:
135: .    -disable_sse - Disable use of hand tuned Intel SSE implementations

137:      Level: developer
138: @*/
139: static PetscTruth petsc_sse_local_is_untested  = PETSC_TRUE;
140: static PetscTruth petsc_sse_enabled_local      = PETSC_FALSE;
141: static PetscTruth petsc_sse_global_is_untested = PETSC_TRUE;
142: static PetscTruth petsc_sse_enabled_global     = PETSC_FALSE;
143: PetscErrorCode PetscSSEIsEnabled(MPI_Comm comm,PetscTruth *lflag,PetscTruth *gflag) {
145:   PetscTruth disabled_option;


149:   if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
150:     disabled_option = PETSC_FALSE;

152:     PetscOptionsName("-disable_sse",
153:                             "Disable use of hand tuned Intel SSE implementations <true,false>.",
154:                             "PetscSSEIsEnabled",&disabled_option);
155:     if (disabled_option) {
156:       petsc_sse_local_is_untested  = PETSC_FALSE;
157:       petsc_sse_enabled_local      = PETSC_FALSE;
158:       petsc_sse_global_is_untested = PETSC_FALSE;
159:       petsc_sse_enabled_global     = PETSC_FALSE;
160:     }

162:     if (petsc_sse_local_is_untested) {
163:       PetscSSEHardwareTest(&petsc_sse_enabled_local);
164:       if (petsc_sse_enabled_local) {
165:         PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
166:       }
167:       petsc_sse_local_is_untested = PETSC_FALSE;
168:     }

170:     if (gflag && petsc_sse_global_is_untested) {
171:       MPI_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPI_INT,MPI_LAND,comm);
172:       petsc_sse_global_is_untested = PETSC_FALSE;
173:     }
174:   }

176:   if (lflag) {
177:     *lflag = petsc_sse_enabled_local;
178:   }
179:   if (gflag) {
180:     *gflag = petsc_sse_enabled_global;
181:   }
182:   return(0);
183: }