escript  Revision_
LocalOps.h
Go to the documentation of this file.
00001 
00002 /*****************************************************************************
00003 *
00004 * Copyright (c) 2003-2014 by University of Queensland
00005 * http://www.uq.edu.au
00006 *
00007 * Primary Business: Queensland, Australia
00008 * Licensed under the Open Software License version 3.0
00009 * http://www.opensource.org/licenses/osl-3.0.php
00010 *
00011 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
00012 * Development 2012-2013 by School of Earth Sciences
00013 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
00014 *
00015 *****************************************************************************/
00016 
00017 
00018 #if !defined escript_LocalOps_H
00019 #define escript_LocalOps_H
00020 #if defined(_WIN32) && defined(__INTEL_COMPILER)
00021 #   include <mathimf.h>
00022 #else
00023 #   include <cmath>
00024 #endif
00025 #ifndef M_PI
00026 #   define M_PI           3.14159265358979323846  /* pi */
00027 #endif
00028 
00029 
00038 namespace escript {
00039 
00044 inline
00045 bool nancheck(double d)
00046 {
00047         // Q: so why not just test d!=d?
00048         // A: Coz it doesn't always work [I've checked].
00049         // One theory is that the optimizer skips the test.
00050 #ifdef isnan
00051     return isnan(d);
00052 #elif defined _isnan
00053     return _isnan(d);
00054 #else
00055     return false;
00056 #endif
00057 }
00058 
00063 inline
00064 double makeNaN()
00065 {
00066 #ifdef nan
00067     return nan("");
00068 #else
00069     return sqrt(-1.);
00070 #endif
00071 
00072 }
00073 
00074 
00082 inline
00083 void eigenvalues1(const double A00,double* ev0) {
00084 
00085    *ev0=A00;
00086 
00087 }
00098 inline
00099 void eigenvalues2(const double A00,const double A01,const double A11,
00100                  double* ev0, double* ev1) {
00101       const register double trA=(A00+A11)/2.;
00102       const register double A_00=A00-trA;
00103       const register double A_11=A11-trA;
00104       const register double s=sqrt(A01*A01-A_00*A_11);
00105       *ev0=trA-s;
00106       *ev1=trA+s;
00107 }
00122 inline
00123 void eigenvalues3(const double A00, const double A01, const double A02,
00124                                    const double A11, const double A12,
00125                                                      const double A22,
00126                  double* ev0, double* ev1,double* ev2) {
00127 
00128       const register double trA=(A00+A11+A22)/3.;
00129       const register double A_00=A00-trA;
00130       const register double A_11=A11-trA;
00131       const register double A_22=A22-trA;
00132       const register double A01_2=A01*A01;
00133       const register double A02_2=A02*A02;
00134       const register double A12_2=A12*A12;
00135       const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
00136       if (p<=0.) {
00137          *ev2=trA;
00138          *ev1=trA;
00139          *ev0=trA;
00140 
00141       } else {
00142          const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
00143          const register double sq_p=sqrt(p/3.);
00144          register double z=-q/(2*pow(sq_p,3));
00145          if (z<-1.) {
00146             z=-1.;
00147          } else if (z>1.) {
00148             z=1.;
00149          }
00150          const register double alpha_3=acos(z)/3.;
00151          *ev2=trA+2.*sq_p*cos(alpha_3);
00152          *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
00153          *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
00154       }
00155 }
00165 inline
00166 void  eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
00167 {
00168       eigenvalues1(A00,ev0);
00169       *V00=1.;
00170       return;
00171 }
00183 inline
00184 void  vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
00185                       double* V0, double*V1)
00186 {
00187       register double absA00=fabs(A00);
00188       register double absA10=fabs(A10);
00189       register double absA01=fabs(A01);
00190       register double absA11=fabs(A11);
00191       register double m=absA11>absA10 ? absA11 : absA10;
00192       if (absA00>m || absA01>m) {
00193          *V0=-A01;
00194          *V1=A00;
00195       } else {
00196          if (m<=0) {
00197            *V0=1.;
00198            *V1=0.;
00199          } else {
00200            *V0=A11;
00201            *V1=-A10;
00202          }
00203      }
00204 }
00223 inline
00224 void  vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
00225                                 const double A01,const double A11,const double A21,
00226                                 const double A02,const double A12,const double A22,
00227                                 double* V0,double* V1,double* V2)
00228 {
00229     double TEMP0,TEMP1;
00230     register const double I00=1./A00;
00231     register const double IA10=I00*A10;
00232     register const double IA20=I00*A20;
00233     vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
00234                     A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
00235     *V0=-(A10*TEMP0+A20*TEMP1);
00236     *V1=A00*TEMP0;
00237     *V2=A00*TEMP1;
00238 }
00239 
00257 inline
00258 void  eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
00259                                     double* ev0, double* ev1,
00260                                     double* V00, double* V10, double* V01, double* V11,
00261                                     const double tol)
00262 {
00263      double TEMP0,TEMP1;
00264      eigenvalues2(A00,A01,A11,ev0,ev1);
00265      const register double absev0=fabs(*ev0);
00266      const register double absev1=fabs(*ev1);
00267      register double max_ev=absev0>absev1 ? absev0 : absev1;
00268      if (fabs((*ev0)-(*ev1))<tol*max_ev) {
00269         *V00=1.;
00270         *V10=0.;
00271         *V01=0.;
00272         *V11=1.;
00273      } else {
00274         vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
00275         const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
00276         if (TEMP0<0.) {
00277             *V00=-TEMP0*scale;
00278             *V10=-TEMP1*scale;
00279             if (TEMP1<0.) {
00280                *V01=  *V10;
00281                *V11=-(*V00);
00282             } else {
00283                *V01=-(*V10);
00284                *V11= (*V00);
00285             }
00286         } else if (TEMP0>0.) {
00287             *V00=TEMP0*scale;
00288             *V10=TEMP1*scale;
00289             if (TEMP1<0.) {
00290                *V01=-(*V10);
00291                *V11= (*V00);
00292             } else {
00293                *V01= (*V10);
00294                *V11=-(*V00);
00295             }
00296         } else {
00297            *V00=0.;
00298            *V10=1;
00299            *V11=0.;
00300            *V01=1.;
00301        }
00302    }
00303 }
00312 inline
00313 void  normalizeVector3(double* V0,double* V1,double* V2)
00314 {
00315     register double s;
00316     if (*V0>0) {
00317         s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
00318         *V0*=s;
00319         *V1*=s;
00320         *V2*=s;
00321     } else if (*V0<0)  {
00322         s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
00323         *V0*=s;
00324         *V1*=s;
00325         *V2*=s;
00326     } else {
00327         if (*V1>0) {
00328             s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
00329             *V1*=s;
00330             *V2*=s;
00331         } else if (*V1<0)  {
00332             s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
00333             *V1*=s;
00334             *V2*=s;
00335         } else {
00336             *V2=1.;
00337         }
00338     }
00339 }
00366 inline
00367 void  eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
00368                                     const double A11, const double A12, const double A22,
00369                                     double* ev0, double* ev1, double* ev2,
00370                                     double* V00, double* V10, double* V20,
00371                                     double* V01, double* V11, double* V21,
00372                                     double* V02, double* V12, double* V22,
00373                                     const double tol)
00374 {
00375       register const double absA01=fabs(A01);
00376       register const double absA02=fabs(A02);
00377       register const double m=absA01>absA02 ? absA01 : absA02;
00378       if (m<=0) {
00379         double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
00380         eigenvalues_and_eigenvectors2(A11,A12,A22,
00381                                       &TEMP_EV0,&TEMP_EV1,
00382                                       &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
00383         if (A00<=TEMP_EV0) {
00384             *V00=1.;
00385             *V10=0.;
00386             *V20=0.;
00387             *V01=0.;
00388             *V11=TEMP_V00;
00389             *V21=TEMP_V10;
00390             *V02=0.;
00391             *V12=TEMP_V01;
00392             *V22=TEMP_V11;
00393             *ev0=A00;
00394             *ev1=TEMP_EV0;
00395             *ev2=TEMP_EV1;
00396         } else if (A00>TEMP_EV1) {
00397             *V02=1.;
00398             *V12=0.;
00399             *V22=0.;
00400             *V00=0.;
00401             *V10=TEMP_V00;
00402             *V20=TEMP_V10;
00403             *V01=0.;
00404             *V11=TEMP_V01;
00405             *V21=TEMP_V11;
00406             *ev0=TEMP_EV0;
00407             *ev1=TEMP_EV1;
00408             *ev2=A00;
00409         } else {
00410             *V01=1.;
00411             *V11=0.;
00412             *V21=0.;
00413             *V00=0.;
00414             *V10=TEMP_V00;
00415             *V20=TEMP_V10;
00416             *V02=0.;
00417             *V12=TEMP_V01;
00418             *V22=TEMP_V11;
00419             *ev0=TEMP_EV0;
00420             *ev1=A00;
00421             *ev2=TEMP_EV1;
00422         }
00423       } else {
00424          eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
00425          const register double absev0=fabs(*ev0);
00426          const register double absev1=fabs(*ev1);
00427          const register double absev2=fabs(*ev2);
00428          register double max_ev=absev0>absev1 ? absev0 : absev1;
00429          max_ev=max_ev>absev2 ? max_ev : absev2;
00430          const register double d_01=fabs((*ev0)-(*ev1));
00431          const register double d_12=fabs((*ev1)-(*ev2));
00432          const register double max_d=d_01>d_12 ? d_01 : d_12;
00433          if (max_d<=tol*max_ev) {
00434              *V00=1.;
00435              *V10=0;
00436              *V20=0;
00437              *V01=0;
00438              *V11=1.;
00439              *V21=0;
00440              *V02=0;
00441              *V12=0;
00442              *V22=1.;
00443          } else {
00444             const register double S00=A00-(*ev0);
00445             const register double absS00=fabs(S00);
00446             if (absS00>m) {
00447                 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
00448             } else if (absA02<m) {
00449                 vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
00450             } else {
00451                 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
00452             }
00453             normalizeVector3(V00,V10,V20);;
00454             const register double T00=A00-(*ev2);
00455             const register double absT00=fabs(T00);
00456             if (absT00>m) {
00457                  vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
00458             } else if (absA02<m) {
00459                  vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
00460             } else {
00461                  vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
00462             }
00463             const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
00464             *V02-=dot*(*V00);
00465             *V12-=dot*(*V10);
00466             *V22-=dot*(*V20);
00467             normalizeVector3(V02,V12,V22);
00468             *V01=(*V10)*(*V22)-(*V12)*(*V20);
00469             *V11=(*V20)*(*V02)-(*V00)*(*V22);
00470             *V21=(*V00)*(*V12)-(*V02)*(*V10);
00471             normalizeVector3(V01,V11,V21);
00472          }
00473    }
00474 }
00475 
00476 // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
00477 // SM is the product of the last axis_offset entries in arg_0.getShape().
00478 inline
00479 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
00480 {
00481   if (transpose == 0) {
00482     for (int i=0; i<SL; i++) {
00483       for (int j=0; j<SR; j++) {
00484         double sum = 0.0;
00485         for (int l=0; l<SM; l++) {
00486       sum += A[i+SL*l] * B[l+SM*j];
00487         }
00488         C[i+SL*j] = sum;
00489       }
00490     }
00491   }
00492   else if (transpose == 1) {
00493     for (int i=0; i<SL; i++) {
00494       for (int j=0; j<SR; j++) {
00495         double sum = 0.0;
00496         for (int l=0; l<SM; l++) {
00497       sum += A[i*SM+l] * B[l+SM*j];
00498         }
00499         C[i+SL*j] = sum;
00500       }
00501     }
00502   }
00503   else if (transpose == 2) {
00504     for (int i=0; i<SL; i++) {
00505       for (int j=0; j<SR; j++) {
00506         double sum = 0.0;
00507         for (int l=0; l<SM; l++) {
00508       sum += A[i+SL*l] * B[l*SR+j];
00509         }
00510         C[i+SL*j] = sum;
00511       }
00512     }
00513   }
00514 }
00515 
00516 template <typename UnaryFunction>
00517 inline void tensor_unary_operation(const int size,
00518                  const double *arg1,
00519                  double * argRes,
00520                  UnaryFunction operation)
00521 {
00522   for (int i = 0; i < size; ++i) {
00523     argRes[i] = operation(arg1[i]);
00524   }
00525   return;
00526 }
00527 
00528 template <typename BinaryFunction>
00529 inline void tensor_binary_operation(const int size,
00530                  const double *arg1,
00531                  const double *arg2,
00532                  double * argRes,
00533                  BinaryFunction operation)
00534 {
00535   for (int i = 0; i < size; ++i) {
00536     argRes[i] = operation(arg1[i], arg2[i]);
00537   }
00538   return;
00539 }
00540 
00541 template <typename BinaryFunction>
00542 inline void tensor_binary_operation(const int size,
00543                  double arg1,
00544                  const double *arg2,
00545                  double *argRes,
00546                  BinaryFunction operation)
00547 {
00548   for (int i = 0; i < size; ++i) {
00549     argRes[i] = operation(arg1, arg2[i]);
00550   }
00551   return;
00552 }
00553 
00554 template <typename BinaryFunction>
00555 inline void tensor_binary_operation(const int size,
00556                  const double *arg1,
00557                  double arg2,
00558                  double *argRes,
00559                  BinaryFunction operation)
00560 {
00561   for (int i = 0; i < size; ++i) {
00562     argRes[i] = operation(arg1[i], arg2);
00563   }
00564   return;
00565 }
00566 
00567 } // end of namespace
00568 #endif