escript
Revision_
|
00001 00002 /***************************************************************************** 00003 * 00004 * Copyright (c) 2003-2014 by University of Queensland 00005 * http://www.uq.edu.au 00006 * 00007 * Primary Business: Queensland, Australia 00008 * Licensed under the Open Software License version 3.0 00009 * http://www.opensource.org/licenses/osl-3.0.php 00010 * 00011 * Development until 2012 by Earth Systems Science Computational Center (ESSCC) 00012 * Development 2012-2013 by School of Earth Sciences 00013 * Development from 2014 by Centre for Geoscience Computing (GeoComp) 00014 * 00015 *****************************************************************************/ 00016 00017 00018 #if !defined escript_LocalOps_H 00019 #define escript_LocalOps_H 00020 #if defined(_WIN32) && defined(__INTEL_COMPILER) 00021 # include <mathimf.h> 00022 #else 00023 # include <cmath> 00024 #endif 00025 #ifndef M_PI 00026 # define M_PI 3.14159265358979323846 /* pi */ 00027 #endif 00028 00029 00038 namespace escript { 00039 00044 inline 00045 bool nancheck(double d) 00046 { 00047 // Q: so why not just test d!=d? 00048 // A: Coz it doesn't always work [I've checked]. 00049 // One theory is that the optimizer skips the test. 00050 #ifdef isnan 00051 return isnan(d); 00052 #elif defined _isnan 00053 return _isnan(d); 00054 #else 00055 return false; 00056 #endif 00057 } 00058 00063 inline 00064 double makeNaN() 00065 { 00066 #ifdef nan 00067 return nan(""); 00068 #else 00069 return sqrt(-1.); 00070 #endif 00071 00072 } 00073 00074 00082 inline 00083 void eigenvalues1(const double A00,double* ev0) { 00084 00085 *ev0=A00; 00086 00087 } 00098 inline 00099 void eigenvalues2(const double A00,const double A01,const double A11, 00100 double* ev0, double* ev1) { 00101 const register double trA=(A00+A11)/2.; 00102 const register double A_00=A00-trA; 00103 const register double A_11=A11-trA; 00104 const register double s=sqrt(A01*A01-A_00*A_11); 00105 *ev0=trA-s; 00106 *ev1=trA+s; 00107 } 00122 inline 00123 void eigenvalues3(const double A00, const double A01, const double A02, 00124 const double A11, const double A12, 00125 const double A22, 00126 double* ev0, double* ev1,double* ev2) { 00127 00128 const register double trA=(A00+A11+A22)/3.; 00129 const register double A_00=A00-trA; 00130 const register double A_11=A11-trA; 00131 const register double A_22=A22-trA; 00132 const register double A01_2=A01*A01; 00133 const register double A02_2=A02*A02; 00134 const register double A12_2=A12*A12; 00135 const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.; 00136 if (p<=0.) { 00137 *ev2=trA; 00138 *ev1=trA; 00139 *ev0=trA; 00140 00141 } else { 00142 const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02); 00143 const register double sq_p=sqrt(p/3.); 00144 register double z=-q/(2*pow(sq_p,3)); 00145 if (z<-1.) { 00146 z=-1.; 00147 } else if (z>1.) { 00148 z=1.; 00149 } 00150 const register double alpha_3=acos(z)/3.; 00151 *ev2=trA+2.*sq_p*cos(alpha_3); 00152 *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.); 00153 *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.); 00154 } 00155 } 00165 inline 00166 void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol) 00167 { 00168 eigenvalues1(A00,ev0); 00169 *V00=1.; 00170 return; 00171 } 00183 inline 00184 void vectorInKernel2(const double A00,const double A10,const double A01,const double A11, 00185 double* V0, double*V1) 00186 { 00187 register double absA00=fabs(A00); 00188 register double absA10=fabs(A10); 00189 register double absA01=fabs(A01); 00190 register double absA11=fabs(A11); 00191 register double m=absA11>absA10 ? absA11 : absA10; 00192 if (absA00>m || absA01>m) { 00193 *V0=-A01; 00194 *V1=A00; 00195 } else { 00196 if (m<=0) { 00197 *V0=1.; 00198 *V1=0.; 00199 } else { 00200 *V0=A11; 00201 *V1=-A10; 00202 } 00203 } 00204 } 00223 inline 00224 void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20, 00225 const double A01,const double A11,const double A21, 00226 const double A02,const double A12,const double A22, 00227 double* V0,double* V1,double* V2) 00228 { 00229 double TEMP0,TEMP1; 00230 register const double I00=1./A00; 00231 register const double IA10=I00*A10; 00232 register const double IA20=I00*A20; 00233 vectorInKernel2(A11-IA10*A01,A12-IA10*A02, 00234 A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1); 00235 *V0=-(A10*TEMP0+A20*TEMP1); 00236 *V1=A00*TEMP0; 00237 *V2=A00*TEMP1; 00238 } 00239 00257 inline 00258 void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11, 00259 double* ev0, double* ev1, 00260 double* V00, double* V10, double* V01, double* V11, 00261 const double tol) 00262 { 00263 double TEMP0,TEMP1; 00264 eigenvalues2(A00,A01,A11,ev0,ev1); 00265 const register double absev0=fabs(*ev0); 00266 const register double absev1=fabs(*ev1); 00267 register double max_ev=absev0>absev1 ? absev0 : absev1; 00268 if (fabs((*ev0)-(*ev1))<tol*max_ev) { 00269 *V00=1.; 00270 *V10=0.; 00271 *V01=0.; 00272 *V11=1.; 00273 } else { 00274 vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1); 00275 const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1); 00276 if (TEMP0<0.) { 00277 *V00=-TEMP0*scale; 00278 *V10=-TEMP1*scale; 00279 if (TEMP1<0.) { 00280 *V01= *V10; 00281 *V11=-(*V00); 00282 } else { 00283 *V01=-(*V10); 00284 *V11= (*V00); 00285 } 00286 } else if (TEMP0>0.) { 00287 *V00=TEMP0*scale; 00288 *V10=TEMP1*scale; 00289 if (TEMP1<0.) { 00290 *V01=-(*V10); 00291 *V11= (*V00); 00292 } else { 00293 *V01= (*V10); 00294 *V11=-(*V00); 00295 } 00296 } else { 00297 *V00=0.; 00298 *V10=1; 00299 *V11=0.; 00300 *V01=1.; 00301 } 00302 } 00303 } 00312 inline 00313 void normalizeVector3(double* V0,double* V1,double* V2) 00314 { 00315 register double s; 00316 if (*V0>0) { 00317 s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2)); 00318 *V0*=s; 00319 *V1*=s; 00320 *V2*=s; 00321 } else if (*V0<0) { 00322 s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2)); 00323 *V0*=s; 00324 *V1*=s; 00325 *V2*=s; 00326 } else { 00327 if (*V1>0) { 00328 s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2)); 00329 *V1*=s; 00330 *V2*=s; 00331 } else if (*V1<0) { 00332 s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2)); 00333 *V1*=s; 00334 *V2*=s; 00335 } else { 00336 *V2=1.; 00337 } 00338 } 00339 } 00366 inline 00367 void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02, 00368 const double A11, const double A12, const double A22, 00369 double* ev0, double* ev1, double* ev2, 00370 double* V00, double* V10, double* V20, 00371 double* V01, double* V11, double* V21, 00372 double* V02, double* V12, double* V22, 00373 const double tol) 00374 { 00375 register const double absA01=fabs(A01); 00376 register const double absA02=fabs(A02); 00377 register const double m=absA01>absA02 ? absA01 : absA02; 00378 if (m<=0) { 00379 double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1; 00380 eigenvalues_and_eigenvectors2(A11,A12,A22, 00381 &TEMP_EV0,&TEMP_EV1, 00382 &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol); 00383 if (A00<=TEMP_EV0) { 00384 *V00=1.; 00385 *V10=0.; 00386 *V20=0.; 00387 *V01=0.; 00388 *V11=TEMP_V00; 00389 *V21=TEMP_V10; 00390 *V02=0.; 00391 *V12=TEMP_V01; 00392 *V22=TEMP_V11; 00393 *ev0=A00; 00394 *ev1=TEMP_EV0; 00395 *ev2=TEMP_EV1; 00396 } else if (A00>TEMP_EV1) { 00397 *V02=1.; 00398 *V12=0.; 00399 *V22=0.; 00400 *V00=0.; 00401 *V10=TEMP_V00; 00402 *V20=TEMP_V10; 00403 *V01=0.; 00404 *V11=TEMP_V01; 00405 *V21=TEMP_V11; 00406 *ev0=TEMP_EV0; 00407 *ev1=TEMP_EV1; 00408 *ev2=A00; 00409 } else { 00410 *V01=1.; 00411 *V11=0.; 00412 *V21=0.; 00413 *V00=0.; 00414 *V10=TEMP_V00; 00415 *V20=TEMP_V10; 00416 *V02=0.; 00417 *V12=TEMP_V01; 00418 *V22=TEMP_V11; 00419 *ev0=TEMP_EV0; 00420 *ev1=A00; 00421 *ev2=TEMP_EV1; 00422 } 00423 } else { 00424 eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2); 00425 const register double absev0=fabs(*ev0); 00426 const register double absev1=fabs(*ev1); 00427 const register double absev2=fabs(*ev2); 00428 register double max_ev=absev0>absev1 ? absev0 : absev1; 00429 max_ev=max_ev>absev2 ? max_ev : absev2; 00430 const register double d_01=fabs((*ev0)-(*ev1)); 00431 const register double d_12=fabs((*ev1)-(*ev2)); 00432 const register double max_d=d_01>d_12 ? d_01 : d_12; 00433 if (max_d<=tol*max_ev) { 00434 *V00=1.; 00435 *V10=0; 00436 *V20=0; 00437 *V01=0; 00438 *V11=1.; 00439 *V21=0; 00440 *V02=0; 00441 *V12=0; 00442 *V22=1.; 00443 } else { 00444 const register double S00=A00-(*ev0); 00445 const register double absS00=fabs(S00); 00446 if (absS00>m) { 00447 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20); 00448 } else if (absA02<m) { 00449 vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20); 00450 } else { 00451 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20); 00452 } 00453 normalizeVector3(V00,V10,V20);; 00454 const register double T00=A00-(*ev2); 00455 const register double absT00=fabs(T00); 00456 if (absT00>m) { 00457 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22); 00458 } else if (absA02<m) { 00459 vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22); 00460 } else { 00461 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22); 00462 } 00463 const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20); 00464 *V02-=dot*(*V00); 00465 *V12-=dot*(*V10); 00466 *V22-=dot*(*V20); 00467 normalizeVector3(V02,V12,V22); 00468 *V01=(*V10)*(*V22)-(*V12)*(*V20); 00469 *V11=(*V20)*(*V02)-(*V00)*(*V22); 00470 *V21=(*V00)*(*V12)-(*V02)*(*V10); 00471 normalizeVector3(V01,V11,V21); 00472 } 00473 } 00474 } 00475 00476 // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR) 00477 // SM is the product of the last axis_offset entries in arg_0.getShape(). 00478 inline 00479 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose) 00480 { 00481 if (transpose == 0) { 00482 for (int i=0; i<SL; i++) { 00483 for (int j=0; j<SR; j++) { 00484 double sum = 0.0; 00485 for (int l=0; l<SM; l++) { 00486 sum += A[i+SL*l] * B[l+SM*j]; 00487 } 00488 C[i+SL*j] = sum; 00489 } 00490 } 00491 } 00492 else if (transpose == 1) { 00493 for (int i=0; i<SL; i++) { 00494 for (int j=0; j<SR; j++) { 00495 double sum = 0.0; 00496 for (int l=0; l<SM; l++) { 00497 sum += A[i*SM+l] * B[l+SM*j]; 00498 } 00499 C[i+SL*j] = sum; 00500 } 00501 } 00502 } 00503 else if (transpose == 2) { 00504 for (int i=0; i<SL; i++) { 00505 for (int j=0; j<SR; j++) { 00506 double sum = 0.0; 00507 for (int l=0; l<SM; l++) { 00508 sum += A[i+SL*l] * B[l*SR+j]; 00509 } 00510 C[i+SL*j] = sum; 00511 } 00512 } 00513 } 00514 } 00515 00516 template <typename UnaryFunction> 00517 inline void tensor_unary_operation(const int size, 00518 const double *arg1, 00519 double * argRes, 00520 UnaryFunction operation) 00521 { 00522 for (int i = 0; i < size; ++i) { 00523 argRes[i] = operation(arg1[i]); 00524 } 00525 return; 00526 } 00527 00528 template <typename BinaryFunction> 00529 inline void tensor_binary_operation(const int size, 00530 const double *arg1, 00531 const double *arg2, 00532 double * argRes, 00533 BinaryFunction operation) 00534 { 00535 for (int i = 0; i < size; ++i) { 00536 argRes[i] = operation(arg1[i], arg2[i]); 00537 } 00538 return; 00539 } 00540 00541 template <typename BinaryFunction> 00542 inline void tensor_binary_operation(const int size, 00543 double arg1, 00544 const double *arg2, 00545 double *argRes, 00546 BinaryFunction operation) 00547 { 00548 for (int i = 0; i < size; ++i) { 00549 argRes[i] = operation(arg1, arg2[i]); 00550 } 00551 return; 00552 } 00553 00554 template <typename BinaryFunction> 00555 inline void tensor_binary_operation(const int size, 00556 const double *arg1, 00557 double arg2, 00558 double *argRes, 00559 BinaryFunction operation) 00560 { 00561 for (int i = 0; i < size; ++i) { 00562 argRes[i] = operation(arg1[i], arg2); 00563 } 00564 return; 00565 } 00566 00567 } // end of namespace 00568 #endif