escript  Revision_
BlockOps.h
Go to the documentation of this file.
00001 
00002 /*****************************************************************************
00003 *
00004 * Copyright (c) 2003-2014 by University of Queensland
00005 * http://www.uq.edu.au
00006 *
00007 * Primary Business: Queensland, Australia
00008 * Licensed under the Open Software License version 3.0
00009 * http://www.opensource.org/licenses/osl-3.0.php
00010 *
00011 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
00012 * Development 2012-2013 by School of Earth Sciences
00013 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
00014 *
00015 *****************************************************************************/
00016 
00017 #ifndef __PASO_BLOCKOPS_H__
00018 #define __PASO_BLOCKOPS_H__
00019 
00020 #include "Paso.h"
00021 #include <cstring> // memcpy
00022 
00023 #ifdef USE_LAPACK
00024    #ifdef MKL_LAPACK
00025       #include <mkl_lapack.h>
00026       #include <mkl_cblas.h>
00027    #else
00028       extern "C" {
00029       #include <clapack.h>
00030       #include <cblas.h>
00031       }
00032    #endif
00033 #endif
00034 
00035 namespace paso {
00036 
00037 inline void BlockOps_Cpy_N(dim_t N, double* R, const double* V)
00038 {
00039     memcpy((void*)R, (void*)V, N*sizeof(double));
00040 }
00041 
00043 inline void BlockOps_SMV_2(double* R, const double* mat, const double* V)
00044 {
00045     const double S1 = V[0];
00046     const double S2 = V[1];
00047     const double A11 = mat[0];
00048     const double A12 = mat[2];
00049     const double A21 = mat[1];
00050     const double A22 = mat[3];
00051     R[0] -= A11 * S1 + A12 * S2;
00052     R[1] -= A21 * S1 + A22 * S2;
00053 }
00054 
00056 inline void BlockOps_SMV_3(double* R, const double* mat, const double* V)
00057 {
00058     const double S1 = V[0];
00059     const double S2 = V[1];
00060     const double S3 = V[2];
00061     const double A11 = mat[0];
00062     const double A21 = mat[1];
00063     const double A31 = mat[2];
00064     const double A12 = mat[3];
00065     const double A22 = mat[4];
00066     const double A32 = mat[5];
00067     const double A13 = mat[6];
00068     const double A23 = mat[7];
00069     const double A33 = mat[8];
00070     R[0] -= A11 * S1 + A12 * S2 + A13 * S3;
00071     R[1] -= A21 * S1 + A22 * S2 + A23 * S3;
00072     R[2] -= A31 * S1 + A32 * S2 + A33 * S3;
00073 }
00074 
00075 #define PASO_MISSING_CLAPACK Esys_setError(TYPE_ERROR, "You need to install a LAPACK version to enable operations on block sizes > 3.")
00076 
00078 inline void BlockOps_SMV_N(dim_t N, double* R, const double* mat, const double* V)
00079 {
00080 #ifdef USE_LAPACK
00081     cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, -1., mat, N, V, 1, 1., R, 1);
00082 #else
00083     PASO_MISSING_CLAPACK;
00084 #endif
00085 }
00086 
00087 inline void BlockOps_MV_N(dim_t N, double* R, const double* mat, const double* V)
00088 {
00089 #ifdef USE_LAPACK
00090     cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, 1., mat, N, V, 1, 0., R, 1);
00091 #else
00092     PASO_MISSING_CLAPACK;
00093 #endif
00094 }
00095 
00096 inline void BlockOps_invM_2(double* invA, const double* A, int* failed)
00097 {
00098     const double A11 = A[0];
00099     const double A12 = A[2];
00100     const double A21 = A[1];
00101     const double A22 = A[3];
00102     double D = A11*A22-A12*A21;
00103     if (std::abs(D) > 0) {
00104         D = 1./D;
00105         invA[0] =  A22*D;
00106         invA[1] = -A21*D;
00107         invA[2] = -A12*D;
00108         invA[3] =  A11*D;
00109     } else {
00110         *failed = 1;
00111     }
00112 }
00113 
00114 inline void BlockOps_invM_3(double* invA, const double* A, int* failed)
00115 {
00116     const double A11 = A[0];
00117     const double A21 = A[1];
00118     const double A31 = A[2];
00119     const double A12 = A[3];
00120     const double A22 = A[4];
00121     const double A32 = A[5];
00122     const double A13 = A[6];
00123     const double A23 = A[7];
00124     const double A33 = A[8];
00125     double D = A11*(A22*A33-A23*A32) +
00126                A12*(A31*A23-A21*A33) +
00127                A13*(A21*A32-A31*A22);
00128     if (std::abs(D) > 0) {
00129         D = 1./D;
00130         invA[0] = (A22*A33-A23*A32)*D;
00131         invA[1] = (A31*A23-A21*A33)*D;
00132         invA[2] = (A21*A32-A31*A22)*D;
00133         invA[3] = (A13*A32-A12*A33)*D;
00134         invA[4] = (A11*A33-A31*A13)*D;
00135         invA[5] = (A12*A31-A11*A32)*D;
00136         invA[6] = (A12*A23-A13*A22)*D;
00137         invA[7] = (A13*A21-A11*A23)*D;
00138         invA[8] = (A11*A22-A12*A21)*D;
00139     } else {
00140         *failed = 1;
00141     }
00142 }
00143 
00145 inline void BlockOps_invM_N(dim_t N, double* mat, int* pivot, int* failed)
00146 {
00147 #ifdef USE_LAPACK
00148 #ifdef MKL_LAPACK
00149     int res = 0;
00150     dgetrf(&N, &N, mat, &N, pivot, &res);
00151     if (res != 0)
00152         *failed = 1;
00153 #else
00154     int res = clapack_dgetrf(CblasColMajor, N, N, mat, N, pivot);
00155     if (res != 0)
00156         *failed = 1;
00157 #endif // MKL_LAPACK
00158 #else
00159     PASO_MISSING_CLAPACK;
00160 #endif
00161 }
00162 
00164 inline void BlockOps_solve_N(dim_t N, double* X, double* mat, int* pivot, int* failed)
00165 {
00166 #ifdef USE_LAPACK
00167 #ifdef MKL_LAPACK
00168     int res = 0;
00169     int ONE = 1;
00170     dgetrs("N", &N, &ONE, mat, &N, pivot, X, &N, &res);
00171     if (res != 0)
00172         *failed = 1;
00173 #else
00174     int res = clapack_dgetrs(CblasColMajor, CblasNoTrans, N, 1, mat, N, pivot, X, N);
00175     if (res != 0)
00176         *failed = 1;
00177 #endif // MKL_LAPACK
00178 #else
00179     PASO_MISSING_CLAPACK;
00180 #endif
00181 }
00182 
00184 inline void BlockOps_MViP_2(const double* mat, double* V)
00185 {
00186     const double S1 = V[0];
00187     const double S2 = V[1];
00188     const double A11 = mat[0];
00189     const double A12 = mat[2];
00190     const double A21 = mat[1];
00191     const double A22 = mat[3];
00192     V[0] = A11 * S1 + A12 * S2;
00193     V[1] = A21 * S1 + A22 * S2;
00194 }
00195 
00197 inline void BlockOps_MViP_3(const double* mat, double* V)
00198 {
00199     const double S1 = V[0];
00200     const double S2 = V[1];
00201     const double S3 = V[2];
00202     const double A11 = mat[0];
00203     const double A21 = mat[1];
00204     const double A31 = mat[2];
00205     const double A12 = mat[3];
00206     const double A22 = mat[4];
00207     const double A32 = mat[5];
00208     const double A13 = mat[6];
00209     const double A23 = mat[7];
00210     const double A33 = mat[8];
00211     V[0] = A11 * S1 + A12 * S2 + A13 * S3;
00212     V[1] = A21 * S1 + A22 * S2 + A23 * S3;
00213     V[2] = A31 * S1 + A32 * S2 + A33 * S3;
00214 }
00215 
00216 inline void BlockOps_solveAll(dim_t n_block, dim_t n, double* D,
00217                               index_t* pivot, double* x)
00218 {
00219     if (n_block == 1) {
00220 #pragma omp parallel for
00221         for (dim_t i=0; i<n; ++i)
00222             x[i] *= D[i];
00223     } else if (n_block == 2) {
00224 #pragma omp parallel for
00225         for (dim_t i=0; i<n; ++i)
00226             BlockOps_MViP_2(&D[4*i], &x[2*i]);
00227     } else if (n_block == 3) {
00228 #pragma omp parallel for
00229         for (dim_t i=0; i<n; ++i)
00230             BlockOps_MViP_3(&D[9*i], &x[3*i]);
00231     } else {
00232         int failed = 0;
00233 #pragma omp parallel for
00234         for (dim_t i=0; i<n; ++i) {
00235             const dim_t block_size = n_block*n_block;
00236             BlockOps_solve_N(n_block, &x[n_block*i], &D[block_size*i], &pivot[n_block*i], &failed);
00237         }
00238         if (failed > 0) {
00239             Esys_setError(ZERO_DIVISION_ERROR, "BlockOps_solveAll: solution failed.");
00240         }
00241     }
00242 }
00243 
00244 } // namespace paso
00245 
00246 #endif // __PASO_BLOCKOPS_H__
00247