escript  Revision_Unversioneddirectory
BlockOps.h
Go to the documentation of this file.
1 
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16 
17 #ifndef __PASO_BLOCKOPS_H__
18 #define __PASO_BLOCKOPS_H__
19 
20 #include "Paso.h"
21 #include <cstring> // memcpy
22 
23 #ifdef USE_LAPACK
24  #ifdef MKL_LAPACK
25  #include <mkl_lapack.h>
26  #include <mkl_cblas.h>
27  #else
28  extern "C" {
29  #include <clapack.h>
30  #include <cblas.h>
31  }
32  #endif
33 #endif
34 
35 namespace paso {
36 
37 inline void BlockOps_Cpy_N(dim_t N, double* R, const double* V)
38 {
39  memcpy((void*)R, (void*)V, N*sizeof(double));
40 }
41 
43 inline void BlockOps_SMV_2(double* R, const double* mat, const double* V)
44 {
45  const double S1 = V[0];
46  const double S2 = V[1];
47  const double A11 = mat[0];
48  const double A12 = mat[2];
49  const double A21 = mat[1];
50  const double A22 = mat[3];
51  R[0] -= A11 * S1 + A12 * S2;
52  R[1] -= A21 * S1 + A22 * S2;
53 }
54 
56 inline void BlockOps_SMV_3(double* R, const double* mat, const double* V)
57 {
58  const double S1 = V[0];
59  const double S2 = V[1];
60  const double S3 = V[2];
61  const double A11 = mat[0];
62  const double A21 = mat[1];
63  const double A31 = mat[2];
64  const double A12 = mat[3];
65  const double A22 = mat[4];
66  const double A32 = mat[5];
67  const double A13 = mat[6];
68  const double A23 = mat[7];
69  const double A33 = mat[8];
70  R[0] -= A11 * S1 + A12 * S2 + A13 * S3;
71  R[1] -= A21 * S1 + A22 * S2 + A23 * S3;
72  R[2] -= A31 * S1 + A32 * S2 + A33 * S3;
73 }
74 
75 #define PASO_MISSING_CLAPACK Esys_setError(TYPE_ERROR, "You need to install a LAPACK version to enable operations on block sizes > 3.")
76 
78 inline void BlockOps_SMV_N(dim_t N, double* R, const double* mat, const double* V)
79 {
80 #ifdef USE_LAPACK
81  cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, -1., mat, N, V, 1, 1., R, 1);
82 #else
84 #endif
85 }
86 
87 inline void BlockOps_MV_N(dim_t N, double* R, const double* mat, const double* V)
88 {
89 #ifdef USE_LAPACK
90  cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, 1., mat, N, V, 1, 0., R, 1);
91 #else
93 #endif
94 }
95 
96 inline void BlockOps_invM_2(double* invA, const double* A, int* failed)
97 {
98  const double A11 = A[0];
99  const double A12 = A[2];
100  const double A21 = A[1];
101  const double A22 = A[3];
102  double D = A11*A22-A12*A21;
103  if (std::abs(D) > 0) {
104  D = 1./D;
105  invA[0] = A22*D;
106  invA[1] = -A21*D;
107  invA[2] = -A12*D;
108  invA[3] = A11*D;
109  } else {
110  *failed = 1;
111  }
112 }
113 
114 inline void BlockOps_invM_3(double* invA, const double* A, int* failed)
115 {
116  const double A11 = A[0];
117  const double A21 = A[1];
118  const double A31 = A[2];
119  const double A12 = A[3];
120  const double A22 = A[4];
121  const double A32 = A[5];
122  const double A13 = A[6];
123  const double A23 = A[7];
124  const double A33 = A[8];
125  double D = A11*(A22*A33-A23*A32) +
126  A12*(A31*A23-A21*A33) +
127  A13*(A21*A32-A31*A22);
128  if (std::abs(D) > 0) {
129  D = 1./D;
130  invA[0] = (A22*A33-A23*A32)*D;
131  invA[1] = (A31*A23-A21*A33)*D;
132  invA[2] = (A21*A32-A31*A22)*D;
133  invA[3] = (A13*A32-A12*A33)*D;
134  invA[4] = (A11*A33-A31*A13)*D;
135  invA[5] = (A12*A31-A11*A32)*D;
136  invA[6] = (A12*A23-A13*A22)*D;
137  invA[7] = (A13*A21-A11*A23)*D;
138  invA[8] = (A11*A22-A12*A21)*D;
139  } else {
140  *failed = 1;
141  }
142 }
143 
145 inline void BlockOps_invM_N(dim_t N, double* mat, index_t* pivot, int* failed)
146 {
147 #ifdef USE_LAPACK
148 #ifdef MKL_LAPACK
149  int res = 0;
150  dgetrf(&N, &N, mat, &N, pivot, &res);
151  if (res != 0)
152  *failed = 1;
153 #else
154  int res = clapack_dgetrf(CblasColMajor, N, N, mat, N, pivot);
155  if (res != 0)
156  *failed = 1;
157 #endif // MKL_LAPACK
158 #else
160 #endif
161 }
162 
164 inline void BlockOps_solve_N(dim_t N, double* X, double* mat, index_t* pivot, int* failed)
165 {
166 #ifdef USE_LAPACK
167 #ifdef MKL_LAPACK
168  int res = 0;
169  int ONE = 1;
170  dgetrs("N", &N, &ONE, mat, &N, pivot, X, &N, &res);
171  if (res != 0)
172  *failed = 1;
173 #else
174  int res = clapack_dgetrs(CblasColMajor, CblasNoTrans, N, 1, mat, N, pivot, X, N);
175  if (res != 0)
176  *failed = 1;
177 #endif // MKL_LAPACK
178 #else
180 #endif
181 }
182 
184 inline void BlockOps_MViP_2(const double* mat, double* V)
185 {
186  const double S1 = V[0];
187  const double S2 = V[1];
188  const double A11 = mat[0];
189  const double A12 = mat[2];
190  const double A21 = mat[1];
191  const double A22 = mat[3];
192  V[0] = A11 * S1 + A12 * S2;
193  V[1] = A21 * S1 + A22 * S2;
194 }
195 
197 inline void BlockOps_MViP_3(const double* mat, double* V)
198 {
199  const double S1 = V[0];
200  const double S2 = V[1];
201  const double S3 = V[2];
202  const double A11 = mat[0];
203  const double A21 = mat[1];
204  const double A31 = mat[2];
205  const double A12 = mat[3];
206  const double A22 = mat[4];
207  const double A32 = mat[5];
208  const double A13 = mat[6];
209  const double A23 = mat[7];
210  const double A33 = mat[8];
211  V[0] = A11 * S1 + A12 * S2 + A13 * S3;
212  V[1] = A21 * S1 + A22 * S2 + A23 * S3;
213  V[2] = A31 * S1 + A32 * S2 + A33 * S3;
214 }
215 
216 inline void BlockOps_solveAll(dim_t n_block, dim_t n, double* D,
217  index_t* pivot, double* x)
218 {
219  if (n_block == 1) {
220 #pragma omp parallel for
221  for (dim_t i=0; i<n; ++i)
222  x[i] *= D[i];
223  } else if (n_block == 2) {
224 #pragma omp parallel for
225  for (dim_t i=0; i<n; ++i)
226  BlockOps_MViP_2(&D[4*i], &x[2*i]);
227  } else if (n_block == 3) {
228 #pragma omp parallel for
229  for (dim_t i=0; i<n; ++i)
230  BlockOps_MViP_3(&D[9*i], &x[3*i]);
231  } else {
232  int failed = 0;
233 #pragma omp parallel for
234  for (dim_t i=0; i<n; ++i) {
235  const dim_t block_size = n_block*n_block;
236  BlockOps_solve_N(n_block, &x[n_block*i], &D[block_size*i], &pivot[n_block*i], &failed);
237  }
238  if (failed > 0) {
239  Esys_setError(ZERO_DIVISION_ERROR, "BlockOps_solveAll: solution failed.");
240  }
241  }
242 }
243 
244 } // namespace paso
245 
246 #endif // __PASO_BLOCKOPS_H__
247 
void BlockOps_Cpy_N(dim_t N, double *R, const double *V)
Definition: BlockOps.h:37
void BlockOps_invM_N(dim_t N, double *mat, index_t *pivot, int *failed)
LU factorization of NxN matrix mat with partial pivoting.
Definition: BlockOps.h:145
Definition: error.h:49
void BlockOps_solve_N(dim_t N, double *X, double *mat, index_t *pivot, int *failed)
solves system of linear equations A*X=B
Definition: BlockOps.h:164
void BlockOps_MViP_2(const double *mat, double *V)
inplace matrix vector product - order 2
Definition: BlockOps.h:184
void Esys_setError(Esys_ErrorCodeType err, __const char *msg)
Definition: error.cpp:38
void BlockOps_invM_2(double *invA, const double *A, int *failed)
Definition: BlockOps.h:96
void BlockOps_SMV_3(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 3x3
Definition: BlockOps.h:56
void BlockOps_invM_3(double *invA, const double *A, int *failed)
Definition: BlockOps.h:114
void BlockOps_MV_N(dim_t N, double *R, const double *mat, const double *V)
Definition: BlockOps.h:87
void BlockOps_SMV_2(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 2x2
Definition: BlockOps.h:43
Definition: AMG.cpp:38
#define PASO_MISSING_CLAPACK
Definition: BlockOps.h:75
static dim_t N
Definition: SparseMatrix_saveHB.cpp:37
void BlockOps_solveAll(dim_t n_block, dim_t n, double *D, index_t *pivot, double *x)
Definition: BlockOps.h:216
void BlockOps_SMV_N(dim_t N, double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - NxN
Definition: BlockOps.h:78
int index_t
Definition: types.h:24
#define V(_K_, _I_)
Definition: ShapeFunctions.cpp:120
void BlockOps_MViP_3(const double *mat, double *V)
inplace matrix vector product - order 3
Definition: BlockOps.h:197
index_t dim_t
Definition: types.h:27