escript  Revision_Unversioneddirectory
LocalOps.h
Go to the documentation of this file.
1 
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16 
17 
18 #if !defined escript_LocalOps_H
19 #define escript_LocalOps_H
20 #include <cmath>
21 #ifndef M_PI
22 # define M_PI 3.14159265358979323846 /* pi */
23 #endif
24 
25 
34 namespace escript {
35 
40 inline
41 bool nancheck(double d)
42 {
43  using namespace std;
44  // Q: so why not just test d!=d?
45  // A: Coz it doesn't always work [I've checked].
46  // One theory is that the optimizer skips the test.
47 #if defined _isnan
48  return _isnan(d);
49 #else
50  return isnan(d); // isNan should be a function in C++ land
51 #endif
52 }
53 
58 inline
59 double makeNaN()
60 {
61 #ifdef nan
62  return nan("");
63 #else
64  return sqrt(-1.);
65 #endif
66 
67 }
68 
69 
77 inline
78 void eigenvalues1(const double A00,double* ev0) {
79 
80  *ev0=A00;
81 
82 }
93 inline
94 void eigenvalues2(const double A00,const double A01,const double A11,
95  double* ev0, double* ev1) {
96  const register double trA=(A00+A11)/2.;
97  const register double A_00=A00-trA;
98  const register double A_11=A11-trA;
99  const register double s=sqrt(A01*A01-A_00*A_11);
100  *ev0=trA-s;
101  *ev1=trA+s;
102 }
117 inline
118 void eigenvalues3(const double A00, const double A01, const double A02,
119  const double A11, const double A12,
120  const double A22,
121  double* ev0, double* ev1,double* ev2) {
122 
123  const register double trA=(A00+A11+A22)/3.;
124  const register double A_00=A00-trA;
125  const register double A_11=A11-trA;
126  const register double A_22=A22-trA;
127  const register double A01_2=A01*A01;
128  const register double A02_2=A02*A02;
129  const register double A12_2=A12*A12;
130  const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
131  if (p<=0.) {
132  *ev2=trA;
133  *ev1=trA;
134  *ev0=trA;
135 
136  } else {
137  const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
138  const register double sq_p=sqrt(p/3.);
139  register double z=-q/(2*pow(sq_p,3));
140  if (z<-1.) {
141  z=-1.;
142  } else if (z>1.) {
143  z=1.;
144  }
145  const register double alpha_3=acos(z)/3.;
146  *ev2=trA+2.*sq_p*cos(alpha_3);
147  *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
148  *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
149  }
150 }
160 inline
161 void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
162 {
163  eigenvalues1(A00,ev0);
164  *V00=1.;
165  return;
166 }
178 inline
179 void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
180  double* V0, double*V1)
181 {
182  register double absA00=fabs(A00);
183  register double absA10=fabs(A10);
184  register double absA01=fabs(A01);
185  register double absA11=fabs(A11);
186  register double m=absA11>absA10 ? absA11 : absA10;
187  if (absA00>m || absA01>m) {
188  *V0=-A01;
189  *V1=A00;
190  } else {
191  if (m<=0) {
192  *V0=1.;
193  *V1=0.;
194  } else {
195  *V0=A11;
196  *V1=-A10;
197  }
198  }
199 }
218 inline
219 void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
220  const double A01,const double A11,const double A21,
221  const double A02,const double A12,const double A22,
222  double* V0,double* V1,double* V2)
223 {
224  double TEMP0,TEMP1;
225  register const double I00=1./A00;
226  register const double IA10=I00*A10;
227  register const double IA20=I00*A20;
228  vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
229  A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
230  *V0=-(A10*TEMP0+A20*TEMP1);
231  *V1=A00*TEMP0;
232  *V2=A00*TEMP1;
233 }
234 
252 inline
253 void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
254  double* ev0, double* ev1,
255  double* V00, double* V10, double* V01, double* V11,
256  const double tol)
257 {
258  double TEMP0,TEMP1;
259  eigenvalues2(A00,A01,A11,ev0,ev1);
260  const register double absev0=fabs(*ev0);
261  const register double absev1=fabs(*ev1);
262  register double max_ev=absev0>absev1 ? absev0 : absev1;
263  if (fabs((*ev0)-(*ev1))<tol*max_ev) {
264  *V00=1.;
265  *V10=0.;
266  *V01=0.;
267  *V11=1.;
268  } else {
269  vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
270  const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
271  if (TEMP0<0.) {
272  *V00=-TEMP0*scale;
273  *V10=-TEMP1*scale;
274  if (TEMP1<0.) {
275  *V01= *V10;
276  *V11=-(*V00);
277  } else {
278  *V01=-(*V10);
279  *V11= (*V00);
280  }
281  } else if (TEMP0>0.) {
282  *V00=TEMP0*scale;
283  *V10=TEMP1*scale;
284  if (TEMP1<0.) {
285  *V01=-(*V10);
286  *V11= (*V00);
287  } else {
288  *V01= (*V10);
289  *V11=-(*V00);
290  }
291  } else {
292  *V00=0.;
293  *V10=1;
294  *V11=0.;
295  *V01=1.;
296  }
297  }
298 }
307 inline
308 void normalizeVector3(double* V0,double* V1,double* V2)
309 {
310  register double s;
311  if (*V0>0) {
312  s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
313  *V0*=s;
314  *V1*=s;
315  *V2*=s;
316  } else if (*V0<0) {
317  s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
318  *V0*=s;
319  *V1*=s;
320  *V2*=s;
321  } else {
322  if (*V1>0) {
323  s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
324  *V1*=s;
325  *V2*=s;
326  } else if (*V1<0) {
327  s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
328  *V1*=s;
329  *V2*=s;
330  } else {
331  *V2=1.;
332  }
333  }
334 }
361 inline
362 void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
363  const double A11, const double A12, const double A22,
364  double* ev0, double* ev1, double* ev2,
365  double* V00, double* V10, double* V20,
366  double* V01, double* V11, double* V21,
367  double* V02, double* V12, double* V22,
368  const double tol)
369 {
370  register const double absA01=fabs(A01);
371  register const double absA02=fabs(A02);
372  register const double m=absA01>absA02 ? absA01 : absA02;
373  if (m<=0) {
374  double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
375  eigenvalues_and_eigenvectors2(A11,A12,A22,
376  &TEMP_EV0,&TEMP_EV1,
377  &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
378  if (A00<=TEMP_EV0) {
379  *V00=1.;
380  *V10=0.;
381  *V20=0.;
382  *V01=0.;
383  *V11=TEMP_V00;
384  *V21=TEMP_V10;
385  *V02=0.;
386  *V12=TEMP_V01;
387  *V22=TEMP_V11;
388  *ev0=A00;
389  *ev1=TEMP_EV0;
390  *ev2=TEMP_EV1;
391  } else if (A00>TEMP_EV1) {
392  *V02=1.;
393  *V12=0.;
394  *V22=0.;
395  *V00=0.;
396  *V10=TEMP_V00;
397  *V20=TEMP_V10;
398  *V01=0.;
399  *V11=TEMP_V01;
400  *V21=TEMP_V11;
401  *ev0=TEMP_EV0;
402  *ev1=TEMP_EV1;
403  *ev2=A00;
404  } else {
405  *V01=1.;
406  *V11=0.;
407  *V21=0.;
408  *V00=0.;
409  *V10=TEMP_V00;
410  *V20=TEMP_V10;
411  *V02=0.;
412  *V12=TEMP_V01;
413  *V22=TEMP_V11;
414  *ev0=TEMP_EV0;
415  *ev1=A00;
416  *ev2=TEMP_EV1;
417  }
418  } else {
419  eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
420  const double absev0=fabs(*ev0);
421  const double absev1=fabs(*ev1);
422  const double absev2=fabs(*ev2);
423  double max_ev=absev0>absev1 ? absev0 : absev1;
424  max_ev=max_ev>absev2 ? max_ev : absev2;
425  const double d_01=fabs((*ev0)-(*ev1));
426  const double d_12=fabs((*ev1)-(*ev2));
427  const double max_d=d_01>d_12 ? d_01 : d_12;
428  if (max_d<=tol*max_ev) {
429  *V00=1.;
430  *V10=0;
431  *V20=0;
432  *V01=0;
433  *V11=1.;
434  *V21=0;
435  *V02=0;
436  *V12=0;
437  *V22=1.;
438  } else {
439  const double S00=A00-(*ev0);
440  const double absS00=fabs(S00);
441  if (absS00>m) {
442  vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
443  } else if (absA02<m) {
444  vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
445  } else {
446  vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
447  }
448  normalizeVector3(V00,V10,V20);;
449  const double T00=A00-(*ev2);
450  const double absT00=fabs(T00);
451  if (absT00>m) {
452  vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
453  } else if (absA02<m) {
454  vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
455  } else {
456  vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
457  }
458  const double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
459  *V02-=dot*(*V00);
460  *V12-=dot*(*V10);
461  *V22-=dot*(*V20);
462  normalizeVector3(V02,V12,V22);
463  *V01=(*V10)*(*V22)-(*V12)*(*V20);
464  *V11=(*V20)*(*V02)-(*V00)*(*V22);
465  *V21=(*V00)*(*V12)-(*V02)*(*V10);
466  normalizeVector3(V01,V11,V21);
467  }
468  }
469 }
470 
471 // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
472 // SM is the product of the last axis_offset entries in arg_0.getShape().
473 inline
474 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
475 {
476  if (transpose == 0) {
477  for (int i=0; i<SL; i++) {
478  for (int j=0; j<SR; j++) {
479  double sum = 0.0;
480  for (int l=0; l<SM; l++) {
481  sum += A[i+SL*l] * B[l+SM*j];
482  }
483  C[i+SL*j] = sum;
484  }
485  }
486  }
487  else if (transpose == 1) {
488  for (int i=0; i<SL; i++) {
489  for (int j=0; j<SR; j++) {
490  double sum = 0.0;
491  for (int l=0; l<SM; l++) {
492  sum += A[i*SM+l] * B[l+SM*j];
493  }
494  C[i+SL*j] = sum;
495  }
496  }
497  }
498  else if (transpose == 2) {
499  for (int i=0; i<SL; i++) {
500  for (int j=0; j<SR; j++) {
501  double sum = 0.0;
502  for (int l=0; l<SM; l++) {
503  sum += A[i+SL*l] * B[l*SR+j];
504  }
505  C[i+SL*j] = sum;
506  }
507  }
508  }
509 }
510 
511 template <typename UnaryFunction>
512 inline void tensor_unary_operation(const int size,
513  const double *arg1,
514  double * argRes,
515  UnaryFunction operation)
516 {
517  for (int i = 0; i < size; ++i) {
518  argRes[i] = operation(arg1[i]);
519  }
520  return;
521 }
522 
523 template <typename BinaryFunction>
524 inline void tensor_binary_operation(const int size,
525  const double *arg1,
526  const double *arg2,
527  double * argRes,
528  BinaryFunction operation)
529 {
530  for (int i = 0; i < size; ++i) {
531  argRes[i] = operation(arg1[i], arg2[i]);
532  }
533  return;
534 }
535 
536 template <typename BinaryFunction>
537 inline void tensor_binary_operation(const int size,
538  double arg1,
539  const double *arg2,
540  double *argRes,
541  BinaryFunction operation)
542 {
543  for (int i = 0; i < size; ++i) {
544  argRes[i] = operation(arg1, arg2[i]);
545  }
546  return;
547 }
548 
549 template <typename BinaryFunction>
550 inline void tensor_binary_operation(const int size,
551  const double *arg1,
552  double arg2,
553  double *argRes,
554  BinaryFunction operation)
555 {
556  for (int i = 0; i < size; ++i) {
557  argRes[i] = operation(arg1[i], arg2);
558  }
559  return;
560 }
561 
562 } // end of namespace
563 #endif
Definition: AbstractContinuousDomain.cpp:24
void tensor_unary_operation(const int size, const double *arg1, double *argRes, UnaryFunction operation)
Definition: LocalOps.h:512
void transpose(const DataTypes::ValueType &in, const DataTypes::ShapeType &inShape, DataTypes::ValueType::size_type inOffset, DataTypes::ValueType &ev, const DataTypes::ShapeType &evShape, DataTypes::ValueType::size_type evOffset, int axis_offset)
Transpose each data point of this Data object around the given axis.
Definition: DataMaths.h:394
void vectorInKernel2(const double A00, const double A10, const double A01, const double A11, double *V0, double *V1)
returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension i...
Definition: LocalOps.h:179
double makeNaN()
returns a NaN.
Definition: LocalOps.h:59
STL namespace.
#define M_PI
Definition: LocalOps.h:22
void eigenvalues2(const double A00, const double A01, const double A11, double *ev0, double *ev1)
solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
Definition: LocalOps.h:94
bool nancheck(double d)
acts as a wrapper to isnan.
Definition: LocalOps.h:41
void scale(dim_t N, double *x, double a)
x = a*x
Definition: PasoUtil.h:93
void matrix_matrix_product(const int SL, const int SM, const int SR, const double *A, const double *B, double *C, int transpose)
Definition: LocalOps.h:474
void eigenvalues_and_eigenvectors1(const double A00, double *ev0, double *V00, const double tol)
solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
Definition: LocalOps.h:161
void eigenvalues1(const double A00, double *ev0)
solves a 1x1 eigenvalue A*V=ev*V problem
Definition: LocalOps.h:78
void normalizeVector3(double *V0, double *V1, double *V2)
nomalizes a 3-d vector such that length is one and first non-zero component is positive.
Definition: LocalOps.h:308
void eigenvalues_and_eigenvectors2(const double A00, const double A01, const double A11, double *ev0, double *ev1, double *V00, double *V10, double *V01, double *V11, const double tol)
solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are ordered by increasing valu...
Definition: LocalOps.h:253
void eigenvalues3(const double A00, const double A01, const double A02, const double A11, const double A12, const double A22, double *ev0, double *ev1, double *ev2)
solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
Definition: LocalOps.h:118
void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02, const double A11, const double A12, const double A22, double *ev0, double *ev1, double *ev2, double *V00, double *V10, double *V20, double *V01, double *V11, double *V21, double *V02, double *V12, double *V22, const double tol)
solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are ordered by increasing valu...
Definition: LocalOps.h:362
void vectorInKernel3__nonZeroA00(const double A00, const double A10, const double A20, const double A01, const double A11, const double A21, const double A02, const double A12, const double A22, double *V0, double *V1, double *V2)
returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]] assuming that ...
Definition: LocalOps.h:219
void tensor_binary_operation(const int size, const double *arg1, const double *arg2, double *argRes, BinaryFunction operation)
Definition: LocalOps.h:524