Free Electron
MatrixSqrt.h
Go to the documentation of this file.
1 /* Copyright (C) 2003-2021 Free Electron Organization
2  Any use of this software requires a license. If a valid license
3  was not distributed with this file, visit freeelectron.org. */
4 
5 /** @file */
6 
7 #ifndef __geometry_MatrixSqrt_h__
8 #define __geometry_MatrixSqrt_h__
9 
10 #define FE_MSQ_DEBUG FALSE
11 #define FE_MSQ_VERIFY (FE_CODEGEN<=FE_DEBUG)
12 #define FE_MSQ_MAX_ERROR_R 1e-3
13 #define FE_MSQ_MAX_ERROR_T 1e-2
14 
15 //* 0 = Denman-Beavers two inverses per iteration
16 //* 1 = Meini one inverse per iteration
17 //* 2 = Schulz no inverses
18 #define FE_MSQ_METHOD 0
19 
20 //* NOTE Meini and Shulz use the (3,3) element, so a 3x4 typename will fail
21 
22 //* NOTE Schulz doesn't seem to converge very well
23 
24 namespace fe
25 {
26 namespace ext
27 {
28 
29 /**************************************************************************//**
30  @brief solve A=B*B for B, given A
31 
32  @ingroup geometry
33 
34  Uses Denman-Beavers square root iteration.
35 
36  B will be set with best result even with a convergence exception,
37  so the exception could be caught and ignored.
38 *//***************************************************************************/
39 template <typename MATRIX>
41 {
42  public:
43  MatrixSqrt(void):
44  m_iterations(8)
45  {}
46 
47  void solve(MATRIX& B, const MATRIX& A) const;
48 
49  void setIterations(U32 iterations) { m_iterations=iterations; }
50 
51  private:
52  U32 m_iterations;
53 };
54 
55 template <typename MATRIX>
56 inline void MatrixSqrt<MATRIX>::solve(MATRIX& B, const MATRIX& A) const
57 {
58 #if FE_MSQ_DEBUG
59  feLog("\nA\n%s\n",c_print(A));
60 #endif
61 
62  MATRIX AA=A;
63  MATRIX correction;
64 
65  //* cancellation protection
66  const BWORD fix0=(fabs(AA(0,0)+1)<1e-3);
67  const BWORD fix1=(fabs(AA(1,1)+1)<1e-3);
68  const BWORD fix2=(fabs(AA(2,2)+1)<1e-3);
69  const BWORD fix=(fix0 || fix1 || fix2);
70 
71  if(fix)
72  {
73  Matrix<3,4,F64> doubleY=AA;
74 
75  Quaternion<F64> quat=doubleY;
76 
77  F64 radians;
78  Vector<3,F64> axis;
79  quat.computeAngleAxis(radians,axis);
80 
81  const F64 tiny=0.03;
82  const F64 tinyAngle=tiny*radians;
83 
84  Quaternion<F64> forward(0.5*tinyAngle,axis);
85  const Matrix<3,4,F64> correct64(forward);
86  correction=correct64;
87  const SpatialVector forwardT=0.5*tiny*doubleY.translation();
88  translate(correction,forwardT);
89 
90  Quaternion<F64> reverse(-tinyAngle,axis);
91  const Matrix<3,4,F64> tweak64(reverse);
92  MATRIX tweak=tweak64;
93  const SpatialVector reverseT= -tiny*doubleY.translation();
94  translate(tweak,reverseT);
95 
96 #if FE_MSQ_METHOD!=0
97  correction(3,3)=1;
98  tweak(3,3)=1;
99 #endif
100 
101 // feLog("ty\n%s\nyt\n%s\n",
102 // c_print(tweak*AA),
103 // c_print(AA*tweak));
104 
105  AA=tweak*AA;
106 
107 #if FE_MSQ_DEBUG
108  feLog("\ntweak\n%s\n",c_print(tweak));
109  feLog("\ncorrection\n%s\n",c_print(correction));
110 #endif
111  }
112 
113  MATRIX Y[2];
114  MATRIX Z[2];
115  U32 current=0;
116 
117  Y[0]=AA;
118  setIdentity(Z[0]);
119 
120 #if FE_MSQ_METHOD==0
121  //* Denman-Beavers
122  MATRIX invY;
123  MATRIX invZ;
124 #elif FE_MSQ_METHOD==1
125  //* Meini
126  Y[1]=Y[0];
127  Y[0]=Z[0]-Y[1]; //* I-A'
128  Z[0]=2*(Z[0]+Y[1]); //* 2(I+A')
129 
130  MATRIX invZ;
131 #else
132  //* Schulz
133 
134  MATRIX I3;
135  setIdentity(I3);
136  I3*=3;
137 #endif
138 
139  U32 iteration;
140  for(iteration=0;iteration<m_iterations;iteration++)
141  {
142  U32 last=current;
143  current= !current;
144 
145 #if FE_MSQ_DEBUG
146  feLog("\n>>>> iteration %d\nY\n%s\nZ\n%s\n",iteration,
147  c_print(Y[last]),
148  c_print(Z[last]));
149  feLog("Y*Y\n%s\nZ*Z\n%s\n",
150  c_print(Y[last]*Y[last]),
151  c_print(Z[last]*Z[last]));
152 #endif
153 
154 #if FE_MSQ_METHOD==0
155 
156  //* Denman-Beavers (1976)
157 
158  invert(invY,Y[last]);
159  invert(invZ,Z[last]);
160 
161 #if FE_MSQ_DEBUG
162  feLog("invY\n%s\n",
163  c_print(invY));
164  feLog("invZ\n%s\n",
165  c_print(invZ));
166  feLog("Y+invZ\n%s\nZ+invY\n%s\n",
167  c_print(Y[last]+invZ),
168  c_print(Z[last]+invY));
169 #endif
170 
171  Y[current]=0.5*(Y[last]+invZ);
172  Z[current]=0.5*(Z[last]+invY);
173 
174 #elif FE_MSQ_METHOD==1
175 
176  //* Meini (2004)
177 
178  invert(invZ,Z[last]);
179 
180 #if FE_MSQ_DEBUG
181  feLog("invZ\n%s\n",
182  c_print(invZ));
183 #endif
184 
185  Y[current]= -1*Y[last]*invZ*Y[last];
186  Z[current]=Z[last]+2*Y[current];
187 
188 #else
189 
190  //* Schulz
191 
192  MATRIX I3ZY=I3-Z[last]*Y[last];
193 
194  Y[current]=0.5*Y[last]*I3ZY;
195  Z[current]=0.5*I3ZY*Z[last];
196 
197 #endif
198  }
199 
200 #if FE_MSQ_METHOD==0
201  //* Denman-Beavers
202  MATRIX& R=Y[current];
203 #elif FE_MSQ_METHOD==1
204  //* Meini
205  MATRIX R=0.25*Z[current];
206  R(3,3)=1;
207 #else
208  //* Schulz
209  MATRIX& R=Y[current];
210  R(3,3)=1;
211 #endif
212 
213 #if FE_MSQ_DEBUG
214  feLog("\nA\n%s\n",c_print(A));
215  if(fix)
216  {
217  feLog("\nA'\n%s\n",c_print(AA));
218  }
219  feLog("\nB'\n%s\nB'*B'\n%s\n",c_print(R),c_print(R*R));
220 #endif
221 
222  if(fix)
223  {
224  B=correction*R;
225 
226 #if FE_MSQ_DEBUG
227  feLog("\ncorrection\n%s\n",c_print(correction));
228  feLog("\ncorrected\n%s\n",c_print(B));
229  feLog("\nsquared\n%s\n",c_print(B*B));
230 #endif
231  }
232  else
233  {
234  B=R;
235  }
236 
237 #if FE_MSQ_VERIFY
238  BWORD invalid=FALSE;
239  MATRIX diff=AA-R*R;
240  F32 sumR=0.0f;
241  F32 sumT=0.0f;
242  for(U32 m=0;m<width(diff);m++)
243  {
244  U32 n;
245  for(n=0;n<height(diff)-1;n++)
246  {
247  if(FE_INVALID_SCALAR(diff(m,n)))
248  {
249  invalid=TRUE;
250  }
251  sumR+=fabs(diff(m,n));
252  }
253  if(FE_INVALID_SCALAR(diff(m,n)))
254  {
255  invalid=TRUE;
256  }
257  sumT+=fabs(diff(m,n));
258  }
259 #endif
260 #if FE_MSQ_VERIFY && FE_MSQ_DEBUG
261  feLog("\ndiff\n%s\ncomponent sumR=%.6G\n",c_print(diff),sumR);
262 #endif
263 #if FE_MSQ_VERIFY
264  if(invalid || sumR>FE_MSQ_MAX_ERROR_R || sumT>FE_MSQ_MAX_ERROR_T)
265  {
266  feLog("MatrixSqrt< %s >::solve"
267  " error of %.6G,%.6G exceeded limit of %.6G,%.6G\n",
268  FE_TYPESTRING(MATRIX).c_str(),
269  sumR,sumT,FE_MSQ_MAX_ERROR_R,FE_MSQ_MAX_ERROR_T);
270  feLog("\nA'\n%s\n",c_print(AA));
271  feLog("\nB'\n%s\nB'*B'\n%s\n",c_print(R),c_print(R*R));
272 
273  feX("MatrixSqrt<>::solve","failed to converge");
274  }
275 #endif
276 }
277 
278 } /* namespace ext */
279 } /* namespace fe */
280 
281 #endif /* __geometry_MatrixSqrt_h__ */
kernel
Definition: namespace.dox:3
Matrix< 4, 4, T > & invert(Matrix< 4, 4, T > &a_inverted, const Matrix< 4, 4, T > &a_matrix)
4x4 full matrix inversion
Definition: Matrix.h:1033
General template for fixed size numeric matrices.
Definition: Matrix.h:42
Dense vector - size fixed by template.
Definition: Vector.h:19
solve A=B*B for B, given A
Definition: MatrixSqrt.h:40
Four-dimensional complex number.
Definition: Quaternion.h:32