ATLAS Offline Software
Public Member Functions | Public Attributes | Static Public Attributes | List of all members
ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative Struct Reference

#include <GPUClusterInfoAndMomentsCalculatorImpl.h>

Collaboration diagram for ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative:

Public Member Functions

CUDA_HOS_DEV RealSymmetricMatrixSolverIterative (const float a_orig, const float b_orig, const float c_orig, const float d_orig, const float e_orig, const float f_orig)
 
CUDA_HOS_DEV void tridiagonalize (float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float tolerance=s_typical_tolerance)
 
CUDA_HOS_DEV void compute_iteration (const int start, const int end, float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3])
 
CUDA_HOS_DEV void compute (float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float near_zero=s_typical_near_zero, const float epsilon=s_typical_epsilon, const int max_iter=s_typical_max_iterations)
 
CUDA_HOS_DEV void get_solution (float(&eigenvalues)[3], float(&eigenvectors)[3][3], const float tolerance=s_typical_tolerance, const float near_zero=s_typical_near_zero, const float epsilon=s_typical_epsilon, const int max_iter=s_typical_max_iterations)
 Get the full eigenvalues and eigenvectors for this matrix. More...
 

Public Attributes

float a
 
float b
 
float c
 
float d
 
float e
 
float f
 
float scale
 

Static Public Attributes

static constexpr float s_typical_tolerance = std::numeric_limits<float>::min()
 
static constexpr int s_typical_max_iterations = 90
 
static constexpr float s_typical_near_zero = std::numeric_limits<float>::min()
 
static constexpr float s_typical_epsilon = std::numeric_limits<float>::epsilon()
 

Detailed Description

Definition at line 130 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

Constructor & Destructor Documentation

◆ RealSymmetricMatrixSolverIterative()

CUDA_HOS_DEV ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::RealSymmetricMatrixSolverIterative ( const float  a_orig,
const float  b_orig,
const float  c_orig,
const float  d_orig,
const float  e_orig,
const float  f_orig 
)
inline

Definition at line 140 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

141  {
142  using namespace std;
143 
144  const float max_ab = max( fabsf(a_orig), fabsf(b_orig) );
145  const float max_cd = max( fabsf(c_orig), fabsf(d_orig) );
146  const float max_ef = max( fabsf(e_orig), fabsf(f_orig) );
147  scale = max(max_ab, max(max_cd, max_ef) );
148  if (scale == 0.f)
149  {
150  scale = 1.f;
151  }
152  const float inv_scale = 1.0f / scale;
153  a = a_orig * inv_scale;
154  b = b_orig * inv_scale;
155  c = c_orig * inv_scale;
156  d = d_orig * inv_scale;
157  e = e_orig * inv_scale;
158  f = f_orig * inv_scale;
159  }

Member Function Documentation

◆ compute()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::compute ( float(&)  temp_diag[3],
float(&)  temp_subdiag[2],
float(&)  temp_mat[3][3],
const float  near_zero = s_typical_near_zero,
const float  epsilon = s_typical_epsilon,
const int  max_iter = s_typical_max_iterations 
)
inline

Definition at line 359 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

365  {
366  int iter_count = 0;
367  int start = 0, end = 2;
368 
369  const float precision_inv = 1.f / epsilon;
370 
371  while (end > 0)
372  {
373  for (int i = start; i < end; ++i)
374  {
375  if (fabsf(temp_subdiag[i]) < near_zero)
376  {
377  temp_subdiag[i] = 0.f;
378  }
379  else
380  {
381  const float scaled_subdiag = precision_inv * temp_subdiag[i];
382  if (scaled_subdiag * scaled_subdiag <= fabsf(temp_diag[i]) + fabsf(temp_diag[i + 1]))
383  {
384  temp_subdiag[i] = 0.f;
385  }
386  }
387  }
388 
389  while (end > 0 && temp_subdiag[end - 1] == 0.f)
390  {
391  --end;
392  }
393 
394  if (end <= 0)
395  {
396  break;
397  }
398 
399  ++iter_count;
400 
401  if (iter_count > max_iter)
402  {
403  printf("OUT OF ITERS! %d %d\n", start, end);
404  break;
405  }
406 
407  start = end - 1;
408 
409  while (start > 0 && temp_subdiag[start - 1] != 0.f)
410  {
411  --start;
412  }
413 
414  compute_iteration(start, end, temp_diag, temp_subdiag, temp_mat);
415  }
416 
417  //No need to sort eigenvalues and eigenvectors:
418  //we are going to check them all anyway.
419  }

◆ compute_iteration()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::compute_iteration ( const int  start,
const int  end,
float(&)  temp_diag[3],
float(&)  temp_subdiag[2],
float(&)  temp_mat[3][3] 
)
inline

Definition at line 228 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

233  {
234  const float td = (temp_diag[end - 1] - temp_diag[end]) * 0.5f;
235 
236  const float ee = temp_subdiag[end - 1];
237 
238  float mu = temp_diag[end];
239 
240  if (td == 0.f)
241  {
242  mu -= fabsf(ee);
243  }
244  else if (ee != 0.f)
245  {
246  const float ee_2 = ee * ee;
247 
248  const float h = hypot(td, ee);
249 
250  const float factor = td + h * ((td >= 0.f) - (td < 0.f));
251 
252  if (ee_2 == 0.f)
253  {
254  mu -= ee / (factor / ee);
255  }
256  else
257  {
258  mu -= ee_2 / factor;
259  }
260  }
261 
262  float x = temp_diag[start] - mu;
263  float z = temp_subdiag[start];
264 
265  for (int k = start; k < end && z != 0.f; ++k)
266  {
267  float givens_c, givens_s;
268 
269  /*if (z == 0.f)
270  {
271  givens_c = ((x >= 0.f) - (x < 0.f));
272  givens_s = 0.f;
273  }
274  else */if (x == 0.f)
275  {
276  givens_c = 0.f;
277  givens_s = (z < 0.f) - (z >= 0.f);
278  }
279  else if (fabsf(x) >= fabsf(z))
280  {
281  const float t = z / x;
282  const float u = hypot(1.f, fabsf(t)) * ((x >= 0.f) - (x < 0.f));
283 
284  givens_c = 1.f / u;
285  givens_s = -t * givens_c;
286  }
287  else
288  {
289  const float t = x / z;
290  const float u = hypot(1.f, fabsf(t)) * ((z >= 0.f) - (z < 0.f));
291 
292  givens_s = -1.f / u;
293  givens_c = -t * givens_s;
294  }
295 
296  const float sdk = product_sum_cornea_harrison_tang(givens_s,
297  temp_diag[k],
298  givens_c,
299  temp_subdiag[k]);
300 
301  const float dkp1 = product_sum_cornea_harrison_tang(givens_s,
302  temp_subdiag[k],
303  givens_c,
304  temp_diag[k + 1]);
305 
306  temp_diag[k] = product_sum_cornea_harrison_tang(givens_c,
308  temp_diag[k],
309  -givens_s,
310  temp_subdiag[k]),
311  -givens_s,
313  temp_subdiag[k],
314  -givens_s,
315  temp_diag[k + 1])
316  );
317  temp_diag[k + 1] = product_sum_cornea_harrison_tang(givens_s, sdk, givens_c, dkp1);
318  temp_subdiag[k] = product_sum_cornea_harrison_tang(givens_c, sdk, -givens_s, dkp1);
319 
320  if (k > start)
321  {
322  temp_subdiag[k - 1] = product_sum_cornea_harrison_tang(givens_c,
323  temp_subdiag[k - 1],
324  -givens_s,
325  z);
326  }
327 
328  x = temp_subdiag[k];
329 
330  if (k < end - 1)
331  {
332  z = -givens_s * temp_subdiag[k + 1];
333 
334  temp_subdiag[k + 1] *= givens_c;
335  }
336 
337  //We could skip if (c, s) == (1, 0)
338  //Also, apply on the right means
339  //we have to consider -s instead of s.
340  for (int i = 0; i < 3; ++i)
341  {
342  float & c_1 = temp_mat[k] [i];
343  float & c_2 = temp_mat[k + 1][i];
344 
345  const float c_1_old = c_1;
346  const float c_2_old = c_2;
347 
348  c_1 = product_sum_cornea_harrison_tang(givens_c, c_1_old, -givens_s, c_2_old);
349  c_2 = product_sum_cornea_harrison_tang(givens_s, c_1_old, givens_c, c_2_old);
350  }
351  }
352  }

◆ get_solution()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::get_solution ( float(&)  eigenvalues[3],
float(&)  eigenvectors[3][3],
const float  tolerance = s_typical_tolerance,
const float  near_zero = s_typical_near_zero,
const float  epsilon = s_typical_epsilon,
const int  max_iter = s_typical_max_iterations 
)
inline

Get the full eigenvalues and eigenvectors for this matrix.

If rescale_and_reshift_values is true, the eigenvalues are scaled and shifted back to their proper value, given the original matrix.

Definition at line 425 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

431  {
432  float temp_subdiag[2];
433 
434  tridiagonalize(eigenvalues, temp_subdiag, eigenvectors, tolerance);
435  //eigenvalues and eigenvectors are used temporarily to store the diagonal and the matrix.
436 
437  compute(eigenvalues, temp_subdiag, eigenvectors, near_zero, epsilon, max_iter);
438 
439  eigenvalues[0] *= scale;
440  eigenvalues[1] *= scale;
441  eigenvalues[2] *= scale;
442  }

◆ tridiagonalize()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::tridiagonalize ( float(&)  temp_diag[3],
float(&)  temp_subdiag[2],
float(&)  temp_mat[3][3],
const float  tolerance = s_typical_tolerance 
)
inline

Definition at line 165 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

166  {
167  using namespace std;
168 
169  temp_diag[0] = a;
170  if (f * f <= tolerance)
171  {
172  temp_diag[1] = b;
173  temp_diag[2] = c;
174 
175  temp_subdiag[0] = d;
176  temp_subdiag[1] = e;
177 
178  temp_mat[0][0] = 1.f;
179  temp_mat[0][1] = 0.f;
180  temp_mat[0][2] = 0.f;
181 
182  temp_mat[1][0] = 0.f;
183  temp_mat[1][1] = 1.f;
184  temp_mat[1][2] = 0.f;
185 
186  temp_mat[2][0] = 0.f;
187  temp_mat[2][1] = 0.f;
188  temp_mat[2][2] = 1.f;
189  }
190  else
191  {
192  const float beta = hypot(d, f);
193 
194  const float inv_beta = 1.f / beta;
195 
196  const float em_0_1 = d * inv_beta;
197  const float em_0_2 = f * inv_beta;
198 
199  const float q_w_1 = 2 * em_0_1 * e;
200  const float q_c_1 = fmaf(2 * em_0_1, e, -q_w_1);
201  const float q_w_2 = em_0_2 * c;
202  const float q_c_2 = fmaf(em_0_2, c, -q_w_2);
203  const float q_w_3 = em_0_2 * b;
204  const float q_c_3 = fmaf(em_0_2, b, -q_w_3);
205 
206  const float q = sum_kahan_babushka_neumaier(q_w_1, q_w_2, -q_w_3, q_c_1, q_c_2, -q_c_3);
207 
208  temp_diag[1] = fmaf( em_0_2, q, b);
209  temp_diag[2] = fmaf(-em_0_2, q, c);
210 
211  temp_subdiag[0] = beta;
212  temp_subdiag[1] = fmaf(-em_0_1, q, e);
213 
214  temp_mat[0][0] = 1.f;
215  temp_mat[0][1] = 0.f;
216  temp_mat[0][2] = 0.f;
217 
218  temp_mat[1][0] = 0.f;
219  temp_mat[1][1] = em_0_1;
220  temp_mat[1][2] = em_0_2;
221 
222  temp_mat[2][0] = 0.f;
223  temp_mat[2][1] = em_0_2;
224  temp_mat[2][2] = -em_0_1;
225  }
226  }

Member Data Documentation

◆ a

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::a

Definition at line 133 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ b

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::b

Definition at line 133 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ c

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::c

Definition at line 133 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ d

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::d

Definition at line 133 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ e

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::e

Definition at line 133 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ f

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::f

Definition at line 133 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ s_typical_epsilon

constexpr float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::s_typical_epsilon = std::numeric_limits<float>::epsilon()
staticconstexpr

Definition at line 357 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ s_typical_max_iterations

constexpr int ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::s_typical_max_iterations = 90
staticconstexpr

Definition at line 355 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ s_typical_near_zero

constexpr float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::s_typical_near_zero = std::numeric_limits<float>::min()
staticconstexpr

Definition at line 356 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ s_typical_tolerance

constexpr float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::s_typical_tolerance = std::numeric_limits<float>::min()
staticconstexpr

Definition at line 161 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ scale

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::scale

Definition at line 133 of file GPUClusterInfoAndMomentsCalculatorImpl.h.


The documentation for this struct was generated from the following file:
max
constexpr double max()
Definition: ap_fixedTest.cxx:33
mergePhysValFiles.start
start
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:13
ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::a
float a
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:133
read_hist_ntuple.t
t
Definition: read_hist_ntuple.py:5
x
#define x
Trk::u
@ u
Enums for curvilinear frames.
Definition: ParamDefs.h:77
mergePhysValFiles.end
end
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:92
ClusterMomentsCalculator::product_sum_cornea_harrison_tang
CUDA_HOS_DEV float product_sum_cornea_harrison_tang(const float a, const float b, const float c, const float d)
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:54
lumiFormat.i
int i
Definition: lumiFormat.py:85
z
#define z
h
ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::b
float b
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:133
ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::c
float c
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:133
tolerance
Definition: suep_shower.h:17
ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::compute_iteration
CUDA_HOS_DEV void compute_iteration(const int start, const int end, float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3])
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:228
ClusterMomentsCalculator::sum_kahan_babushka_neumaier
CUDA_HOS_DEV float sum_kahan_babushka_neumaier(const Floats &... fs)
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:40
ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::scale
float scale
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:133
ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::compute
CUDA_HOS_DEV void compute(float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float near_zero=s_typical_near_zero, const float epsilon=s_typical_epsilon, const int max_iter=s_typical_max_iterations)
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:359
extractSporadic.q
list q
Definition: extractSporadic.py:97
MuonParameters::beta
@ beta
Definition: MuonParamDefs.h:144
ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::f
float f
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:133
CaloNoise_fillDB.mu
mu
Definition: CaloNoise_fillDB.py:51
ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::tridiagonalize
CUDA_HOS_DEV void tridiagonalize(float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float tolerance=s_typical_tolerance)
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:165
fitman.k
k
Definition: fitman.py:528
ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::e
float e
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:133
ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::d
float d
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:133