ATLAS Offline Software
Loading...
Searching...
No Matches
GPUClusterInfoAndMomentsCalculatorImpl.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3//
4// Dear emacs, this is -*- c++ -*-
5//
6
7#ifndef CALORECGPU_GPUCLUSTERINFOANDMOMENTSCALCULATOR_CUDA_H
8#define CALORECGPU_GPUCLUSTERINFOANDMOMENTSCALCULATOR_CUDA_H
9
12#include "CaloRecGPU/Helpers.h"
13
15
16#include <cmath>
17#include <type_traits>
18
20{
22 //Following the Eigen implementation too...
23 {
24 float a, b, c, d, e, f, scale;
25 // +-- --+
26 // | a d f |
27 // | d b e |
28 // | f e c |
29 // +-- --+
30
31 CUDA_HOS_DEV RealSymmetricMatrixSolverIterative(const float a_orig, const float b_orig, const float c_orig, const float d_orig, const float e_orig, const float f_orig)
32 {
33 using namespace std;
34
35 const float max_ab = max( fabsf(a_orig), fabsf(b_orig) );
36 const float max_cd = max( fabsf(c_orig), fabsf(d_orig) );
37 const float max_ef = max( fabsf(e_orig), fabsf(f_orig) );
38 scale = max(max_ab, max(max_cd, max_ef) );
39 if (scale == 0.f)
40 {
41 scale = 1.f;
42 }
43 const float inv_scale = 1.0f / scale;
44 a = a_orig * inv_scale;
45 b = b_orig * inv_scale;
46 c = c_orig * inv_scale;
47 d = d_orig * inv_scale;
48 e = e_orig * inv_scale;
49 f = f_orig * inv_scale;
50 }
51
52 static constexpr float s_typical_tolerance = std::numeric_limits<float>::min();
53
54 //Uses temp_diag to store the diagonal
55 //and temp_mat to store the tridiagonalized form of the matrix
56 CUDA_HOS_DEV void tridiagonalize(float (&temp_diag)[3], float (&temp_subdiag)[2], float (&temp_mat)[3][3], const float tolerance = s_typical_tolerance)
57 {
58 using namespace std;
59
60 temp_diag[0] = a;
61 if (f * f <= tolerance)
62 {
63 temp_diag[1] = b;
64 temp_diag[2] = c;
65
66 temp_subdiag[0] = d;
67 temp_subdiag[1] = e;
68
69 temp_mat[0][0] = 1.f;
70 temp_mat[0][1] = 0.f;
71 temp_mat[0][2] = 0.f;
72
73 temp_mat[1][0] = 0.f;
74 temp_mat[1][1] = 1.f;
75 temp_mat[1][2] = 0.f;
76
77 temp_mat[2][0] = 0.f;
78 temp_mat[2][1] = 0.f;
79 temp_mat[2][2] = 1.f;
80 }
81 else
82 {
83 const float beta = hypot(d, f);
84
85 const float inv_beta = 1.f / beta;
86
87 const float em_0_1 = d * inv_beta;
88 const float em_0_2 = f * inv_beta;
89
90 const float q_w_1 = 2 * em_0_1 * e;
91 const float q_c_1 = fmaf(2 * em_0_1, e, -q_w_1);
92 const float q_w_2 = em_0_2 * c;
93 const float q_c_2 = fmaf(em_0_2, c, -q_w_2);
94 const float q_w_3 = em_0_2 * b;
95 const float q_c_3 = fmaf(em_0_2, b, -q_w_3);
96
97 const float q = CaloRecGPU::Helpers::sum_kahan_babushka_neumaier(q_w_1, q_w_2, -q_w_3, q_c_1, q_c_2, -q_c_3);
98
99 temp_diag[1] = fmaf( em_0_2, q, b);
100 temp_diag[2] = fmaf(-em_0_2, q, c);
101
102 temp_subdiag[0] = beta;
103 temp_subdiag[1] = fmaf(-em_0_1, q, e);
104
105 temp_mat[0][0] = 1.f;
106 temp_mat[0][1] = 0.f;
107 temp_mat[0][2] = 0.f;
108
109 temp_mat[1][0] = 0.f;
110 temp_mat[1][1] = em_0_1;
111 temp_mat[1][2] = em_0_2;
112
113 temp_mat[2][0] = 0.f;
114 temp_mat[2][1] = em_0_2;
115 temp_mat[2][2] = -em_0_1;
116 }
117 }
118
119 CUDA_HOS_DEV void compute_iteration(const int start,
120 const int end,
121 float (&temp_diag)[3],
122 float (&temp_subdiag)[2],
123 float (&temp_mat)[3][3])
124 {
125 const float td = (temp_diag[end - 1] - temp_diag[end]) * 0.5f;
126
127 const float ee = temp_subdiag[end - 1];
128
129 float mu = temp_diag[end];
130
131 if (td == 0.f)
132 {
133 mu -= fabsf(ee);
134 }
135 else if (ee != 0.f)
136 {
137 const float ee_2 = ee * ee;
138
139 const float h = hypot(td, ee);
140
141 const float factor = td + h * ((td >= 0.f) - (td < 0.f));
142
143 if (ee_2 == 0.f)
144 {
145 mu -= ee / (factor / ee);
146 }
147 else
148 {
149 mu -= ee_2 / factor;
150 }
151 }
152
153 float x = temp_diag[start] - mu;
154 float z = temp_subdiag[start];
155
156 for (int k = start; k < end && z != 0.f; ++k)
157 {
158 float givens_c, givens_s;
159
160 /*if (z == 0.f)
161 {
162 givens_c = ((x >= 0.f) - (x < 0.f));
163 givens_s = 0.f;
164 }
165 else */if (x == 0.f)
166 {
167 givens_c = 0.f;
168 givens_s = (z < 0.f) - (z >= 0.f);
169 }
170 else if (fabsf(x) >= fabsf(z))
171 {
172 const float t = z / x;
173 const float u = hypot(1.f, fabsf(t)) * ((x >= 0.f) - (x < 0.f));
174
175 givens_c = 1.f / u;
176 givens_s = -t * givens_c;
177 }
178 else
179 {
180 const float t = x / z;
181 const float u = hypot(1.f, fabsf(t)) * ((z >= 0.f) - (z < 0.f));
182
183 givens_s = -1.f / u;
184 givens_c = -t * givens_s;
185 }
186
188 temp_diag[k],
189 givens_c,
190 temp_subdiag[k]);
191
193 temp_subdiag[k],
194 givens_c,
195 temp_diag[k + 1]);
196
199 temp_diag[k],
200 -givens_s,
201 temp_subdiag[k]),
202 -givens_s,
204 temp_subdiag[k],
205 -givens_s,
206 temp_diag[k + 1])
207 );
208 temp_diag[k + 1] = CaloRecGPU::Helpers::product_sum_cornea_harrison_tang(givens_s, sdk, givens_c, dkp1);
209 temp_subdiag[k] = CaloRecGPU::Helpers::product_sum_cornea_harrison_tang(givens_c, sdk, -givens_s, dkp1);
210
211 if (k > start)
212 {
213 temp_subdiag[k - 1] = CaloRecGPU::Helpers::product_sum_cornea_harrison_tang(givens_c,
214 temp_subdiag[k - 1],
215 -givens_s,
216 z);
217 }
218
219 x = temp_subdiag[k];
220
221 if (k < end - 1)
222 {
223 z = -givens_s * temp_subdiag[k + 1];
224
225 temp_subdiag[k + 1] *= givens_c;
226 }
227
228 //We could skip if (c, s) == (1, 0)
229 //Also, apply on the right means
230 //we have to consider -s instead of s.
231 for (int i = 0; i < 3; ++i)
232 {
233 float & c_1 = temp_mat[k] [i];
234 float & c_2 = temp_mat[k + 1][i];
235
236 const float c_1_old = c_1;
237 const float c_2_old = c_2;
238
239 c_1 = CaloRecGPU::Helpers::product_sum_cornea_harrison_tang(givens_c, c_1_old, -givens_s, c_2_old);
240 c_2 = CaloRecGPU::Helpers::product_sum_cornea_harrison_tang(givens_s, c_1_old, givens_c, c_2_old);
241 }
242 }
243 }
244
245
246 static constexpr int s_typical_max_iterations = 90;
247 static constexpr float s_typical_near_zero = std::numeric_limits<float>::min();
248 static constexpr float s_typical_epsilon = std::numeric_limits<float>::epsilon();
249
250 CUDA_HOS_DEV void compute(float (&temp_diag)[3],
251 float (&temp_subdiag)[2],
252 float (&temp_mat)[3][3],
253 const float near_zero = s_typical_near_zero,
254 const float epsilon = s_typical_epsilon,
255 const int max_iter = s_typical_max_iterations)
256 {
257 int iter_count = 0;
258 int start = 0, end = 2;
259
260 const float precision_inv = 1.f / epsilon;
261
262 while (end > 0)
263 {
264 for (int i = start; i < end; ++i)
265 {
266 if (fabsf(temp_subdiag[i]) < near_zero)
267 {
268 temp_subdiag[i] = 0.f;
269 }
270 else
271 {
272 const float scaled_subdiag = precision_inv * temp_subdiag[i];
273 if (scaled_subdiag * scaled_subdiag <= fabsf(temp_diag[i]) + fabsf(temp_diag[i + 1]))
274 {
275 temp_subdiag[i] = 0.f;
276 }
277 }
278 }
279
280 while (end > 0 && temp_subdiag[end - 1] == 0.f)
281 {
282 --end;
283 }
284
285 if (end <= 0)
286 {
287 break;
288 }
289
290 ++iter_count;
291
292 if (iter_count > max_iter)
293 {
294 //printf("OUT OF ITERS! %d %d\n", start, end);
295 break;
296 }
297
298 start = end - 1;
299
300 while (start > 0 && temp_subdiag[start - 1] != 0.f)
301 {
302 --start;
303 }
304
305 compute_iteration(start, end, temp_diag, temp_subdiag, temp_mat);
306 }
307
308 //No need to sort eigenvalues and eigenvectors:
309 //we are going to check them all anyway.
310 }
311
316 CUDA_HOS_DEV void get_solution(float (&eigenvalues)[3],
317 float (&eigenvectors)[3][3],
318 const float tolerance = s_typical_tolerance,
319 const float near_zero = s_typical_near_zero,
320 const float epsilon = s_typical_epsilon,
321 const int max_iter = s_typical_max_iterations )
322 {
323 float temp_subdiag[2];
324
325 tridiagonalize(eigenvalues, temp_subdiag, eigenvectors, tolerance);
326 //eigenvalues and eigenvectors are used temporarily to store the diagonal and the matrix.
327
328 compute(eigenvalues, temp_subdiag, eigenvectors, near_zero, epsilon, max_iter);
329
330 eigenvalues[0] *= scale;
331 eigenvalues[1] *= scale;
332 eigenvalues[2] *= scale;
333 }
334 };
335
337//Taken from the Eigen implementation of direct_selfadjoint_eigenvalues
338 {
339 float a, b, c, d, e, f, shift, scale;
340 // +-- --+
341 // | a d f |
342 // | d b e |
343 // | f e c |
344 // +-- --+
345
346 CUDA_HOS_DEV RealSymmetricMatrixSolver(const float a_orig, const float b_orig, const float c_orig, const float d_orig, const float e_orig, const float f_orig)
347 {
348 using namespace std;
349
350 shift = CaloRecGPU::Helpers::sum_kahan_babushka_neumaier(a_orig, b_orig, c_orig) / 3.f;
351 a = a_orig - shift;
352 b = b_orig - shift;
353 c = c_orig - shift;
354 const float max_ab = max( fabsf(a), fabsf(b) );
355 const float max_cd = max( fabsf(c), fabsf(d_orig) );
356 const float max_ef = max( fabsf(e_orig), fabsf(f_orig) );
357 scale = max(max_ab, max(max_cd, max_ef) );
358 if (scale == 0.f)
359 {
360 scale = 1.f;
361 }
362 const float inv_scale = 1.0f / scale;
363 a *= inv_scale;
364 b *= inv_scale;
365 c *= inv_scale;
366 d = d_orig * inv_scale;
367 e = e_orig * inv_scale;
368 f = f_orig * inv_scale;
369 }
370
374 CUDA_HOS_DEV void get_eigenvalues(float & e_1, float & e_2, float & e_3) const
375 {
376 using namespace std;
377
378 const float ab = a * b;
379 const float corr_ab = fmaf(a, b, -ab);
380 const float dd = d * d;
381 const float corr_dd = fmaf(d, d, -dd);
382 const float ac = a * c;
383 const float corr_ac = fmaf(a, c, -ac);
384 const float ff = f * f;
385 const float corr_ff = fmaf(f, f, -ff);
386 const float bc = b * c;
387 const float corr_bc = fmaf(b, c, -bc);
388 const float ee = e * e;
389 const float corr_ee = fmaf(e, e, -ee);
390
391 const float c_0 = fmaf(2 * d, f * e,
394 //Consider using some other strategy to select the best pairs
395 //to minimize error here?
396
397 const float c_1 = CaloRecGPU::Helpers::sum_kahan_babushka_neumaier(ab, -dd, ac, -ff, bc, -ee, corr_ab, -corr_dd, corr_ac, -corr_ff, corr_bc, corr_ee);
398
400
401 constexpr float inv_3 = 1.f / 3.f;
402
403 const float c_2_over_3 = c_2 * inv_3;
404
405 const float a_over_3 = max(fma(c_2, c_2_over_3, -c_1) * inv_3, 0.f);
406
407 const float half_b = 0.5f * (fma(c_2_over_3, fma(2.f * c_2_over_3, c_2_over_3, -c_1), c_0));
408
409 const float q = max(CaloRecGPU::Helpers::product_sum_cornea_harrison_tang(a_over_3, a_over_3 * a_over_3, -half_b, half_b), 0.f);
410
411 //None of what we are doing here is the best choice.
412 //We would need to perform a proper, formal, numerical analysis
413 //to figure out all possible errors and so on.
414
415 const float rho = sqrtf(a_over_3);
416
417#ifdef __CUDA_ARCH__
418 const float theta = atan2f(1.0f, rsqrtf(q) * half_b) * inv_3;
419#else
420 const float theta = atan2f(sqrtf(q), half_b) * inv_3;
421#endif
422
423#ifdef __CUDA_ARCH__
424 float sin_theta, cos_theta;
425 sincosf(theta, &sin_theta, &cos_theta);
426#else
427 const float sin_theta = sinf(theta);
428 const float cos_theta = cosf(theta);
429#endif
430
431 const float sqrt_3 = sqrtf(3.f);
432
433 e_1 = fma(-rho, fma( sqrt_3, sin_theta, cos_theta), c_2_over_3);
434 e_2 = fma(-rho, fma(-sqrt_3, sin_theta, cos_theta), c_2_over_3);
435 e_3 = fma( rho, 2.0f * cos_theta, c_2_over_3);
436 }
437
438 CUDA_HOS_DEV void extract_one(const float eigenvalue, float (&res)[3], float (&representative)[3]) const
439 {
440 using namespace std;
441
442 const float diag[3] = { a - eigenvalue,
443 b - eigenvalue,
444 c - eigenvalue
445 };
446
447 float vec_1[3], vec_2[3];
448
449 int max_diag = -1;
450 float max_value = -1.f;
451
452 for (int i = 0; i < 3; ++i)
453 {
454 const float this_diag = fabsf(diag[i]);
455 if (this_diag >= max_value)
456 {
457 max_diag = i;
458 max_value = this_diag;
459 }
460 }
461
462 if (max_diag == 0)
463 {
464 representative[0] = diag[0];
465 representative[1] = d;
466 representative[2] = f;
467
468 vec_1[0] = d;
469 vec_1[1] = diag[1];
470 vec_1[2] = e;
471
472 vec_2[0] = f;
473 vec_2[1] = e;
474 vec_2[2] = diag[2];
475 }
476 else if (max_diag == 1)
477 {
478 representative[0] = d;
479 representative[1] = diag[1];
480 representative[2] = e;
481
482 vec_1[0] = f;
483 vec_1[1] = e;
484 vec_1[2] = diag[2];
485
486 vec_2[0] = diag[0];
487 vec_2[1] = d;
488 vec_2[2] = f;
489
490 }
491 else /*if (max_diag == 2)*/
492 {
493 representative[0] = f;
494 representative[1] = e;
495 representative[2] = diag[2];
496
497 vec_1[0] = diag[0];
498 vec_1[1] = d;
499 vec_1[2] = f;
500
501 vec_2[0] = d;
502 vec_2[1] = diag[1];
503 vec_2[2] = e;
504 }
505
507 CaloRecGPU::Helpers::corrected_cross_product(vec_1, representative, vec_2);
508 //Can safely override previous value...
509
510#ifdef __CUDA_ARCH__
511 const float norm_1 = rnorm3df(res[0], res[1], res[2]);
512 const float norm_2 = rnorm3df(vec_1[0], vec_1[1], vec_1[2]);
513#else
514 const float norm_1 = 1.f / hypot(res[0], res[1], res[2]);
515 const float norm_2 = 1.f / hypot(vec_1[0], vec_1[1], vec_1[2]);
516#endif
517
518 if (norm_1 <= norm_2)
519 //Greater magnitude -> multiply by a smaller value
520 {
521 res[0] *= norm_1;
522 res[1] *= norm_1;
523 res[2] *= norm_1;
524 }
525 else
526 {
527 res[0] = vec_1[0] * norm_2;
528 res[1] = vec_1[1] * norm_2;
529 res[2] = vec_1[2] * norm_2;
530 }
531 }
532
533 static constexpr float s_typical_epsilon = std::numeric_limits<float>::epsilon();
534
538 CUDA_HOS_DEV void get_eigenvectors(float (&res)[3][3], const float e_1, const float e_2, const float e_3, const float epsilon = s_typical_epsilon) const
539 {
540 using namespace std;
541
542 if (e_3 - e_1 <= epsilon)
543 {
544 res[0][0] = 1.f;
545 res[0][1] = 0.f;
546 res[0][2] = 0.f;
547
548 res[1][0] = 0.f;
549 res[1][1] = 1.f;
550 res[1][2] = 0.f;
551
552 res[2][0] = 0.f;
553 res[2][1] = 0.f;
554 res[2][2] = 1.f;
555 }
556 else
557 {
558 const float d_0 = e_3 - e_2;
559 const float d_1 = e_2 - e_1;
560
561 const float d_min = min(d_0, d_1);
562 const float d_max = max(d_0, d_1);
563
564 int k, j;
565 float first_e, second_e;
566
567 if (d_0 > d_1)
568 {
569 k = 2;
570 j = 0;
571
572 first_e = e_3;
573 second_e = e_1;
574 }
575 else
576 {
577 k = 0;
578 j = 2;
579
580 first_e = e_1;
581 second_e = e_3;
582 }
583
584 extract_one(first_e, res[k], res[j]);
585#if USE_ORIGINAL_EIGEN
586 if (d_min <= 2 * epsilon * d1)
587 {
588#ifdef __CUDA_ARCH__
589 const float base_norm = rnorm3df(res[j][0], res[j][1], res[j][2]);
590#else
591 const float base_norm = 1.f / hypot(res[j][0], res[j][1], res[j][2]);
592#endif
593 const float extra_factor = 1.f - CaloRecGPU::Helpers::corrected_dot_product(res[k], res[j]);
594
595 const float norm = base_norm / extra_factor;
596
597 res[j][0] *= norm;
598 res[j][1] *= norm;
599 res[j][2] *= norm;
600 }
601#else
602 if (d_min <= 2 * epsilon * d_max)
603 //Eigen has d0 <= 2 * eps <= d1, but d0 is overwritten while d1 isn't...
604 {
605 //Eigen speaks about ortho-normalization
606 //but does eivecs.col(l) -= eivecs.col(k).dot(eivecs.col(l))*eivecs.col(l)
607 //which... does not ortho-normalize anything.
608
609 const float prod = CaloRecGPU::Helpers::corrected_dot_product(res[k], res[j]);
610
611 res[j][0] -= res[k][0] * prod;
612 res[j][1] -= res[k][1] * prod;
613 res[j][2] -= res[k][2] * prod;
614
615#ifdef __CUDA_ARCH__
616 const float norm = rnorm3df(res[j][0], res[j][1], res[j][2]);
617#else
618 const float norm = 1.f / hypot(res[j][0], res[j][1], res[j][2]);
619#endif
620 res[j][0] *= norm;
621 res[j][1] *= norm;
622 res[j][2] *= norm;
623 }
624#endif
625 else
626 {
627 float extra_vector[3];
628
629 extract_one(second_e, res[j], extra_vector);
630 }
631
633
634#ifdef __CUDA_ARCH__
635 const float norm = rnorm3df(res[1][0], res[1][1], res[1][2]);
636#else
637 const float norm = 1.f / hypot(res[1][0], res[1][1], res[1][2]);
638#endif
639
640 res[1][0] *= norm;
641 res[1][1] *= norm;
642 res[1][2] *= norm;
643 }
644 }
645
650 CUDA_HOS_DEV void get_solution(float (&eigenvalues)[3], float (&eigenvectors)[3][3], bool rescale_and_reshift_values = true, const float epsilon = s_typical_epsilon)
651 {
652 get_eigenvalues(eigenvalues[0], eigenvalues[1], eigenvalues[2]);
653 get_eigenvectors(eigenvectors, eigenvalues[0], eigenvalues[1], eigenvalues[2], epsilon);
654
655 if (rescale_and_reshift_values)
656 {
657 eigenvalues[0] = fmaf(eigenvalues[0], scale, shift);
658 eigenvalues[1] = fmaf(eigenvalues[1], scale, shift);
659 eigenvalues[2] = fmaf(eigenvalues[2], scale, shift);
660 }
661 }
662 };
663
675
689
691
692 constexpr unsigned int num_time_measurements = 11;
693
695 const CaloRecGPU::ConstantDataHolder & instance_data,
696 const CMCOptionsHolder & options,
697 const IGPUKernelSizeOptimizer & optimizer,
698 size_t (&times)[num_time_measurements],
699 const bool synchronize = false,
701}
702
703#endif
Scalar theta() const
theta method
std::pair< std::vector< unsigned int >, bool > res
#define x
#define z
#define min(a, b)
Definition cfImp.cxx:40
#define max(a, b)
Definition cfImp.cxx:41
Header file for AthHistogramAlgorithm.
Holds CPU and GPU versions of the geometry and cell noise information, which are assumed to be consta...
Definition DataHolders.h:27
Holds the mutable per-event information (clusters and cells) and provides utilities to convert betwee...
Definition DataHolders.h:73
Interface for GPU kernel size optimization (allowing adjustment of kernel sizes to the properties of ...
SimpleHolder< T, MemoryContext::CUDAGPU, true > CUDA_object
Holds an object of type T in CUDA GPU memory.
static CUDA_HOS_DEV float product_sum_cornea_harrison_tang(const float a, const float b, const float c, const float d)
static CUDA_HOS_DEV float corrected_dot_product(const float a_1, const float a_2, const float a_3, const float b_1, const float b_2, const float b_3)
SimpleHolder< T, MemoryContext::CPU, true > CPU_object
Holds an object of type T in CPU memory.
static CUDA_HOS_DEV void corrected_cross_product(float(&res)[3], const float a1, const float a2, const float a3, const float b1, const float b2, const float b3)
CUDA_HOS_DEV float sum_kahan_babushka_neumaier(const Floats &... fs)
Adds a list of floats using the Kahan-Babushka-Neumaier algorithm for greater precision (at the cost ...
void register_kernels(IGPUKernelSizeOptimizer &optimizer)
void calculateClusterPropertiesAndMoments(CaloRecGPU::EventDataHolder &holder, const CaloRecGPU::ConstantDataHolder &instance_data, const CMCOptionsHolder &options, const IGPUKernelSizeOptimizer &optimizer, size_t(&times)[num_time_measurements], const bool synchronize=false, CaloRecGPU::CUDA_Helpers::CUDAStreamPtrHolder stream={})
STL namespace.
void sendToGPU(const bool clear_CPU=false)
CaloRecGPU::Helpers::CUDA_object< ClusterMomentCalculationOptions > m_options_dev
CaloRecGPU::Helpers::CPU_object< ClusterMomentCalculationOptions > m_options
CUDA_HOS_DEV RealSymmetricMatrixSolverIterative(const float a_orig, const float b_orig, const float c_orig, const float d_orig, const float e_orig, const float f_orig)
CUDA_HOS_DEV void get_solution(float(&eigenvalues)[3], float(&eigenvectors)[3][3], const float tolerance=s_typical_tolerance, const float near_zero=s_typical_near_zero, const float epsilon=s_typical_epsilon, const int max_iter=s_typical_max_iterations)
Get the full eigenvalues and eigenvectors for this matrix.
CUDA_HOS_DEV void tridiagonalize(float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float tolerance=s_typical_tolerance)
CUDA_HOS_DEV void compute(float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float near_zero=s_typical_near_zero, const float epsilon=s_typical_epsilon, const int max_iter=s_typical_max_iterations)
CUDA_HOS_DEV void compute_iteration(const int start, const int end, float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3])
CUDA_HOS_DEV void get_eigenvalues(float &e_1, float &e_2, float &e_3) const
Calculate shifted and scaled eigenvalues of the matrix, in ascending value.
CUDA_HOS_DEV void get_solution(float(&eigenvalues)[3], float(&eigenvectors)[3][3], bool rescale_and_reshift_values=true, const float epsilon=s_typical_epsilon)
Get the full eigenvalues and eigenvectors for this matrix.
CUDA_HOS_DEV void get_eigenvectors(float(&res)[3][3], const float e_1, const float e_2, const float e_3, const float epsilon=s_typical_epsilon) const
Calculate the eigenvectors of the matrix, using the (possibly unscaled) eigenvalues e_1,...
CUDA_HOS_DEV void extract_one(const float eigenvalue, float(&res)[3], float(&representative)[3]) const
CUDA_HOS_DEV RealSymmetricMatrixSolver(const float a_orig, const float b_orig, const float c_orig, const float d_orig, const float e_orig, const float f_orig)