ATLAS Offline Software
Loading...
Searching...
No Matches
ClusterMomentsCalculator::RealSymmetricMatrixSolver Struct Reference

#include <GPUClusterInfoAndMomentsCalculatorImpl.h>

Collaboration diagram for ClusterMomentsCalculator::RealSymmetricMatrixSolver:

Public Member Functions

CUDA_HOS_DEV RealSymmetricMatrixSolver (const float a_orig, const float b_orig, const float c_orig, const float d_orig, const float e_orig, const float f_orig)
CUDA_HOS_DEV void get_eigenvalues (float &e_1, float &e_2, float &e_3) const
 Calculate shifted and scaled eigenvalues of the matrix, in ascending value.
CUDA_HOS_DEV void extract_one (const float eigenvalue, float(&res)[3], float(&representative)[3]) const
CUDA_HOS_DEV void get_eigenvectors (float(&res)[3][3], const float e_1, const float e_2, const float e_3, const float epsilon=s_typical_epsilon) const
 Calculate the eigenvectors of the matrix, using the (possibly unscaled) eigenvalues e_1, e_2, e_3 (in ascending order of magnitude) and epsilon to guard against undefined cases.
CUDA_HOS_DEV void get_solution (float(&eigenvalues)[3], float(&eigenvectors)[3][3], bool rescale_and_reshift_values=true, const float epsilon=s_typical_epsilon)
 Get the full eigenvalues and eigenvectors for this matrix.

Public Attributes

float a
float b
float c
float d
float e
float f
float shift
float scale

Static Public Attributes

static constexpr float s_typical_epsilon = std::numeric_limits<float>::epsilon()

Detailed Description

Definition at line 336 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

Constructor & Destructor Documentation

◆ RealSymmetricMatrixSolver()

CUDA_HOS_DEV ClusterMomentsCalculator::RealSymmetricMatrixSolver::RealSymmetricMatrixSolver ( const float a_orig,
const float b_orig,
const float c_orig,
const float d_orig,
const float e_orig,
const float f_orig )
inline

Definition at line 346 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

347 {
348 using namespace std;
349
350 shift = CaloRecGPU::Helpers::sum_kahan_babushka_neumaier(a_orig, b_orig, c_orig) / 3.f;
351 a = a_orig - shift;
352 b = b_orig - shift;
353 c = c_orig - shift;
354 const float max_ab = max( fabsf(a), fabsf(b) );
355 const float max_cd = max( fabsf(c), fabsf(d_orig) );
356 const float max_ef = max( fabsf(e_orig), fabsf(f_orig) );
357 scale = max(max_ab, max(max_cd, max_ef) );
358 if (scale == 0.f)
359 {
360 scale = 1.f;
361 }
362 const float inv_scale = 1.0f / scale;
363 a *= inv_scale;
364 b *= inv_scale;
365 c *= inv_scale;
366 d = d_orig * inv_scale;
367 e = e_orig * inv_scale;
368 f = f_orig * inv_scale;
369 }
#define max(a, b)
Definition cfImp.cxx:41
CUDA_HOS_DEV float sum_kahan_babushka_neumaier(const Floats &... fs)
Adds a list of floats using the Kahan-Babushka-Neumaier algorithm for greater precision (at the cost ...

Member Function Documentation

◆ extract_one()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolver::extract_one ( const float eigenvalue,
float(&) res[3],
float(&) representative[3] ) const
inline

Definition at line 438 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

439 {
440 using namespace std;
441
442 const float diag[3] = { a - eigenvalue,
443 b - eigenvalue,
444 c - eigenvalue
445 };
446
447 float vec_1[3], vec_2[3];
448
449 int max_diag = -1;
450 float max_value = -1.f;
451
452 for (int i = 0; i < 3; ++i)
453 {
454 const float this_diag = fabsf(diag[i]);
455 if (this_diag >= max_value)
456 {
457 max_diag = i;
458 max_value = this_diag;
459 }
460 }
461
462 if (max_diag == 0)
463 {
464 representative[0] = diag[0];
465 representative[1] = d;
466 representative[2] = f;
467
468 vec_1[0] = d;
469 vec_1[1] = diag[1];
470 vec_1[2] = e;
471
472 vec_2[0] = f;
473 vec_2[1] = e;
474 vec_2[2] = diag[2];
475 }
476 else if (max_diag == 1)
477 {
478 representative[0] = d;
479 representative[1] = diag[1];
480 representative[2] = e;
481
482 vec_1[0] = f;
483 vec_1[1] = e;
484 vec_1[2] = diag[2];
485
486 vec_2[0] = diag[0];
487 vec_2[1] = d;
488 vec_2[2] = f;
489
490 }
491 else /*if (max_diag == 2)*/
492 {
493 representative[0] = f;
494 representative[1] = e;
495 representative[2] = diag[2];
496
497 vec_1[0] = diag[0];
498 vec_1[1] = d;
499 vec_1[2] = f;
500
501 vec_2[0] = d;
502 vec_2[1] = diag[1];
503 vec_2[2] = e;
504 }
505
507 CaloRecGPU::Helpers::corrected_cross_product(vec_1, representative, vec_2);
508 //Can safely override previous value...
509
510#ifdef __CUDA_ARCH__
511 const float norm_1 = rnorm3df(res[0], res[1], res[2]);
512 const float norm_2 = rnorm3df(vec_1[0], vec_1[1], vec_1[2]);
513#else
514 const float norm_1 = 1.f / hypot(res[0], res[1], res[2]);
515 const float norm_2 = 1.f / hypot(vec_1[0], vec_1[1], vec_1[2]);
516#endif
517
518 if (norm_1 <= norm_2)
519 //Greater magnitude -> multiply by a smaller value
520 {
521 res[0] *= norm_1;
522 res[1] *= norm_1;
523 res[2] *= norm_1;
524 }
525 else
526 {
527 res[0] = vec_1[0] * norm_2;
528 res[1] = vec_1[1] * norm_2;
529 res[2] = vec_1[2] * norm_2;
530 }
531 }
std::pair< std::vector< unsigned int >, bool > res
static CUDA_HOS_DEV void corrected_cross_product(float(&res)[3], const float a1, const float a2, const float a3, const float b1, const float b2, const float b3)

◆ get_eigenvalues()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolver::get_eigenvalues ( float & e_1,
float & e_2,
float & e_3 ) const
inline

Calculate shifted and scaled eigenvalues of the matrix, in ascending value.

To get the actual eigenvalues, you should multiply by scale and then add shift.

Definition at line 374 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

375 {
376 using namespace std;
377
378 const float ab = a * b;
379 const float corr_ab = fmaf(a, b, -ab);
380 const float dd = d * d;
381 const float corr_dd = fmaf(d, d, -dd);
382 const float ac = a * c;
383 const float corr_ac = fmaf(a, c, -ac);
384 const float ff = f * f;
385 const float corr_ff = fmaf(f, f, -ff);
386 const float bc = b * c;
387 const float corr_bc = fmaf(b, c, -bc);
388 const float ee = e * e;
389 const float corr_ee = fmaf(e, e, -ee);
390
391 const float c_0 = fmaf(2 * d, f * e,
394 //Consider using some other strategy to select the best pairs
395 //to minimize error here?
396
397 const float c_1 = CaloRecGPU::Helpers::sum_kahan_babushka_neumaier(ab, -dd, ac, -ff, bc, -ee, corr_ab, -corr_dd, corr_ac, -corr_ff, corr_bc, corr_ee);
398
400
401 constexpr float inv_3 = 1.f / 3.f;
402
403 const float c_2_over_3 = c_2 * inv_3;
404
405 const float a_over_3 = max(fma(c_2, c_2_over_3, -c_1) * inv_3, 0.f);
406
407 const float half_b = 0.5f * (fma(c_2_over_3, fma(2.f * c_2_over_3, c_2_over_3, -c_1), c_0));
408
409 const float q = max(CaloRecGPU::Helpers::product_sum_cornea_harrison_tang(a_over_3, a_over_3 * a_over_3, -half_b, half_b), 0.f);
410
411 //None of what we are doing here is the best choice.
412 //We would need to perform a proper, formal, numerical analysis
413 //to figure out all possible errors and so on.
414
415 const float rho = sqrtf(a_over_3);
416
417#ifdef __CUDA_ARCH__
418 const float theta = atan2f(1.0f, rsqrtf(q) * half_b) * inv_3;
419#else
420 const float theta = atan2f(sqrtf(q), half_b) * inv_3;
421#endif
422
423#ifdef __CUDA_ARCH__
424 float sin_theta, cos_theta;
425 sincosf(theta, &sin_theta, &cos_theta);
426#else
427 const float sin_theta = sinf(theta);
428 const float cos_theta = cosf(theta);
429#endif
430
431 const float sqrt_3 = sqrtf(3.f);
432
433 e_1 = fma(-rho, fma( sqrt_3, sin_theta, cos_theta), c_2_over_3);
434 e_2 = fma(-rho, fma(-sqrt_3, sin_theta, cos_theta), c_2_over_3);
435 e_3 = fma( rho, 2.0f * cos_theta, c_2_over_3);
436 }
Scalar theta() const
theta method
static CUDA_HOS_DEV float product_sum_cornea_harrison_tang(const float a, const float b, const float c, const float d)

◆ get_eigenvectors()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolver::get_eigenvectors ( float(&) res[3][3],
const float e_1,
const float e_2,
const float e_3,
const float epsilon = s_typical_epsilon ) const
inline

Calculate the eigenvectors of the matrix, using the (possibly unscaled) eigenvalues e_1, e_2, e_3 (in ascending order of magnitude) and epsilon to guard against undefined cases.

Definition at line 538 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

539 {
540 using namespace std;
541
542 if (e_3 - e_1 <= epsilon)
543 {
544 res[0][0] = 1.f;
545 res[0][1] = 0.f;
546 res[0][2] = 0.f;
547
548 res[1][0] = 0.f;
549 res[1][1] = 1.f;
550 res[1][2] = 0.f;
551
552 res[2][0] = 0.f;
553 res[2][1] = 0.f;
554 res[2][2] = 1.f;
555 }
556 else
557 {
558 const float d_0 = e_3 - e_2;
559 const float d_1 = e_2 - e_1;
560
561 const float d_min = min(d_0, d_1);
562 const float d_max = max(d_0, d_1);
563
564 int k, j;
565 float first_e, second_e;
566
567 if (d_0 > d_1)
568 {
569 k = 2;
570 j = 0;
571
572 first_e = e_3;
573 second_e = e_1;
574 }
575 else
576 {
577 k = 0;
578 j = 2;
579
580 first_e = e_1;
581 second_e = e_3;
582 }
583
584 extract_one(first_e, res[k], res[j]);
585#if USE_ORIGINAL_EIGEN
586 if (d_min <= 2 * epsilon * d1)
587 {
588#ifdef __CUDA_ARCH__
589 const float base_norm = rnorm3df(res[j][0], res[j][1], res[j][2]);
590#else
591 const float base_norm = 1.f / hypot(res[j][0], res[j][1], res[j][2]);
592#endif
593 const float extra_factor = 1.f - CaloRecGPU::Helpers::corrected_dot_product(res[k], res[j]);
594
595 const float norm = base_norm / extra_factor;
596
597 res[j][0] *= norm;
598 res[j][1] *= norm;
599 res[j][2] *= norm;
600 }
601#else
602 if (d_min <= 2 * epsilon * d_max)
603 //Eigen has d0 <= 2 * eps <= d1, but d0 is overwritten while d1 isn't...
604 {
605 //Eigen speaks about ortho-normalization
606 //but does eivecs.col(l) -= eivecs.col(k).dot(eivecs.col(l))*eivecs.col(l)
607 //which... does not ortho-normalize anything.
608
609 const float prod = CaloRecGPU::Helpers::corrected_dot_product(res[k], res[j]);
610
611 res[j][0] -= res[k][0] * prod;
612 res[j][1] -= res[k][1] * prod;
613 res[j][2] -= res[k][2] * prod;
614
615#ifdef __CUDA_ARCH__
616 const float norm = rnorm3df(res[j][0], res[j][1], res[j][2]);
617#else
618 const float norm = 1.f / hypot(res[j][0], res[j][1], res[j][2]);
619#endif
620 res[j][0] *= norm;
621 res[j][1] *= norm;
622 res[j][2] *= norm;
623 }
624#endif
625 else
626 {
627 float extra_vector[3];
628
629 extract_one(second_e, res[j], extra_vector);
630 }
631
633
634#ifdef __CUDA_ARCH__
635 const float norm = rnorm3df(res[1][0], res[1][1], res[1][2]);
636#else
637 const float norm = 1.f / hypot(res[1][0], res[1][1], res[1][2]);
638#endif
639
640 res[1][0] *= norm;
641 res[1][1] *= norm;
642 res[1][2] *= norm;
643 }
644 }
#define min(a, b)
Definition cfImp.cxx:40
static CUDA_HOS_DEV float corrected_dot_product(const float a_1, const float a_2, const float a_3, const float b_1, const float b_2, const float b_3)
CUDA_HOS_DEV void extract_one(const float eigenvalue, float(&res)[3], float(&representative)[3]) const

◆ get_solution()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolver::get_solution ( float(&) eigenvalues[3],
float(&) eigenvectors[3][3],
bool rescale_and_reshift_values = true,
const float epsilon = s_typical_epsilon )
inline

Get the full eigenvalues and eigenvectors for this matrix.

If rescale_and_reshift_values is true, the eigenvalues are scaled and shifted back to their proper value, given the original matrix.

Definition at line 650 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

651 {
652 get_eigenvalues(eigenvalues[0], eigenvalues[1], eigenvalues[2]);
653 get_eigenvectors(eigenvectors, eigenvalues[0], eigenvalues[1], eigenvalues[2], epsilon);
654
655 if (rescale_and_reshift_values)
656 {
657 eigenvalues[0] = fmaf(eigenvalues[0], scale, shift);
658 eigenvalues[1] = fmaf(eigenvalues[1], scale, shift);
659 eigenvalues[2] = fmaf(eigenvalues[2], scale, shift);
660 }
661 }
CUDA_HOS_DEV void get_eigenvalues(float &e_1, float &e_2, float &e_3) const
Calculate shifted and scaled eigenvalues of the matrix, in ascending value.
CUDA_HOS_DEV void get_eigenvectors(float(&res)[3][3], const float e_1, const float e_2, const float e_3, const float epsilon=s_typical_epsilon) const
Calculate the eigenvectors of the matrix, using the (possibly unscaled) eigenvalues e_1,...

Member Data Documentation

◆ a

float ClusterMomentsCalculator::RealSymmetricMatrixSolver::a

Definition at line 339 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ b

float ClusterMomentsCalculator::RealSymmetricMatrixSolver::b

Definition at line 339 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ c

float ClusterMomentsCalculator::RealSymmetricMatrixSolver::c

Definition at line 339 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ d

float ClusterMomentsCalculator::RealSymmetricMatrixSolver::d

Definition at line 339 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ e

float ClusterMomentsCalculator::RealSymmetricMatrixSolver::e

Definition at line 339 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ f

float ClusterMomentsCalculator::RealSymmetricMatrixSolver::f

Definition at line 339 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ s_typical_epsilon

float ClusterMomentsCalculator::RealSymmetricMatrixSolver::s_typical_epsilon = std::numeric_limits<float>::epsilon()
staticconstexpr

Definition at line 533 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ scale

float ClusterMomentsCalculator::RealSymmetricMatrixSolver::scale

Definition at line 339 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ shift

float ClusterMomentsCalculator::RealSymmetricMatrixSolver::shift

Definition at line 339 of file GPUClusterInfoAndMomentsCalculatorImpl.h.


The documentation for this struct was generated from the following file: