#include <GPUClusterInfoAndMomentsCalculatorImpl.h>

Collaboration diagram for ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative:

Public Member Functions
CUDA_HOS_DEV	RealSymmetricMatrixSolverIterative (const float a_orig, const float b_orig, const float c_orig, const float d_orig, const float e_orig, const float f_orig)

CUDA_HOS_DEV void	tridiagonalize (float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float tolerance=s_typical_tolerance)

CUDA_HOS_DEV void	compute_iteration (const int start, const int end, float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3])

CUDA_HOS_DEV void	compute (float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float near_zero=s_typical_near_zero, const float epsilon=s_typical_epsilon, const int max_iter=s_typical_max_iterations)

CUDA_HOS_DEV void	get_solution (float(&eigenvalues)[3], float(&eigenvectors)[3][3], const float tolerance=s_typical_tolerance, const float near_zero=s_typical_near_zero, const float epsilon=s_typical_epsilon, const int max_iter=s_typical_max_iterations)
	Get the full eigenvalues and eigenvectors for this matrix. More...

Public Attributes
float	a

float	b

float	c

float	d

float	e

float	f

float	scale

Static Public Attributes
static constexpr float	s_typical_tolerance = std::numeric_limits<float>::min()

static constexpr int	s_typical_max_iterations = 90

static constexpr float	s_typical_near_zero = std::numeric_limits<float>::min()

static constexpr float	s_typical_epsilon = std::numeric_limits<float>::epsilon()

Detailed Description

Definition at line 130 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

Constructor & Destructor Documentation

◆ RealSymmetricMatrixSolverIterative()

CUDA_HOS_DEV ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::RealSymmetricMatrixSolverIterative	(	const float	a_orig,
		const float	b_orig,
		const float	c_orig,
		const float	d_orig,
		const float	e_orig,
		const float	f_orig
	)

inline

Definition at line 140 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

     {
       using namespace std;
  
       const float max_ab = max( fabsf(a_orig),  fabsf(b_orig) );
       const float max_cd = max( fabsf(c_orig),  fabsf(d_orig) );
       const float max_ef = max( fabsf(e_orig),  fabsf(f_orig) );
       scale = max(max_ab, max(max_cd, max_ef) );
       if (scale == 0.f)
         {
           scale = 1.f;
         }
       const float inv_scale = 1.0f / scale;
       a = a_orig * inv_scale;
       b = b_orig * inv_scale;
       c = c_orig * inv_scale;
       d = d_orig * inv_scale;
       e = e_orig * inv_scale;
       f = f_orig * inv_scale;
     }

Member Function Documentation

◆ compute()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::compute	(	float(&)	temp_diag[3],
		float(&)	temp_subdiag[2],
		float(&)	temp_mat[3][3],
		const float	near_zero = `s_typical_near_zero`,
		const float	epsilon = `s_typical_epsilon`,
		const int	max_iter = `s_typical_max_iterations`
	)

inline

Definition at line 359 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

     {
       int iter_count = 0;
       int start = 0, end = 2;
  
       const float precision_inv = 1.f / epsilon;
  
       while (end > 0)
         {
           for (int i = start; i < end; ++i)
             {
               if (fabsf(temp_subdiag[i]) < near_zero)
                 {
                   temp_subdiag[i] = 0.f;
                 }
               else
                 {
                   const float scaled_subdiag = precision_inv * temp_subdiag[i];
                   if (scaled_subdiag * scaled_subdiag <= fabsf(temp_diag[i]) + fabsf(temp_diag[i + 1]))
                     {
                       temp_subdiag[i] = 0.f;
                     }
                 }
             }
  
           while (end > 0 && temp_subdiag[end - 1] == 0.f)
             {
               --end;
             }
  
           if (end <= 0)
             {
               break;
             }
  
           ++iter_count;
  
           if (iter_count > max_iter)
             {
               printf("OUT OF ITERS! %d %d\n", start, end);
               break;
             }
  
           start = end - 1;
  
           while (start > 0 && temp_subdiag[start - 1] != 0.f)
             {
               --start;
             }
  
           compute_iteration(start, end, temp_diag, temp_subdiag, temp_mat);
         }
  
       //No need to sort eigenvalues and eigenvectors:
       //we are going to check them all anyway.
     }

◆ compute_iteration()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::compute_iteration	(	const int	start,
		const int	end,
		float(&)	temp_diag[3],
		float(&)	temp_subdiag[2],
		float(&)	temp_mat[3][3]
	)

inline

Definition at line 228 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

     {
       const float td = (temp_diag[end - 1] - temp_diag[end]) * 0.5f;
  
       const float ee = temp_subdiag[end - 1];
  
       float mu = temp_diag[end];
  
       if (td == 0.f)
         {
           mu -= fabsf(ee);
         }
       else if (ee != 0.f)
         {
           const float ee_2 = ee * ee;
  
           const float h = hypot(td, ee);
  
           const float factor = td + h * ((td >= 0.f) - (td < 0.f));
  
           if (ee_2 == 0.f)
             {
               mu -= ee / (factor / ee);
             }
           else
             {
               mu -= ee_2 / factor;
             }
         }
  
       float x = temp_diag[start] - mu;
       float z = temp_subdiag[start];
  
       for (int k = start; k < end && z != 0.f; ++k)
         {
           float givens_c, givens_s;
  
            /*if (z == 0.f)
             {
               givens_c = ((x >= 0.f) - (x < 0.f));
               givens_s = 0.f;
             }
           else */if (x == 0.f)
             {
               givens_c = 0.f;
               givens_s = (z < 0.f) - (z >= 0.f);
             }
           else if (fabsf(x) >= fabsf(z))
             {
               const float t = z / x;
               const float u = hypot(1.f, fabsf(t)) * ((x >= 0.f) - (x < 0.f));
               
               givens_c = 1.f / u;
               givens_s = -t * givens_c;
             }
           else
             {
               const float t = x / z;
               const float u = hypot(1.f, fabsf(t)) * ((z >= 0.f) - (z < 0.f));
               
               givens_s = -1.f / u;
               givens_c = -t * givens_s;
             }
  
           const float sdk  = product_sum_cornea_harrison_tang(givens_s,
                                                               temp_diag[k],
                                                               givens_c,
                                                               temp_subdiag[k]);
                                                               
           const float dkp1 = product_sum_cornea_harrison_tang(givens_s,
                                                               temp_subdiag[k],
                                                               givens_c,
                                                               temp_diag[k + 1]);
  
           temp_diag[k] = product_sum_cornea_harrison_tang(givens_c,
                                                           product_sum_cornea_harrison_tang(givens_c,
                                                                                            temp_diag[k],
                                                                                            -givens_s,
                                                                                            temp_subdiag[k]),
                                                           -givens_s,
                                                           product_sum_cornea_harrison_tang(givens_c,
                                                                                            temp_subdiag[k],
                                                                                            -givens_s,
                                                                                            temp_diag[k + 1])
                                                          );
           temp_diag[k + 1] = product_sum_cornea_harrison_tang(givens_s, sdk,  givens_c, dkp1);
           temp_subdiag[k] = product_sum_cornea_harrison_tang(givens_c, sdk, -givens_s, dkp1);
  
           if (k > start)
             {
               temp_subdiag[k - 1] = product_sum_cornea_harrison_tang(givens_c,
                                                                      temp_subdiag[k - 1],
                                                                      -givens_s,
                                                                      z);
             }
  
           x = temp_subdiag[k];
  
           if (k < end - 1)
             {
               z = -givens_s * temp_subdiag[k + 1];
  
               temp_subdiag[k + 1] *= givens_c;
             }
  
           //We could skip if (c, s) == (1, 0)
           //Also, apply on the right means
           //we have to consider -s instead of s.
           for (int i = 0; i < 3; ++i)
             {
               float & c_1 = temp_mat[k]    [i];
               float & c_2 = temp_mat[k + 1][i];
               
               const float c_1_old = c_1;
               const float c_2_old = c_2;
                
               c_1 = product_sum_cornea_harrison_tang(givens_c, c_1_old, -givens_s, c_2_old);
               c_2 = product_sum_cornea_harrison_tang(givens_s, c_1_old,  givens_c, c_2_old);
             }
         }
     }

◆ get_solution()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::get_solution	(	float(&)	eigenvalues[3],
		float(&)	eigenvectors[3][3],
		const float	tolerance = `s_typical_tolerance`,
		const float	near_zero = `s_typical_near_zero`,
		const float	epsilon = `s_typical_epsilon`,
		const int	max_iter = `s_typical_max_iterations`
	)

inline

Get the full eigenvalues and eigenvectors for this matrix.

If rescale_and_reshift_values is true, the eigenvalues are scaled and shifted back to their proper value, given the original matrix.

Definition at line 425 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

     {
       float temp_subdiag[2];
  
       tridiagonalize(eigenvalues, temp_subdiag, eigenvectors, tolerance);
       //eigenvalues and eigenvectors are used temporarily to store the diagonal and the matrix.
  
       compute(eigenvalues, temp_subdiag, eigenvectors, near_zero, epsilon, max_iter);
  
       eigenvalues[0] *= scale;
       eigenvalues[1] *= scale;
       eigenvalues[2] *= scale;
     }

◆ tridiagonalize()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::tridiagonalize	(	float(&)	temp_diag[3],
		float(&)	temp_subdiag[2],
		float(&)	temp_mat[3][3],
		const float	tolerance = `s_typical_tolerance`
	)

inline

Definition at line 165 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

     {
       using namespace std;
  
       temp_diag[0] = a;
       if (f * f <= tolerance)
         {
           temp_diag[1] = b;
           temp_diag[2] = c;
  
           temp_subdiag[0] = d;
           temp_subdiag[1] = e;
  
           temp_mat[0][0] =    1.f;
           temp_mat[0][1] =    0.f;
           temp_mat[0][2] =    0.f;
           
           temp_mat[1][0] =    0.f;
           temp_mat[1][1] =    1.f;
           temp_mat[1][2] =    0.f;
           
           temp_mat[2][0] =    0.f;
           temp_mat[2][1] =    0.f;
           temp_mat[2][2] =    1.f;
         }
       else
         {
           const float beta = hypot(d, f);
           
           const float inv_beta = 1.f / beta;
  
           const float em_0_1 = d * inv_beta;
           const float em_0_2 = f * inv_beta;
  
           const float q_w_1 = 2 * em_0_1 * e;
           const float q_c_1 = fmaf(2 * em_0_1, e, -q_w_1);
           const float q_w_2 = em_0_2 * c;
           const float q_c_2 = fmaf(em_0_2, c, -q_w_2);
           const float q_w_3 = em_0_2 * b;
           const float q_c_3 = fmaf(em_0_2, b, -q_w_3);
  
           const float q = sum_kahan_babushka_neumaier(q_w_1, q_w_2, -q_w_3, q_c_1, q_c_2, -q_c_3);
  
           temp_diag[1] = fmaf( em_0_2, q, b);
           temp_diag[2] = fmaf(-em_0_2, q, c);
  
           temp_subdiag[0] = beta;
           temp_subdiag[1] = fmaf(-em_0_1, q, e);
  
           temp_mat[0][0] =    1.f;
           temp_mat[0][1] =    0.f;
           temp_mat[0][2] =    0.f;
           
           temp_mat[1][0] =    0.f;
           temp_mat[1][1] =  em_0_1;
           temp_mat[1][2] =  em_0_2;
           
           temp_mat[2][0] =    0.f;
           temp_mat[2][1] =  em_0_2;
           temp_mat[2][2] = -em_0_1;
         }
     }