#include <GPUClusterInfoAndMomentsCalculatorImpl.h>
|
CUDA_HOS_DEV | RealSymmetricMatrixSolverIterative (const float a_orig, const float b_orig, const float c_orig, const float d_orig, const float e_orig, const float f_orig) |
|
CUDA_HOS_DEV void | tridiagonalize (float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float tolerance=s_typical_tolerance) |
|
CUDA_HOS_DEV void | compute_iteration (const int start, const int end, float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3]) |
|
CUDA_HOS_DEV void | compute (float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float near_zero=s_typical_near_zero, const float epsilon=s_typical_epsilon, const int max_iter=s_typical_max_iterations) |
|
CUDA_HOS_DEV void | get_solution (float(&eigenvalues)[3], float(&eigenvectors)[3][3], const float tolerance=s_typical_tolerance, const float near_zero=s_typical_near_zero, const float epsilon=s_typical_epsilon, const int max_iter=s_typical_max_iterations) |
| Get the full eigenvalues and eigenvectors for this matrix. More...
|
|
|
float | a |
|
float | b |
|
float | c |
|
float | d |
|
float | e |
|
float | f |
|
float | scale |
|
◆ RealSymmetricMatrixSolverIterative()
CUDA_HOS_DEV ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::RealSymmetricMatrixSolverIterative |
( |
const float |
a_orig, |
|
|
const float |
b_orig, |
|
|
const float |
c_orig, |
|
|
const float |
d_orig, |
|
|
const float |
e_orig, |
|
|
const float |
f_orig |
|
) |
| |
|
inline |
Definition at line 140 of file GPUClusterInfoAndMomentsCalculatorImpl.h.
144 const float max_ab =
max( fabsf(a_orig), fabsf(b_orig) );
145 const float max_cd =
max( fabsf(c_orig), fabsf(d_orig) );
146 const float max_ef =
max( fabsf(e_orig), fabsf(f_orig) );
152 const float inv_scale = 1.0f /
scale;
153 a = a_orig * inv_scale;
154 b = b_orig * inv_scale;
155 c = c_orig * inv_scale;
156 d = d_orig * inv_scale;
157 e = e_orig * inv_scale;
158 f = f_orig * inv_scale;
◆ compute()
Definition at line 359 of file GPUClusterInfoAndMomentsCalculatorImpl.h.
369 const float precision_inv = 1.f / epsilon;
375 if (fabsf(temp_subdiag[
i]) < near_zero)
377 temp_subdiag[
i] = 0.f;
381 const float scaled_subdiag = precision_inv * temp_subdiag[
i];
382 if (scaled_subdiag * scaled_subdiag <= fabsf(temp_diag[
i]) + fabsf(temp_diag[
i + 1]))
384 temp_subdiag[
i] = 0.f;
389 while (
end > 0 && temp_subdiag[
end - 1] == 0.
f)
401 if (iter_count > max_iter)
403 printf(
"OUT OF ITERS! %d %d\n",
start,
end);
409 while (
start > 0 && temp_subdiag[
start - 1] != 0.
f)
◆ compute_iteration()
CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::compute_iteration |
( |
const int |
start, |
|
|
const int |
end, |
|
|
float(&) |
temp_diag[3], |
|
|
float(&) |
temp_subdiag[2], |
|
|
float(&) |
temp_mat[3][3] |
|
) |
| |
|
inline |
Definition at line 228 of file GPUClusterInfoAndMomentsCalculatorImpl.h.
234 const float td = (temp_diag[
end - 1] - temp_diag[
end]) * 0.5
f;
236 const float ee = temp_subdiag[
end - 1];
238 float mu = temp_diag[
end];
246 const float ee_2 = ee * ee;
248 const float h = hypot(td, ee);
250 const float factor = td +
h * ((td >= 0.f) - (td < 0.
f));
254 mu -= ee / (factor / ee);
263 float z = temp_subdiag[
start];
267 float givens_c, givens_s;
277 givens_s = (
z < 0.f) - (
z >= 0.
f);
279 else if (fabsf(
x) >= fabsf(
z))
281 const float t =
z /
x;
282 const float u = hypot(1.
f, fabsf(
t)) * ((
x >= 0.f) - (
x < 0.
f));
285 givens_s = -
t * givens_c;
289 const float t =
x /
z;
290 const float u = hypot(1.
f, fabsf(
t)) * ((
z >= 0.f) - (
z < 0.
f));
293 givens_c = -
t * givens_s;
332 z = -givens_s * temp_subdiag[
k + 1];
334 temp_subdiag[
k + 1] *= givens_c;
340 for (
int i = 0;
i < 3; ++
i)
342 float & c_1 = temp_mat[
k] [
i];
343 float & c_2 = temp_mat[
k + 1][
i];
345 const float c_1_old = c_1;
346 const float c_2_old = c_2;
◆ get_solution()
Get the full eigenvalues and eigenvectors for this matrix.
If rescale_and_reshift_values
is true
, the eigenvalues are scaled and shifted back to their proper value, given the original matrix.
Definition at line 425 of file GPUClusterInfoAndMomentsCalculatorImpl.h.
432 float temp_subdiag[2];
437 compute(eigenvalues, temp_subdiag, eigenvectors, near_zero, epsilon, max_iter);
439 eigenvalues[0] *=
scale;
440 eigenvalues[1] *=
scale;
441 eigenvalues[2] *=
scale;
◆ tridiagonalize()
CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::tridiagonalize |
( |
float(&) |
temp_diag[3], |
|
|
float(&) |
temp_subdiag[2], |
|
|
float(&) |
temp_mat[3][3], |
|
|
const float |
tolerance = s_typical_tolerance |
|
) |
| |
|
inline |
Definition at line 165 of file GPUClusterInfoAndMomentsCalculatorImpl.h.
178 temp_mat[0][0] = 1.f;
179 temp_mat[0][1] = 0.f;
180 temp_mat[0][2] = 0.f;
182 temp_mat[1][0] = 0.f;
183 temp_mat[1][1] = 1.f;
184 temp_mat[1][2] = 0.f;
186 temp_mat[2][0] = 0.f;
187 temp_mat[2][1] = 0.f;
188 temp_mat[2][2] = 1.f;
192 const float beta = hypot(
d,
f);
194 const float inv_beta = 1.f /
beta;
196 const float em_0_1 =
d * inv_beta;
197 const float em_0_2 =
f * inv_beta;
199 const float q_w_1 = 2 * em_0_1 *
e;
200 const float q_c_1 = fmaf(2 * em_0_1,
e, -q_w_1);
201 const float q_w_2 = em_0_2 *
c;
202 const float q_c_2 = fmaf(em_0_2,
c, -q_w_2);
203 const float q_w_3 = em_0_2 *
b;
204 const float q_c_3 = fmaf(em_0_2,
b, -q_w_3);
208 temp_diag[1] = fmaf( em_0_2,
q,
b);
209 temp_diag[2] = fmaf(-em_0_2,
q,
c);
211 temp_subdiag[0] =
beta;
212 temp_subdiag[1] = fmaf(-em_0_1,
q,
e);
214 temp_mat[0][0] = 1.f;
215 temp_mat[0][1] = 0.f;
216 temp_mat[0][2] = 0.f;
218 temp_mat[1][0] = 0.f;
219 temp_mat[1][1] = em_0_1;
220 temp_mat[1][2] = em_0_2;
222 temp_mat[2][0] = 0.f;
223 temp_mat[2][1] = em_0_2;
224 temp_mat[2][2] = -em_0_1;
float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::a |
float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::b |
float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::c |
float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::d |
float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::e |
float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::f |
◆ s_typical_epsilon
constexpr float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::s_typical_epsilon = std::numeric_limits<float>::epsilon() |
|
staticconstexpr |
◆ s_typical_max_iterations
constexpr int ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::s_typical_max_iterations = 90 |
|
staticconstexpr |
◆ s_typical_near_zero
constexpr float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::s_typical_near_zero = std::numeric_limits<float>::min() |
|
staticconstexpr |
◆ s_typical_tolerance
constexpr float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::s_typical_tolerance = std::numeric_limits<float>::min() |
|
staticconstexpr |
◆ scale
float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::scale |
The documentation for this struct was generated from the following file: