ATLAS Offline Software
Loading...
Searching...
No Matches
ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative Struct Reference

#include <GPUClusterInfoAndMomentsCalculatorImpl.h>

Collaboration diagram for ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative:

Public Member Functions

CUDA_HOS_DEV RealSymmetricMatrixSolverIterative (const float a_orig, const float b_orig, const float c_orig, const float d_orig, const float e_orig, const float f_orig)
CUDA_HOS_DEV void tridiagonalize (float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float tolerance=s_typical_tolerance)
CUDA_HOS_DEV void compute_iteration (const int start, const int end, float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3])
CUDA_HOS_DEV void compute (float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float near_zero=s_typical_near_zero, const float epsilon=s_typical_epsilon, const int max_iter=s_typical_max_iterations)
CUDA_HOS_DEV void get_solution (float(&eigenvalues)[3], float(&eigenvectors)[3][3], const float tolerance=s_typical_tolerance, const float near_zero=s_typical_near_zero, const float epsilon=s_typical_epsilon, const int max_iter=s_typical_max_iterations)
 Get the full eigenvalues and eigenvectors for this matrix.

Public Attributes

float a
float b
float c
float d
float e
float f
float scale

Static Public Attributes

static constexpr float s_typical_tolerance = std::numeric_limits<float>::min()
static constexpr int s_typical_max_iterations = 90
static constexpr float s_typical_near_zero = std::numeric_limits<float>::min()
static constexpr float s_typical_epsilon = std::numeric_limits<float>::epsilon()

Detailed Description

Definition at line 21 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

Constructor & Destructor Documentation

◆ RealSymmetricMatrixSolverIterative()

CUDA_HOS_DEV ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::RealSymmetricMatrixSolverIterative ( const float a_orig,
const float b_orig,
const float c_orig,
const float d_orig,
const float e_orig,
const float f_orig )
inline

Definition at line 31 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

32 {
33 using namespace std;
34
35 const float max_ab = max( fabsf(a_orig), fabsf(b_orig) );
36 const float max_cd = max( fabsf(c_orig), fabsf(d_orig) );
37 const float max_ef = max( fabsf(e_orig), fabsf(f_orig) );
38 scale = max(max_ab, max(max_cd, max_ef) );
39 if (scale == 0.f)
40 {
41 scale = 1.f;
42 }
43 const float inv_scale = 1.0f / scale;
44 a = a_orig * inv_scale;
45 b = b_orig * inv_scale;
46 c = c_orig * inv_scale;
47 d = d_orig * inv_scale;
48 e = e_orig * inv_scale;
49 f = f_orig * inv_scale;
50 }
#define max(a, b)
Definition cfImp.cxx:41

Member Function Documentation

◆ compute()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::compute ( float(&) temp_diag[3],
float(&) temp_subdiag[2],
float(&) temp_mat[3][3],
const float near_zero = s_typical_near_zero,
const float epsilon = s_typical_epsilon,
const int max_iter = s_typical_max_iterations )
inline

Definition at line 250 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

256 {
257 int iter_count = 0;
258 int start = 0, end = 2;
259
260 const float precision_inv = 1.f / epsilon;
261
262 while (end > 0)
263 {
264 for (int i = start; i < end; ++i)
265 {
266 if (fabsf(temp_subdiag[i]) < near_zero)
267 {
268 temp_subdiag[i] = 0.f;
269 }
270 else
271 {
272 const float scaled_subdiag = precision_inv * temp_subdiag[i];
273 if (scaled_subdiag * scaled_subdiag <= fabsf(temp_diag[i]) + fabsf(temp_diag[i + 1]))
274 {
275 temp_subdiag[i] = 0.f;
276 }
277 }
278 }
279
280 while (end > 0 && temp_subdiag[end - 1] == 0.f)
281 {
282 --end;
283 }
284
285 if (end <= 0)
286 {
287 break;
288 }
289
290 ++iter_count;
291
292 if (iter_count > max_iter)
293 {
294 //printf("OUT OF ITERS! %d %d\n", start, end);
295 break;
296 }
297
298 start = end - 1;
299
300 while (start > 0 && temp_subdiag[start - 1] != 0.f)
301 {
302 --start;
303 }
304
305 compute_iteration(start, end, temp_diag, temp_subdiag, temp_mat);
306 }
307
308 //No need to sort eigenvalues and eigenvectors:
309 //we are going to check them all anyway.
310 }
CUDA_HOS_DEV void compute_iteration(const int start, const int end, float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3])

◆ compute_iteration()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::compute_iteration ( const int start,
const int end,
float(&) temp_diag[3],
float(&) temp_subdiag[2],
float(&) temp_mat[3][3] )
inline

Definition at line 119 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

124 {
125 const float td = (temp_diag[end - 1] - temp_diag[end]) * 0.5f;
126
127 const float ee = temp_subdiag[end - 1];
128
129 float mu = temp_diag[end];
130
131 if (td == 0.f)
132 {
133 mu -= fabsf(ee);
134 }
135 else if (ee != 0.f)
136 {
137 const float ee_2 = ee * ee;
138
139 const float h = hypot(td, ee);
140
141 const float factor = td + h * ((td >= 0.f) - (td < 0.f));
142
143 if (ee_2 == 0.f)
144 {
145 mu -= ee / (factor / ee);
146 }
147 else
148 {
149 mu -= ee_2 / factor;
150 }
151 }
152
153 float x = temp_diag[start] - mu;
154 float z = temp_subdiag[start];
155
156 for (int k = start; k < end && z != 0.f; ++k)
157 {
158 float givens_c, givens_s;
159
160 /*if (z == 0.f)
161 {
162 givens_c = ((x >= 0.f) - (x < 0.f));
163 givens_s = 0.f;
164 }
165 else */if (x == 0.f)
166 {
167 givens_c = 0.f;
168 givens_s = (z < 0.f) - (z >= 0.f);
169 }
170 else if (fabsf(x) >= fabsf(z))
171 {
172 const float t = z / x;
173 const float u = hypot(1.f, fabsf(t)) * ((x >= 0.f) - (x < 0.f));
174
175 givens_c = 1.f / u;
176 givens_s = -t * givens_c;
177 }
178 else
179 {
180 const float t = x / z;
181 const float u = hypot(1.f, fabsf(t)) * ((z >= 0.f) - (z < 0.f));
182
183 givens_s = -1.f / u;
184 givens_c = -t * givens_s;
185 }
186
188 temp_diag[k],
189 givens_c,
190 temp_subdiag[k]);
191
193 temp_subdiag[k],
194 givens_c,
195 temp_diag[k + 1]);
196
199 temp_diag[k],
200 -givens_s,
201 temp_subdiag[k]),
202 -givens_s,
204 temp_subdiag[k],
205 -givens_s,
206 temp_diag[k + 1])
207 );
208 temp_diag[k + 1] = CaloRecGPU::Helpers::product_sum_cornea_harrison_tang(givens_s, sdk, givens_c, dkp1);
209 temp_subdiag[k] = CaloRecGPU::Helpers::product_sum_cornea_harrison_tang(givens_c, sdk, -givens_s, dkp1);
210
211 if (k > start)
212 {
213 temp_subdiag[k - 1] = CaloRecGPU::Helpers::product_sum_cornea_harrison_tang(givens_c,
214 temp_subdiag[k - 1],
215 -givens_s,
216 z);
217 }
218
219 x = temp_subdiag[k];
220
221 if (k < end - 1)
222 {
223 z = -givens_s * temp_subdiag[k + 1];
224
225 temp_subdiag[k + 1] *= givens_c;
226 }
227
228 //We could skip if (c, s) == (1, 0)
229 //Also, apply on the right means
230 //we have to consider -s instead of s.
231 for (int i = 0; i < 3; ++i)
232 {
233 float & c_1 = temp_mat[k] [i];
234 float & c_2 = temp_mat[k + 1][i];
235
236 const float c_1_old = c_1;
237 const float c_2_old = c_2;
238
239 c_1 = CaloRecGPU::Helpers::product_sum_cornea_harrison_tang(givens_c, c_1_old, -givens_s, c_2_old);
240 c_2 = CaloRecGPU::Helpers::product_sum_cornea_harrison_tang(givens_s, c_1_old, givens_c, c_2_old);
241 }
242 }
243 }
#define x
#define z
static CUDA_HOS_DEV float product_sum_cornea_harrison_tang(const float a, const float b, const float c, const float d)
@ u
Enums for curvilinear frames.
Definition ParamDefs.h:77

◆ get_solution()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::get_solution ( float(&) eigenvalues[3],
float(&) eigenvectors[3][3],
const float tolerance = s_typical_tolerance,
const float near_zero = s_typical_near_zero,
const float epsilon = s_typical_epsilon,
const int max_iter = s_typical_max_iterations )
inline

Get the full eigenvalues and eigenvectors for this matrix.

If rescale_and_reshift_values is true, the eigenvalues are scaled and shifted back to their proper value, given the original matrix.

Definition at line 316 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

322 {
323 float temp_subdiag[2];
324
325 tridiagonalize(eigenvalues, temp_subdiag, eigenvectors, tolerance);
326 //eigenvalues and eigenvectors are used temporarily to store the diagonal and the matrix.
327
328 compute(eigenvalues, temp_subdiag, eigenvectors, near_zero, epsilon, max_iter);
329
330 eigenvalues[0] *= scale;
331 eigenvalues[1] *= scale;
332 eigenvalues[2] *= scale;
333 }
constexpr double tolerance
CUDA_HOS_DEV void tridiagonalize(float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float tolerance=s_typical_tolerance)
CUDA_HOS_DEV void compute(float(&temp_diag)[3], float(&temp_subdiag)[2], float(&temp_mat)[3][3], const float near_zero=s_typical_near_zero, const float epsilon=s_typical_epsilon, const int max_iter=s_typical_max_iterations)

◆ tridiagonalize()

CUDA_HOS_DEV void ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::tridiagonalize ( float(&) temp_diag[3],
float(&) temp_subdiag[2],
float(&) temp_mat[3][3],
const float tolerance = s_typical_tolerance )
inline

Definition at line 56 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

57 {
58 using namespace std;
59
60 temp_diag[0] = a;
61 if (f * f <= tolerance)
62 {
63 temp_diag[1] = b;
64 temp_diag[2] = c;
65
66 temp_subdiag[0] = d;
67 temp_subdiag[1] = e;
68
69 temp_mat[0][0] = 1.f;
70 temp_mat[0][1] = 0.f;
71 temp_mat[0][2] = 0.f;
72
73 temp_mat[1][0] = 0.f;
74 temp_mat[1][1] = 1.f;
75 temp_mat[1][2] = 0.f;
76
77 temp_mat[2][0] = 0.f;
78 temp_mat[2][1] = 0.f;
79 temp_mat[2][2] = 1.f;
80 }
81 else
82 {
83 const float beta = hypot(d, f);
84
85 const float inv_beta = 1.f / beta;
86
87 const float em_0_1 = d * inv_beta;
88 const float em_0_2 = f * inv_beta;
89
90 const float q_w_1 = 2 * em_0_1 * e;
91 const float q_c_1 = fmaf(2 * em_0_1, e, -q_w_1);
92 const float q_w_2 = em_0_2 * c;
93 const float q_c_2 = fmaf(em_0_2, c, -q_w_2);
94 const float q_w_3 = em_0_2 * b;
95 const float q_c_3 = fmaf(em_0_2, b, -q_w_3);
96
97 const float q = CaloRecGPU::Helpers::sum_kahan_babushka_neumaier(q_w_1, q_w_2, -q_w_3, q_c_1, q_c_2, -q_c_3);
98
99 temp_diag[1] = fmaf( em_0_2, q, b);
100 temp_diag[2] = fmaf(-em_0_2, q, c);
101
102 temp_subdiag[0] = beta;
103 temp_subdiag[1] = fmaf(-em_0_1, q, e);
104
105 temp_mat[0][0] = 1.f;
106 temp_mat[0][1] = 0.f;
107 temp_mat[0][2] = 0.f;
108
109 temp_mat[1][0] = 0.f;
110 temp_mat[1][1] = em_0_1;
111 temp_mat[1][2] = em_0_2;
112
113 temp_mat[2][0] = 0.f;
114 temp_mat[2][1] = em_0_2;
115 temp_mat[2][2] = -em_0_1;
116 }
117 }
CUDA_HOS_DEV float sum_kahan_babushka_neumaier(const Floats &... fs)
Adds a list of floats using the Kahan-Babushka-Neumaier algorithm for greater precision (at the cost ...

Member Data Documentation

◆ a

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::a

Definition at line 24 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ b

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::b

Definition at line 24 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ c

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::c

Definition at line 24 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ d

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::d

Definition at line 24 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ e

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::e

Definition at line 24 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ f

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::f

Definition at line 24 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ s_typical_epsilon

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::s_typical_epsilon = std::numeric_limits<float>::epsilon()
staticconstexpr

Definition at line 248 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ s_typical_max_iterations

int ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::s_typical_max_iterations = 90
staticconstexpr

Definition at line 246 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ s_typical_near_zero

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::s_typical_near_zero = std::numeric_limits<float>::min()
staticconstexpr

Definition at line 247 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ s_typical_tolerance

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::s_typical_tolerance = std::numeric_limits<float>::min()
staticconstexpr

Definition at line 52 of file GPUClusterInfoAndMomentsCalculatorImpl.h.

◆ scale

float ClusterMomentsCalculator::RealSymmetricMatrixSolverIterative::scale

Definition at line 24 of file GPUClusterInfoAndMomentsCalculatorImpl.h.


The documentation for this struct was generated from the following file: