ATLAS Offline Software
GPUClusterInfoAndMomentsCalculatorImpl.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2023 CERN for the benefit of the ATLAS collaboration
3 //
4 // Dear emacs, this is -*- c++ -*-
5 //
6 
7 #ifndef CALORECGPU_GPUCLUSTERINFOANDMOMENTSCALCULATOR_CUDA_H
8 #define CALORECGPU_GPUCLUSTERINFOANDMOMENTSCALCULATOR_CUDA_H
9 
11 #include "CaloRecGPU/DataHolders.h"
12 #include "CaloRecGPU/Helpers.h"
13 
15 
16 #include <cmath>
17 
19 {
20 
22  //Taken from the Eigen implementation of direct_selfadjoint_eigenvalues
23  {
24  float a, b, c, d, e, f, shift, scale;
25  // +-- --+
26  // | a d f |
27  // | d b e |
28  // | f e c |
29  // +-- --+
30 
31  CUDA_HOS_DEV RealSymmetricMatrixSolver(const float a_orig, const float b_orig, const float c_orig, const float d_orig, const float e_orig, const float f_orig)
32  {
33  using namespace std;
34 
35  shift = (a_orig + b_orig + c_orig) / 3.f;
36  a = a_orig - shift;
37  b = b_orig - shift;
38  c = c_orig - shift;
39  scale = max(fabsf(a), max(fabsf(b), max(fabsf(c), max(fabsf(d_orig), max(fabsf(e_orig), fabsf(f_orig))))));
40  if (scale == 0.f)
41  {
42  scale = 1.f;
43  }
44  a /= scale;
45  b /= scale;
46  c /= scale;
47  d = d_orig / scale;
48  e = e_orig / scale;
49  f = f_orig / scale;
50  }
51 
55  CUDA_HOS_DEV void get_eigenvalues(float & e_1, float & e_2, float & e_3) const
56  {
57  using namespace std;
58 
59  const float c_0 = a * b * c + 2.f * d * f * e - a * e * e - b * f * f - c * d * d;
60  const float c_1 = a * b - d * d + a * c - f * f + b * c - e * e;
61  const float c_2 = a + b + c;
62 
63  constexpr float inv_3 = 1.f / 3.f;
64 
65  const float c_2_over_3 = c_2 * inv_3;
66 
67  const float a_over_3 = max((c_2 * c_2_over_3 - c_1) * inv_3, 0.f);
68 
69  const float half_b = 0.5f * (c_0 + c_2_over_3 * (2.f * c_2_over_3 * c_2_over_3 - c_1));
70 
71  const float q = max(a_over_3 * a_over_3 * a_over_3 - half_b * half_b, 0.f);
72 
73  const float rho = sqrtf(a_over_3);
74 
75 #ifdef __CUDA_ARCH__
76  const float theta = atan2f(1.0f, rsqrtf(q) * half_b) * inv_3;
77 #else
78  const float theta = atan2f(sqrtf(q), half_b) * inv_3;
79 #endif
80 
81 #ifdef __CUDA_ARCH__
82  float sin_theta, cos_theta;
83  sincosf(theta, &sin_theta, &cos_theta);
84 #else
85  const float sin_theta = sinf(theta);
86  const float cos_theta = cosf(theta);
87 #endif
88 
89  const float sqrt_3 = sqrtf(3.f);
90 
91  e_1 = c_2_over_3 - rho * (cos_theta + sqrt_3 * sin_theta);
92  e_2 = c_2_over_3 - rho * (cos_theta - sqrt_3 * sin_theta);
93  e_3 = c_2_over_3 + 2.f * rho * cos_theta;
94  }
95 
96  CUDA_HOS_DEV static void cross_prod(float (&res)[3], const float a1, const float a2, const float a3, const float b1, const float b2, const float b3)
97  {
98  res[0] = a2 * b3 - a3 * b2;
99  res[1] = a3 * b1 - a1 * b3;
100  res[2] = a1 * b2 - a2 * b1;
101  }
102 
103  CUDA_HOS_DEV static void cross_prod(float (&res)[3], const float (&x)[3], const float (&y)[3])
104  {
105  cross_prod(res, x[0], x[1], x[2], y[0], y[1], y[2]);
106  }
107 
108  CUDA_HOS_DEV static float dot_prod(const float a1, const float a2, const float a3, const float b1, const float b2, const float b3)
109  {
110  return a1 * b1 + a2 * b2 + a3 * b3;
111  }
112 
113  CUDA_HOS_DEV static float dot_prod(const float (&x)[3], const float (&y)[3])
114  {
115  return dot_prod(x[0], x[1], x[2], y[0], y[1], y[2]);
116  }
117 
118  CUDA_HOS_DEV void extract_one(const float eigenvalue, float (&res)[3], float (&representative)[3]) const
119  {
120  using namespace std;
121 
122  const float diag_0 = a - eigenvalue;
123  const float diag_1 = b - eigenvalue;
124  const float diag_2 = c - eigenvalue;
125 
126  float vec_1[3], vec_2[3];
127 
128  if (fabsf(diag_0) > fabsf(diag_1) && fabsf(diag_0) > fabsf(diag_2))
129  {
130  representative[0] = diag_0;
131  representative[1] = d;
132  representative[2] = f;
133 
134  vec_1[0] = d;
135  vec_1[1] = diag_1;
136  vec_1[2] = e;
137 
138  vec_2[0] = f;
139  vec_2[1] = e;
140  vec_2[2] = diag_2;
141  }
142  else if (/*(fabsf(diag_0) <= fabsf(diag_1) || fabsf(diag_0) <= fabsf(diag_2)) &&*/ fabsf(diag_1) > fabsf(diag_2))
143  {
144  representative[0] = d;
145  representative[1] = diag_1;
146  representative[2] = e;
147 
148  vec_1[0] = f;
149  vec_1[1] = e;
150  vec_1[2] = diag_2;
151 
152  vec_2[0] = diag_0;
153  vec_2[1] = d;
154  vec_2[2] = f;
155 
156  }
157  else /*if ((fabsf(diag_0) <= fabsf(diag_1) || fabsf(diag_0) <= fabsf(diag_2)) && fabsf(diag_1) <= fabsf(diag_2))*/
158  {
159  representative[0] = f;
160  representative[1] = e;
161  representative[2] = diag_2;
162 
163  vec_1[0] = diag_0;
164  vec_1[1] = d;
165  vec_1[2] = f;
166 
167  vec_2[0] = d;
168  vec_2[1] = diag_1;
169  vec_2[2] = e;
170  }
171 
172  cross_prod(res, representative, vec_1);
173  cross_prod(vec_1, representative, vec_2);
174  //Can safely override previous value...
175 
176 #ifdef __CUDA_ARCH__
177  const float norm_1 = rnorm3df(res[0], res[1], res[2]);
178  const float norm_2 = rnorm3df(vec_1[0], vec_1[1], vec_1[2]);
179 #else
180  const float norm_1 = 1.f / hypot(res[0], res[1], res[2]);
181  const float norm_2 = 1.f / hypot(vec_1[0], vec_1[1], vec_1[2]);
182 #endif
183 
184  if (norm_1 <= norm_2)
185  //Greater magnitude -> multiply by a smaller value
186  {
187  res[0] *= norm_1;
188  res[1] *= norm_1;
189  res[2] *= norm_1;
190  }
191  else
192  {
193  res[0] = vec_1[0] * norm_2;
194  res[1] = vec_1[1] * norm_2;
195  res[2] = vec_1[2] * norm_2;
196  }
197  }
198 
199  static constexpr float s_typical_epsilon = 5e-4;
200 
204  CUDA_HOS_DEV void get_eigenvectors(float (&res)[3][3], const float e_1, const float e_2, const float e_3, const float epsilon = s_typical_epsilon) const
205  {
206  using namespace std;
207 
208  if (e_3 - e_1 <= epsilon)
209  {
210  res[0][0] = 1.f;
211  res[0][1] = 0.f;
212  res[0][2] = 0.f;
213 
214  res[1][0] = 0.f;
215  res[1][1] = 1.f;
216  res[1][2] = 0.f;
217 
218  res[2][0] = 0.f;
219  res[2][1] = 0.f;
220  res[2][2] = 1.f;
221  }
222  else
223  {
224  const float d_0 = e_3 - e_2;
225  const float d_1 = e_2 - e_1;
226 
227  const float d_min = min(d_0, d_1);
228 
229  int k, j;
230  float first_e, second_e;
231 
232  if (d_0 > d_1)
233  {
234  k = 2;
235  j = 0;
236 
237  first_e = e_3;
238  second_e = e_1;
239  }
240  else
241  {
242  k = 0;
243  j = 2;
244 
245  first_e = e_1;
246  second_e = e_3;
247  }
248 
249  extract_one(first_e, res[k], res[j]);
250 
251  if (d_min <= 2 * epsilon * d_1)
252  {
253 #ifdef __CUDA_ARCH__
254  const float base_norm = rnorm3df(res[j][0], res[j][1], res[j][2]);
255 #else
256  const float base_norm = 1.f / hypot(res[j][0], res[j][1], res[j][2]);
257 #endif
258  const float extra_factor = 1.f - dot_prod(res[k], res[j]);
259 
260  const float norm = base_norm / extra_factor;
261 
262  res[j][0] *= norm;
263  res[j][1] *= norm;
264  res[j][2] *= norm;
265  }
266  else
267  {
268  float extra_vector[3];
269 
270  extract_one(second_e, res[j], extra_vector);
271  }
272 
273  cross_prod(res[1], res[2], res[0]);
274 
275 #ifdef __CUDA_ARCH__
276  const float norm = rnorm3df(res[1][0], res[1][1], res[1][2]);
277 #else
278  const float norm = 1.f / hypot(res[1][0], res[1][1], res[1][2]);
279 #endif
280 
281  res[1][0] *= norm;
282  res[1][1] *= norm;
283  res[1][2] *= norm;
284  }
285  }
286 
291  CUDA_HOS_DEV void get_solution(float (&eigenvalues)[3], float (&eigenvectors)[3][3], bool rescale_and_reshift_values = true, const float epsilon = s_typical_epsilon)
292  {
293  get_eigenvalues(eigenvalues[0], eigenvalues[1], eigenvalues[2]);
294  get_eigenvectors(eigenvectors, eigenvalues[0], eigenvalues[1], eigenvalues[2], epsilon);
295 
296  if (rescale_and_reshift_values)
297  {
298  eigenvalues[0] = eigenvalues[0] * scale + shift;
299  eigenvalues[1] = eigenvalues[1] * scale + shift;
300  eigenvalues[2] = eigenvalues[2] * scale + shift;
301  }
302  }
303  };
304 
306  {
315  };
316 
318  {
320 
322 
323  void allocate()
324  {
325  m_options.allocate();
326  }
327 
328  void sendToGPU(const bool clear_CPU = false);
329  };
330 
332 
334  const CaloRecGPU::ConstantDataHolder & instance_data,
335  const CMCOptionsHolder & options,
336  const IGPUKernelSizeOptimizer & optimizer,
337  const bool synchronize = false,
339  const bool defer_instead_of_oversize = false);
340 }
341 #endif
ClusterMomentsCalculator::ClusterMomentCalculationOptions::min_l_longitudinal
float min_l_longitudinal
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:313
PlotCalibFromCool.norm
norm
Definition: PlotCalibFromCool.py:100
IGPUKernelSizeOptimizer.h
max
constexpr double max()
Definition: ap_fixedTest.cxx:33
min
constexpr double min()
Definition: ap_fixedTest.cxx:26
ClusterMomentsCalculator::RealSymmetricMatrixSolver::get_solution
CUDA_HOS_DEV void get_solution(float(&eigenvalues)[3], float(&eigenvectors)[3][3], bool rescale_and_reshift_values=true, const float epsilon=s_typical_epsilon)
Get the full eigenvalues and eigenvectors for this matrix.
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:291
CaloRecGPU::Helpers::SimpleHolder
Holds one objects of type \T in memory context Context.
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1067
ClusterMomentsCalculator::RealSymmetricMatrixSolver::a
float a
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:24
ClusterMomentsCalculator::CMCOptionsHolder::sendToGPU
void sendToGPU(const bool clear_CPU=false)
ClusterMomentsCalculator::RealSymmetricMatrixSolver::dot_prod
static CUDA_HOS_DEV float dot_prod(const float a1, const float a2, const float a3, const float b1, const float b2, const float b3)
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:108
ClusterMomentsCalculator::ClusterMomentCalculationOptions::use_abs_energy
bool use_abs_energy
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:307
x
#define x
AthenaPoolTestWrite.stream
string stream
Definition: AthenaPoolTestWrite.py:12
ClusterMomentsCalculator
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:19
ClusterMomentsCalculator::RealSymmetricMatrixSolver::extract_one
CUDA_HOS_DEV void extract_one(const float eigenvalue, float(&res)[3], float(&representative)[3]) const
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:118
ClusterMomentsCalculator::CMCOptionsHolder::allocate
void allocate()
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:323
ClusterMomentsCalculator::RealSymmetricMatrixSolver::e
float e
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:24
CaloRecGPU::EventDataHolder
Definition: DataHolders.h:35
CUDA_HOS_DEV
#define CUDA_HOS_DEV
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:101
Helpers.h
ClusterMomentsCalculator::ClusterMomentCalculationOptions::min_r_lateral
float min_r_lateral
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:314
res
std::pair< std::vector< unsigned int >, bool > res
Definition: JetGroupProductTest.cxx:14
python.AtlRunQueryLib.options
options
Definition: AtlRunQueryLib.py:379
ClusterMomentsCalculator::RealSymmetricMatrixSolver::dot_prod
static CUDA_HOS_DEV float dot_prod(const float(&x)[3], const float(&y)[3])
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:113
ClusterMomentsCalculator::RealSymmetricMatrixSolver
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:23
ClusterMomentsCalculator::ClusterMomentCalculationOptions::max_axis_angle
float max_axis_angle
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:311
ClusterMomentsCalculator::RealSymmetricMatrixSolver::f
float f
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:24
CaloRecGPU::CUDA_Helpers::CUDAStreamPtrHolder
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:110
ClusterMomentsCalculator::CMCOptionsHolder::m_options_dev
CaloRecGPU::Helpers::CUDA_object< ClusterMomentCalculationOptions > m_options_dev
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:321
ClusterMomentsCalculator::ClusterMomentCalculationOptions
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:306
ClusterMomentsCalculator::RealSymmetricMatrixSolver::b
float b
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:24
y
#define y
ClusterMomentsCalculator::RealSymmetricMatrixSolver::get_eigenvectors
CUDA_HOS_DEV void get_eigenvectors(float(&res)[3][3], const float e_1, const float e_2, const float e_3, const float epsilon=s_typical_epsilon) const
Calculate the eigenvectors of the matrix, using the (possibly unscaled) eigenvalues e_1,...
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:204
ClusterMomentsCalculator::RealSymmetricMatrixSolver::get_eigenvalues
CUDA_HOS_DEV void get_eigenvalues(float &e_1, float &e_2, float &e_3) const
Calculate shifted and scaled eigenvalues of the matrix, in ascending value.
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:55
ClusterMomentsCalculator::RealSymmetricMatrixSolver::shift
float shift
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:24
CUDAFriendlyClasses.h
ClusterMomentsCalculator::ClusterMomentCalculationOptions::min_LAr_quality
float min_LAr_quality
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:310
CaloRecGPU::ConstantDataHolder
Definition: DataHolders.h:19
ClusterMomentsCalculator::RealSymmetricMatrixSolver::scale
float scale
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:24
extractSporadic.q
list q
Definition: extractSporadic.py:98
ClusterMomentsCalculator::RealSymmetricMatrixSolver::cross_prod
static CUDA_HOS_DEV void cross_prod(float(&res)[3], const float(&x)[3], const float(&y)[3])
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:103
ClusterMomentsCalculator::RealSymmetricMatrixSolver::c
float c
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:24
ClusterMomentsCalculator::register_kernels
void register_kernels(IGPUKernelSizeOptimizer &optimizer)
ClusterMomentsCalculator::ClusterMomentCalculationOptions::use_two_gaussian_noise
bool use_two_gaussian_noise
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:308
ClusterMomentsCalculator::RealSymmetricMatrixSolver::cross_prod
static CUDA_HOS_DEV void cross_prod(float(&res)[3], const float a1, const float a2, const float a3, const float b1, const float b2, const float b3)
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:96
IGPUKernelSizeOptimizer
Interface for GPU kernel size optimization (allowing adjustment of kernel sizes to the properties of ...
Definition: IGPUKernelSizeOptimizer.h:29
ClusterMomentsCalculator::RealSymmetricMatrixSolver::s_typical_epsilon
static constexpr float s_typical_epsilon
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:199
DataHolders.h
ClusterMomentsCalculator::RealSymmetricMatrixSolver::d
float d
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:24
fitman.rho
rho
Definition: fitman.py:532
ClusterMomentsCalculator::CMCOptionsHolder::m_options
CaloRecGPU::Helpers::CPU_object< ClusterMomentCalculationOptions > m_options
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:319
fitman.k
k
Definition: fitman.py:528
ClusterMomentsCalculator::CMCOptionsHolder
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:318
ClusterMomentsCalculator::RealSymmetricMatrixSolver::RealSymmetricMatrixSolver
CUDA_HOS_DEV RealSymmetricMatrixSolver(const float a_orig, const float b_orig, const float c_orig, const float d_orig, const float e_orig, const float f_orig)
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:31
ClusterMomentsCalculator::ClusterMomentCalculationOptions::skip_invalid_clusters
bool skip_invalid_clusters
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:309
ClusterMomentsCalculator::calculateClusterPropertiesAndMoments
void calculateClusterPropertiesAndMoments(CaloRecGPU::EventDataHolder &holder, const CaloRecGPU::ConstantDataHolder &instance_data, const CMCOptionsHolder &options, const IGPUKernelSizeOptimizer &optimizer, const bool synchronize=false, CaloRecGPU::CUDA_Helpers::CUDAStreamPtrHolder stream={}, const bool defer_instead_of_oversize=false)
ClusterMomentsCalculator::ClusterMomentCalculationOptions::eta_inner_wheel
float eta_inner_wheel
Definition: GPUClusterInfoAndMomentsCalculatorImpl.h:312