ATLAS Offline Software
Clustering.h
Go to the documentation of this file.
1 
15 #ifndef CLUSTERINGHELPER_H
16 #define CLUSTERINGHELPER_H
17 
18 #include "TMath.h"
19 
20 #include <algorithm>
21 #include <iostream>
22 #include <cmath>
23 #include <memory>
24 #include <stdexcept>
25 #include <utility>
26 #include <vector>
27 
28 namespace HGTD {
29 
30 template <typename T> class Cluster {
31 
32 public:
33  Cluster();
34 
35  Cluster(const std::vector<double> &v, const std::vector<double> &v_sigma);
36 
37  Cluster(const std::vector<double> &v, const std::vector<double> &v_sigma,
38  const T &entry);
39 
46  void addEntry(const T &entry);
47 
56  void addEntryVector(const std::vector<T> &entry_vector);
57 
61  std::vector<T> getEntries() const;
62 
72  void setClusterValue(const std::vector<double> &v,
73  const std::vector<double> &v_sigma);
74 
78  std::vector<double> getValues() const;
79 
83  std::vector<double> getSigmas() const;
84 
88  int getNEntries() const;
89 
93  bool mergeStatus() const;
94 
95  void setMergeStatus(bool status);
96 
97  int getMergeIteration() const;
98 
99  void setMergeIteration(int iteration);
100 
101  bool containsUnknowns() const;
102 
103  void setUnknownStatus(bool status);
104 
105 private:
106  int m_debug_level = 0; // TODO set this dynamically
107  std::vector<T> m_entries { }; // Stores raw objects that are part of this Cluster
108  double m_combined_value { };
109  double m_combined_sigma { };
110  std::vector<double> m_combined_value_vector { };
111  std::vector<double> m_combined_sigma_vector { };
112  bool m_was_merged = false;
114  bool m_contains_unknowns = false;
115 };
116 
118 
120 
122 
123 template <typename T> class ClusterCollection {
124 public:
125  void addCluster(Cluster<T> vx);
126  void doClustering(ClusterAlgo algo);
127 
128 private:
129  double getDistanceBetweenClusters(const Cluster<T> &a, const Cluster<T> &b);
130 
139 
141 
142  std::pair<Cluster<T>, int> largestClusterInfo();
143 
144 public:
147  std::vector<Cluster<T>> getClusters() const;
148  int getNClusters() const;
156  void updateDistanceCut(double cut_value);
157  void setDebugLevel(int debug_level) { m_debug_level = debug_level; }
158 
159 private:
160  int m_debug_level = 0;
161  double m_distance_cut = 3.0;
162  std::vector<Cluster<T>> m_clusters;
163 };
164 
170 
171 template <class T>
173  : m_combined_value(-999.), m_combined_sigma(-999.) {}
174 
175 template <typename T>
177  const std::vector<double> &v, const std::vector<double> &v_sigma)
178  : m_combined_value_vector(v), m_combined_sigma_vector(v_sigma) {
179 
180  for (const auto &s : v_sigma) {
181  if (s < 0) {
182  m_contains_unknowns = true;
183  }
184  }
185 }
186 
187 template <typename T>
189  const std::vector<double> &v, const std::vector<double> &v_sigma,
190  const T &entry)
191  : m_combined_value_vector(v), m_combined_sigma_vector(v_sigma) {
192 
193  m_entries.push_back(entry);
194 
195  for (const auto &s : v_sigma) {
196  if (s < 0) {
197  m_contains_unknowns = true;
198  }
199  }
200 }
201 
202 template <class T> int Cluster<T>::getNEntries() const {
203  return m_entries.size();
204 }
205 
206 template <class T> void Cluster<T>::addEntry(const T &entry) {
207  m_entries.push_back(entry);
208 }
209 
210 template <class T> std::vector<T> Cluster<T>::getEntries() const {
211  return m_entries;
212 }
213 
214 template <class T>
215 void Cluster<T>::addEntryVector(const std::vector<T> &entry_vector) {
216  if (not entry_vector.empty()) {
217  m_entries.insert(m_entries.end(), entry_vector.begin(), entry_vector.end());
218  }
219 }
220 
221 template <class T>
222 void Cluster<T>::setClusterValue(const std::vector<double> &v,
223  const std::vector<double> &v_sigma) {
224  m_combined_value_vector = v;
225  m_combined_sigma_vector = v_sigma;
226 }
227 
228 template <class T> std::vector<double> Cluster<T>::getValues() const {
229  // return a warning when this value is a default value
230  if (m_combined_value_vector.size() == 0) {
231  if (m_debug_level > 0) {
232  std::cout
233  << "Cluster::getTime: ATTENTION, combi values are not initialized!"
234  << std::endl;
235  }
236  }
237  return m_combined_value_vector;
238 }
239 
240 template <class T> std::vector<double> Cluster<T>::getSigmas() const {
241  return m_combined_sigma_vector;
242 }
243 
244 template <class T> void Cluster<T>::setMergeStatus(bool status) {
245  m_was_merged = status;
246 }
247 
248 template <class T> bool Cluster<T>::mergeStatus() const { return m_was_merged; }
249 
250 template <class T> int Cluster<T>::getMergeIteration() const {
251  return m_merge_iteration;
252 }
253 
254 template <class T> void Cluster<T>::setMergeIteration(int iteration) {
255  m_merge_iteration = iteration;
256 }
257 
258 template <class T> bool Cluster<T>::containsUnknowns() const {
259  return m_contains_unknowns;
260 }
261 
262 template <class T> void Cluster<T>::setUnknownStatus(bool status) {
263  m_contains_unknowns = status;
264 }
265 
268 
269 template <class T>
271  m_distance_cut = cut_value;
272 }
273 
274 template <typename T> void ClusterCollection<T>::addCluster(Cluster<T> vx) {
275  m_clusters.push_back(vx);
276 }
277 
278 template <typename T>
280  const Cluster<T> &b) {
281  std::vector<double> a_values = a.getValues();
282  std::vector<double> b_values = b.getValues();
283  std::vector<double> a_sigmas = a.getSigmas();
284  std::vector<double> b_sigmas = b.getSigmas();
285 
286  std::vector<double> distances(a_values.size());
287  for (size_t i = 0; i < a_values.size(); i++) {
288  double distance_i = 0;
289  if (a_sigmas.at(i) >= 0.0 && b_sigmas.at(i) >= 0.0) {
290  distance_i = std::abs(a_values.at(i) - b_values.at(i)) /
291  std::hypot(a_sigmas.at(i), b_sigmas.at(i));
292  }
293  distances.at(i) = distance_i;
294  }
295  double distance2 = 0.;
296  for (double d : distances) {
297  distance2 += d * d;
298  }
299  return std::sqrt(distance2);
300 }
301 
302 template <class T>
304  const Cluster<T> &b) {
305  Cluster<T> merged_cluster;
306  merged_cluster.addEntryVector(a.getEntries());
307  merged_cluster.addEntryVector(b.getEntries());
308 
309  std::vector<double> a_values = a.getValues();
310  std::vector<double> b_values = b.getValues();
311  std::vector<double> a_sigmas = a.getSigmas();
312  std::vector<double> b_sigmas = b.getSigmas();
313 
314  std::vector<double> new_cluster_values(a_values.size());
315  std::vector<double> new_cluster_sigmas(a_values.size());
316 
317  for (size_t i = 0; i < a_values.size(); i++) {
318  double value1 = a_values.at(i);
319  double value2 = b_values.at(i);
320  double var1 = std::pow(a_sigmas.at(i), 2.0);
321  double var2 = std::pow(b_sigmas.at(i), 2.0);
322  double new_cluster_value =
323  (value1 / var1 + value2 / var2) / (1.0 / var1 + 1.0 / var2);
324  double new_cluster_sigma = std::sqrt(var1 * var2 / (var1 + var2));
325  new_cluster_values.at(i) = new_cluster_value;
326  new_cluster_sigmas.at(i) = new_cluster_sigma;
327  }
328  int new_merge_iteration = a.getMergeIteration() + b.getMergeIteration() + 1;
329  merged_cluster.setClusterValue(new_cluster_values, new_cluster_sigmas);
330  merged_cluster.setMergeStatus(true);
331  merged_cluster.setMergeIteration(new_merge_iteration);
332  return merged_cluster;
333 }
334 
335 template <class T>
337  const Cluster<T> &b) {
338  Cluster<T> merged_cluster;
339  merged_cluster.addEntryVector(a.getEntries());
340  merged_cluster.addEntryVector(b.getEntries());
341 
342  std::vector<double> a_values = a.getValues();
343  std::vector<double> b_values = b.getValues();
344  std::vector<double> a_sigmas = a.getSigmas();
345  std::vector<double> b_sigmas = b.getSigmas();
346 
347  std::vector<double> new_cluster_values(a_values.size());
348  std::vector<double> new_cluster_sigmas(a_values.size());
349 
350  for (size_t i = 0; i < a_values.size(); i++) {
351  double value1 = a_values.at(i);
352  double value2 = b_values.at(i);
353  double sigma1 = a_sigmas.at(i);
354  double sigma2 = b_sigmas.at(i);
355  double new_cluster_value = (value1 + value2) / 2.;
356  double new_cluster_sigma = std::hypot(sigma1, sigma2);
357  new_cluster_values.at(i) = new_cluster_value;
358  new_cluster_sigmas.at(i) = new_cluster_sigma;
359  }
360  int new_merge_iteration = a.getMergeIteration() + b.getMergeIteration() + 1;
361  merged_cluster.setClusterValue(new_cluster_values, new_cluster_sigmas);
362  merged_cluster.setMergeStatus(true);
363  merged_cluster.setMergeIteration(new_merge_iteration);
364  return merged_cluster;
365 }
366 
367 template <typename T>
369  if (algo == ClusterAlgo::Eager) {
370  for (const auto &clust : m_clusters) {
371  if (clust.containsUnknowns()) {
372  throw std::invalid_argument(
373  "[ClusterCollection::doClustering] ERROR "
374  "- eager clustering does not allow for unknown values");
375  }
376  }
377  }
378 
379  // TODO case where I have 2 vertices in my collection
380  if (m_debug_level > 0) {
381  std::cout << "ClusterCollection::doTimeClustering" << std::endl;
382  }
383  double distance = 1.e30; // initial distance value, "far away"
384 
385  while (m_clusters.size() > 1) {
386  int i0 = 0;
387  int j0 = 0;
388  if (m_debug_level > 0) {
389  std::cout << "using " << m_clusters.size() << " vertices" << std::endl;
390  }
391  // find the two vertices that are closest to each other
392  distance = getDistanceBetweenClusters(m_clusters.at(0), m_clusters.at(1));
393  for (size_t i = 0; i < m_clusters.size(); i++) {
394  for (size_t j = i + 1; j < m_clusters.size(); j++) {
395 
396  if (algo == ClusterAlgo::Simultaneous ||
398 
399  if (m_clusters.at(i).mergeStatus() or m_clusters.at(j).mergeStatus()) {
400  continue;
401  }
402  }
403 
404  double current_distance =
405  getDistanceBetweenClusters(m_clusters.at(i), m_clusters.at(j));
406  if (current_distance <= distance) {
407  distance = current_distance;
408  i0 = i;
409  j0 = j;
410  }
411  } // loop over j
412  } // loop over i
413  if (m_debug_level > 0) {
414  std::cout << "using vertex " << i0 << " and " << j0 << std::endl;
415  }
416  // now the closest two vertices are found and will be fused if cut passes
417  if (distance < m_distance_cut && i0 != j0) {
418 
419  Cluster<T> new_cluster { };
420 
421  if (algo == ClusterAlgo::SimultaneousMean) {
422  new_cluster = mergeClustersMean(m_clusters.at(i0), m_clusters.at(j0));
423  } else {
424  new_cluster = mergeClusters(m_clusters.at(i0), m_clusters.at(j0));
425  }
426 
427  if (m_debug_level > 0) {
428  std::cout << "starting to erase" << std::endl;
429  }
430  m_clusters.erase(m_clusters.begin() + j0);
431  if (i0 < j0) {
432  m_clusters.erase(m_clusters.begin() + i0);
433  } else {
434  m_clusters.erase(m_clusters.begin() + (i0 - 1));
435  }
436  if (m_debug_level > 0) {
437  std::cout << "erase done" << std::endl;
438  }
439  m_clusters.push_back(new_cluster);
440  if (m_debug_level > 0) {
441  std::cout << "new cluster stored" << std::endl;
442  }
443  } else {
444  if (algo == ClusterAlgo::Eager) {
445  break;
446  }
447  // if there is a cluster that was merged
448  if (std::find_if(m_clusters.begin(), m_clusters.end(),
449  [](const Cluster<T> &c) { return c.mergeStatus(); }) !=
450  m_clusters.end()) {
451  // reset each status to false
452  std::for_each(m_clusters.begin(), m_clusters.end(), [](Cluster<T> &c) {
453  c.setMergeStatus(false);
454  });
455  } else {
456  // if not, this is the end of the clustering
457  break;
458  }
459  }
460  } // while loop
461 }
462 
463 template <typename T>
464 std::pair<Cluster<T>, int> ClusterCollection<T>::largestClusterInfo() {
465 
466  int max_n_hits = 0;
467  int count = 0;
468  Cluster<T> max_n_vertex { };
469 
470  for (const auto& vx : m_clusters) {
471  int current_n = vx.getNEntries();
472 
473  if (current_n > max_n_hits) {
474  max_n_hits = current_n;
475  max_n_vertex = vx;
476  count = 1;
477  }
478  else if (current_n == max_n_hits) {
479  count++;
480  }
481  }
482 
483  return std::make_pair(max_n_vertex, count);
484 }
485 
487 
488  const auto [max_cluster, nMaxClusters] = largestClusterInfo();
489  // If two or more clusters have the same maximum number of entries, I can't
490  // decide, so return default
491  if (nMaxClusters > 1) {
492  return Cluster<T> { };
493  }
494  return max_cluster;
495 }
496 
497 template <typename T> int ClusterCollection<T>::getMaxClusterSize_Info() {
498  // find the vertex with a maximum amount of hits clustered in it
499  // and return thisn umber
500  const auto [max_cluster, nMaxClusters] = largestClusterInfo();
501 
502  return max_cluster.getNEntries();
503 }
504 
505 template <typename T>
506 std::vector<Cluster<T>> ClusterCollection<T>::getClusters() const {
507  return m_clusters;
508 }
509 
510 template <typename T> int ClusterCollection<T>::getNClusters() const {
511  return static_cast<int>(m_clusters.size());
512 }
513 
514 } // namespace HGTD
515 
516 #endif // CLUSTERINGHELPER_H
HGTD::Cluster::setMergeStatus
void setMergeStatus(bool status)
Definition: Clustering.h:244
python.SystemOfUnits.s
int s
Definition: SystemOfUnits.py:131
HGTD::ClusterAlgo::Eager
@ Eager
HGTD::Cluster::m_contains_unknowns
bool m_contains_unknowns
Definition: Clustering.h:114
HGTD::ClusterCollection::getDistanceBetweenClusters
double getDistanceBetweenClusters(const Cluster< T > &a, const Cluster< T > &b)
Definition: Clustering.h:279
HGTD::Cluster::m_combined_sigma_vector
std::vector< double > m_combined_sigma_vector
Definition: Clustering.h:111
HGTD::ClusterCollection::getMaxClusterSize_Info
int getMaxClusterSize_Info()
Definition: Clustering.h:497
HGTD::Cluster::getEntries
std::vector< T > getEntries() const
Return the objects that are part of the Cluster.
Definition: Clustering.h:210
hist_file_dump.d
d
Definition: hist_file_dump.py:137
HGTD::Cluster::getValues
std::vector< double > getValues() const
Return the N-dimensional value of the Cluster.
Definition: Clustering.h:228
HGTD::Cluster::Cluster
Cluster()
Definition: Clustering.h:172
conifer::pow
constexpr int pow(int x)
Definition: conifer.h:20
HGTD::ClusterCollection::largestClusterInfo
std::pair< Cluster< T >, int > largestClusterInfo()
Definition: Clustering.h:464
HGTD::Cluster::m_debug_level
int m_debug_level
Definition: Clustering.h:106
HGTD::Cluster::m_was_merged
bool m_was_merged
Definition: Clustering.h:112
HGTD::Cluster
Definition: Clustering.h:30
HGTD::Cluster::setMergeIteration
void setMergeIteration(int iteration)
Definition: Clustering.h:254
HGTD::ClusterAlgo::SimultaneousMean
@ SimultaneousMean
HGTD::Cluster::m_combined_value_vector
std::vector< double > m_combined_value_vector
Definition: Clustering.h:110
HGTD::ClusterCollection::m_debug_level
int m_debug_level
Definition: Clustering.h:160
HGTD::Cluster::m_combined_sigma
double m_combined_sigma
Definition: Clustering.h:109
HGTD::Cluster::addEntryVector
void addEntryVector(const std::vector< T > &entry_vector)
This can be used to combine the subsets stored in two vertices all in once while merging.
Definition: Clustering.h:215
HGTD::Cluster::m_entries
std::vector< T > m_entries
Definition: Clustering.h:107
HGTD::ClusterAlgo
ClusterAlgo
Definition: Clustering.h:119
HGTD::ClusterCollection::mergeClusters
Cluster< T > mergeClusters(const Cluster< T > &a, const Cluster< T > &b)
Creates a new Cluster object that is a fusion of the two clusters given in the arguments.
Definition: Clustering.h:303
HGTD::Cluster::m_merge_iteration
int m_merge_iteration
Definition: Clustering.h:113
HGTD
Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration.
Definition: Clustering.h:28
lumiFormat.i
int i
Definition: lumiFormat.py:92
HGTD::ClusterCollection::addCluster
void addCluster(Cluster< T > vx)
Definition: Clustering.h:274
HGTD::Cluster::getSigmas
std::vector< double > getSigmas() const
Return the N-dimensional resolution of the Cluster.
Definition: Clustering.h:240
HGTD::ClusterCollection::updateDistanceCut
void updateDistanceCut(double cut_value)
Set the distance cut.
Definition: Clustering.h:270
HGTD::ClusterCollection::getNClusters
int getNClusters() const
Definition: Clustering.h:510
HGTD::Cluster::getMergeIteration
int getMergeIteration() const
Definition: Clustering.h:250
HGTD::Cluster::containsUnknowns
bool containsUnknowns() const
Definition: Clustering.h:258
GetAllXsec.entry
list entry
Definition: GetAllXsec.py:132
HGTD::ClusterCollection::m_clusters
std::vector< Cluster< T > > m_clusters
Definition: Clustering.h:162
HGTD::Cluster::addEntry
void addEntry(const T &entry)
Add an object of type T to the Cluster.
Definition: Clustering.h:206
plotBeamSpotMon.b
b
Definition: plotBeamSpotMon.py:77
HGTD::Cluster::mergeStatus
bool mergeStatus() const
Return true if the Cluster is the result of a merge.
Definition: Clustering.h:248
HGTD::Cluster::m_combined_value
double m_combined_value
Definition: Clustering.h:108
HGTD::ClusterCollection::mergeClustersMean
Cluster< T > mergeClustersMean(const Cluster< T > &a, const Cluster< T > &b)
Definition: Clustering.h:336
HGTD::ClusterCollection::getClusters
std::vector< Cluster< T > > getClusters() const
Definition: Clustering.h:506
HGTD::ClusterCollection::getMaxEntriesCluster
Cluster< T > getMaxEntriesCluster()
Definition: Clustering.h:486
python.PyAthena.v
v
Definition: PyAthena.py:157
a
TList * a
Definition: liststreamerinfos.cxx:10
HGTD::ClusterAlgo::Simultaneous
@ Simultaneous
HGTD::Cluster::setUnknownStatus
void setUnknownStatus(bool status)
Definition: Clustering.h:262
HGTD::ClusterCollection
Definition: Clustering.h:123
HGTD::ClusterCollection::setDebugLevel
void setDebugLevel(int debug_level)
Definition: Clustering.h:157
HGTD::ClusterCollection::doClustering
void doClustering(ClusterAlgo algo)
Definition: Clustering.h:368
merge.status
status
Definition: merge.py:17
Amg::distance2
float distance2(const Amg::Vector3D &p1, const Amg::Vector3D &p2)
calculates the squared distance between two point in 3D space
Definition: GeoPrimitivesHelpers.h:48
HGTD::ClusterCollection::m_distance_cut
double m_distance_cut
Definition: Clustering.h:161
Amg::distance
float distance(const Amg::Vector3D &p1, const Amg::Vector3D &p2)
calculates the distance between two point in 3D space
Definition: GeoPrimitivesHelpers.h:54
HGTD::Cluster::getNEntries
int getNEntries() const
Return the number of objects stored in the Cluster.
Definition: Clustering.h:202
python.compressB64.c
def c
Definition: compressB64.py:93
HGTD::Cluster::setClusterValue
void setClusterValue(const std::vector< double > &v, const std::vector< double > &v_sigma)
The value of a vertex has to be set manually.
Definition: Clustering.h:222