ATLAS Offline Software
Loading...
Searching...
No Matches
Clustering.h
Go to the documentation of this file.
1
14
15#ifndef CLUSTERINGHELPER_H
16#define CLUSTERINGHELPER_H
17
18#include "TMath.h"
19
20#include <algorithm>
21#include <iostream>
22#include <cmath>
23#include <memory>
24#include <stdexcept>
25#include <utility>
26#include <vector>
27
28namespace HGTD {
29
30template <typename T> class Cluster {
31
32public:
33 Cluster();
34
35 Cluster(const std::vector<double> &v, const std::vector<double> &v_sigma);
36
37 Cluster(const std::vector<double> &v, const std::vector<double> &v_sigma,
38 const T &entry);
39
46 void addEntry(const T &entry);
47
56 void addEntryVector(const std::vector<T> &entry_vector);
57
61 const std::vector<T>& getEntries() const;
62
72 void setClusterValue(const std::vector<double> &v,
73 const std::vector<double> &v_sigma);
74
78 std::vector<double> getValues() const;
79
83 const std::vector<double>& getSigmas() const;
84
88 int getNEntries() const;
89
93 bool mergeStatus() const;
94
95 void setMergeStatus(bool status);
96
97 int getMergeIteration() const;
98
99 void setMergeIteration(int iteration);
100
101 bool containsUnknowns() const;
102
103 void setUnknownStatus(bool status);
104
105private:
106 int m_debug_level = 0; // TODO set this dynamically
107 std::vector<T> m_entries { }; // Stores raw objects that are part of this Cluster
110 std::vector<double> m_combined_value_vector { };
111 std::vector<double> m_combined_sigma_vector { };
112 bool m_was_merged = false;
115};
116
118
120
122
123template <typename T> class ClusterCollection {
124public:
125 void addCluster(const Cluster<T> & vx);
126 void doClustering(ClusterAlgo algo);
127
128private:
129 double getDistanceBetweenClusters(const Cluster<T> &a, const Cluster<T> &b);
130
139
141
142 std::pair<Cluster<T>, int> largestClusterInfo();
143
144public:
147 const std::vector<Cluster<T>>& getClusters() const;
148 int getNClusters() const;
156 void updateDistanceCut(double cut_value);
157 void setDebugLevel(int debug_level) { m_debug_level = debug_level; }
158
159private:
161 double m_distance_cut = 3.0;
162 std::vector<Cluster<T>> m_clusters;
163};
164
170
171template <class T>
174
175template <typename T>
177 const std::vector<double> &v, const std::vector<double> &v_sigma)
179
180 for (const auto &s : v_sigma) {
181 if (s < 0) {
182 m_contains_unknowns = true;
183 }
184 }
185}
186
187template <typename T>
189 const std::vector<double> &v, const std::vector<double> &v_sigma,
190 const T &entry)
192
193 m_entries.push_back(entry);
194
195 for (const auto &s : v_sigma) {
196 if (s < 0) {
197 m_contains_unknowns = true;
198 }
199 }
200}
201
202template <class T> int Cluster<T>::getNEntries() const {
203 return m_entries.size();
204}
205
206template <class T> void Cluster<T>::addEntry(const T &entry) {
207 m_entries.push_back(entry);
208}
209
210template <class T> const std::vector<T>& Cluster<T>::getEntries() const {
211 return m_entries;
212}
213
214template <class T>
215void Cluster<T>::addEntryVector(const std::vector<T> &entry_vector) {
216 if (not entry_vector.empty()) {
217 m_entries.insert(m_entries.end(), entry_vector.begin(), entry_vector.end());
218 }
219}
220
221template <class T>
222void Cluster<T>::setClusterValue(const std::vector<double> &v,
223 const std::vector<double> &v_sigma) {
225 m_combined_sigma_vector = v_sigma;
226}
227
228template <class T> std::vector<double> Cluster<T>::getValues() const {
229 // return a warning when this value is a default value
230 if (m_combined_value_vector.size() == 0) {
231 if (m_debug_level > 0) {
232 std::cout
233 << "Cluster::getTime: ATTENTION, combi values are not initialized!"
234 << std::endl;
235 }
236 }
238}
239
240template <class T> const std::vector<double>& Cluster<T>::getSigmas() const {
242}
243
244template <class T> void Cluster<T>::setMergeStatus(bool status) {
245 m_was_merged = status;
246}
247
248template <class T> bool Cluster<T>::mergeStatus() const { return m_was_merged; }
249
250template <class T> int Cluster<T>::getMergeIteration() const {
251 return m_merge_iteration;
252}
253
254template <class T> void Cluster<T>::setMergeIteration(int iteration) {
255 m_merge_iteration = iteration;
256}
257
258template <class T> bool Cluster<T>::containsUnknowns() const {
259 return m_contains_unknowns;
260}
261
262template <class T> void Cluster<T>::setUnknownStatus(bool status) {
263 m_contains_unknowns = status;
264}
265
268
269template <class T>
271 m_distance_cut = cut_value;
272}
273
274template <typename T> void ClusterCollection<T>::addCluster(const Cluster<T> & vx) {
275 m_clusters.push_back(vx);
276}
277
278template <typename T>
280 const Cluster<T> &b) {
281 std::vector<double> a_values = a.getValues();
282 std::vector<double> b_values = b.getValues();
283 std::vector<double> a_sigmas = a.getSigmas();
284 std::vector<double> b_sigmas = b.getSigmas();
285
286 std::vector<double> distances(a_values.size());
287 for (size_t i = 0; i < a_values.size(); i++) {
288 double distance_i = 0;
289 if (a_sigmas.at(i) >= 0.0 && b_sigmas.at(i) >= 0.0) {
290 distance_i = std::abs(a_values.at(i) - b_values.at(i)) /
291 std::hypot(a_sigmas.at(i), b_sigmas.at(i));
292 }
293 distances.at(i) = distance_i;
294 }
295 double distance2 = 0.;
296 for (double d : distances) {
297 distance2 += d * d;
298 }
299 return std::sqrt(distance2);
300}
301
302template <class T>
304 const Cluster<T> &b) {
305 Cluster<T> merged_cluster;
306 merged_cluster.addEntryVector(a.getEntries());
307 merged_cluster.addEntryVector(b.getEntries());
308
309 std::vector<double> a_values = a.getValues();
310 std::vector<double> b_values = b.getValues();
311 std::vector<double> a_sigmas = a.getSigmas();
312 std::vector<double> b_sigmas = b.getSigmas();
313
314 std::vector<double> new_cluster_values(a_values.size());
315 std::vector<double> new_cluster_sigmas(a_values.size());
316
317 for (size_t i = 0; i < a_values.size(); i++) {
318 double value1 = a_values.at(i);
319 double value2 = b_values.at(i);
320 double var1 = std::pow(a_sigmas.at(i), 2.0);
321 double var2 = std::pow(b_sigmas.at(i), 2.0);
322 double new_cluster_value =
323 (value1 / var1 + value2 / var2) / (1.0 / var1 + 1.0 / var2);
324 double new_cluster_sigma = std::sqrt(var1 * var2 / (var1 + var2));
325 new_cluster_values.at(i) = new_cluster_value;
326 new_cluster_sigmas.at(i) = new_cluster_sigma;
327 }
328 int new_merge_iteration = a.getMergeIteration() + b.getMergeIteration() + 1;
329 merged_cluster.setClusterValue(new_cluster_values, new_cluster_sigmas);
330 merged_cluster.setMergeStatus(true);
331 merged_cluster.setMergeIteration(new_merge_iteration);
332 return merged_cluster;
333}
334
335template <class T>
337 const Cluster<T> &b) {
338 Cluster<T> merged_cluster;
339 merged_cluster.addEntryVector(a.getEntries());
340 merged_cluster.addEntryVector(b.getEntries());
341
342 std::vector<double> a_values = a.getValues();
343 std::vector<double> b_values = b.getValues();
344 std::vector<double> a_sigmas = a.getSigmas();
345 std::vector<double> b_sigmas = b.getSigmas();
346
347 std::vector<double> new_cluster_values(a_values.size());
348 std::vector<double> new_cluster_sigmas(a_values.size());
349
350 for (size_t i = 0; i < a_values.size(); i++) {
351 double value1 = a_values.at(i);
352 double value2 = b_values.at(i);
353 double sigma1 = a_sigmas.at(i);
354 double sigma2 = b_sigmas.at(i);
355 double new_cluster_value = (value1 + value2) / 2.;
356 double new_cluster_sigma = std::hypot(sigma1, sigma2);
357 new_cluster_values.at(i) = new_cluster_value;
358 new_cluster_sigmas.at(i) = new_cluster_sigma;
359 }
360 int new_merge_iteration = a.getMergeIteration() + b.getMergeIteration() + 1;
361 merged_cluster.setClusterValue(new_cluster_values, new_cluster_sigmas);
362 merged_cluster.setMergeStatus(true);
363 merged_cluster.setMergeIteration(new_merge_iteration);
364 return merged_cluster;
365}
366
367template <typename T>
369 if (algo == ClusterAlgo::Eager) {
370 for (const auto &clust : m_clusters) {
371 if (clust.containsUnknowns()) {
372 throw std::invalid_argument(
373 "[ClusterCollection::doClustering] ERROR "
374 "- eager clustering does not allow for unknown values");
375 }
376 }
377 }
378
379 // TODO case where I have 2 vertices in my collection
380 if (m_debug_level > 0) {
381 std::cout << "ClusterCollection::doTimeClustering" << std::endl;
382 }
383 double distance = 1.e30; // initial distance value, "far away"
384
385 while (m_clusters.size() > 1) {
386 int i0 = 0;
387 int j0 = 0;
388 if (m_debug_level > 0) {
389 std::cout << "using " << m_clusters.size() << " vertices" << std::endl;
390 }
391 // find the two vertices that are closest to each other
392 distance = getDistanceBetweenClusters(m_clusters.at(0), m_clusters.at(1));
393 for (size_t i = 0; i < m_clusters.size(); i++) {
394 for (size_t j = i + 1; j < m_clusters.size(); j++) {
395
396 if (algo == ClusterAlgo::Simultaneous ||
398
399 if (m_clusters.at(i).mergeStatus() or m_clusters.at(j).mergeStatus()) {
400 continue;
401 }
402 }
403
404 double current_distance =
406 if (current_distance <= distance) {
407 distance = current_distance;
408 i0 = i;
409 j0 = j;
410 }
411 } // loop over j
412 } // loop over i
413 if (m_debug_level > 0) {
414 std::cout << "using vertex " << i0 << " and " << j0 << std::endl;
415 }
416 // now the closest two vertices are found and will be fused if cut passes
417 if (distance < m_distance_cut && i0 != j0) {
418
419 Cluster<T> new_cluster { };
420
421 if (algo == ClusterAlgo::SimultaneousMean) {
422 new_cluster = mergeClustersMean(m_clusters.at(i0), m_clusters.at(j0));
423 } else {
424 new_cluster = mergeClusters(m_clusters.at(i0), m_clusters.at(j0));
425 }
426
427 if (m_debug_level > 0) {
428 std::cout << "starting to erase" << std::endl;
429 }
430 m_clusters.erase(m_clusters.begin() + j0);
431 if (i0 < j0) {
432 m_clusters.erase(m_clusters.begin() + i0);
433 } else {
434 m_clusters.erase(m_clusters.begin() + (i0 - 1));
435 }
436 if (m_debug_level > 0) {
437 std::cout << "erase done" << std::endl;
438 }
439 m_clusters.push_back(new_cluster);
440 if (m_debug_level > 0) {
441 std::cout << "new cluster stored" << std::endl;
442 }
443 } else {
444 if (algo == ClusterAlgo::Eager) {
445 break;
446 }
447 // if there is a cluster that was merged
448 if (std::find_if(m_clusters.begin(), m_clusters.end(),
449 [](const Cluster<T> &c) { return c.mergeStatus(); }) !=
450 m_clusters.end()) {
451 // reset each status to false
452 std::for_each(m_clusters.begin(), m_clusters.end(), [](Cluster<T> &c) {
453 c.setMergeStatus(false);
454 });
455 } else {
456 // if not, this is the end of the clustering
457 break;
458 }
459 }
460 } // while loop
461}
462
463template <typename T>
464std::pair<Cluster<T>, int> ClusterCollection<T>::largestClusterInfo() {
465
466 int max_n_hits = 0;
467 int count = 0;
468 Cluster<T> max_n_vertex { };
469
470 for (const auto& vx : m_clusters) {
471 int current_n = vx.getNEntries();
472
473 if (current_n > max_n_hits) {
474 max_n_hits = current_n;
475 max_n_vertex = vx;
476 count = 1;
477 }
478 else if (current_n == max_n_hits) {
479 count++;
480 }
481 }
482
483 return std::make_pair(max_n_vertex, count);
484}
485
487
488 const auto [max_cluster, nMaxClusters] = largestClusterInfo();
489 // If two or more clusters have the same maximum number of entries, I can't
490 // decide, so return default
491 if (nMaxClusters > 1) {
492 return Cluster<T> { };
493 }
494 return max_cluster;
495}
496
498 // find the vertex with a maximum amount of hits clustered in it
499 // and return thisn umber
500 const auto [max_cluster, nMaxClusters] = largestClusterInfo();
501
502 return max_cluster.getNEntries();
503}
504
505template <typename T>
506const std::vector<Cluster<T>>& ClusterCollection<T>::getClusters() const {
507 return m_clusters;
508}
509
510template <typename T> int ClusterCollection<T>::getNClusters() const {
511 return static_cast<int>(m_clusters.size());
512}
513
514} // namespace HGTD
515
516#endif // CLUSTERINGHELPER_H
static Double_t a
Cluster< T > mergeClustersMean(const Cluster< T > &a, const Cluster< T > &b)
Definition Clustering.h:336
double getDistanceBetweenClusters(const Cluster< T > &a, const Cluster< T > &b)
Definition Clustering.h:279
std::vector< Cluster< T > > m_clusters
Definition Clustering.h:162
std::pair< Cluster< T >, int > largestClusterInfo()
Definition Clustering.h:464
void setDebugLevel(int debug_level)
Definition Clustering.h:157
Cluster< T > getMaxEntriesCluster()
Definition Clustering.h:486
void doClustering(ClusterAlgo algo)
Definition Clustering.h:368
void updateDistanceCut(double cut_value)
Set the distance cut.
Definition Clustering.h:270
Cluster< T > mergeClusters(const Cluster< T > &a, const Cluster< T > &b)
Creates a new Cluster object that is a fusion of the two clusters given in the arguments.
Definition Clustering.h:303
void addCluster(const Cluster< T > &vx)
Definition Clustering.h:274
const std::vector< Cluster< T > > & getClusters() const
Definition Clustering.h:506
std::vector< T > m_entries
Definition Clustering.h:107
void setMergeIteration(int iteration)
Definition Clustering.h:254
void addEntry(const T &entry)
Add an object of type T to the Cluster.
Definition Clustering.h:206
int getMergeIteration() const
Definition Clustering.h:250
void setClusterValue(const std::vector< double > &v, const std::vector< double > &v_sigma)
The value of a vertex has to be set manually.
Definition Clustering.h:222
std::vector< double > m_combined_sigma_vector
Definition Clustering.h:111
void addEntryVector(const std::vector< T > &entry_vector)
This can be used to combine the subsets stored in two vertices all in once while merging.
Definition Clustering.h:215
int m_merge_iteration
Definition Clustering.h:113
const std::vector< T > & getEntries() const
Return the objects that are part of the Cluster.
Definition Clustering.h:210
bool mergeStatus() const
Return true if the Cluster is the result of a merge.
Definition Clustering.h:248
double m_combined_value
Definition Clustering.h:108
void setMergeStatus(bool status)
Definition Clustering.h:244
std::vector< double > getValues() const
Return the N-dimensional value of the Cluster.
Definition Clustering.h:228
int getNEntries() const
Return the number of objects stored in the Cluster.
Definition Clustering.h:202
void setUnknownStatus(bool status)
Definition Clustering.h:262
bool containsUnknowns() const
Definition Clustering.h:258
const std::vector< double > & getSigmas() const
Return the N-dimensional resolution of the Cluster.
Definition Clustering.h:240
double m_combined_sigma
Definition Clustering.h:109
std::vector< double > m_combined_value_vector
Definition Clustering.h:110
bool m_contains_unknowns
Definition Clustering.h:114
int count(std::string s, const std::string &regx)
count how many occurances of a regx are in a string
Definition hcg.cxx:146
Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration.
ClusterAlgo
Definition Clustering.h:119