ATLAS Offline Software
DBScan.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2022 CERN for the benefit of the ATLAS collaboration
3 */
4 #ifndef TRIGTOOLS_TRIG_VSI_DBSCAN
5 #define TRIGTOOLS_TRIG_VSI_DBSCAN
6 
14 
15 #include <vector>
16 #include <functional>
17 #include <unordered_set>
18 #include <unordered_map>
19 
20 namespace TrigVSI {
21 
23 template<typename T>
24 class Cluster {
25  public:
26  Cluster(){ m_points.clear(); };
27  Cluster( const std::vector<T>& points ) : m_points( points ){};
28  Cluster( std::vector<T>&& points ) : m_points( std::move(points) ){};
29 
30  inline T getPoint(size_t ipt) const { return m_points[ipt]; };
31  inline size_t nPoints() const { return m_points.size(); };
32 
33  inline std::vector<T>& Points() { return m_points; };
34  inline const std::vector<T>& getPoints() const { return m_points; };
35 
36  protected :
37  std::vector<T> m_points;
38 };
39 
40 
42 template<typename pointType>
43 class DBScan {
44  public :
45  using RegionFunc = std::function<std::vector<pointType>(const pointType&, double)>;
46  DBScan( const std::unordered_set<pointType>&, RegionFunc);
47  DBScan();
48  ~DBScan(){};
49 
50  DBScan& operator= (DBScan&&) noexcept = default;
51  DBScan(DBScan&&) noexcept = default;
52 
53  inline std::vector<Cluster<pointType>> getClusters() const;
54  inline size_t nClusters() const { return m_clusters.size() + m_noisesCluster.size(); };
55  inline const Cluster<pointType>& getCluster(size_t) const;
56 
57  void clusterize(double, size_t);
58 
59  private :
61  std::unordered_map<pointType, bool> m_pointsVisited;
62  std::vector<Cluster<pointType>> m_clusters;
63  std::unordered_set<pointType> m_noises;
64  std::vector<Cluster<pointType>> m_noisesCluster;
65 };
66 
67 
68 //============================================================================
75 
82 template<typename pointType>
83 DBScan<pointType>::DBScan( const std::unordered_set<pointType>& set, RegionFunc regionQuery):
84 m_regionQuery(regionQuery)
85 {
86  m_pointsVisited.clear();
87  for (const auto& point : set) {
88  m_pointsVisited.emplace(point, false);
89  }
90 }
91 
92 
96 template<typename pointType>
98 {
99 
100 }
101 
102 
106 template<typename pointType>
107 std::vector<Cluster<pointType>> DBScan<pointType>::getClusters() const
108 {
109  auto tmp = m_clusters;
110  const auto& tmp_noise = m_noisesCluster;
111  tmp.insert( tmp.end(), tmp_noise.begin(), tmp_noise.end() );
112  return tmp;
113 }
114 
115 
127 template<typename pointType>
129 {
130  const Cluster<pointType>& cls_tmp = ( icls < m_clusters.size() )? m_clusters[icls] : m_noisesCluster[icls-m_clusters.size()];
131  return cls_tmp;
132 }
133 
134 
141 template<typename pointType>
142 void DBScan<pointType>::clusterize( double eps, size_t minN )
143 {
144  m_clusters.clear();
145  m_noisesCluster.clear();
146  for (auto& pair : m_pointsVisited) {
147  const pointType& point = pair.first;
148 
149  if ( pair.second ) continue; // Continue when the point is visited
150  pair.second = true; // Set the point as visited
151  std::vector<pointType> neighbor_points = m_regionQuery(point, eps);
152  if ( neighbor_points.size() < minN ) {
153  m_noises.emplace(point);
154  continue;
155  }
156 
157  m_clusters.emplace_back( std::vector<pointType>{point} );
158  Cluster<pointType>& cls = *(--m_clusters.end());
159 
160  for (size_t i = 0; i < neighbor_points.size(); i++) {
161  const auto& q = neighbor_points[i];
162 
163  // Check if q is labeled as noise
164  // If so, remove q from noise lists and add q to cluster
165  auto q_itr_noise = m_noises.find(q);
166  if ( q_itr_noise != m_noises.end() ) {
167  m_noises.erase(q_itr_noise);
168  cls.Points().emplace_back(q);
169  }
170 
171  // Check if q had been visited before
172  // If visited, q is already labeled as a noise or added to other cluster so skip it
173  // If not visited, label q as visited and add it to cluster
174  auto q_itr = m_pointsVisited.find(q);
175  if ( q_itr == m_pointsVisited.end() ) continue;
176  if ( q_itr->second ) continue;
177  q_itr->second = true;
178  cls.Points().emplace_back(q);
179 
180  std::vector<pointType> q_neighbor_points = m_regionQuery(q, eps);
181  if ( q_neighbor_points.size() >= minN ) neighbor_points.insert( neighbor_points.end(), q_neighbor_points.begin(), q_neighbor_points.end() );
182  }
183 
184  }
185 
186  // Convert noises to one point clusters
187  for (const auto& q : m_noises) {
188  m_noisesCluster.emplace_back( std::vector<pointType>{q} );
189  }
190 }
191 
192 
193 } // end of namespace TrigVSI
194 
195 #endif
TrigVSI::Cluster::Points
std::vector< T > & Points()
Definition: DBScan.h:33
TrigVSI::DBScan::~DBScan
~DBScan()
Definition: DBScan.h:48
TrigVSI::DBScan::m_regionQuery
RegionFunc m_regionQuery
Definition: DBScan.h:60
TrigVSI::DBScan::m_noises
std::unordered_set< pointType > m_noises
Definition: DBScan.h:63
TrigVSI::DBScan::DBScan
DBScan(const std::unordered_set< pointType > &, RegionFunc)
Normal Constructor.
Definition: DBScan.h:83
TrigVSI::DBScan::getClusters
std::vector< Cluster< pointType > > getClusters() const
Retrun the list of clusters along with noise clusters.
Definition: DBScan.h:107
TrigVSI::Cluster::nPoints
size_t nPoints() const
Definition: DBScan.h:31
TrigVSI::DBScan
Class for operating DBSCAN clustering.
Definition: DBScan.h:43
TrigVSI::Cluster
Base class for clusters.
Definition: DBScan.h:24
TrigVSI::DBScan< int >::RegionFunc
std::function< std::vector< int >(const int &, double)> RegionFunc
Function type for region query function.
Definition: DBScan.h:45
CaloClusterListBadChannel.cls
cls
Definition: CaloClusterListBadChannel.py:8
TrigVSI::Cluster::Cluster
Cluster(const std::vector< T > &points)
Definition: DBScan.h:27
const
bool const RAWDATA *ch2 const
Definition: LArRodBlockPhysicsV0.cxx:560
TrigVSI::DBScan::operator=
DBScan & operator=(DBScan &&) noexcept=default
Move assignment operator.
protected
#define protected
Definition: DetDescrConditionsDict_dict_fixes.cxx:14
TrigVSI::Cluster::getPoint
T getPoint(size_t ipt) const
Definition: DBScan.h:30
TrigVSI::DBScan::m_noisesCluster
std::vector< Cluster< pointType > > m_noisesCluster
Definition: DBScan.h:64
lumiFormat.i
int i
Definition: lumiFormat.py:85
TrigVSI::DBScan::m_clusters
std::vector< Cluster< pointType > > m_clusters
Definition: DBScan.h:62
vector
Definition: MultiHisto.h:13
TrigVSI::DBScan::DBScan
DBScan()
Default constructor.
Definition: DBScan.h:97
TrigVSI::DBScan::getCluster
const Cluster< pointType > & getCluster(size_t) const
Retrieve cluster.
Definition: DBScan.h:128
DeMoUpdate.tmp
string tmp
Definition: DeMoUpdate.py:1167
TrigVSI::Cluster::getPoints
const std::vector< T > & getPoints() const
Definition: DBScan.h:34
CxxUtils::set
constexpr std::enable_if_t< is_bitmask_v< E >, E & > set(E &lhs, E rhs)
Convenience function to set bits in a class enum bitmask.
Definition: bitmask.h:232
private
#define private
Definition: DetDescrConditionsDict_dict_fixes.cxx:13
HashTools.h
Hash functions for std::pair.
TrigVSI::Cluster::m_points
std::vector< T > m_points
Definition: DBScan.h:34
TrigVSI::DBScan::m_pointsVisited
std::unordered_map< pointType, bool > m_pointsVisited
Definition: DBScan.h:61
TrigVSI::DBScan::nClusters
size_t nClusters() const
Definition: DBScan.h:54
TrigVSI::Cluster::Cluster
Cluster(std::vector< T > &&points)
Definition: DBScan.h:28
TrigVSI::DBScan::clusterize
void clusterize(double, size_t)
Generate clusters with DBSCAN algorithim.
Definition: DBScan.h:142
extractSporadic.q
list q
Definition: extractSporadic.py:98
python.CaloScaleNoiseConfig.default
default
Definition: CaloScaleNoiseConfig.py:79
TrigVSI::Cluster::Cluster
Cluster()
Definition: DBScan.h:26
jobOptions.points
points
Definition: jobOptions.GenevaPy8_Zmumu.py:97
TrigVSI
Definition: TrigVrtSecInclusive.cxx:27