ATLAS Offline Software
GSFFindIndexOfMinimum.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3 */
4 
59 #ifndef GSFFindIndexOfMimimum_H
60 #define GSFFindIndexOfMimimum_H
61 #include "CxxUtils/inline_hints.h"
62 #include "CxxUtils/restrict.h"
63 #include "CxxUtils/vec.h"
64 #include "GaudiKernel/Kernel.h"
66 //
67 #include <algorithm>
68 #include <memory>
69 #include <numeric>
70 namespace findIdxOfMinDetail {
71 
72 // index of minimum scalar
74 int32_t scalarC(const float* distancesIn, int n) {
75  const float* array =
76  std::assume_aligned<GSFConstants::alignment>(distancesIn);
77  float minvalue = array[0];
78  int minIndex = 0;
79  for (int i = 0; i < n; ++i) {
80  const float value = array[i];
81  if (value < minvalue) {
82  minvalue = value;
83  minIndex = i;
84  }
85  }
86  return minIndex;
87 }
88 
89 // index of minimum STL
91 int32_t scalarSTL(const float* distancesIn, int n) {
92  const float* array =
93  std::assume_aligned<GSFConstants::alignment>(distancesIn);
94  return std::distance(array, std::min_element(array, array + n));
95 }
96 
97 // index of minimum vec with blend
99 int32_t vecAlwaysTrackIdx(const float* distancesIn, int n) {
100  using namespace CxxUtils;
101  const float* array =
102  std::assume_aligned<GSFConstants::alignment>(distancesIn);
103  const vec<int, 4> increment = {16, 16, 16, 16};
104 
105  vec<int, 4> indices1 = {0, 1, 2, 3};
106  vec<int, 4> indices2 = {4, 5, 6, 7};
107  vec<int, 4> indices3 = {8, 9, 10, 11};
108  vec<int, 4> indices4 = {12, 13, 14, 15};
109 
110  vec<int, 4> minIndices1 = indices1;
111  vec<int, 4> minIndices2 = indices2;
112  vec<int, 4> minIndices3 = indices3;
113  vec<int, 4> minIndices4 = indices4;
114 
115  vec<float, 4> minValues1;
116  vec<float, 4> minValues2;
117  vec<float, 4> minValues3;
118  vec<float, 4> minValues4;
119  vload(minValues1, array);
120  vload(minValues2, array + 4);
121  vload(minValues3, array + 8);
122  vload(minValues4, array + 12);
123 
124  vec<float, 4> values1;
125  vec<float, 4> values2;
126  vec<float, 4> values3;
127  vec<float, 4> values4;
128  for (int i = 16; i < n; i += 16) {
129  // 1
130  vload(values1, array + i); // 0-3
131  indices1 = indices1 + increment;
132  vec<int, 4> lt1 = values1 < minValues1;
133  vselect(minIndices1, indices1, minIndices1, lt1);
134  vmin(minValues1, values1, minValues1);
135  // 2
136  vload(values2, array + i + 4); // 4-7
137  indices2 = indices2 + increment;
138  vec<int, 4> lt2 = values2 < minValues2;
139  vselect(minIndices2, indices2, minIndices2, lt2);
140  vmin(minValues2, values2, minValues2);
141  // 3
142  vload(values3, array + i + 8); // 8-11
143  indices3 = indices3 + increment;
144  vec<int, 4> lt3 = values3 < minValues3;
145  vselect(minIndices3, indices3, minIndices3, lt3);
146  vmin(minValues3, values3, minValues3);
147  // 4
148  vload(values4, array + i + 12); // 12-15
149  indices4 = indices4 + increment;
150  vec<int, 4> lt4 = values4 < minValues4;
151  vselect(minIndices4, indices4, minIndices4, lt4);
152  vmin(minValues4, values4, minValues4);
153  }
154 
155  float minValues[16];
156  int minIndices[16];
157  vstore(minValues, minValues1);
158  vstore(minValues + 4, minValues2);
159  vstore(minValues + 8, minValues3);
160  vstore(minValues + 12, minValues4);
161  vstore(minIndices, minIndices1);
162  vstore(minIndices + 4, minIndices2);
163  vstore(minIndices + 8, minIndices3);
164  vstore(minIndices + 12, minIndices4);
165 
166  float minValue = minValues[0];
167  int32_t minIndex = minIndices[0];
168  for (size_t i = 1; i < 16; ++i) {
169  const float value = minValues[i];
170  const int32_t index = minIndices[i];
171  if (value < minValue) {
172  minValue = value;
173  minIndex = index;
174  } else if (value == minValue && index < minIndex) {
175  // we want to return the smallest index
176  // in case of 2 same values
177  minIndex = index;
178  }
179  }
180  return minIndex;
181 }
182 
183 // index of minimum vec quite fast for the "unordered"/"random" case
185 int32_t vecUpdateIdxOnNewMin(const float* distancesIn, int n) {
186  using namespace CxxUtils;
187  const float* array =
188  std::assume_aligned<GSFConstants::alignment>(distancesIn);
189 
190  int32_t idx = 0;
191  float min = distancesIn[0];
192  vec<float, 4> minValues;
193  vbroadcast(minValues, min);
194  vec<float, 4> values1;
195  vec<float, 4> values2;
196  vec<float, 4> values3;
197  vec<float, 4> values4;
198  for (int i = 0; i < n; i += 16) {
199  // 1
200  vload(values1, array + i); // 0-3
201  // 2
202  vload(values2, array + i + 4); // 4-7
203  // 3
204  vload(values3, array + i + 8); // 8-11
205  // 4
206  vload(values4, array + i + 12); // 12-15
207 
208  // compare 1 with 2 result is 1
209  vmin(values1, values1, values2);
210  // compare 3 with 4 result is 3
211  vmin(values3, values3, values4);
212  // compare 1 with 3 result is 1
213  vmin(values1, values1, values3);
214  //see if the new minimum is less than existing
215  vec<int, 4> newMinimumMask = values1 < minValues;
216  if (vany(newMinimumMask)) {
217  idx = i;
218  float minCandidates[4];
219  vstore(minCandidates, values1);
220  for (int j = 0; j < 4; ++j) {
221  if (minCandidates[j] < min) {
222  min = minCandidates[j];
223  }
224  }
225  vbroadcast(minValues, min);
226  }
227  }
228  //Do the final calculation scalar way
229  for (int i = idx; i < idx + 16; ++i) {
230  if (distancesIn[i] == min) {
231  return i;
232  }
233  }
234  return 0;
235 }
236 
237 template <typename T = float, int STRIDE = 16, int VEC_WIDTH = 4>
239 float vecFindMinimum(const T* distancesIn, int n) {
240  using namespace CxxUtils;
241  const T* array = std::assume_aligned<GSFConstants::alignment>(distancesIn);
242  constexpr int vectorCount = STRIDE / VEC_WIDTH;
243 
244  vec<T, VEC_WIDTH> minValues[vectorCount];
245 
246  for (int i = 0; i < vectorCount; i++) {
247  vload(minValues[i], array + (VEC_WIDTH * i));
248  }
249 
250  constexpr int totalStride = VEC_WIDTH * vectorCount;
251  vec<T, VEC_WIDTH> values[vectorCount];
252  for (int i = totalStride; i < n; i += totalStride) {
253  GAUDI_LOOP_UNROLL(4)
254  for (int j = 0; j < vectorCount; j++) {
255  vload(values[j], array + i + (VEC_WIDTH * j));
256  vmin(minValues[j], values[j], minValues[j]);
257  }
258  }
259 
260  T finalMinValues[VEC_WIDTH];
261  vstore(finalMinValues, std::reduce(std::begin(minValues),
262  std::end(minValues),
263  minValues[0],
264  [](auto a, auto b){ return a < b ? a : b; }));
265 
266  // Do the final calculation scalar way
267  return std::reduce(std::begin(finalMinValues),
268  std::end(finalMinValues),
269  finalMinValues[0],
270  [](auto a, auto b){ return a < b ? a : b; });
271 }
272 
273 template <typename T = float, int STRIDE = 16, int VEC_WIDTH = 4>
275 int32_t vecIdxOfValue(const T value, const T* distancesIn, int n) {
276  using namespace CxxUtils;
277  const T* array = std::assume_aligned<GSFConstants::alignment>(distancesIn);
278  constexpr int vectorCount = STRIDE / VEC_WIDTH;
279 
280  vec<T, VEC_WIDTH> values[vectorCount];
283  vec<int, VEC_WIDTH> eqs[vectorCount];
284 
285  for (int i = 0; i < n; i += STRIDE) {
286  GAUDI_LOOP_UNROLL(4)
287  for (int j = 0; j < vectorCount; j++) {
288  vload(values[j], array + i + (VEC_WIDTH * j));
289  eqs[j] = values[j] == target;
290  }
291 
292  // See if we have the value in any
293  // of the vectors
294  // If yes then use scalar code to locate it
295  if (vany(std::reduce(std::begin(eqs),
296  std::end(eqs),
297  eqs[0],
298  [](auto a, auto b){ return a || b; }))) {
299  for (int idx = i; idx < i + STRIDE; ++idx) {
300  if (distancesIn[idx] == value) {
301  return idx;
302  }
303  }
304  }
305  }
306  return -1;
307 }
308 
310 int32_t vecMinThenIdx(const float* distancesIn, int n) {
311  using namespace CxxUtils;
312  const float* array =
313  std::assume_aligned<GSFConstants::alignment>(distancesIn);
314  // Finding of minimum needs to loop over all elements
315  // But we can run the finding of index only inside a block
316  constexpr int blockSizePower2 = 8;
317  constexpr int blockSize = 2 << blockSizePower2;
318  // case for n less than blockSize
319  if (n <= blockSize) {
320  float min = vecFindMinimum(array, n);
321  return vecIdxOfValue(min, array, n);
322  }
323  int32_t idx = 0;
324  float min = array[0];
325  // We might have a remainder that we need to handle
326  const int remainder = n & (blockSize - 1);
327  for (int32_t i = 0; i < (n - remainder); i += blockSize) {
328  float mintmp = vecFindMinimum(array + i, blockSize);
329  if (mintmp < min) {
330  min = mintmp;
331  idx = i;
332  }
333  }
334  if (remainder != 0) {
335  int index = n - remainder;
336  float mintmp = vecFindMinimum(array + index, remainder);
337  // if the minimum is in this part
338  if (mintmp < min) {
339  min = mintmp;
340  return index + vecIdxOfValue(min, array + index, remainder);
341  }
342  }
343  //default return
344  return idx + vecIdxOfValue(min, array + idx, blockSize);
345 }
346 
347 } // namespace findIdxOfMinDetail
348 
349 namespace findIdxOfMinimum {
350 enum Impl {
354  C = 3,
355  STL = 4
356 };
357 
358 template <enum Impl I>
359 ATH_ALWAYS_INLINE int32_t impl(const float* distancesIn, int n) {
360  static_assert(I == VecUpdateIdxOnNewMin || I == VecAlwaysTrackIdx ||
361  I == VecMinThenIdx || I == C || I == STL,
362  "Not a valid implementation chosen");
363 
364  if constexpr (I == VecUpdateIdxOnNewMin) {
365  return findIdxOfMinDetail::vecUpdateIdxOnNewMin(distancesIn, n);
366  } else if constexpr (I == VecAlwaysTrackIdx) {
367  return findIdxOfMinDetail::vecAlwaysTrackIdx(distancesIn, n);
368  } else if constexpr (I == VecMinThenIdx) {
369  return findIdxOfMinDetail::vecMinThenIdx(distancesIn, n);
370  } else if constexpr (I == C) {
371  return findIdxOfMinDetail::scalarC(distancesIn, n);
372  } else if constexpr (I == STL) {
373  return findIdxOfMinDetail::scalarSTL(distancesIn, n);
374  }
375  else {
376  return 0; // Avoid cppchcheck warning.
377  }
378 }
379 } // namespace findIdxOfMinimum
380 
381 #endif
findIdxOfMinimum
Definition: GSFFindIndexOfMinimum.h:349
findIdxOfMinDetail::vecFindMinimum
ATH_ALWAYS_INLINE float vecFindMinimum(const T *distancesIn, int n)
Definition: GSFFindIndexOfMinimum.h:239
inline_hints.h
findIdxOfMinimum::VecUpdateIdxOnNewMin
@ VecUpdateIdxOnNewMin
Definition: GSFFindIndexOfMinimum.h:351
index
Definition: index.py:1
findIdxOfMinDetail::scalarSTL
ATH_ALWAYS_INLINE int32_t scalarSTL(const float *distancesIn, int n)
Definition: GSFFindIndexOfMinimum.h:91
PlotCalibFromCool.begin
begin
Definition: PlotCalibFromCool.py:94
ATH_ALWAYS_INLINE
#define ATH_ALWAYS_INLINE
Definition: inline_hints.h:53
athena.value
value
Definition: athena.py:122
PlotCalibFromCool.vmin
vmin
Definition: PlotCalibFromCool.py:696
CxxUtils::vstore
ATH_ALWAYS_INLINE void vstore(vec_type_t< VEC > *dst, const VEC &src)
Definition: vec.h:290
reduce
void reduce(HepMC::GenEvent *ge, std::vector< HepMC::GenParticlePtr > toremove)
Remove unwanted particles from the event, collapsing the graph structure consistently.
Definition: FixHepMC.cxx:81
python.Bindings.values
values
Definition: Control/AthenaPython/python/Bindings.py:797
mergePhysValFiles.end
end
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:93
CxxUtils::vec
typename vecDetail::vec_typedef< T, N >::type vec
Define a nice alias for the vectorized type.
Definition: vec.h:207
findIdxOfMinDetail
Definition: GSFFindIndexOfMinimum.h:70
lumiFormat.i
int i
Definition: lumiFormat.py:92
beamspotman.n
n
Definition: beamspotman.py:731
CxxUtils
Definition: aligned_vector.h:29
findIdxOfMinimum::Impl
Impl
Definition: GSFFindIndexOfMinimum.h:350
findIdxOfMinDetail::vecMinThenIdx
ATH_ALWAYS_INLINE int32_t vecMinThenIdx(const float *distancesIn, int n)
Definition: GSFFindIndexOfMinimum.h:310
min
#define min(a, b)
Definition: cfImp.cxx:40
lumiFormat.array
array
Definition: lumiFormat.py:98
restrict.h
Macro wrapping the nonstandard restrict keyword.
findIdxOfMinDetail::vecIdxOfValue
ATH_ALWAYS_INLINE int32_t vecIdxOfValue(const T value, const T *distancesIn, int n)
Definition: GSFFindIndexOfMinimum.h:275
CxxUtils::vload
ATH_ALWAYS_INLINE void vload(VEC &dst, vec_type_t< VEC > const *src)
Definition: vec.h:272
remainder
std::vector< std::string > remainder(const std::vector< std::string > &v1, const std::vector< std::string > &v2)
list of entries in a vector that are not in another
Definition: compareFlatTrees.cxx:44
findIdxOfMinimum::VecMinThenIdx
@ VecMinThenIdx
Definition: GSFFindIndexOfMinimum.h:353
plotBeamSpotMon.b
b
Definition: plotBeamSpotMon.py:77
CxxUtils::vselect
ATH_ALWAYS_INLINE void vselect(VEC &dst, const VEC &a, const VEC &b, const vec_mask_type_t< VEC > &mask)
Definition: vec.h:306
findIdxOfMinimum::impl
ATH_ALWAYS_INLINE int32_t impl(const float *distancesIn, int n)
Definition: GSFFindIndexOfMinimum.h:359
findIdxOfMinDetail::scalarC
ATH_ALWAYS_INLINE int32_t scalarC(const float *distancesIn, int n)
Definition: GSFFindIndexOfMinimum.h:74
findIdxOfMinDetail::vecUpdateIdxOnNewMin
ATH_ALWAYS_INLINE int32_t vecUpdateIdxOnNewMin(const float *distancesIn, int n)
Definition: GSFFindIndexOfMinimum.h:185
DeMoScan.index
string index
Definition: DeMoScan.py:362
a
TList * a
Definition: liststreamerinfos.cxx:10
vec.h
Vectorization helpers.
CxxUtils::vbroadcast
ATH_ALWAYS_INLINE void vbroadcast(VEC &v, T x)
Copy a scalar to each element of a vectorized type.
Definition: vec.h:251
LArNewCalib_DelayDump_OFC_Cali.idx
idx
Definition: LArNewCalib_DelayDump_OFC_Cali.py:69
GsfConstants.h
CxxUtils::vany
ATH_ALWAYS_INLINE bool vany(const VEC &mask)
Definition: vec.h:362
COOLRates.target
target
Definition: COOLRates.py:1106
I
#define I(x, y, z)
Definition: MD5.cxx:116
findIdxOfMinDetail::vecAlwaysTrackIdx
ATH_ALWAYS_INLINE int32_t vecAlwaysTrackIdx(const float *distancesIn, int n)
Definition: GSFFindIndexOfMinimum.h:99
minValue
#define minValue(current, test)
Definition: CompoundLayerMaterialCreator.h:21
Amg::distance
float distance(const Amg::Vector3D &p1, const Amg::Vector3D &p2)
calculates the distance between two point in 3D space
Definition: GeoPrimitivesHelpers.h:54
findIdxOfMinimum::STL
@ STL
Definition: GSFFindIndexOfMinimum.h:355
findIdxOfMinimum::VecAlwaysTrackIdx
@ VecAlwaysTrackIdx
Definition: GSFFindIndexOfMinimum.h:352