ATLAS Offline Software
GsfFindIndexOfMinimum.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3 */
4 
45 #ifndef GSFFindIndexOfMimimum_H
46 #define GSFFindIndexOfMimimum_H
47 #include "CxxUtils/features.h"
48 #include "CxxUtils/inline_hints.h"
49 #include "CxxUtils/restrict.h"
50 #include "CxxUtils/vec.h"
51 //
52 #include <algorithm>
53 #include <memory>
54 #include <numeric>
55 #include <climits>
56 
57 namespace vAlgs{
63 
67 template <size_t ISA_WIDTH>
68 constexpr size_t alignmentForArray(){
69  return ISA_WIDTH / CHAR_BIT;
70 }
71 
77 template <size_t ISA_WIDTH, typename T>
78 constexpr size_t strideOfNumSIMDVec(size_t NumSIMDVec){
79  return NumSIMDVec * (ISA_WIDTH / (sizeof(T) * CHAR_BIT));
80 }
81 
85 template <size_t STRIDE>
86 constexpr int numPadded(const int n) {
87  // This always return a padded number dividable
88  // with STRIDE , eg if STRIDE = 16
89  // e.g ((33+15)&~15) = 48
90  constexpr size_t STRIDEMINUS1 = STRIDE - 1;
91  return ((n + STRIDEMINUS1) & ~STRIDEMINUS1);
92 }
93 
97 template <size_t ISA_WIDTH, typename T>
99 T vFindMinimum(const T* distancesIn, int n) {
100 
101  using namespace CxxUtils;
102  static_assert(std::is_floating_point_v<T>, "T not a floating point type");
103  constexpr size_t VEC_WIDTH = ISA_WIDTH / (sizeof(T) * CHAR_BIT);
104  const T* array = std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
105  using vec_t = vec<T, VEC_WIDTH>;
106  vec_t minValues1;
107  vec_t minValues2;
108  vec_t minValues3;
109  vec_t minValues4;
110  vload(minValues1, array);
111  vload(minValues2, array + VEC_WIDTH);
112  vload(minValues3, array + VEC_WIDTH * 2);
113  vload(minValues4, array + VEC_WIDTH * 3);
114  vec_t values1;
115  vec_t values2;
116  vec_t values3;
117  vec_t values4;
118  for (int i = 4 * VEC_WIDTH; i < n; i += 4 * VEC_WIDTH) {
119  // 1
120  vload(values1, array + i);
121  vmin(minValues1, values1, minValues1);
122  // 2
123  vload(values2, array + i + VEC_WIDTH);
124  vmin(minValues2, values2, minValues2);
125  // 3
126  vload(values3, array + i + 2 * VEC_WIDTH);
127  vmin(minValues3, values3, minValues3);
128  // 4
129  vload(values4, array + i + 3 * VEC_WIDTH);
130  vmin(minValues4, values4, minValues4);
131  }
132  // Compare //1 with //2
133  vmin(minValues1, minValues1, minValues2);
134  // compare //3 with //4
135  vmin(minValues3, minValues3, minValues4);
136  // Compare //1 with //3
137  vmin(minValues1, minValues1, minValues3);
138  // Do the final calculation scalar way
139  T finalMinValues[VEC_WIDTH];
140  vstore(finalMinValues, minValues1);
141 
142  // Do the final calculation scalar way
143  return std::reduce(std::begin(finalMinValues), std::end(finalMinValues),
144  finalMinValues[0],
145  [](T a, T b) { return a < b ? a : b; });
146 }
147 
152 template <size_t ISA_WIDTH, typename T>
154 int vIdxOfValue(const T value,
155  const T* distancesIn, int n) {
156  using namespace CxxUtils;
157 
158  static_assert(std::is_floating_point_v<T>, "T not a floating point type");
159  constexpr int VEC_WIDTH = ISA_WIDTH / (sizeof(T) * CHAR_BIT);
160  const T* array = std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
161  using vec_t = vec<T, VEC_WIDTH>;
162  using vec_mask = vec_mask_type_t<vec_t>;
163  vec_t values1;
164  vec_t values2;
165  vec_t values3;
166  vec_t values4;
167  vec_t target;
169  for (int i = 0; i < n; i += 4 * VEC_WIDTH) {
170  // 1
171  vload(values1, array + i);
172  vec_mask eq1 = values1 == target;
173  // 2
174  vload(values2, array + i + VEC_WIDTH);
175  vec_mask eq2 = values2 == target;
176  // 3
177  vload(values3, array + i + VEC_WIDTH * 2);
178  vec_mask eq3 = values3 == target;
179  // 4
180  vload(values4, array + i + VEC_WIDTH * 3);
181  vec_mask eq4 = values4 == target;
182 
183  vec_mask eq12 = eq1 || eq2;
184  vec_mask eq34 = eq3 || eq4;
185  vec_mask eqAny = eq12 || eq34;
186  if (vany(eqAny)) {
187  for (int idx = i; idx < i + 4 * VEC_WIDTH; ++idx) {
188  if (distancesIn[idx] == value) {
189  return idx;
190  }
191  }
192  }
193  }
194  return -1;
195 }
196 
203 template <int ISA_WIDTH, typename T>
205 int vIdxOfMin(const T* distancesIn, int n) {
206  using namespace CxxUtils;
207  const T* array = std::assume_aligned<vAlgs::alignmentForArray<ISA_WIDTH>()>(distancesIn);
208  static_assert(std::is_floating_point_v<T>, "T not a floating point type");
209  //We process elements in blocks.
210  //When we find the minimum we also
211  //keep track in which block it was.
212  //The blocksize of 512 seemed to be a good enough
213  //compromise in tests.
214  constexpr int blockSize = 512;
215  // case for n less than blockSize
216  if (n <= blockSize) {
217  T min = vFindMinimum<ISA_WIDTH>(array, n);
218  return vIdxOfValue<ISA_WIDTH>(min, array, n);
219  }
220  int idx = 0;
221  T min = array[0];
222  // We might have a remainder, elements after an integral
223  // number of blockes that we need to handle
224  const int remainder = n & (blockSize - 1);
225  // Process elements up to the remainder in blocks
226  // For example for blockSize 512 and 1056 elements
227  // (if we opt for padding/multiple of 32)
228  // The loop will run two times and then we need
229  // to handle the 32 remaining elements.
230  for (int i = 0; i < (n - remainder); i += blockSize) {
231  T mintmp = vFindMinimum<ISA_WIDTH>(array + i, blockSize);
232  if (mintmp < min) {
233  min = mintmp;
234  idx = i;
235  }
236  }
237  //Process the remaining elements if any
238  if (remainder != 0) {
239  int index = n - remainder;
240  T mintmp = vFindMinimum<ISA_WIDTH>(array + index, remainder);
241  if (mintmp < min) {
242  min = mintmp;
243  return index + vIdxOfValue<ISA_WIDTH>(min, array + index, remainder);
244  }
245  }
246  // Return the idx of the minimum just looping over a single block
247  return idx + vIdxOfValue<ISA_WIDTH>(min, array + idx, blockSize);
248 }
249 
250 } // namespace vAlgs
251 
252 #endif
vAlgs::numPadded
constexpr int numPadded(const int n)
Given a number n returns a new n >= n that is padded to the required STRIDE.
Definition: GsfFindIndexOfMinimum.h:86
features.h
Some additional feature test macros.
inline_hints.h
vAlgs::strideOfNumSIMDVec
constexpr size_t strideOfNumSIMDVec(size_t NumSIMDVec)
returns the STRIDE in units of elements covered by NumSIMDVec simd vectors of type T for the specific...
Definition: GsfFindIndexOfMinimum.h:78
index
Definition: index.py:1
min
constexpr double min()
Definition: ap_fixedTest.cxx:26
PlotCalibFromCool.begin
begin
Definition: PlotCalibFromCool.py:94
ATH_ALWAYS_INLINE
#define ATH_ALWAYS_INLINE
Definition: inline_hints.h:53
athena.value
value
Definition: athena.py:124
PlotCalibFromCool.vmin
vmin
Definition: PlotCalibFromCool.py:696
CxxUtils::vstore
ATH_ALWAYS_INLINE void vstore(vec_type_t< VEC > *dst, const VEC &src)
Definition: vec.h:290
CxxUtils::vec_mask_type_t
typename vecDetail::vec_mask_type< VEC >::type vec_mask_type_t
Define a nice alias for the mask type for a vectorized type.
Definition: vec.h:219
reduce
void reduce(HepMC::GenEvent *ge, std::vector< HepMC::GenParticlePtr > toremove)
Remove unwanted particles from the event, collapsing the graph structure consistently.
Definition: FixHepMC.cxx:84
mergePhysValFiles.end
end
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:92
vAlgs
Definition: GsfFindIndexOfMinimum.h:57
CxxUtils::vec
typename vecDetail::vec_typedef< T, N >::type vec
Define a nice alias for the vectorized type.
Definition: vec.h:207
lumiFormat.i
int i
Definition: lumiFormat.py:85
beamspotman.n
n
Definition: beamspotman.py:729
vAlgs::alignmentForArray
constexpr size_t alignmentForArray()
In the following ISA_WIDTH is the ISA width in bits e.g 128 for SSE 256 for AVX2 etc For the cases of...
Definition: GsfFindIndexOfMinimum.h:68
CxxUtils
Definition: aligned_vector.h:29
lumiFormat.array
array
Definition: lumiFormat.py:91
restrict.h
Macro wrapping the nonstandard restrict keyword.
CxxUtils::vload
ATH_ALWAYS_INLINE void vload(VEC &dst, vec_type_t< VEC > const *src)
Definition: vec.h:272
remainder
std::vector< std::string > remainder(const std::vector< std::string > &v1, const std::vector< std::string > &v2)
Definition: compareFlatTrees.cxx:44
plotBeamSpotMon.b
b
Definition: plotBeamSpotMon.py:76
vAlgs::vIdxOfMin
ATH_ALWAYS_INLINE int vIdxOfMin(const T *distancesIn, int n)
Find the index of the minimum in the array of distances.
Definition: GsfFindIndexOfMinimum.h:205
a
TList * a
Definition: liststreamerinfos.cxx:10
vec.h
Vectorization helpers.
copySelective.target
string target
Definition: copySelective.py:36
CxxUtils::vbroadcast
ATH_ALWAYS_INLINE void vbroadcast(VEC &v, T x)
Copy a scalar to each element of a vectorized type.
Definition: vec.h:251
LArNewCalib_DelayDump_OFC_Cali.idx
idx
Definition: LArNewCalib_DelayDump_OFC_Cali.py:69
vAlgs::vIdxOfValue
ATH_ALWAYS_INLINE int vIdxOfValue(const T value, const T *distancesIn, int n)
Find the index of an element in the array of distances processing four simd vectors at a time.
Definition: GsfFindIndexOfMinimum.h:154
CxxUtils::vany
ATH_ALWAYS_INLINE bool vany(const VEC &mask)
Definition: vec.h:362
vAlgs::vFindMinimum
ATH_ALWAYS_INLINE T vFindMinimum(const T *distancesIn, int n)
Find the minimum element in the array of distances processing four simd vectors at a time.
Definition: GsfFindIndexOfMinimum.h:99