ATLAS Offline Software
GSFFindIndexOfMinimum.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3 */
4 
51 #ifndef GSFFindIndexOfMimimum_H
52 #define GSFFindIndexOfMimimum_H
53 #include "CxxUtils/features.h"
54 #include "CxxUtils/inline_hints.h"
55 #include "CxxUtils/restrict.h"
56 #include "CxxUtils/vec.h"
57 #include "GaudiKernel/Kernel.h"
59 //
60 #include <algorithm>
61 #include <memory>
62 #include <numeric>
63 #include <climits>
64 
65 namespace vAlgs{
71 
75 template <size_t ISA_WIDTH>
76 constexpr size_t alignmentForArray(){
77  return ISA_WIDTH / CHAR_BIT;
78 }
79 
85 template <size_t ISA_WIDTH, typename T>
86 constexpr size_t strideOfNumSIMDVec(size_t NumSIMDVec){
87  return NumSIMDVec * (ISA_WIDTH / (sizeof(T) * CHAR_BIT));
88 }
89 
93 template <size_t STRIDE>
94 constexpr int numPadded(const int n) {
95  // This always return a padded number dividable
96  // with STRIDE , eg if STRIDE = 16
97  // e.g ((33+15)&~15) = 48
98  constexpr size_t STRIDEMINUS1 = STRIDE - 1;
99  return ((n + STRIDEMINUS1) & ~STRIDEMINUS1);
100 }
101 
105 template <size_t ISA_WIDTH, typename T>
107 T vFindMinimum(const T* distancesIn, int n) {
108 
109  using namespace CxxUtils;
110  static_assert(std::is_floating_point_v<T>, "T not a floating point type");
111  constexpr size_t VEC_WIDTH = ISA_WIDTH / (sizeof(T) * CHAR_BIT);
112  const T* array =
113  std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
114  using vec_t = vec<T, VEC_WIDTH>;
115  vec_t minValues1;
116  vec_t minValues2;
117  vec_t minValues3;
118  vec_t minValues4;
119  vload(minValues1, array);
120  vload(minValues2, array + VEC_WIDTH);
121  vload(minValues3, array + VEC_WIDTH * 2);
122  vload(minValues4, array + VEC_WIDTH * 3);
123  vec_t values1;
124  vec_t values2;
125  vec_t values3;
126  vec_t values4;
127  for (int i = 4 * VEC_WIDTH; i < n; i += 4 * VEC_WIDTH) {
128  // 1
129  vload(values1, array + i);
130  vmin(minValues1, values1, minValues1);
131  // 2
132  vload(values2, array + i + VEC_WIDTH);
133  vmin(minValues2, values2, minValues2);
134  // 3
135  vload(values3, array + i + 2 * VEC_WIDTH);
136  vmin(minValues3, values3, minValues3);
137  // 4
138  vload(values4, array + i + 3 * VEC_WIDTH);
139  vmin(minValues4, values4, minValues4);
140  }
141  // Compare //1 with //2
142  vmin(minValues1, minValues1, minValues2);
143  // compare //3 with //4
144  vmin(minValues3, minValues3, minValues4);
145  // Compare //1 with //3
146  vmin(minValues1, minValues1, minValues3);
147  // Do the final calculation scalar way
148  T finalMinValues[VEC_WIDTH];
149  vstore(finalMinValues, minValues1);
150 
151  // Do the final calculation scalar way
152  return std::reduce(std::begin(finalMinValues), std::end(finalMinValues),
153  finalMinValues[0],
154  [](T a, T b) { return a < b ? a : b; });
155 }
156 
160 template <size_t ISA_WIDTH, typename T>
162 int vIdxOfValue(const T value,
163  const T* distancesIn, int n) {
164  using namespace CxxUtils;
165 
166  static_assert(std::is_floating_point_v<T>, "T not a floating point type");
167  constexpr int VEC_WIDTH = ISA_WIDTH / (sizeof(T) * CHAR_BIT);
168  const T* array =
169  std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
170  using vec_t = vec<T, VEC_WIDTH>;
171  using vec_mask = vec_mask_type_t<vec_t>;
172  vec_t values1;
173  vec_t values2;
174  vec_t values3;
175  vec_t values4;
176  vec_t target;
178  for (int i = 0; i < n; i += 4 * VEC_WIDTH) {
179  // 1
180  vload(values1, array + i);
181  vec_mask eq1 = values1 == target;
182  // 2
183  vload(values2, array + i + VEC_WIDTH);
184  vec_mask eq2 = values2 == target;
185  // 3
186  vload(values3, array + i + VEC_WIDTH * 2);
187  vec_mask eq3 = values3 == target;
188  // 4
189  vload(values4, array + i + VEC_WIDTH * 3);
190  vec_mask eq4 = values4 == target;
191 
192  vec_mask eq12 = eq1 || eq2;
193  vec_mask eq34 = eq3 || eq4;
194  vec_mask eqAny = eq12 || eq34;
195  if (vany(eqAny)) {
196  for (int idx = i; idx < i + 4 * VEC_WIDTH; ++idx) {
197  if (distancesIn[idx] == value) {
198  return idx;
199  }
200  }
201  }
202  }
203  return -1;
204 }
205 
208 template <int ISA_WIDTH, typename T>
210 int vIdxOfMin(const T* distancesIn, int n) {
211  using namespace CxxUtils;
212  const T* array =
213  std::assume_aligned<vAlgs::alignmentForArray<ISA_WIDTH>()>(distancesIn);
214  static_assert(std::is_floating_point_v<T>, "T not a floating point type");
215  //We process elements in blocks. When we find the minimum we also
216  //keep track in which block it was
217  constexpr int blockSize = 512;
218  // case for n less than blockSize
219  if (n <= blockSize) {
220  T min = vFindMinimum<ISA_WIDTH>(array, n);
221  return vIdxOfValue<ISA_WIDTH>(min, array, n);
222  }
223  int idx = 0;
224  T min = array[0];
225  // We might have a remainder that we need to handle
226  const int remainder = n & (blockSize - 1);
227  // process elements up to the remainder in blocks
228  for (int i = 0; i < (n - remainder); i += blockSize) {
229  T mintmp = vFindMinimum<ISA_WIDTH>(array + i, blockSize);
230  if (mintmp < min) {
231  min = mintmp;
232  idx = i;
233  }
234  }
235 
236  //Process the remaining elements if any
237  if (remainder != 0) {
238  int index = n - remainder;
239  T mintmp = vFindMinimum<ISA_WIDTH>(array + index, remainder);
240  // if the minimu is here
241  if (mintmp < min) {
242  min = mintmp;
243  return index + vIdxOfValue<ISA_WIDTH>(min, array + index, remainder);
244  }
245  }
246  // Return the idx of the minimum just looping over a single block
247  return idx + vIdxOfValue<ISA_WIDTH>(min, array + idx, blockSize);
248 }
249 
250 } // namespace vAlgs
251 
252 #endif
vAlgs::numPadded
constexpr int numPadded(const int n)
Given a number n returns a new n >= n that is padded to the required STRIDE.
Definition: GSFFindIndexOfMinimum.h:94
features.h
Some additional feature test macros.
inline_hints.h
vAlgs::strideOfNumSIMDVec
constexpr size_t strideOfNumSIMDVec(size_t NumSIMDVec)
returns the STRIDE in units of elements covered by NumSIMDVec simd vectors of type T for the specific...
Definition: GSFFindIndexOfMinimum.h:86
index
Definition: index.py:1
min
constexpr double min()
Definition: ap_fixedTest.cxx:26
PlotCalibFromCool.begin
begin
Definition: PlotCalibFromCool.py:94
ATH_ALWAYS_INLINE
#define ATH_ALWAYS_INLINE
Definition: inline_hints.h:53
athena.value
value
Definition: athena.py:124
PlotCalibFromCool.vmin
vmin
Definition: PlotCalibFromCool.py:696
CxxUtils::vstore
ATH_ALWAYS_INLINE void vstore(vec_type_t< VEC > *dst, const VEC &src)
Definition: vec.h:290
CxxUtils::vec_mask_type_t
typename vecDetail::vec_mask_type< VEC >::type vec_mask_type_t
Define a nice alias for the mask type for a vectorized type.
Definition: vec.h:219
reduce
void reduce(HepMC::GenEvent *ge, std::vector< HepMC::GenParticlePtr > toremove)
Remove unwanted particles from the event, collapsing the graph structure consistently.
Definition: FixHepMC.cxx:81
mergePhysValFiles.end
end
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:93
vAlgs
Definition: GSFFindIndexOfMinimum.h:65
CxxUtils::vec
typename vecDetail::vec_typedef< T, N >::type vec
Define a nice alias for the vectorized type.
Definition: vec.h:207
lumiFormat.i
int i
Definition: lumiFormat.py:85
beamspotman.n
n
Definition: beamspotman.py:731
vAlgs::alignmentForArray
constexpr size_t alignmentForArray()
In the following ISA_WIDTH is the ISA width in bits e.g 128 for SSE 256 for AVX2 etc For the cases of...
Definition: GSFFindIndexOfMinimum.h:76
CxxUtils
Definition: aligned_vector.h:29
lumiFormat.array
array
Definition: lumiFormat.py:91
restrict.h
Macro wrapping the nonstandard restrict keyword.
CxxUtils::vload
ATH_ALWAYS_INLINE void vload(VEC &dst, vec_type_t< VEC > const *src)
Definition: vec.h:272
remainder
std::vector< std::string > remainder(const std::vector< std::string > &v1, const std::vector< std::string > &v2)
list of entries in a vector that are not in another
Definition: compareFlatTrees.cxx:44
plotBeamSpotMon.b
b
Definition: plotBeamSpotMon.py:77
vAlgs::vIdxOfMin
ATH_ALWAYS_INLINE int vIdxOfMin(const T *distancesIn, int n)
Find the index of the minimum in the array of distances.
Definition: GSFFindIndexOfMinimum.h:210
a
TList * a
Definition: liststreamerinfos.cxx:10
vec.h
Vectorization helpers.
copySelective.target
string target
Definition: copySelective.py:37
CxxUtils::vbroadcast
ATH_ALWAYS_INLINE void vbroadcast(VEC &v, T x)
Copy a scalar to each element of a vectorized type.
Definition: vec.h:251
LArNewCalib_DelayDump_OFC_Cali.idx
idx
Definition: LArNewCalib_DelayDump_OFC_Cali.py:69
vAlgs::vIdxOfValue
ATH_ALWAYS_INLINE int vIdxOfValue(const T value, const T *distancesIn, int n)
Find the index of an element in the array of distances processing four simd vectors at a time.
Definition: GSFFindIndexOfMinimum.h:162
GsfConstants.h
CxxUtils::vany
ATH_ALWAYS_INLINE bool vany(const VEC &mask)
Definition: vec.h:362
vAlgs::vFindMinimum
ATH_ALWAYS_INLINE T vFindMinimum(const T *distancesIn, int n)
Find the minimum element in the array of distances processing four simd vectors at a time.
Definition: GSFFindIndexOfMinimum.h:107