ATLAS Offline Software
Functions
vAlgs Namespace Reference

Functions

template<size_t ISA_WIDTH>
constexpr size_t alignmentForArray ()
 In the following ISA_WIDTH is the ISA width in bits e.g 128 for SSE 256 for AVX2 etc For the cases of interest/tested doing 4 simd vectors at a time seemed best. More...
 
template<size_t ISA_WIDTH, typename T >
constexpr size_t strideOfNumSIMDVec (size_t NumSIMDVec)
 returns the STRIDE in units of elements covered by NumSIMDVec simd vectors of type T for the specific ISA For example for a 256 bit, a SIMD vector is 8 floats So 4 simd vectors correspond to 32 elements. More...
 
template<size_t STRIDE>
constexpr int numPadded (const int n)
 Given a number n returns a new n >= n that is padded to the required STRIDE. More...
 
template<size_t ISA_WIDTH, typename T >
ATH_ALWAYS_INLINEvFindMinimum (const T *distancesIn, int n)
 Find the minimum element in the array of distances processing four simd vectors at a time. More...
 
template<size_t ISA_WIDTH, typename T >
ATH_ALWAYS_INLINE int vIdxOfValue (const T value, const T *distancesIn, int n)
 Find the index of an element in the array of distances processing four simd vectors at a time. More...
 
template<int ISA_WIDTH, typename T >
ATH_ALWAYS_INLINE int vIdxOfMin (const T *distancesIn, int n)
 Find the index of the minimum in the array of distances. More...
 

Function Documentation

◆ alignmentForArray()

template<size_t ISA_WIDTH>
constexpr size_t vAlgs::alignmentForArray ( )
constexpr

In the following ISA_WIDTH is the ISA width in bits e.g 128 for SSE 256 for AVX2 etc For the cases of interest/tested doing 4 simd vectors at a time seemed best.

Alignment needed for arrays of elements when using an ISA of specific width e.g 32 for AVX2 16 for SSE4 etc

Definition at line 76 of file GSFFindIndexOfMinimum.h.

76  {
77  return ISA_WIDTH / CHAR_BIT;
78 }

◆ numPadded()

template<size_t STRIDE>
constexpr int vAlgs::numPadded ( const int  n)
constexpr

Given a number n returns a new n >= n that is padded to the required STRIDE.

Definition at line 94 of file GSFFindIndexOfMinimum.h.

94  {
95  // This always return a padded number dividable
96  // with STRIDE , eg if STRIDE = 16
97  // e.g ((33+15)&~15) = 48
98  constexpr size_t STRIDEMINUS1 = STRIDE - 1;
99  return ((n + STRIDEMINUS1) & ~STRIDEMINUS1);
100 }

◆ strideOfNumSIMDVec()

template<size_t ISA_WIDTH, typename T >
constexpr size_t vAlgs::strideOfNumSIMDVec ( size_t  NumSIMDVec)
constexpr

returns the STRIDE in units of elements covered by NumSIMDVec simd vectors of type T for the specific ISA For example for a 256 bit, a SIMD vector is 8 floats So 4 simd vectors correspond to 32 elements.

Definition at line 86 of file GSFFindIndexOfMinimum.h.

86  {
87  return NumSIMDVec * (ISA_WIDTH / (sizeof(T) * CHAR_BIT));
88 }

◆ vFindMinimum()

template<size_t ISA_WIDTH, typename T >
ATH_ALWAYS_INLINE T vAlgs::vFindMinimum ( const T *  distancesIn,
int  n 
)

Find the minimum element in the array of distances processing four simd vectors at a time.

Definition at line 107 of file GSFFindIndexOfMinimum.h.

107  {
108 
109  using namespace CxxUtils;
110  static_assert(std::is_floating_point_v<T>, "T not a floating point type");
111  constexpr size_t VEC_WIDTH = ISA_WIDTH / (sizeof(T) * CHAR_BIT);
112  const T* array =
113  std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
114  using vec_t = vec<T, VEC_WIDTH>;
115  vec_t minValues1;
116  vec_t minValues2;
117  vec_t minValues3;
118  vec_t minValues4;
119  vload(minValues1, array);
120  vload(minValues2, array + VEC_WIDTH);
121  vload(minValues3, array + VEC_WIDTH * 2);
122  vload(minValues4, array + VEC_WIDTH * 3);
123  vec_t values1;
124  vec_t values2;
125  vec_t values3;
126  vec_t values4;
127  for (int i = 4 * VEC_WIDTH; i < n; i += 4 * VEC_WIDTH) {
128  // 1
129  vload(values1, array + i);
130  vmin(minValues1, values1, minValues1);
131  // 2
132  vload(values2, array + i + VEC_WIDTH);
133  vmin(minValues2, values2, minValues2);
134  // 3
135  vload(values3, array + i + 2 * VEC_WIDTH);
136  vmin(minValues3, values3, minValues3);
137  // 4
138  vload(values4, array + i + 3 * VEC_WIDTH);
139  vmin(minValues4, values4, minValues4);
140  }
141  // Compare //1 with //2
142  vmin(minValues1, minValues1, minValues2);
143  // compare //3 with //4
144  vmin(minValues3, minValues3, minValues4);
145  // Compare //1 with //3
146  vmin(minValues1, minValues1, minValues3);
147  // Do the final calculation scalar way
148  T finalMinValues[VEC_WIDTH];
149  vstore(finalMinValues, minValues1);
150 
151  // Do the final calculation scalar way
152  return std::reduce(std::begin(finalMinValues), std::end(finalMinValues),
153  finalMinValues[0],
154  [](T a, T b) { return a < b ? a : b; });
155 }

◆ vIdxOfMin()

template<int ISA_WIDTH, typename T >
ATH_ALWAYS_INLINE int vAlgs::vIdxOfMin ( const T *  distancesIn,
int  n 
)

Find the index of the minimum in the array of distances.

Definition at line 210 of file GSFFindIndexOfMinimum.h.

210  {
211  using namespace CxxUtils;
212  const T* array =
213  std::assume_aligned<vAlgs::alignmentForArray<ISA_WIDTH>()>(distancesIn);
214  static_assert(std::is_floating_point_v<T>, "T not a floating point type");
215  //We process elements in blocks. When we find the minimum we also
216  //keep track in which block it was
217  constexpr int blockSize = 512;
218  // case for n less than blockSize
219  if (n <= blockSize) {
220  T min = vFindMinimum<ISA_WIDTH>(array, n);
221  return vIdxOfValue<ISA_WIDTH>(min, array, n);
222  }
223  int idx = 0;
224  T min = array[0];
225  // We might have a remainder that we need to handle
226  const int remainder = n & (blockSize - 1);
227  // process elements up to the remainder in blocks
228  for (int i = 0; i < (n - remainder); i += blockSize) {
229  T mintmp = vFindMinimum<ISA_WIDTH>(array + i, blockSize);
230  if (mintmp < min) {
231  min = mintmp;
232  idx = i;
233  }
234  }
235 
236  //Process the remaining elements if any
237  if (remainder != 0) {
238  int index = n - remainder;
239  T mintmp = vFindMinimum<ISA_WIDTH>(array + index, remainder);
240  // if the minimu is here
241  if (mintmp < min) {
242  min = mintmp;
243  return index + vIdxOfValue<ISA_WIDTH>(min, array + index, remainder);
244  }
245  }
246  // Return the idx of the minimum just looping over a single block
247  return idx + vIdxOfValue<ISA_WIDTH>(min, array + idx, blockSize);
248 }

◆ vIdxOfValue()

template<size_t ISA_WIDTH, typename T >
ATH_ALWAYS_INLINE int vAlgs::vIdxOfValue ( const value,
const T *  distancesIn,
int  n 
)

Find the index of an element in the array of distances processing four simd vectors at a time.

Definition at line 162 of file GSFFindIndexOfMinimum.h.

163  {
164  using namespace CxxUtils;
165 
166  static_assert(std::is_floating_point_v<T>, "T not a floating point type");
167  constexpr int VEC_WIDTH = ISA_WIDTH / (sizeof(T) * CHAR_BIT);
168  const T* array =
169  std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
170  using vec_t = vec<T, VEC_WIDTH>;
171  using vec_mask = vec_mask_type_t<vec_t>;
172  vec_t values1;
173  vec_t values2;
174  vec_t values3;
175  vec_t values4;
176  vec_t target;
178  for (int i = 0; i < n; i += 4 * VEC_WIDTH) {
179  // 1
180  vload(values1, array + i);
181  vec_mask eq1 = values1 == target;
182  // 2
183  vload(values2, array + i + VEC_WIDTH);
184  vec_mask eq2 = values2 == target;
185  // 3
186  vload(values3, array + i + VEC_WIDTH * 2);
187  vec_mask eq3 = values3 == target;
188  // 4
189  vload(values4, array + i + VEC_WIDTH * 3);
190  vec_mask eq4 = values4 == target;
191 
192  vec_mask eq12 = eq1 || eq2;
193  vec_mask eq34 = eq3 || eq4;
194  vec_mask eqAny = eq12 || eq34;
195  if (vany(eqAny)) {
196  for (int idx = i; idx < i + 4 * VEC_WIDTH; ++idx) {
197  if (distancesIn[idx] == value) {
198  return idx;
199  }
200  }
201  }
202  }
203  return -1;
204 }
index
Definition: index.py:1
min
constexpr double min()
Definition: ap_fixedTest.cxx:26
PlotCalibFromCool.begin
begin
Definition: PlotCalibFromCool.py:94
athena.value
value
Definition: athena.py:124
CxxUtils::vstore
ATH_ALWAYS_INLINE void vstore(vec_type_t< VEC > *dst, const VEC &src)
Definition: vec.h:290
CxxUtils::vec_mask_type_t
typename vecDetail::vec_mask_type< VEC >::type vec_mask_type_t
Define a nice alias for the mask type for a vectorized type.
Definition: vec.h:219
reduce
void reduce(HepMC::GenEvent *ge, std::vector< HepMC::GenParticlePtr > toremove)
Remove unwanted particles from the event, collapsing the graph structure consistently.
Definition: FixHepMC.cxx:81
mergePhysValFiles.end
end
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:93
CxxUtils::vec
typename vecDetail::vec_typedef< T, N >::type vec
Define a nice alias for the vectorized type.
Definition: vec.h:207
lumiFormat.i
int i
Definition: lumiFormat.py:85
beamspotman.n
n
Definition: beamspotman.py:731
CxxUtils
Definition: aligned_vector.h:29
CxxUtils::vmin
ATH_ALWAYS_INLINE void vmin(VEC &dst, const VEC &a, const VEC &b)
Definition: vec.h:324
lumiFormat.array
array
Definition: lumiFormat.py:91
CxxUtils::vload
ATH_ALWAYS_INLINE void vload(VEC &dst, vec_type_t< VEC > const *src)
Definition: vec.h:272
remainder
std::vector< std::string > remainder(const std::vector< std::string > &v1, const std::vector< std::string > &v2)
list of entries in a vector that are not in another
Definition: compareFlatTrees.cxx:44
plotBeamSpotMon.b
b
Definition: plotBeamSpotMon.py:77
a
TList * a
Definition: liststreamerinfos.cxx:10
copySelective.target
string target
Definition: copySelective.py:37
CxxUtils::vbroadcast
ATH_ALWAYS_INLINE void vbroadcast(VEC &v, T x)
Copy a scalar to each element of a vectorized type.
Definition: vec.h:251
LArNewCalib_DelayDump_OFC_Cali.idx
idx
Definition: LArNewCalib_DelayDump_OFC_Cali.py:69
CxxUtils::vany
ATH_ALWAYS_INLINE bool vany(const VEC &mask)
Definition: vec.h:362
TSU::T
unsigned long long T
Definition: L1TopoDataTypes.h:35