ATLAS Offline Software
Functions
vAlgs Namespace Reference

Functions

template<size_t ISA_WIDTH>
constexpr size_t alignmentForArray ()
 In the following ISA_WIDTH is the ISA width in bits e.g 128 for SSE 256 for AVX2 etc For the cases of interest/tested doing 4 simd vectors at a time seemed best. More...
 
template<size_t ISA_WIDTH, typename T >
constexpr size_t strideOfNumSIMDVec (size_t NumSIMDVec)
 returns the STRIDE in units of elements covered by NumSIMDVec simd vectors of type T for the specific ISA For example for a 256 bit, a SIMD vector is 8 floats So 4 simd vectors correspond to 32 elements. More...
 
template<size_t STRIDE>
constexpr int numPadded (const int n)
 Given a number n returns a new n >= n that is padded to the required STRIDE. More...
 
template<size_t ISA_WIDTH, typename T >
ATH_ALWAYS_INLINEvFindMinimum (const T *distancesIn, int n)
 Find the minimum element in the array of distances processing four simd vectors at a time. More...
 
template<size_t ISA_WIDTH, typename T >
ATH_ALWAYS_INLINE int vIdxOfValue (const T value, const T *distancesIn, int n)
 Find the index of an element in the array of distances processing four simd vectors at a time. More...
 
template<int ISA_WIDTH, typename T >
ATH_ALWAYS_INLINE int vIdxOfMin (const T *distancesIn, int n)
 Find the index of the minimum in the array of distances. More...
 

Function Documentation

◆ alignmentForArray()

template<size_t ISA_WIDTH>
constexpr size_t vAlgs::alignmentForArray ( )
constexpr

In the following ISA_WIDTH is the ISA width in bits e.g 128 for SSE 256 for AVX2 etc For the cases of interest/tested doing 4 simd vectors at a time seemed best.

Alignment needed for arrays of elements when using an ISA of specific width e.g 32 for AVX2 16 for SSE4 etc

Definition at line 68 of file GsfFindIndexOfMinimum.h.

68  {
69  return ISA_WIDTH / CHAR_BIT;
70 }

◆ numPadded()

template<size_t STRIDE>
constexpr int vAlgs::numPadded ( const int  n)
constexpr

Given a number n returns a new n >= n that is padded to the required STRIDE.

Definition at line 86 of file GsfFindIndexOfMinimum.h.

86  {
87  // This always return a padded number dividable
88  // with STRIDE , eg if STRIDE = 16
89  // e.g ((33+15)&~15) = 48
90  constexpr size_t STRIDEMINUS1 = STRIDE - 1;
91  return ((n + STRIDEMINUS1) & ~STRIDEMINUS1);
92 }

◆ strideOfNumSIMDVec()

template<size_t ISA_WIDTH, typename T >
constexpr size_t vAlgs::strideOfNumSIMDVec ( size_t  NumSIMDVec)
constexpr

returns the STRIDE in units of elements covered by NumSIMDVec simd vectors of type T for the specific ISA For example for a 256 bit, a SIMD vector is 8 floats So 4 simd vectors correspond to 32 elements.

Definition at line 78 of file GsfFindIndexOfMinimum.h.

78  {
79  return NumSIMDVec * (ISA_WIDTH / (sizeof(T) * CHAR_BIT));
80 }

◆ vFindMinimum()

template<size_t ISA_WIDTH, typename T >
ATH_ALWAYS_INLINE T vAlgs::vFindMinimum ( const T *  distancesIn,
int  n 
)

Find the minimum element in the array of distances processing four simd vectors at a time.

Definition at line 99 of file GsfFindIndexOfMinimum.h.

99  {
100 
101  using namespace CxxUtils;
102  static_assert(std::is_floating_point_v<T>, "T not a floating point type");
103  constexpr size_t VEC_WIDTH = ISA_WIDTH / (sizeof(T) * CHAR_BIT);
104  const T* array = std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
105  using vec_t = vec<T, VEC_WIDTH>;
106  vec_t minValues1;
107  vec_t minValues2;
108  vec_t minValues3;
109  vec_t minValues4;
110  vload(minValues1, array);
111  vload(minValues2, array + VEC_WIDTH);
112  vload(minValues3, array + VEC_WIDTH * 2);
113  vload(minValues4, array + VEC_WIDTH * 3);
114  vec_t values1;
115  vec_t values2;
116  vec_t values3;
117  vec_t values4;
118  for (int i = 4 * VEC_WIDTH; i < n; i += 4 * VEC_WIDTH) {
119  // 1
120  vload(values1, array + i);
121  vmin(minValues1, values1, minValues1);
122  // 2
123  vload(values2, array + i + VEC_WIDTH);
124  vmin(minValues2, values2, minValues2);
125  // 3
126  vload(values3, array + i + 2 * VEC_WIDTH);
127  vmin(minValues3, values3, minValues3);
128  // 4
129  vload(values4, array + i + 3 * VEC_WIDTH);
130  vmin(minValues4, values4, minValues4);
131  }
132  // Compare //1 with //2
133  vmin(minValues1, minValues1, minValues2);
134  // compare //3 with //4
135  vmin(minValues3, minValues3, minValues4);
136  // Compare //1 with //3
137  vmin(minValues1, minValues1, minValues3);
138  // Do the final calculation scalar way
139  T finalMinValues[VEC_WIDTH];
140  vstore(finalMinValues, minValues1);
141 
142  // Do the final calculation scalar way
143  return std::reduce(std::begin(finalMinValues), std::end(finalMinValues),
144  finalMinValues[0],
145  [](T a, T b) { return a < b ? a : b; });
146 }

◆ vIdxOfMin()

template<int ISA_WIDTH, typename T >
ATH_ALWAYS_INLINE int vAlgs::vIdxOfMin ( const T *  distancesIn,
int  n 
)

Find the index of the minimum in the array of distances.

Inputs are a ptr to the data and the number of elements to examine. The input needs to be properly padded (see numpadded method)

Definition at line 205 of file GsfFindIndexOfMinimum.h.

205  {
206  using namespace CxxUtils;
207  const T* array = std::assume_aligned<vAlgs::alignmentForArray<ISA_WIDTH>()>(distancesIn);
208  static_assert(std::is_floating_point_v<T>, "T not a floating point type");
209  //We process elements in blocks.
210  //When we find the minimum we also
211  //keep track in which block it was.
212  //The blocksize of 512 seemed to be a good enough
213  //compromise in tests.
214  constexpr int blockSize = 512;
215  // case for n less than blockSize
216  if (n <= blockSize) {
217  T min = vFindMinimum<ISA_WIDTH>(array, n);
218  return vIdxOfValue<ISA_WIDTH>(min, array, n);
219  }
220  int idx = 0;
221  T min = array[0];
222  // We might have a remainder, elements after an integral
223  // number of blockes that we need to handle
224  const int remainder = n & (blockSize - 1);
225  // Process elements up to the remainder in blocks
226  // For example for blockSize 512 and 1056 elements
227  // (if we opt for padding/multiple of 32)
228  // The loop will run two times and then we need
229  // to handle the 32 remaining elements.
230  for (int i = 0; i < (n - remainder); i += blockSize) {
231  T mintmp = vFindMinimum<ISA_WIDTH>(array + i, blockSize);
232  if (mintmp < min) {
233  min = mintmp;
234  idx = i;
235  }
236  }
237  //Process the remaining elements if any
238  if (remainder != 0) {
239  int index = n - remainder;
240  T mintmp = vFindMinimum<ISA_WIDTH>(array + index, remainder);
241  if (mintmp < min) {
242  min = mintmp;
243  return index + vIdxOfValue<ISA_WIDTH>(min, array + index, remainder);
244  }
245  }
246  // Return the idx of the minimum just looping over a single block
247  return idx + vIdxOfValue<ISA_WIDTH>(min, array + idx, blockSize);
248 }

◆ vIdxOfValue()

template<size_t ISA_WIDTH, typename T >
ATH_ALWAYS_INLINE int vAlgs::vIdxOfValue ( const value,
const T *  distancesIn,
int  n 
)

Find the index of an element in the array of distances processing four simd vectors at a time.

Returns -1 if the element is not found.

Definition at line 154 of file GsfFindIndexOfMinimum.h.

155  {
156  using namespace CxxUtils;
157 
158  static_assert(std::is_floating_point_v<T>, "T not a floating point type");
159  constexpr int VEC_WIDTH = ISA_WIDTH / (sizeof(T) * CHAR_BIT);
160  const T* array = std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
161  using vec_t = vec<T, VEC_WIDTH>;
162  using vec_mask = vec_mask_type_t<vec_t>;
163  vec_t values1;
164  vec_t values2;
165  vec_t values3;
166  vec_t values4;
167  vec_t target;
169  for (int i = 0; i < n; i += 4 * VEC_WIDTH) {
170  // 1
171  vload(values1, array + i);
172  vec_mask eq1 = values1 == target;
173  // 2
174  vload(values2, array + i + VEC_WIDTH);
175  vec_mask eq2 = values2 == target;
176  // 3
177  vload(values3, array + i + VEC_WIDTH * 2);
178  vec_mask eq3 = values3 == target;
179  // 4
180  vload(values4, array + i + VEC_WIDTH * 3);
181  vec_mask eq4 = values4 == target;
182 
183  vec_mask eq12 = eq1 || eq2;
184  vec_mask eq34 = eq3 || eq4;
185  vec_mask eqAny = eq12 || eq34;
186  if (vany(eqAny)) {
187  for (int idx = i; idx < i + 4 * VEC_WIDTH; ++idx) {
188  if (distancesIn[idx] == value) {
189  return idx;
190  }
191  }
192  }
193  }
194  return -1;
195 }
index
Definition: index.py:1
min
constexpr double min()
Definition: ap_fixedTest.cxx:26
PlotCalibFromCool.begin
begin
Definition: PlotCalibFromCool.py:94
athena.value
value
Definition: athena.py:124
CxxUtils::vstore
ATH_ALWAYS_INLINE void vstore(vec_type_t< VEC > *dst, const VEC &src)
Definition: vec.h:290
CxxUtils::vec_mask_type_t
typename vecDetail::vec_mask_type< VEC >::type vec_mask_type_t
Define a nice alias for the mask type for a vectorized type.
Definition: vec.h:219
reduce
void reduce(HepMC::GenEvent *ge, std::vector< HepMC::GenParticlePtr > toremove)
Remove unwanted particles from the event, collapsing the graph structure consistently.
Definition: FixHepMC.cxx:84
mergePhysValFiles.end
end
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:92
CxxUtils::vec
typename vecDetail::vec_typedef< T, N >::type vec
Define a nice alias for the vectorized type.
Definition: vec.h:207
lumiFormat.i
int i
Definition: lumiFormat.py:85
beamspotman.n
n
Definition: beamspotman.py:729
CxxUtils
Definition: aligned_vector.h:29
CxxUtils::vmin
ATH_ALWAYS_INLINE void vmin(VEC &dst, const VEC &a, const VEC &b)
Definition: vec.h:324
lumiFormat.array
array
Definition: lumiFormat.py:91
CxxUtils::vload
ATH_ALWAYS_INLINE void vload(VEC &dst, vec_type_t< VEC > const *src)
Definition: vec.h:272
remainder
std::vector< std::string > remainder(const std::vector< std::string > &v1, const std::vector< std::string > &v2)
Definition: compareFlatTrees.cxx:44
plotBeamSpotMon.b
b
Definition: plotBeamSpotMon.py:76
a
TList * a
Definition: liststreamerinfos.cxx:10
copySelective.target
string target
Definition: copySelective.py:36
CxxUtils::vbroadcast
ATH_ALWAYS_INLINE void vbroadcast(VEC &v, T x)
Copy a scalar to each element of a vectorized type.
Definition: vec.h:251
LArNewCalib_DelayDump_OFC_Cali.idx
idx
Definition: LArNewCalib_DelayDump_OFC_Cali.py:69
CxxUtils::vany
ATH_ALWAYS_INLINE bool vany(const VEC &mask)
Definition: vec.h:362
TSU::T
unsigned long long T
Definition: L1TopoDataTypes.h:35