ATLAS Offline Software
vec.h
Go to the documentation of this file.
1 // This file's extension implies that it's C, but it's really -*- C++ -*-.
2 /*
3  * Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration.
4  */
128 #ifndef CXXUTILS_VEC_H
129 #define CXXUTILS_VEC_H
130 
131 #include "CxxUtils/features.h"
132 #include "CxxUtils/inline_hints.h"
133 #include <cstdlib>
134 #include <cstring>
135 #include <type_traits>
136 
137 
138 // Define @c WANT_VECTOR_FALLBACK prior to including this file to
139 // make the fallback class @c vec_fb visible, even if we support the
140 // built-in type.
141 // Intended for testing.
142 #ifndef WANT_VECTOR_FALLBACK
143 # define WANT_VECTOR_FALLBACK 0
144 #endif
145 
146 #if (!HAVE_VECTOR_SIZE_ATTRIBUTE) || WANT_VECTOR_FALLBACK!=0
147 #include "CxxUtils/vec_fb.h"
148 #endif // !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
149 
150 namespace CxxUtils {
151 
152 namespace vecDetail {
158 template <typename T, size_t N>
159 struct vec_typedef {
160  static_assert((N & (N - 1)) == 0, "N must be a power of 2.");
161  static_assert(std::is_arithmetic_v<T>, "T not an arithmetic type");
162 
163 #if HAVE_VECTOR_SIZE_ATTRIBUTE
164  using type __attribute__((vector_size(N * sizeof(T)))) = T;
165 #else
166  using type vec_fb<T, N>;
167 #endif
168 };
169 
173 template <class VEC>
174 struct vec_type {
175  static auto elt(const VEC& v) -> decltype(v[0]);
176  typedef typename std::invoke_result<decltype(elt), const VEC&>::type type1;
177  typedef std::remove_cv_t<std::remove_reference_t<type1>> type;
178 };
179 
184 template <class VEC>
186  static auto maskt(const VEC& v1, const VEC& v2) -> decltype(v1 < v2);
187  typedef typename std::invoke_result<decltype(maskt), const VEC&, const VEC&>::type type1;
188  typedef std::remove_cv_t<std::remove_reference_t<type1>> type;
189 };
190 
194 namespace bool_pack_helper {
195 template <bool...>
196 struct bool_pack;
197 template <bool... bs>
198 using all_true = std::is_same<bool_pack<bs..., true>, bool_pack<true, bs...>>;
199 } // namespace bool_pack_helper
200 
201 } // namespace vecDetail
202 
206 template <typename T, size_t N>
208 
212 template<class VEC>
214 
218 template<class VEC>
220 
224 template<class VEC>
226 constexpr size_t
228 {
229  typedef vec_type_t<VEC> ELT;
230  return sizeof(VEC) / sizeof(ELT);
231 }
232 
236 template<class VEC>
238 constexpr size_t
239 vec_size(const VEC&)
240 {
241  typedef vec_type_t<VEC> ELT;
242  return sizeof(VEC) / sizeof(ELT);
243 }
244 
248 template<typename VEC, typename T>
250 void
251 vbroadcast(VEC& v, T x)
252 {
253 #if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
254  constexpr size_t N = CxxUtils::vec_size<VEC>();
255  for (size_t i = 0; i < N; ++i) {
256  v[i] = x;
257  }
258 #else
259  // using - to avoid sign conversions.
260  v = x - VEC{ 0 };
261 #endif
262 }
263 
264 /*
265  * @brief load elements from memory address src (C-array)
266  * to a vectorized type dst.
267  * Uses memcpy to avoid alignment issues
268  */
269 template<typename VEC>
271 void
272 vload(VEC& dst, vec_type_t<VEC> const* src)
273 {
274 
275 #if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
276  std::memcpy(dst.m_arr, src, sizeof(VEC));
277 #else
278  std::memcpy(&dst, src, sizeof(VEC));
279 #endif
280 }
281 
282 /*
283  * @brief store elements from a vectorized type src to
284  * to a memory address dst (C-array).
285  * Uses memcpy to avoid alignment issues
286  */
287 template<typename VEC>
289 void
290 vstore(vec_type_t<VEC>* dst, const VEC& src)
291 {
292 #if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
293  std::memcpy(dst, src.m_arr, sizeof(VEC));
294 #else
295  std::memcpy(dst, &src, sizeof(VEC));
296 #endif
297 }
298 
299 /*
300  * @brief select elements based on a mask
301  * Fill dst according to
302  * dst[i] = mask[i] ? a[i] : b[i]
303  */
304 template <typename VEC>
306 void vselect(VEC& dst, const VEC& a, const VEC& b, const vec_mask_type_t<VEC>& mask) {
307 #if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
308  constexpr size_t N = vec_size<VEC>();
309  for (size_t i = 0; i < N; ++i) {
310  dst[i] = mask[i] ? a[i] : b[i];
311  }
312 #else
313  dst = mask ? a : b;
314 #endif
315 }
316 
317 /*
318  * @brief vectorized min.
319  * copies to @c dst[i] the min(a[i],b[i])
320  */
321 template<typename VEC>
323 void
324 vmin(VEC& dst, const VEC& a, const VEC& b)
325 {
326 #if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
327  constexpr size_t N = vec_size<VEC>();
328  for (size_t i = 0; i < N; ++i) {
329  dst[i] = a[i] < b[i] ? a[i] : b[i];
330  }
331 #else
332  dst = a < b ? a : b;
333 #endif
334 }
335 
336 /*
337  * @brief vectorized max.
338  * copies to @c dst[i] the max(a[i],b[i])
339  */
340 template<typename VEC>
342 void
343 vmax(VEC& dst, const VEC& a, const VEC& b)
344 {
345 #if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
346  constexpr size_t N = vec_size<VEC>();
347  for (size_t i = 0; i < N; ++i) {
348  dst[i] = a[i] > b[i] ? a[i] : b[i];
349  }
350 #else
351  dst = a > b ? a : b;
352 #endif
353 }
354 
355 /*
356  * @brief Returns true if at least
357  * one value in mask is true.
358  */
359 template<typename VEC>
361 bool
362 vany(const VEC& mask){
363  static_assert(std::is_integral<vec_type_t<VEC>>::value,
364  "vec elements must be of integral type. Aka vec must be "
365  "compatible with a mask");
366  VEC zero;
368 #if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
369  return std::memcmp(mask.m_arr, zero.m_arr, sizeof(VEC)) != 0;
370 #else
371  return std::memcmp(&mask, &zero, sizeof(VEC)) != 0;
372 #endif
373 }
374 
375 /*
376  * @brief Returns true if
377  * all values in mask are false
378  */
379 template<typename VEC>
381 bool
382 vnone(const VEC& mask){
383  static_assert(std::is_integral<vec_type_t<VEC>>::value,
384  "vec elements must be of integral type. Aka vec must be "
385  "compatible with a mask");
386  VEC zero;
388 #if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
389  return std::memcmp(mask.m_arr, zero.m_arr, sizeof(VEC)) == 0;
390 #else
391  return std::memcmp(&mask, &zero, sizeof(VEC)) == 0;
392 #endif
393 }
394 
395 /*
396  * @brief Returns true if
397  * all values in mask are true
398  */
399 template<typename VEC>
401 bool
402 vall(const VEC& mask){
403  static_assert(std::is_integral<vec_type_t<VEC>>::value,
404  "vec elements must be of integral type. Aka vec must be "
405  "compatible with a mask");
406  VEC alltrue;
407 #if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
408  // fallback compares to 0 when false
409  // and 1 when is true
410  vbroadcast(alltrue, vec_type_t<VEC>{1});
411  return std::memcmp(mask.m_arr, alltrue.m_arr, sizeof(VEC)) == 0;
412 #else
413  // For the gnu vector extensions
414  // Vectors are compared element-wise producing 0 when comparison is false
415  // and -1 (constant of the appropriate type where all bits are set) otherwise.
416  vbroadcast(alltrue, vec_type_t<VEC>{-1});
417  return std::memcmp(&mask, &alltrue, sizeof(VEC)) == 0;
418 #endif
419 }
420 
425 template<typename VEC1, typename VEC2>
427 void
428 vconvert(VEC1& dst, const VEC2& src)
429 {
430  static_assert((vec_size<VEC1>() == vec_size<VEC2>()),
431  "vconvert dst and src have different number of elements");
432 
433 #if !HAVE_CONVERT_VECTOR || WANT_VECTOR_FALLBACK
434  typedef vec_type_t<VEC1> ELT;
435  constexpr size_t N = vec_size<VEC1>();
436  for (size_t i = 0; i < N; ++i) {
437  dst[i] = static_cast<ELT>(src[i]);
438  }
439 #else
440  dst = __builtin_convertvector(src, VEC1);
441 #endif
442 }
443 
449 template<size_t... Indices, typename VEC, typename VEC1>
451 vpermute(VEC1& dst, const VEC& src)
452 {
453 
454  static_assert((sizeof...(Indices) == vec_size<VEC1>()),
455  "vpermute number of indices different than return vector size");
456  static_assert(std::is_same<vec_type_t<VEC>, vec_type_t<VEC1>>::value,
457  "vpermute type of input and output vector elements differ");
459  Indices >= 0 && Indices < vec_size<VEC>())...>::value,
460  "vpermute value of a mask index is outside the allowed range");
461 
462 #if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
463  dst = VEC1{ src[Indices]... };
464 #else
465  dst = __builtin_shufflevector(src, src, Indices...);
466 #endif
467 }
468 
474 template<size_t... Indices, typename VEC, typename VEC1>
476 vpermute2(VEC1& dst, const VEC& src1, const VEC& src2)
477 {
478  static_assert(
479  (sizeof...(Indices) == vec_size<VEC1>()),
480  "vpermute2 number of indices different than return vector size");
481  static_assert(std::is_same<vec_type_t<VEC>, vec_type_t<VEC1>>::value,
482  "vpermute2 type of input and output vector elements differ");
483  constexpr size_t N = vec_size<VEC>();
485  Indices >= 0 && Indices < 2 * N)...>::value,
486  "vpermute2 value of a mask index is outside the allowed range");
487 
488 #if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
489  VEC1 tmp;
490  size_t pos{0};
491  for (auto index: { Indices... }) {
492  if (index < N) {
493  tmp[pos] = src1[index];
494  } else {
495  tmp[pos] = src2[index - N];
496  }
497  ++pos;
498  }
499  dst = tmp;
500 #else
501  dst = __builtin_shufflevector(src1, src2, Indices...);
502 #endif
503 }
504 
505 } // namespace CxxUtils
506 
507 #endif // not CXXUTILS_VEC_H
features.h
Some additional feature test macros.
inline_hints.h
CxxUtils::vnone
ATH_ALWAYS_INLINE bool vnone(const VEC &mask)
Definition: vec.h:382
CxxUtils::vmax
ATH_ALWAYS_INLINE void vmax(VEC &dst, const VEC &a, const VEC &b)
Definition: vec.h:343
WriteCellNoiseToCool.src
src
Definition: WriteCellNoiseToCool.py:513
index
Definition: index.py:1
CxxUtils::vec_size
constexpr ATH_ALWAYS_INLINE size_t vec_size()
Return the number of elements in a vectorized type.
Definition: vec.h:227
CxxUtils::vecDetail::vec_type
Deduce the element type from a vectorized type.
Definition: vec.h:174
CxxUtils::vpermute
ATH_ALWAYS_INLINE void vpermute(VEC1 &dst, const VEC &src)
vpermute function.
Definition: vec.h:451
ATH_ALWAYS_INLINE
#define ATH_ALWAYS_INLINE
Definition: inline_hints.h:53
athena.value
value
Definition: athena.py:124
JetTiledMap::N
@ N
Definition: TiledEtaPhiMap.h:44
CxxUtils::vec_fb
Definition: vec_fb.h:31
CxxUtils::vstore
ATH_ALWAYS_INLINE void vstore(vec_type_t< VEC > *dst, const VEC &src)
Definition: vec.h:290
CxxUtils::vec_mask_type_t
typename vecDetail::vec_mask_type< VEC >::type vec_mask_type_t
Define a nice alias for the mask type for a vectorized type.
Definition: vec.h:219
x
#define x
CxxUtils::vall
ATH_ALWAYS_INLINE bool vall(const VEC &mask)
Definition: vec.h:402
CxxUtils::vconvert
ATH_ALWAYS_INLINE void vconvert(VEC1 &dst, const VEC2 &src)
performs dst is the result of a static cast of each element of src
Definition: vec.h:428
python.utils.AtlRunQueryLookup.mask
string mask
Definition: AtlRunQueryLookup.py:460
CxxUtils::vecDetail::vec_mask_type::maskt
static auto maskt(const VEC &v1, const VEC &v2) -> decltype(v1< v2)
CxxUtils::vec
typename vecDetail::vec_typedef< T, N >::type vec
Define a nice alias for the vectorized type.
Definition: vec.h:207
CxxUtils::vpermute2
ATH_ALWAYS_INLINE void vpermute2(VEC1 &dst, const VEC &src1, const VEC &src2)
vpermute2 function.
Definition: vec.h:476
CxxUtils::vecDetail::vec_typedef
check the type and the size of the vector.
Definition: vec.h:159
lumiFormat.i
int i
Definition: lumiFormat.py:85
CxxUtils::vecDetail::vec_mask_type::type
std::remove_cv_t< std::remove_reference_t< type1 > > type
Definition: vec.h:188
CxxUtils
Definition: aligned_vector.h:29
CxxUtils::vecDetail::vec_type::type
std::remove_cv_t< std::remove_reference_t< type1 > > type
Definition: vec.h:177
DeMoUpdate.tmp
string tmp
Definition: DeMoUpdate.py:1167
CxxUtils::vecDetail::vec_type::type1
std::invoke_result< decltype(elt), const VEC & >::type type1
Definition: vec.h:176
CxxUtils::vmin
ATH_ALWAYS_INLINE void vmin(VEC &dst, const VEC &a, const VEC &b)
Definition: vec.h:324
CxxUtils::vecDetail::vec_type::elt
static auto elt(const VEC &v) -> decltype(v[0])
CxxUtils::vload
ATH_ALWAYS_INLINE void vload(VEC &dst, vec_type_t< VEC > const *src)
Definition: vec.h:272
plotBeamSpotMon.b
b
Definition: plotBeamSpotMon.py:77
CxxUtils::vselect
ATH_ALWAYS_INLINE void vselect(VEC &dst, const VEC &a, const VEC &b, const vec_mask_type_t< VEC > &mask)
Definition: vec.h:306
python.LumiBlobConversion.pos
pos
Definition: LumiBlobConversion.py:18
ReadCellNoiseFromCoolCompare.v2
v2
Definition: ReadCellNoiseFromCoolCompare.py:364
python.PyAthena.v
v
Definition: PyAthena.py:154
CxxUtils::vecDetail::vec_mask_type
Deduce the type of the mask returned by relational operations, for a vectorized type.
Definition: vec.h:185
DeMoScan.index
string index
Definition: DeMoScan.py:364
__attribute__
__attribute__((always_inline)) inline uint16_t TileCalibDrawerBase
Definition: TileCalibDrawerBase.h:190
a
TList * a
Definition: liststreamerinfos.cxx:10
python.CaloScaleNoiseConfig.type
type
Definition: CaloScaleNoiseConfig.py:78
CxxUtils::vec_type_t
typename vecDetail::vec_type< VEC >::type vec_type_t
Define a nice alias for the element type of a vectorized type.
Definition: vec.h:213
CxxUtils::vbroadcast
ATH_ALWAYS_INLINE void vbroadcast(VEC &v, T x)
Copy a scalar to each element of a vectorized type.
Definition: vec.h:251
CxxUtils::vecDetail::bool_pack_helper::bool_pack
Definition: vec.h:196
CxxUtils::vany
ATH_ALWAYS_INLINE bool vany(const VEC &mask)
Definition: vec.h:362
vec_fb.h
Fallback vectorized class.
CxxUtils::vecDetail::bool_pack_helper::all_true
std::is_same< bool_pack< bs..., true >, bool_pack< true, bs... > > all_true
Definition: vec.h:198
CxxUtils::vecDetail::vec_mask_type::type1
std::invoke_result< decltype(maskt), const VEC &, const VEC & >::type type1
Definition: vec.h:187
zero
void zero(TH2 *h)
zero the contents of a 2d histogram
Definition: comparitor.cxx:435