db/d65/vec_8h_source.html

// This file's extension implies that it's C, but it's really -*- C++ -*-.

/*

 * Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration.

 */

#ifndef CXXUTILS_VEC_H

#define CXXUTILS_VEC_H


#include "CxxUtils/features.h"

#include "CxxUtils/inline_hints.h"

#include <cstdlib>

#include <cstring>

#include <type_traits>


// Define @c WANT_VECTOR_FALLBACK prior to including this file to

// make the fallback class @c vec_fb visible, even if we support the

// built-in type.

// Intended for testing.

#ifndef WANT_VECTOR_FALLBACK

# define WANT_VECTOR_FALLBACK 0

#endif


#if (!HAVE_VECTOR_SIZE_ATTRIBUTE) || WANT_VECTOR_FALLBACK!=0

#include "CxxUtils/vec_fb.h"

#endif // !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK


namespace CxxUtils {


namespace vecDetail {

template <typename T, size_t N>

struct vec_typedef {

  static_assert((N & (N - 1)) == 0, "N must be a power of 2.");

  static_assert(std::is_arithmetic_v<T>, "T not an arithmetic type");


#if HAVE_VECTOR_SIZE_ATTRIBUTE

  using type __attribute__((vector_size(N * sizeof(T)))) = T;

#else

  using type = vec_fb<T, N>;

#endif

};


template <class VEC>

struct vec_type {

  static auto elt(const VEC& v) -> decltype(v[0]);

  typedef typename std::invoke_result<decltype(elt), const VEC&>::type type1;

  typedef std::remove_cv_t<std::remove_reference_t<type1>> type;

};


template <class VEC>

struct vec_mask_type {

  static auto maskt(const VEC& v1, const VEC& v2) -> decltype(v1 < v2);

  typedef typename std::invoke_result<decltype(maskt), const VEC&, const VEC&>::type type1;

  typedef std::remove_cv_t<std::remove_reference_t<type1>> type;

};


namespace bool_pack_helper {

template <bool...>

struct bool_pack;

template <bool... bs>

using all_true = std::is_same<bool_pack<bs..., true>, bool_pack<true, bs...>>;

}  // namespace bool_pack_helper


}  // namespace vecDetail


template <typename T, size_t N>

using vec = typename vecDetail::vec_typedef<T,N>::type;


template<class VEC>

using vec_type_t = typename vecDetail::vec_type<VEC>::type;


template<class VEC>

using vec_mask_type_t = typename vecDetail::vec_mask_type<VEC>::type;


template<class VEC>

ATH_ALWAYS_INLINE

constexpr size_t

vec_size()

{

  typedef vec_type_t<VEC> ELT;

  return sizeof(VEC) / sizeof(ELT);

}


template<class VEC>

ATH_ALWAYS_INLINE

constexpr size_t

vec_size(const VEC&)

{

  typedef vec_type_t<VEC> ELT;

  return sizeof(VEC) / sizeof(ELT);

}


template<typename VEC, typename T>

ATH_ALWAYS_INLINE

void

vbroadcast(VEC& v, T x)

{

#if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK

  constexpr size_t N = CxxUtils::vec_size<VEC>();

  for (size_t i = 0; i < N; ++i) {

    v[i] = x;

  }

#else

  // using  - to avoid sign conversions.

  v = x - VEC{ 0 };

#endif

}


/*

 * @brief load elements from  memory address src (C-array)

 * to a vectorized type dst.

 * Uses memcpy to avoid alignment issues

 */

template<typename VEC>

ATH_ALWAYS_INLINE

void

vload(VEC& dst, vec_type_t<VEC> const* src)

{


#if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK

  std::memcpy(dst.m_arr, src, sizeof(VEC));

#else

  std::memcpy(&dst, src, sizeof(VEC));

#endif

}


/*

 * @brief store elements from a vectorized type src to

 * to a memory address dst (C-array).

 * Uses memcpy to avoid alignment issues

 */

template<typename VEC>

ATH_ALWAYS_INLINE

void

vstore(vec_type_t<VEC>* dst, const VEC& src)

{

#if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK

  std::memcpy(dst, src.m_arr, sizeof(VEC));

#else

  std::memcpy(dst, &src, sizeof(VEC));

#endif

}


/*

 * @brief select elements based on a mask

 * Fill dst according to

 * dst[i] = mask[i] ? a[i] : b[i]

 */

template <typename VEC>

ATH_ALWAYS_INLINE

void vselect(VEC& dst, const VEC& a, const VEC& b, const vec_mask_type_t<VEC>& mask) {

#if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK

  constexpr size_t N = vec_size<VEC>();

  for (size_t i = 0; i < N; ++i) {

    dst[i] = mask[i] ? a[i] : b[i];

  }

#else

  dst = mask ? a : b;

#endif

}


/*

 * @brief vectorized min.

 * copies to @c dst[i] the min(a[i],b[i])

 */

template<typename VEC>

ATH_ALWAYS_INLINE

void

vmin(VEC& dst, const VEC& a, const VEC& b)

{

#if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK

  constexpr size_t N = vec_size<VEC>();

  for (size_t i = 0; i < N; ++i) {

    dst[i] = a[i] < b[i] ? a[i] : b[i];

  }

#else

  dst = a < b ? a : b;

#endif

}


/*

 * @brief vectorized max.

 * copies to @c dst[i]  the max(a[i],b[i])

 */

template<typename VEC>

ATH_ALWAYS_INLINE

void

vmax(VEC& dst, const VEC& a, const VEC& b)

{

#if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK

  constexpr size_t N = vec_size<VEC>();

  for (size_t i = 0; i < N; ++i) {

    dst[i] = a[i] > b[i] ? a[i] : b[i];

  }

#else

  dst = a > b ? a : b;

#endif

}


/*

 * @brief Returns true if at least

 * one value in mask is true.

 */

template<typename VEC>

ATH_ALWAYS_INLINE

bool

vany(const VEC& mask){

  static_assert(std::is_integral<vec_type_t<VEC>>::value,

                "vec elements must be of integral type. Aka vec must be "

                "compatible with a mask");

  VEC zero;

  vbroadcast(zero,vec_type_t<VEC>{0});

#if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK

  return std::memcmp(mask.m_arr, zero.m_arr, sizeof(VEC)) != 0;

#else

  return std::memcmp(&mask, &zero, sizeof(VEC)) != 0;

#endif

}


/*

 * @brief Returns true if

 * all values in mask are false

 */

template<typename VEC>

ATH_ALWAYS_INLINE

bool

vnone(const VEC& mask){

  static_assert(std::is_integral<vec_type_t<VEC>>::value,

                "vec elements must be of integral type. Aka vec must be "

                "compatible with a mask");

  VEC zero;

  vbroadcast(zero,vec_type_t<VEC>{0});

#if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK

  return std::memcmp(mask.m_arr, zero.m_arr, sizeof(VEC)) == 0;

#else

  return std::memcmp(&mask, &zero, sizeof(VEC)) == 0;

#endif

}


/*

 * @brief Returns true if

 * all values in mask are true

 */

template<typename VEC>

ATH_ALWAYS_INLINE

bool

vall(const VEC& mask){

  static_assert(std::is_integral<vec_type_t<VEC>>::value,

                "vec elements must be of integral type. Aka vec must be "

                "compatible with a mask");

  VEC alltrue;

#if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK

  // fallback compares to 0 when false

  // and 1 when is true

  vbroadcast(alltrue, vec_type_t<VEC>{1});

  return std::memcmp(mask.m_arr, alltrue.m_arr, sizeof(VEC)) == 0;

#else

  // For the gnu vector extensions

  // Vectors are compared element-wise producing 0 when comparison is false

  // and -1 (constant of the appropriate type where all bits are set) otherwise.

  vbroadcast(alltrue, vec_type_t<VEC>{-1});

  return std::memcmp(&mask, &alltrue, sizeof(VEC)) == 0;

#endif

}


template<typename VEC1, typename VEC2>

ATH_ALWAYS_INLINE

void

vconvert(VEC1& dst, const VEC2& src)

{

  static_assert((vec_size<VEC1>() == vec_size<VEC2>()),

                "vconvert dst and src have different number of elements");


#if !HAVE_CONVERT_VECTOR || WANT_VECTOR_FALLBACK

  typedef vec_type_t<VEC1> ELT;

  constexpr size_t N = vec_size<VEC1>();

  for (size_t i = 0; i < N; ++i) {

    dst[i] = static_cast<ELT>(src[i]);

  }

#else

  dst = __builtin_convertvector(src, VEC1);

#endif

}


template<size_t... Indices, typename VEC, typename VEC1>

ATH_ALWAYS_INLINE void

vpermute(VEC1& dst, const VEC& src)

{


  static_assert((sizeof...(Indices) == vec_size<VEC1>()),

                "vpermute number of indices different than return vector size");

  static_assert(std::is_same<vec_type_t<VEC>, vec_type_t<VEC1>>::value,

                "vpermute type of input and output vector elements differ");

  static_assert(vecDetail::bool_pack_helper::all_true<(

                    Indices >= 0 && Indices < vec_size<VEC>())...>::value,

                "vpermute value of a mask index is outside the allowed range");


#if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK

  dst = VEC1{ src[Indices]... };

#else

  dst = __builtin_shufflevector(src, src, Indices...);

#endif

}


template<size_t... Indices, typename VEC, typename VEC1>

ATH_ALWAYS_INLINE void

vpermute2(VEC1& dst, const VEC& src1, const VEC& src2)

{

  static_assert(

      (sizeof...(Indices) == vec_size<VEC1>()),

      "vpermute2 number of indices different than return vector size");

  static_assert(std::is_same<vec_type_t<VEC>, vec_type_t<VEC1>>::value,

                "vpermute2 type of input and output vector elements differ");

  constexpr size_t N = vec_size<VEC>();

  static_assert(vecDetail::bool_pack_helper::all_true<(

                    Indices >= 0 && Indices < 2 * N)...>::value,

                "vpermute2 value of a mask index is outside the allowed range");


#if !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK

  VEC1 tmp;

  size_t pos{0};

  for (auto index: { Indices... }) {

    if (index < N) {

      tmp[pos] = src1[index];

    } else {

      tmp[pos] = src2[index - N];

    }

    ++pos;

  }

  dst = tmp;

#else

  dst = __builtin_shufflevector(src1, src2, Indices...);

#endif

}


} // namespace CxxUtils


#endif // not CXXUTILS_VEC_H