45 #ifndef GSFFindIndexOfMimimum_H 
   46 #define GSFFindIndexOfMimimum_H 
   67 template <
size_t ISA_WIDTH>
 
   69   return ISA_WIDTH / CHAR_BIT;
 
   77 template <
size_t ISA_WIDTH, 
typename T>
 
   79   return NumSIMDVec * (ISA_WIDTH / (
sizeof(T) * CHAR_BIT));
 
   85 template <
size_t STRIDE>
 
   90   constexpr 
size_t STRIDEMINUS1 = STRIDE - 1;
 
   91   return ((
n + STRIDEMINUS1) & ~STRIDEMINUS1);
 
   97 template <
size_t ISA_WIDTH, 
typename T>
 
  102   static_assert(std::is_floating_point_v<T>, 
"T not a floating point type");
 
  103   constexpr 
size_t VEC_WIDTH = ISA_WIDTH / (
sizeof(T) * CHAR_BIT);
 
  104   const T* 
array = std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
 
  118   for (
int i = 4 * VEC_WIDTH; 
i < 
n; 
i += 4 * VEC_WIDTH) {
 
  121     vmin(minValues1, values1, minValues1);
 
  124     vmin(minValues2, values2, minValues2);
 
  127     vmin(minValues3, values3, minValues3);
 
  130     vmin(minValues4, values4, minValues4);
 
  133   vmin(minValues1, minValues1, minValues2);
 
  135   vmin(minValues3, minValues3, minValues4);
 
  137   vmin(minValues1, minValues1, minValues3);
 
  139   T finalMinValues[VEC_WIDTH];
 
  140   vstore(finalMinValues, minValues1);
 
  145                      [](T 
a, T 
b) { 
return a < 
b ? 
a : 
b; });
 
  152 template <
size_t ISA_WIDTH, 
typename T>
 
  155                 const T* distancesIn, 
int n) {
 
  158   static_assert(std::is_floating_point_v<T>, 
"T not a floating point type");
 
  159   constexpr 
int VEC_WIDTH = ISA_WIDTH / (
sizeof(T) * CHAR_BIT);
 
  160   const T* 
array = std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
 
  169   for (
int i = 0; 
i < 
n; 
i += 4 * VEC_WIDTH) {
 
  172     vec_mask eq1 = values1 == 
target;
 
  175     vec_mask eq2 = values2 == 
target;
 
  178     vec_mask eq3 = values3 == 
target;
 
  181     vec_mask eq4 = values4 == 
target;
 
  183     vec_mask eq12 = eq1 || eq2;
 
  184     vec_mask eq34 = eq3 || eq4;
 
  185     vec_mask eqAny = eq12 || eq34;
 
  187       for (
int idx = 
i; 
idx < 
i + 4 * VEC_WIDTH; ++
idx) {
 
  203 template <
int ISA_WIDTH, 
typename T>
 
  207   const T* 
array = std::assume_aligned<vAlgs::alignmentForArray<ISA_WIDTH>()>(distancesIn);
 
  208   static_assert(std::is_floating_point_v<T>, 
"T not a floating point type");
 
  214   constexpr 
int blockSize = 512;
 
  216   if (
n <= blockSize) {
 
  217     T 
min = vFindMinimum<ISA_WIDTH>(
array, 
n);
 
  218     return vIdxOfValue<ISA_WIDTH>(
min, 
array, 
n);
 
  231     T mintmp = vFindMinimum<ISA_WIDTH>(
array + 
i, blockSize);