51 #ifndef GSFFindIndexOfMimimum_H
52 #define GSFFindIndexOfMimimum_H
57 #include "GaudiKernel/Kernel.h"
75 template <
size_t ISA_WIDTH>
77 return ISA_WIDTH / CHAR_BIT;
85 template <
size_t ISA_WIDTH,
typename T>
87 return NumSIMDVec * (ISA_WIDTH / (
sizeof(T) * CHAR_BIT));
93 template <
size_t STRIDE>
98 constexpr
size_t STRIDEMINUS1 = STRIDE - 1;
99 return ((
n + STRIDEMINUS1) & ~STRIDEMINUS1);
105 template <
size_t ISA_WIDTH,
typename T>
110 static_assert(std::is_floating_point_v<T>,
"T not a floating point type");
111 constexpr
size_t VEC_WIDTH = ISA_WIDTH / (
sizeof(T) * CHAR_BIT);
113 std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
127 for (
int i = 4 * VEC_WIDTH;
i <
n;
i += 4 * VEC_WIDTH) {
130 vmin(minValues1, values1, minValues1);
133 vmin(minValues2, values2, minValues2);
136 vmin(minValues3, values3, minValues3);
139 vmin(minValues4, values4, minValues4);
142 vmin(minValues1, minValues1, minValues2);
144 vmin(minValues3, minValues3, minValues4);
146 vmin(minValues1, minValues1, minValues3);
148 T finalMinValues[VEC_WIDTH];
149 vstore(finalMinValues, minValues1);
154 [](T
a, T
b) {
return a <
b ?
a :
b; });
160 template <
size_t ISA_WIDTH,
typename T>
163 const T* distancesIn,
int n) {
166 static_assert(std::is_floating_point_v<T>,
"T not a floating point type");
167 constexpr
int VEC_WIDTH = ISA_WIDTH / (
sizeof(T) * CHAR_BIT);
169 std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
178 for (
int i = 0;
i <
n;
i += 4 * VEC_WIDTH) {
181 vec_mask eq1 = values1 ==
target;
184 vec_mask eq2 = values2 ==
target;
187 vec_mask eq3 = values3 ==
target;
190 vec_mask eq4 = values4 ==
target;
192 vec_mask eq12 = eq1 || eq2;
193 vec_mask eq34 = eq3 || eq4;
194 vec_mask eqAny = eq12 || eq34;
196 for (
int idx =
i;
idx <
i + 4 * VEC_WIDTH; ++
idx) {
208 template <
int ISA_WIDTH,
typename T>
213 std::assume_aligned<vAlgs::alignmentForArray<ISA_WIDTH>()>(distancesIn);
214 static_assert(std::is_floating_point_v<T>,
"T not a floating point type");
217 constexpr
int blockSize = 512;
219 if (
n <= blockSize) {
220 T
min = vFindMinimum<ISA_WIDTH>(
array,
n);
221 return vIdxOfValue<ISA_WIDTH>(
min,
array,
n);
229 T mintmp = vFindMinimum<ISA_WIDTH>(
array +
i, blockSize);