45 #ifndef GSFFindIndexOfMimimum_H
46 #define GSFFindIndexOfMimimum_H
67 template <
size_t ISA_WIDTH>
69 return ISA_WIDTH / CHAR_BIT;
77 template <
size_t ISA_WIDTH,
typename T>
79 return NumSIMDVec * (ISA_WIDTH / (
sizeof(T) * CHAR_BIT));
85 template <
size_t STRIDE>
90 constexpr
size_t STRIDEMINUS1 = STRIDE - 1;
91 return ((
n + STRIDEMINUS1) & ~STRIDEMINUS1);
97 template <
size_t ISA_WIDTH,
typename T>
102 static_assert(std::is_floating_point_v<T>,
"T not a floating point type");
103 constexpr
size_t VEC_WIDTH = ISA_WIDTH / (
sizeof(T) * CHAR_BIT);
104 const T*
array = std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
118 for (
int i = 4 * VEC_WIDTH;
i <
n;
i += 4 * VEC_WIDTH) {
121 vmin(minValues1, values1, minValues1);
124 vmin(minValues2, values2, minValues2);
127 vmin(minValues3, values3, minValues3);
130 vmin(minValues4, values4, minValues4);
133 vmin(minValues1, minValues1, minValues2);
135 vmin(minValues3, minValues3, minValues4);
137 vmin(minValues1, minValues1, minValues3);
139 T finalMinValues[VEC_WIDTH];
140 vstore(finalMinValues, minValues1);
145 [](T
a, T
b) {
return a <
b ?
a :
b; });
152 template <
size_t ISA_WIDTH,
typename T>
155 const T* distancesIn,
int n) {
158 static_assert(std::is_floating_point_v<T>,
"T not a floating point type");
159 constexpr
int VEC_WIDTH = ISA_WIDTH / (
sizeof(T) * CHAR_BIT);
160 const T*
array = std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
169 for (
int i = 0;
i <
n;
i += 4 * VEC_WIDTH) {
172 vec_mask eq1 = values1 ==
target;
175 vec_mask eq2 = values2 ==
target;
178 vec_mask eq3 = values3 ==
target;
181 vec_mask eq4 = values4 ==
target;
183 vec_mask eq12 = eq1 || eq2;
184 vec_mask eq34 = eq3 || eq4;
185 vec_mask eqAny = eq12 || eq34;
187 for (
int idx =
i;
idx <
i + 4 * VEC_WIDTH; ++
idx) {
203 template <
int ISA_WIDTH,
typename T>
207 const T*
array = std::assume_aligned<vAlgs::alignmentForArray<ISA_WIDTH>()>(distancesIn);
208 static_assert(std::is_floating_point_v<T>,
"T not a floating point type");
214 constexpr
int blockSize = 512;
216 if (
n <= blockSize) {
217 T
min = vFindMinimum<ISA_WIDTH>(
array,
n);
218 return vIdxOfValue<ISA_WIDTH>(
min,
array,
n);
231 T mintmp = vFindMinimum<ISA_WIDTH>(
array +
i, blockSize);