59 #ifndef GSFFindIndexOfMimimum_H
60 #define GSFFindIndexOfMimimum_H
64 #include "GaudiKernel/Kernel.h"
74 int32_t
scalarC(
const float* distancesIn,
int n) {
76 std::assume_aligned<GSFConstants::alignment>(distancesIn);
77 float minvalue =
array[0];
79 for (
int i = 0;
i <
n; ++
i) {
81 if (
value < minvalue) {
93 std::assume_aligned<GSFConstants::alignment>(distancesIn);
102 std::assume_aligned<GSFConstants::alignment>(distancesIn);
128 for (
int i = 16;
i <
n;
i += 16) {
131 indices1 = indices1 + increment;
133 vselect(minIndices1, indices1, minIndices1, lt1);
134 vmin(minValues1, values1, minValues1);
137 indices2 = indices2 + increment;
139 vselect(minIndices2, indices2, minIndices2, lt2);
140 vmin(minValues2, values2, minValues2);
143 indices3 = indices3 + increment;
145 vselect(minIndices3, indices3, minIndices3, lt3);
146 vmin(minValues3, values3, minValues3);
149 indices4 = indices4 + increment;
151 vselect(minIndices4, indices4, minIndices4, lt4);
152 vmin(minValues4, values4, minValues4);
157 vstore(minValues, minValues1);
158 vstore(minValues + 4, minValues2);
159 vstore(minValues + 8, minValues3);
160 vstore(minValues + 12, minValues4);
161 vstore(minIndices, minIndices1);
162 vstore(minIndices + 4, minIndices2);
163 vstore(minIndices + 8, minIndices3);
164 vstore(minIndices + 12, minIndices4);
167 int32_t minIndex = minIndices[0];
168 for (
size_t i = 1;
i < 16; ++
i) {
169 const float value = minValues[
i];
170 const int32_t
index = minIndices[
i];
188 std::assume_aligned<GSFConstants::alignment>(distancesIn);
191 float min = distancesIn[0];
198 for (
int i = 0;
i <
n;
i += 16) {
209 vmin(values1, values1, values2);
211 vmin(values3, values3, values4);
213 vmin(values1, values1, values3);
216 if (
vany(newMinimumMask)) {
218 float minCandidates[4];
219 vstore(minCandidates, values1);
220 for (
int j = 0; j < 4; ++j) {
221 if (minCandidates[j] <
min) {
222 min = minCandidates[j];
230 if (distancesIn[
i] ==
min) {
237 template <
typename T =
float,
int STRIDE = 16,
int VEC_WIDTH = 4>
241 const T*
array = std::assume_aligned<GSFConstants::alignment>(distancesIn);
242 constexpr
int vectorCount = STRIDE / VEC_WIDTH;
246 for (
int i = 0;
i < vectorCount;
i++) {
250 constexpr
int totalStride = VEC_WIDTH * vectorCount;
252 for (
int i = totalStride;
i <
n;
i += totalStride) {
254 for (
int j = 0; j < vectorCount; j++) {
260 T finalMinValues[VEC_WIDTH];
264 [](
auto a,
auto b){
return a <
b ?
a :
b; }));
270 [](
auto a,
auto b){
return a <
b ?
a :
b; });
273 template <
typename T =
float,
int STRIDE = 16,
int VEC_WIDTH = 4>
277 const T*
array = std::assume_aligned<GSFConstants::alignment>(distancesIn);
278 constexpr
int vectorCount = STRIDE / VEC_WIDTH;
285 for (
int i = 0;
i <
n;
i += STRIDE) {
287 for (
int j = 0; j < vectorCount; j++) {
298 [](
auto a,
auto b){
return a ||
b; }))) {
313 std::assume_aligned<GSFConstants::alignment>(distancesIn);
316 constexpr
int blockSizePower2 = 8;
317 constexpr
int blockSize = 2 << blockSizePower2;
319 if (
n <= blockSize) {
358 template <enum Impl I>
362 "Not a valid implementation chosen");
370 }
else if constexpr (
I == C) {
372 }
else if constexpr (
I ==
STL) {