ATLAS Offline Software
Loading...
Searching...
No Matches
GsfFindIndexOfMinimum.h
Go to the documentation of this file.
1/*
2 Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
3*/
4
44
45#ifndef GSFFindIndexOfMimimum_H
46#define GSFFindIndexOfMimimum_H
47#include "CxxUtils/features.h"
49#include "CxxUtils/restrict.h"
50#include "CxxUtils/vec.h"
51//
52#include <algorithm>
53#include <memory>
54#include <numeric>
55#include <climits>
56
57namespace vAlgs{
63
67template <size_t ISA_WIDTH>
68constexpr size_t alignmentForArray(){
69 return ISA_WIDTH / CHAR_BIT;
70}
71
77template <size_t ISA_WIDTH, typename T>
78constexpr size_t strideOfNumSIMDVec(size_t NumSIMDVec){
79 return NumSIMDVec * (ISA_WIDTH / (sizeof(T) * CHAR_BIT));
80}
81
85template <size_t STRIDE>
86constexpr int numPadded(const int n) {
87 // This always return a padded number dividable
88 // with STRIDE , eg if STRIDE = 16
89 // e.g ((33+15)&~15) = 48
90 constexpr size_t STRIDEMINUS1 = STRIDE - 1;
91 return ((n + STRIDEMINUS1) & ~STRIDEMINUS1);
92}
93
97template <size_t ISA_WIDTH, typename T>
99T vFindMinimum(const T* distancesIn, int n) {
100
101 using namespace CxxUtils;
102 static_assert(std::is_floating_point_v<T>, "T not a floating point type");
103 constexpr size_t VEC_WIDTH = ISA_WIDTH / (sizeof(T) * CHAR_BIT);
104 const T* array = std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
105 using vec_t = vec<T, VEC_WIDTH>;
106 vec_t minValues1;
107 vec_t minValues2;
108 vec_t minValues3;
109 vec_t minValues4;
110 vload(minValues1, array);
111 vload(minValues2, array + VEC_WIDTH);
112 vload(minValues3, array + VEC_WIDTH * 2);
113 vload(minValues4, array + VEC_WIDTH * 3);
114 vec_t values1;
115 vec_t values2;
116 vec_t values3;
117 vec_t values4;
118 for (int i = 4 * VEC_WIDTH; i < n; i += 4 * VEC_WIDTH) {
119 // 1
120 vload(values1, array + i);
121 vmin(minValues1, values1, minValues1);
122 // 2
123 vload(values2, array + i + VEC_WIDTH);
124 vmin(minValues2, values2, minValues2);
125 // 3
126 vload(values3, array + i + 2 * VEC_WIDTH);
127 vmin(minValues3, values3, minValues3);
128 // 4
129 vload(values4, array + i + 3 * VEC_WIDTH);
130 vmin(minValues4, values4, minValues4);
131 }
132 // Compare //1 with //2
133 vmin(minValues1, minValues1, minValues2);
134 // compare //3 with //4
135 vmin(minValues3, minValues3, minValues4);
136 // Compare //1 with //3
137 vmin(minValues1, minValues1, minValues3);
138 // Do the final calculation scalar way
139 T finalMinValues[VEC_WIDTH];
140 vstore(finalMinValues, minValues1);
141
142 // Do the final calculation scalar way
143 return std::reduce(std::begin(finalMinValues), std::end(finalMinValues),
144 finalMinValues[0],
145 [](T a, T b) { return a < b ? a : b; });
146}
147
152template <size_t ISA_WIDTH, typename T>
154int vIdxOfValue(const T value,
155 const T* distancesIn, int n) {
156 using namespace CxxUtils;
157
158 static_assert(std::is_floating_point_v<T>, "T not a floating point type");
159 constexpr int VEC_WIDTH = ISA_WIDTH / (sizeof(T) * CHAR_BIT);
160 const T* array = std::assume_aligned<alignmentForArray<ISA_WIDTH>()>(distancesIn);
161 using vec_t = vec<T, VEC_WIDTH>;
162 using vec_mask = vec_mask_type_t<vec_t>;
163 vec_t values1;
164 vec_t values2;
165 vec_t values3;
166 vec_t values4;
167 vec_t target;
168 vbroadcast(target, value);
169 for (int i = 0; i < n; i += 4 * VEC_WIDTH) {
170 // 1
171 vload(values1, array + i);
172 vec_mask eq1 = values1 == target;
173 // 2
174 vload(values2, array + i + VEC_WIDTH);
175 vec_mask eq2 = values2 == target;
176 // 3
177 vload(values3, array + i + VEC_WIDTH * 2);
178 vec_mask eq3 = values3 == target;
179 // 4
180 vload(values4, array + i + VEC_WIDTH * 3);
181 vec_mask eq4 = values4 == target;
182
183 vec_mask eq12 = eq1 || eq2;
184 vec_mask eq34 = eq3 || eq4;
185 vec_mask eqAny = eq12 || eq34;
186 if (vany(eqAny)) {
187 for (int idx = i; idx < i + 4 * VEC_WIDTH; ++idx) {
188 if (distancesIn[idx] == value) {
189 return idx;
190 }
191 }
192 }
193 }
194 return -1;
195}
196
203template <int ISA_WIDTH, typename T>
205int vIdxOfMin(const T* distancesIn, int n) {
206 using namespace CxxUtils;
207 const T* array = std::assume_aligned<vAlgs::alignmentForArray<ISA_WIDTH>()>(distancesIn);
208 static_assert(std::is_floating_point_v<T>, "T not a floating point type");
209 //We process elements in blocks.
210 //When we find the minimum we also
211 //keep track in which block it was.
212 //The blocksize of 512 seemed to be a good enough
213 //compromise in tests.
214 constexpr int blockSize = 512;
215 // case for n less than blockSize
216 if (n <= blockSize) {
217 T min = vFindMinimum<ISA_WIDTH>(array, n);
218 return vIdxOfValue<ISA_WIDTH>(min, array, n);
219 }
220 int idx = 0;
221 T min = array[0];
222 // We might have a remainder, elements after an integral
223 // number of blockes that we need to handle
224 const int remainder = n & (blockSize - 1);
225 // Process elements up to the remainder in blocks
226 // For example for blockSize 512 and 1056 elements
227 // (if we opt for padding/multiple of 32)
228 // The loop will run two times and then we need
229 // to handle the 32 remaining elements.
230 for (int i = 0; i < (n - remainder); i += blockSize) {
231 T mintmp = vFindMinimum<ISA_WIDTH>(array + i, blockSize);
232 if (mintmp < min) {
233 min = mintmp;
234 idx = i;
235 }
236 }
237 //Process the remaining elements if any
238 if (remainder != 0) {
239 int index = n - remainder;
240 T mintmp = vFindMinimum<ISA_WIDTH>(array + index, remainder);
241 if (mintmp < min) {
242 min = mintmp;
243 return index + vIdxOfValue<ISA_WIDTH>(min, array + index, remainder);
244 }
245 }
246 // Return the idx of the minimum just looping over a single block
247 return idx + vIdxOfValue<ISA_WIDTH>(min, array + idx, blockSize);
248}
249
250} // namespace vAlgs
251
252#endif
std::vector< size_t > vec
static Double_t a
#define min(a, b)
Definition cfImp.cxx:40
std::vector< std::string > remainder(const std::vector< std::string > &v1, const std::vector< std::string > &v2)
Some additional feature test macros.
#define ATH_ALWAYS_INLINE
ATH_ALWAYS_INLINE bool vany(const VEC &mask)
Definition vec.h:362
typename vecDetail::vec_mask_type< VEC >::type vec_mask_type_t
Define a nice alias for the mask type for a vectorized type.
Definition vec.h:219
ATH_ALWAYS_INLINE void vstore(vec_type_t< VEC > *dst, const VEC &src)
Definition vec.h:290
ATH_ALWAYS_INLINE void vmin(VEC &dst, const VEC &a, const VEC &b)
Definition vec.h:324
ATH_ALWAYS_INLINE void vbroadcast(VEC &v, T x)
Copy a scalar to each element of a vectorized type.
Definition vec.h:251
ATH_ALWAYS_INLINE void vload(VEC &dst, vec_type_t< VEC > const *src)
Definition vec.h:272
Definition index.py:1
ATH_ALWAYS_INLINE int vIdxOfMin(const T *distancesIn, int n)
Find the index of the minimum in the array of distances.
ATH_ALWAYS_INLINE T vFindMinimum(const T *distancesIn, int n)
Find the minimum element in the array of distances processing four simd vectors at a time.
constexpr size_t alignmentForArray()
In the following ISA_WIDTH is the ISA width in bits e.g 128 for SSE 256 for AVX2 etc For the cases of...
ATH_ALWAYS_INLINE int vIdxOfValue(const T value, const T *distancesIn, int n)
Find the index of an element in the array of distances processing four simd vectors at a time.
constexpr int numPadded(const int n)
Given a number n returns a new n >= n that is padded to the required STRIDE.
constexpr size_t strideOfNumSIMDVec(size_t NumSIMDVec)
returns the STRIDE in units of elements covered by NumSIMDVec simd vectors of type T for the specific...
Macro wrapping the nonstandard restrict keyword.
Vectorization helpers.