26 #ifdef __clang__ // trapping-math is incompatible with multiversioning with clang 20.1.5
27 #pragma float_control(except, off)
30 #if HAVE_FUNCTION_MULTIVERSIONING
32 int vIdxOfMin(
const float* distancesIn,
int n) {
33 return vAlgs::vIdxOfMin<256>(distancesIn,
n);
38 return vAlgs::vIdxOfMin<128>(distancesIn,
n);
46 constexpr
size_t STRIDEForKL = vAlgs::strideOfNumSIMDVec<256,float>(4);
65 const double meanDifference = componentI.mean - componentJ.mean;
66 const double inverCovSum = componentI.invCov + componentJ.invCov;
67 const double term1 = componentI.invCov * componentJ.cov;
68 const double term2 = componentJ.invCov * componentI.cov;
69 const double term3 = meanDifference * inverCovSum * meanDifference;
70 return term1 + term2 + term3;
84 const double sumWeight = updated.weight + removed.weight;
86 const double invSumWeight = 1. / sumWeight;
87 const double weightI_IJ = updated.weight * invSumWeight;
88 const double weightJ_IJ = removed.weight * invSumWeight;
89 const double meanDiff = (updated.mean - removed.mean);
91 const double sumMean = weightI_IJ * updated.mean + weightJ_IJ * removed.mean;
93 const double sumVariance = weightI_IJ * updated.cov +
94 weightJ_IJ * removed.cov +
95 weightI_IJ * weightJ_IJ * meanDiff * meanDiff;
96 updated.mean = sumMean;
97 updated.cov = sumVariance;
98 updated.invCov = 1. / sumVariance;
99 updated.weight = sumWeight;
103 numDistances(
const int n,
float* distancesIn)
105 const int npadded = vAlgs::numPadded<STRIDEForKL>(
n);
188 struct triangularToIJ
199 inline triangularToIJ
203 throw std::out_of_range(
"KLGaussianMixtureReduction.cxx::convert : idx is negative");
205 int i = std::floor((std::sqrt(1 + 8 *
idx) + 1) / 2);
206 int j =
idx - (
i - 1) *
i / 2;
217 calculateAllDistances(
const Component1D* componentsIn,
221 const Component1D* components = std::assume_aligned<GSFConstants::alignment>(componentsIn);
222 float* distances = std::assume_aligned<GSFConstants::alignment>(distancesIn);
223 for (
int i = 1;
i <
n; ++
i) {
224 const int indexConst = (
i - 1) *
i / 2;
226 for (
int j = 0; j <
i; ++j) {
228 distances[indexConst + j] = symmetricKL(componentI, componentJ);
251 float* distances = std::assume_aligned<GSFConstants::alignment>(distancesIn);
252 Component1D* components = std::assume_aligned<GSFConstants::alignment>(componentsIn);
253 int* mergingIndex = std::assume_aligned<GSFConstants::alignment>(mergingIndexIn);
257 const int last = (
n - 1);
258 const int indexOffsetJ = (minFrom - 1) * minFrom / 2;
259 const int indexOffsetLast = (last - 1) * last / 2;
261 if (minFrom != last) {
263 for (
int i = 0;
i < minFrom; ++
i) {
264 std::swap(distances[indexOffsetJ +
i], distances[indexOffsetLast +
i]);
267 for (
int i = minFrom + 1;
i < last; ++
i) {
268 const int index = (
i-1) *
i/2 + minFrom;
272 std::swap(components[minFrom], components[last]);
273 std::swap(mergingIndex[minFrom], mergingIndex[last]);
280 const int indexConst = (minTo - 1) * minTo/2;
284 for (
int i = 0;
i < minTo; ++
i) {
286 const int index = indexConst +
i;
287 distances[
index] = symmetricKL(componentI, componentJ);
290 for (
int i = minTo + 1;
i < last; ++
i) {
292 const int index = (
i- 1) *
i/2 + minTo;
293 distances[
index] = symmetricKL(componentI, componentJ);
307 const int n = componentsIn.size();
308 int nn =
n * (
n - 1) / 2;
309 int nnpadded = vAlgs::numPadded<STRIDEForKL>(nn);
314 calculateAllDistances(components.
buffer(), distances.
buffer(),
n);
317 std::iota(mergingIndex.
begin(), mergingIndex.
end(), 0);
321 int numberOfComponentsLeft =
n;
323 while (numberOfComponentsLeft > reducedSize) {
326 const triangularToIJ conversion =
convert(minIndex);
327 int minTo = conversion.I;
328 int minFrom = conversion.J;
330 if (mergingIndex[minTo] < mergingIndex[minFrom]) {
334 const int miniToreturn = mergingIndex[minTo];
335 const int minjToreturn = mergingIndex[minFrom];
336 result.push_back({ miniToreturn, minjToreturn });
338 combine(components[minTo], components[minFrom]);
340 numberOfComponentsLeft = updateDistances(components.
buffer(),
345 numberOfComponentsLeft);
348 nn = (numberOfComponentsLeft - 1) * numberOfComponentsLeft / 2;
349 nnpadded = numDistances(nn, distances.
buffer());