666#define CALORECGPU_CMC_EXPAND(...) __VA_ARGS__
669#define CALORECGPU_CMC_LOAD(NAME, NEEDED, PREVNEEDED, VARS, INIT) \
672 using AssumedList = TypeList<CALORECGPU_CMC_EXPAND NEEDED >; \
673 using AssumedPreviousList = TypeList<CALORECGPU_CMC_EXPAND PREVNEEDED >; \
674 CALORECGPU_CMC_EXPAND VARS \
675 template <class Final> __device__ NAME(const Final & f, Parameters p, const int idx) { CALORECGPU_CMC_EXPAND INIT } \
680#define CALORECGPU_CMC_LOAD_SIMPLE_CELL_INFO(NAME, VARNAME, PROPNAME) \
681 CALORECGPU_CMC_LOAD(NAME, \
684 (std::decay_t<decltype(std::declval<CaloRecGPU::CellInfoArr>().PROPNAME[0])> VARNAME;), \
685 (VARNAME = p.cell_info_arr->PROPNAME[idx];) \
688#define CALORECGPU_CMC_LOAD_SIMPLE_GEOMETRY_INFO(NAME, VARNAME, PROPNAME) \
689 CALORECGPU_CMC_LOAD(NAME, \
692 (std::decay_t<decltype(std::declval<CaloRecGPU::GeometryArr>().PROPNAME[0])> VARNAME;), \
693 (VARNAME = p.geometry->PROPNAME[f.hash_ID];) \
696#define CALORECGPU_CMC_LOAD_SIMPLE_CLUSTER_INFO(NAME, VARNAME, PROPNAME) \
697 CALORECGPU_CMC_LOAD(NAME, \
700 (std::decay_t<decltype(std::declval<CaloRecGPU::ClusterInfoArr>().PROPNAME[0])> VARNAME;), \
701 (VARNAME = p.clusters_arr->PROPNAME[idx];) \
704#define CALORECGPU_CMC_LOAD_SIMPLE_MOMENT_INFO(NAME, VARNAME, PROPNAME) \
705 CALORECGPU_CMC_LOAD(NAME, \
708 (std::decay_t<decltype(std::declval<CaloRecGPU::ClusterInfoArr>().moments.PROPNAME[0])> VARNAME;), \
709 (VARNAME = p.clusters_arr->moments.PROPNAME[idx];) \
714#define CALORECGPU_CMC_LOAD_SIMPLE_PER_SAMPLING_MOMENT_INFO(NAME, VARNAME, PROPNAME) \
715 CALORECGPU_CMC_LOAD(NAME, \
718 (std::decay_t<decltype(std::declval<CaloRecGPU::ClusterInfoArr>().moments.PROPNAME[0][0])> VARNAME;), \
719 (VARNAME = p.clusters_arr->moments.PROPNAME[f.sampling][idx];) \
722#define CALORECGPU_CMC_LOAD_SIMPLE_TEMPORARY_INFO(NAME, VARNAME, PROPNAME) \
723 CALORECGPU_CMC_LOAD(NAME, \
726 (std::decay_t<decltype(CMCTemporaries::PROPNAME(std::declval<CaloRecGPU::ClusterInfoArr *>(),0))> VARNAME;), \
727 (VARNAME = CMCTemporaries::PROPNAME(p.clusters_arr, idx);) \
732#define CALORECGPU_CMC_LOAD_SIMPLE_PER_SAMPLING_TEMPORARY_INFO(NAME, VARNAME, PROPNAME) \
733 CALORECGPU_CMC_LOAD(NAME, \
736 (std::decay_t<decltype(CMCTemporaries::PROPNAME(std::declval<CaloRecGPU::ClusterInfoArr *>(),0,0))> VARNAME;), \
737 (VARNAME = CMCTemporaries::PROPNAME(p.clusters_arr, f.sampling, idx);) \
745 (SamplingFromMomentIndex,
749 (sampling = p.moments_index;)
757 (hash_ID = p.cell_info_arr->get_hash_ID(idx, p.assume_complete_cells);)
765 (sampling = p.geometry->sampling(f.hash_ID);)
786 (abs_energy = fabsf(f.energy);)
791 (CellEnergy, CellAbsEnergy),
793 (
float moments_energy;),
794 (moments_energy = (p.opts->use_abs_energy || f.energy > 0.f) ? f.abs_energy : 0.f;)
801 (CellQualityProvenance,
805 (qp =
p.cell_info_arr->qualityProvenance[
idx];)
813 (is_tile =
p.geometry->is_tile(
f.hash_ID);)
818 (CellIsTile, CellQualityProvenance),
821 (is_bad =
p.cell_info_arr->is_bad_general(
f.is_tile,
f.qp,
false);)
826 (CellHashID, CellIsTile, CellGain, CellEnergy),
829 (
noise = (
f.is_tile &&
p.opts->use_two_gaussian_noise ?
830 p.noise_arr->get_double_gaussian_noise(
f.hash_ID,
f.gain,
f.energy) :
831 p.noise_arr->get_noise(
f.hash_ID,
f.gain) );)
836 (CellIsBad, CellIsTile, CellQualityProvenance),
838 (
bool LArQ_cell_check;),
839 (LArQ_cell_check = !
f.is_bad && !
f.is_tile && ((
f.qp.provenance() & 0x2800U) == 0x2000U);)
844 (CellIsBad, CellIsTile, CellQualityProvenance),
846 (
bool TileQ_cell_check;),
847 (TileQ_cell_check = !
f.is_bad &&
f.is_tile &&
f.qp.tile_qual1() != 0xFFU &&
f.qp.tile_qual2() != 0xFFU;)
852 (CellTimeMomentsCheck,
853 (CellIsTile, CellQualityProvenance, CellSampling),
855 (
bool time_moments_check;),
856 (time_moments_check = ( (
f.is_tile && (
f.qp.provenance() & 0x8080U)) ||
857 (!
f.is_tile && (
f.qp.provenance() & 0x2000U)) ) &&
858 f.sampling != CaloSampling::PreSamplerB &&
f.sampling != CaloSampling::PreSamplerE; )
871 (
float weighted_energy;),
872 (weighted_energy =
f.moments_energy *
f.weight;)
876 (SquareWeightedEnergy,
880 (square_w_E =
f.weighted_energy *
f.weighted_energy;)
884 (WeightedEnergyOverVolume,
888 (w_E_over_V = (
f.volume > 0.f ?
f.weighted_energy /
f.volume : 1.f);)
892 (WeightedCellPositionNormalization,
894 (CellX, CellY, CellZ),
898 (
const float r_dir_base = rnorm3df(
f.x,
f.y,
f.z);
899 r_dir = isinf(r_dir_base) ? 0.f : r_dir_base;
900 w_E_r_dir =
f.weighted_energy *
f.r_dir;
905 (WeightedEnergyOrNegative,
908 (
float weighted_energy_or_negative;),
909 (weighted_energy_or_negative = (
p.opts->use_abs_energy ? fabsf(
f.energy) :
f.
energy) *
f.
weight;)
913 (SquareWeightedEnergyOrNegative,
914 (WeightedEnergyOrNegative),
916 (
float square_w_E_or_neg;),
917 (square_w_E_or_neg =
f.weighted_energy_or_negative *
f.weighted_energy_or_negative;)
921 (WeightedNonMomentsEnergy,
925 (normE =
f.weight *
f.energy;)
929 (SquaredWeightedNonMomentsEnergy,
930 (WeightedNonMomentsEnergy),
932 (
float squared_normE;),
933 (squared_normE =
f.normE *
f.normE;)
958 (CenterX, CenterY, CenterZ),
959 (CellX, CellY, CellZ),
961 (
dx =
f.x -
f.center_x;
962 dy =
f.y -
f.center_y;
963 dz =
f.z -
f.center_z;
969 (Deltas, ShowerAxisX, ShowerAxisY, ShowerAxisZ),
977 (Deltas, ShowerAxisX, ShowerAxisY, ShowerAxisZ),
995 (
float rev_sum_energies;),
996 (rev_sum_energies = 1.0f / (
f.sum_energies > 0.f ?
f.sum_energies : 1.f);)
1005 (EnergyDensityNormalization,
1006 (EnergyDensityNormalizationBase, EnergyDensityNormalizationCorr),
1008 (
float energy_density_norm;),
1009 (energy_density_norm =
f.energy_density_norm_base +
f.energy_density_norm_corr;)
1013 (ReverseEnergyDensityNormalization,
1014 (EnergyDensityNormalization),
1016 (
float rev_energy_density_norm;),
1017 (rev_energy_density_norm = 1.0f / (
f.energy_density_norm > 0.f ?
f.energy_density_norm : 1.f);)
1023 (ClusterCellWithMaxEnergy,
1024 (ClusterMaxCellEnergyAndCell),
1027 (max_E_cell = (
f.max_E_and_cell & 0x7FFFFFFFU) - 1;)
1031 (ClusterMaxCellEnergy,
1032 (ClusterMaxCellEnergyAndCell),
1035 (max_E = __uint_as_float(
f.max_E_and_cell >> 32U);)
1041 (ClusterCellWithSecondMaxEnergy,
1042 (ClusterSecondMaxCellEnergyAndCell),
1044 (
int second_max_E_cell;),
1045 (second_max_E_cell = (
f.second_max_E_and_cell & 0x7FFFFFFFU) - 1;)
1049 (ClusterSecondMaxCellEnergy,
1050 (ClusterSecondMaxCellEnergyAndCell),
1052 (
float second_max_E;),
1053 (second_max_E = __uint_as_float(
f.second_max_E_and_cell >> 32U);)
1060 (SumAbsEnergyNonMoments,
1061 (SumAbsEnergyNonMomentsBase, SumAbsEnergyNonMomentsCorr),
1063 (
float abs_energy_non_moments;),
1064 (abs_energy_non_moments =
f.abs_energy_non_moments_base +
f.abs_energy_non_moments_corr;)
1068 (ReverseSumAbsEnergyNonMoments,
1069 (SumAbsEnergyNonMoments),
1071 (
float rev_abs_energy_non_moments;),
1072 (rev_abs_energy_non_moments = 1.f / (
f.abs_energy_non_moments != 0.f ?
f.abs_energy_non_moments : 1.f);)
1082 (ReverseClusterEnergy,
1085 (
float rev_cluster_energy;),
1086 (rev_cluster_energy = (
f.cluster_energy != 0.f ? 1.f /
f.cluster_energy : 1.f);)
1096 (TimeNormalizationBase, TimeNormalizationCorr),
1099 (time_norm =
f.time_norm_base +
f.time_norm_corr;)
1111 (ClusterMaxAndSecondMaxCell,
1112 (ClusterMaxAndSecondMaxCellTogether),
1114 (
int max_cell, second_max_cell;),
1115 ( max_cell =
f.stored_max_and_second_max >> 32U;
1116 second_max_cell =
f.stored_max_and_second_max;
1123 (ReverseAbsoluteEnergyPerSample,
1124 (AbsoluteEnergyPerSample),
1126 (
float rev_sampling_normalization;),
1127 (rev_sampling_normalization = 1.0f / (
f.sampling_normalization != 0.f ?
f.sampling_normalization : 1.0f);)
1141 (SeedCellGeometryPhi,
1144 (
float seed_cell_phi_coordinate;),
1146 p.cell_info_arr->get_hash_ID(
f.seed_cell,
p.assume_complete_cells) : -1);
1147 seed_cell_phi_coordinate = (seed_cell_hash_ID >= 0 ?
p.geometry->phi[seed_cell_hash_ID] : -999);
1161 using BeforeLoading = TypeList<>;
1166 __device__
static void before(Parameters p,
1173 using CellLoading = TypeList<>;
1176 using ClusterLoading = TypeList<>;
1182 __device__
static void per_cell(Parameters p,
1189 using AfterLoading = TypeList<>;
1194 __device__
static void after(Parameters p,
1201#define CALORECGPU_CMC_MOMENT_CALC(NAME, BEFORELOAD, BEFOREEXEC, CELLLOAD, CLUSTERLOAD, CELLEXEC, AFTERLOAD, AFTEREXEC) \
1204 using BeforeLoading = TypeList<CALORECGPU_CMC_EXPAND BEFORELOAD>; \
1205 template <class T> __device__ static void before(Parameters p, \
1207 const int cluster) \
1208 { CALORECGPU_CMC_EXPAND BEFOREEXEC } \
1209 using CellLoading = TypeList<CALORECGPU_CMC_EXPAND CELLLOAD>; \
1210 using ClusterLoading = TypeList<CALORECGPU_CMC_EXPAND CLUSTERLOAD>; \
1211 template <class T> __device__ static void per_cell(Parameters p, \
1214 const int cluster) \
1215 { CALORECGPU_CMC_EXPAND CELLEXEC } \
1216 using AfterLoading = TypeList<CALORECGPU_CMC_EXPAND AFTERLOAD>; \
1217 template <class T> __device__ static void after(Parameters p, \
1219 const int cluster) \
1220 { CALORECGPU_CMC_EXPAND AFTEREXEC } \
1233 (ClusterEnergyEtaAndEt,
1235 (p.clusters_arr->clusterEnergy[cluster] = 0.f;
1236 CMCTemporaries::clusterEnergyAux(p.clusters_arr, cluster) = 0.f;
1237 p.clusters_arr->clusterEta[cluster] = 0.f;
1238 CMCTemporaries::clusterEtaAux(p.clusters_arr, cluster) = 0.f;),
1239 (ToLoad::CellEnergy, ToLoad::CellAbsEnergy, ToLoad::CellEta),
1241 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->clusterEnergy[cluster]),
1242 CMCTemporaries::clusterEnergyAux_ptr(p.clusters_arr, cluster),
1244 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->clusterEta[cluster]),
1245 CMCTemporaries::clusterEtaAux_ptr(p.clusters_arr, cluster),
1248 (ToLoad::SumAbsEnergyNonMoments, ToLoad::ReverseSumAbsEnergyNonMoments),
1249 (
const float temp_E_main = p.clusters_arr->clusterEnergy[cluster];
1250 const float temp_E_corr = CMCTemporaries::clusterEnergyAux(p.clusters_arr, cluster);
1252 const float temp_eta_main = p.clusters_arr->clusterEta[cluster];
1253 const float temp_eta_corr = CMCTemporaries::clusterEtaAux(p.clusters_arr, cluster);
1255 const float temp_exp_1 = expf(temp_eta_main *
data.rev_abs_energy_non_moments);
1256 const float temp_exp_2 = expf(temp_eta_corr *
data.rev_abs_energy_non_moments);
1258 const float temp_exp_mult = temp_exp_1 * temp_exp_2;
1261 temp_E_corr, temp_exp_mult);
1262 const float temp_inv_denominator = 1.f/fmaf(temp_exp_mult, temp_exp_mult, 1.f);
1264 const float temp_ET = temp_numerator * temp_inv_denominator;
1266 const float temp_energy = temp_E_main + temp_E_corr;
1268 p.clusters_arr->clusterEnergy[cluster] = temp_energy;
1270 const float temp_eta = (temp_eta_main + temp_eta_corr) *
data.rev_abs_energy_non_moments;
1272 p.clusters_arr->clusterEta[cluster] = temp_eta * (
data.abs_energy_non_moments != 0.f);
1276 p.clusters_arr->clusterEt[cluster] = temp_ET * (
data.abs_energy_non_moments != 0.f);
1283 (p.clusters_arr->clusterPhi[cluster] = 0.f;
1284 CMCTemporaries::clusterPhiAux(p.clusters_arr, cluster) = 0.f;
1286 (ToLoad::CellAbsEnergy, ToLoad::CellPhi),
1287 (ToLoad::SeedCellPhi),
1289 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->clusterPhi[cluster]),
1290 CMCTemporaries::clusterPhiAux_ptr(p.clusters_arr, cluster),
1291 phi_real *
data.abs_energy *
data.weight);
1293 (ToLoad::SumAbsEnergyNonMoments, ToLoad::ReverseSumAbsEnergyNonMoments),
1294 (
const float old_phi = p.clusters_arr->clusterPhi[cluster] + CMCTemporaries::clusterPhiAux(p.clusters_arr, cluster);
1296 data.rev_abs_energy_non_moments, 0.f) *
1297 (
data.abs_energy_non_moments != 0.f);
1308 (p.clusters_arr->moments.avgLArQ[cluster] = 0.f;
1309 CMCTemporaries::avgLArQAux(p.clusters_arr, cluster) = 0.f;
1311 (ToLoad::CellQualityProvenance, ToLoad::CellLArQCheck),
1312 (ToLoad::SquareWeightedEnergyOrNegative),
1313 (
if (
data.LArQ_cell_check)
1315 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.avgLArQ[cluster]),
1316 CMCTemporaries::avgLArQAux_ptr(p.clusters_arr, cluster),
1317 data.square_w_E_or_neg * data.qp.quality());
1321 (
const float norm_LAr = CMCTemporaries::averageLArQNorm(p.clusters_arr, cluster) + CMCTemporaries::averageLArQNormAux(p.clusters_arr, cluster);
1322 const float rev_norm_LAr = 1.0f / (norm_LAr > 0.f ? norm_LAr : 1.0f);
1323 const float new_LArQ = p.clusters_arr->moments.avgLArQ[cluster] + CMCTemporaries::avgLArQAux(p.clusters_arr, cluster);
1324 p.clusters_arr->moments.avgLArQ[cluster] = new_LArQ * rev_norm_LAr;
1331 (p.clusters_arr->moments.avgTileQ[cluster] = 0.f;
1332 CMCTemporaries::avgTileQAux(p.clusters_arr, cluster) = 0.f;
1334 (ToLoad::CellQualityProvenance, ToLoad::CellTileQCheck),
1335 (ToLoad::SquareWeightedEnergyOrNegative),
1336 (
if (
data.TileQ_cell_check)
1338 const float max_quality = max((unsigned int) data.qp.tile_qual1(), (unsigned int) data.qp.tile_qual2());
1340 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.avgTileQ[cluster]),
1341 CMCTemporaries::avgTileQAux_ptr(p.clusters_arr, cluster),
1342 data.square_w_E_or_neg * max_quality);
1346 (
const float norm_Tile = CMCTemporaries::averageTileQNorm(p.clusters_arr, cluster) + CMCTemporaries::averageTileQNormAux(p.clusters_arr, cluster);
1347 const float rev_norm_Tile = 1.0f / (norm_Tile > 0.f ? norm_Tile : 1.0f);
1348 const float new_TileQ = p.clusters_arr->moments.avgTileQ[cluster] + CMCTemporaries::avgTileQAux(p.clusters_arr, cluster);
1349 p.clusters_arr->moments.avgTileQ[cluster] = new_TileQ * rev_norm_Tile;
1356 (p.clusters_arr->moments.badCellsCorrE[cluster] = 0.f;
1357 CMCTemporaries::badCellsCorrEAux(p.clusters_arr, cluster) = 0.f;
1359 (ToLoad::CellIsBad, ToLoad::CellEnergy),
1360 (ToLoad::WeightedEnergyOrNegative),
1363 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.badCellsCorrE[cluster]),
1364 CMCTemporaries::badCellsCorrEAux_ptr(p.clusters_arr, cluster),
1365 data.weighted_energy_or_negative);
1369 (p.clusters_arr->moments.badCellsCorrE[cluster] += CMCTemporaries::badCellsCorrEAux(p.clusters_arr, cluster);
1376 (p.clusters_arr->moments.badLArQFrac[cluster] = 0.f;
1377 CMCTemporaries::badLArQFracAux(p.clusters_arr, cluster) = 0.f;
1379 (ToLoad::CellQualityProvenance, ToLoad::CellLArQCheck),
1380 (ToLoad::WeightedEnergyOrNegative),
1381 (
if (
data.LArQ_cell_check &&
data.qp.quality() > p.opts->min_LAr_quality)
1383 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.badLArQFrac[cluster]),
1384 CMCTemporaries::badLArQFracAux_ptr(p.clusters_arr, cluster),
1385 data.weighted_energy_or_negative);
1388 (ToLoad::ReverseClusterEnergy),
1389 (
const float new_badLArQFrac = p.clusters_arr->moments.badLArQFrac[cluster] + CMCTemporaries::badLArQFracAux(p.clusters_arr, cluster);
1390 p.clusters_arr->moments.badLArQFrac[cluster] = new_badLArQFrac *
data.rev_cluster_energy;
1401 (ToLoad::MaxSignificanceAndSampling),
1402 (
const float max_sig = __uint_as_float(
data.max_sig_and_samp >> 32);
1403 p.clusters_arr->moments.cellSignificance[cluster] = max_sig * (
data.max_sig_and_samp & 1 ? 1.f : -1.f);
1414 (ToLoad::MaxSignificanceAndSampling),
1415 (
const int max_samp = (
data.max_sig_and_samp & 0xFFFFFFFEU) >> 1;
1416 p.clusters_arr->moments.cellSigSampling[cluster] = max_samp;
1424 (p.clusters_arr->moments.centerX[cluster] = 0.f;
1425 CMCTemporaries::centerXAux(p.clusters_arr, cluster) = 0.f;
1428 (ToLoad::WeightedEnergy),
1429 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.centerX[cluster]),
1430 CMCTemporaries::centerXAux_ptr(p.clusters_arr, cluster),
1433 (ToLoad::ReverseSumEnergies),
1434 (
const float new_value = p.clusters_arr->moments.centerX[cluster] + CMCTemporaries::centerXAux(p.clusters_arr, cluster);
1435 p.clusters_arr->moments.centerX[cluster] = new_value *
data.rev_sum_energies;
1441 (p.clusters_arr->moments.centerY[cluster] = 0.f;
1442 CMCTemporaries::centerYAux(p.clusters_arr, cluster) = 0.f;
1445 (ToLoad::WeightedEnergy),
1446 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.centerY[cluster]),
1447 CMCTemporaries::centerYAux_ptr(p.clusters_arr, cluster),
1450 (ToLoad::ReverseSumEnergies),
1451 (
const float new_value = p.clusters_arr->moments.centerY[cluster] + CMCTemporaries::centerYAux(p.clusters_arr, cluster);
1452 p.clusters_arr->moments.centerY[cluster] = new_value *
data.rev_sum_energies;
1458 (p.clusters_arr->moments.centerZ[cluster] = 0.f;
1459 CMCTemporaries::centerZAux(p.clusters_arr, cluster) = 0.f;
1462 (ToLoad::WeightedEnergy),
1463 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.centerZ[cluster]),
1464 CMCTemporaries::centerZAux_ptr(p.clusters_arr, cluster),
1467 (ToLoad::ReverseSumEnergies),
1468 (
const float new_value = p.clusters_arr->moments.centerZ[cluster] + CMCTemporaries::centerZAux(p.clusters_arr, cluster);
1469 p.clusters_arr->moments.centerZ[cluster] = new_value *
data.rev_sum_energies;
1476 (p.clusters_arr->moments.engFracEM[cluster] = 0.f;
1477 CMCTemporaries::engFracEMAux(p.clusters_arr, cluster) = 0.f;),
1478 (ToLoad::CellSampling),
1479 (ToLoad::WeightedEnergy),
1480 (
if (
data.sampling == CaloSampling::EMB1 ||
1481 data.sampling == CaloSampling::EMB2 ||
1482 data.sampling == CaloSampling::EMB3 ||
1483 data.sampling == CaloSampling::EME1 ||
1484 data.sampling == CaloSampling::EME2 ||
1485 data.sampling == CaloSampling::EME3 ||
1486 data.sampling == CaloSampling::FCAL0 )
1488 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.engFracEM[cluster]),
1489 CMCTemporaries::engFracEMAux_ptr(p.clusters_arr, cluster),
1490 data.weighted_energy);
1493 (ToLoad::ReverseSumEnergies),
1494 (
const float new_engFracEM = p.clusters_arr->moments.engFracEM[cluster] + CMCTemporaries::engFracEMAux(p.clusters_arr, cluster);
1495 p.clusters_arr->moments.engFracEM[cluster] = new_engFracEM *
data.rev_sum_energies;
1502 (p.clusters_arr->moments.engBadCells[cluster] = 0.f;
1503 CMCTemporaries::engBadCellsAux(p.clusters_arr, cluster) = 0.f;),
1504 (ToLoad::CellIsBad),
1505 (ToLoad::WeightedEnergyOrNegative),
1508 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.engBadCells[cluster]),
1509 CMCTemporaries::engBadCellsAux_ptr(p.clusters_arr, cluster),
1510 data.weighted_energy_or_negative);
1514 (p.clusters_arr->moments.engBadCells[cluster] += CMCTemporaries::engBadCellsAux(p.clusters_arr, cluster);)
1524 (ToLoad::ReverseSumEnergies, ToLoad::ClusterMaxCellEnergy),
1525 (p.clusters_arr->moments.engFracMax[cluster] =
data.max_E *
data.rev_sum_energies;)
1529 (EngPosAndEngFracCore,
1531 (
const float sum_energies = p.clusters_arr->moments.engPos[cluster] + CMCTemporaries::engPosAux(p.clusters_arr, cluster);
1532 p.clusters_arr->moments.engPos[cluster] = sum_energies;
1533 p.clusters_arr->moments.engFracCore[cluster] *= (sum_energies != 0.f ? 1.0f / sum_energies : 0.f);
1545 (p.clusters_arr->moments.firstEngDens[cluster] = 0.f;
1546 CMCTemporaries::firstEngDensAux(p.clusters_arr, cluster) = 0.f;),
1547 (ToLoad::CellVolume),
1548 (ToLoad::WeightedEnergy, ToLoad::WeightedEnergyOverVolume),
1551 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.firstEngDens[cluster]),
1552 CMCTemporaries::firstEngDensAux_ptr(p.clusters_arr, cluster),
1553 data.weighted_energy * data.w_E_over_V);
1556 (ToLoad::ReverseEnergyDensityNormalization),
1557 (
const float new_firstEngDens = p.clusters_arr->moments.firstEngDens[cluster] + CMCTemporaries::firstEngDensAux(p.clusters_arr, cluster);
1558 p.clusters_arr->moments.firstEngDens[cluster] = new_firstEngDens *
data.rev_energy_density_norm;
1565 (p.clusters_arr->moments.firstEta[cluster] = 0.f;
1566 CMCTemporaries::firstEtaAux(p.clusters_arr, cluster) = 0.f;
1569 (ToLoad::WeightedEnergy),
1570 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.firstEta[cluster]),
1571 CMCTemporaries::firstEtaAux_ptr(p.clusters_arr, cluster),
1574 (ToLoad::ReverseSumEnergies),
1575 (
const float new_firstEta = p.clusters_arr->moments.firstEta[cluster] + CMCTemporaries::firstEtaAux(p.clusters_arr, cluster);
1576 p.clusters_arr->moments.firstEta[cluster] = new_firstEta *
data.rev_sum_energies;
1583 (p.clusters_arr->moments.firstPhi[cluster] = 0.f;
1584 CMCTemporaries::firstPhiAux(p.clusters_arr, cluster) = 0.f;
1587 (ToLoad::SeedCellPhi, ToLoad::WeightedEnergy),
1589 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.firstPhi[cluster]),
1590 CMCTemporaries::firstPhiAux_ptr(p.clusters_arr, cluster),
1591 data.weighted_energy * phi_real);
1593 (ToLoad::ReverseSumEnergies),
1594 (
const float new_firstPhi = p.clusters_arr->moments.firstPhi[cluster] + CMCTemporaries::firstPhiAux(p.clusters_arr, cluster);
1602 (p.clusters_arr->moments.lateral[cluster] = 0.f;
1603 CMCTemporaries::lateralAux(p.clusters_arr, cluster) = 0.f;
1606 (ToLoad::WeightedEnergy, ToLoad::ClusterMaxAndSecondMaxCell, ToLoad::R),
1607 (
if (cell !=
data.max_cell && cell !=
data.second_max_cell)
1609 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.lateral[cluster]),
1610 CMCTemporaries::lateralAux_ptr(p.clusters_arr, cluster),
1611 data.weighted_energy * data.r * data.r);
1615 (
const float new_lateral = p.clusters_arr->moments.lateral[cluster] + CMCTemporaries::lateralAux(p.clusters_arr, cluster);
1616 const float new_norm = CMCTemporaries::lateralNormalization(p.clusters_arr, cluster) +
1617 CMCTemporaries::lateralNormalizationAux(p.clusters_arr, cluster);
1618 p.clusters_arr->moments.lateral[cluster] = new_lateral / (new_norm != 0.f ? new_norm : 1.f);
1625 (p.clusters_arr->moments.longitudinal[cluster] = 0.f;
1626 CMCTemporaries::longitudinalAux(p.clusters_arr, cluster) = 0.f;
1629 (ToLoad::WeightedEnergy, ToLoad::ClusterMaxAndSecondMaxCell, ToLoad::Lambda),
1630 (
if (cell !=
data.max_cell && cell !=
data.second_max_cell)
1632 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.longitudinal[cluster]),
1633 CMCTemporaries::longitudinalAux_ptr(p.clusters_arr, cluster),
1634 data.weighted_energy * data.lambda * data.lambda);
1638 (
const float new_longitudinal = p.clusters_arr->moments.longitudinal[cluster] + CMCTemporaries::longitudinalAux(p.clusters_arr, cluster);
1639 const float new_norm = CMCTemporaries::longitudinalNormalization(p.clusters_arr, cluster) +
1640 CMCTemporaries::longitudinalNormalizationAux(p.clusters_arr, cluster);
1641 p.clusters_arr->moments.longitudinal[cluster] = new_longitudinal / (new_norm != 0.f ? new_norm : 1.f);
1652 (ToLoad::SumEnergies),
1653 (
const float mx = CMCTemporaries::mX(p.clusters_arr, cluster) + CMCTemporaries::mXAux(p.clusters_arr, cluster);
1654 const float my = CMCTemporaries::mY(p.clusters_arr, cluster) + CMCTemporaries::mYAux(p.clusters_arr, cluster);
1655 const float mz = CMCTemporaries::mZ(p.clusters_arr, cluster) + CMCTemporaries::mZAux(p.clusters_arr, cluster);
1657 const float v_1 = mx * mx;
1658 const float v_2 = my * my;
1659 const float v_3 = mz * mz;
1660 const float v_4 =
data.sum_energies *
data.sum_energies;
1662 const float c_1 = fmaf(mx, mx, -v_1);
1663 const float c_2 = fmaf(my, my, -v_2);
1664 const float c_3 = fmaf(mz, mz, -v_3);
1665 const float c_4 = fmaf(
data.sum_energies,
data.sum_energies, -v_4);
1669 p.clusters_arr->moments.mass[cluster] = sqrtf(fabsf(sq_mass)) * ((sq_mass > 0.f) - (sq_mass < 0.f));
1676 (p.clusters_arr->moments.nBadCells[cluster] = 0;),
1677 (ToLoad::CellIsBad),
1681 atomicAdd(&(p.clusters_arr->moments.nBadCells[cluster]), 1);
1690 (p.clusters_arr->moments.nBadCellsCorr[cluster] = 0;),
1691 (ToLoad::CellIsBad, ToLoad::CellEnergy),
1695 atomicAdd(&(p.clusters_arr->moments.nBadCellsCorr[cluster]), 1);
1703 (NExtraCellSampling,
1705 (p.clusters_arr->moments.nExtraCellSampling[cluster] = 0;),
1706 (ToLoad::CellSampling, ToLoad::CellEta),
1708 (
if (
data.sampling == CaloSampling::EME2 && fabsf(
data.eta) > p.opts->eta_inner_wheel)
1710 atomicAdd(&(p.clusters_arr->moments.nExtraCellSampling[cluster]), 1);
1720 (p.clusters_arr->moments.PTD[cluster] = 0.f;
1721 CMCTemporaries::PTDAux(p.clusters_arr, cluster) = 0.f;
1724 (ToLoad::SquareWeightedEnergy),
1725 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.PTD[cluster]),
1726 CMCTemporaries::PTDAux_ptr(p.clusters_arr, cluster),
1741 (ToLoad::SumEnergies),
1742 (
const float new_PTD = p.clusters_arr->moments.PTD[cluster] + CMCTemporaries::PTDAux(p.clusters_arr, cluster);
1743 p.clusters_arr->moments.PTD[cluster] = 1.0f / ((
data.sum_energies > 0.f ?
data.sum_energies : 1.f) * rsqrtf(new_PTD));
1751 (p.clusters_arr->moments.secondEngDens[cluster] = 0.f;
1752 CMCTemporaries::secondEngDensAux(p.clusters_arr, cluster) = 0.f;
1754 (ToLoad::CellVolume),
1755 (ToLoad::WeightedEnergy, ToLoad::WeightedEnergyOverVolume),
1758 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.secondEngDens[cluster]),
1759 CMCTemporaries::secondEngDensAux_ptr(p.clusters_arr, cluster),
1760 data.weighted_energy * data.w_E_over_V * data.w_E_over_V);
1763 (ToLoad::ReverseEnergyDensityNormalization),
1764 (
const float new_secondEngDens = p.clusters_arr->moments.secondEngDens[cluster] + CMCTemporaries::secondEngDensAux(p.clusters_arr, cluster);
1765 p.clusters_arr->moments.secondEngDens[cluster] = new_secondEngDens *
data.rev_energy_density_norm;)
1771 (p.clusters_arr->moments.secondLambda[cluster] = 0.f;
1772 CMCTemporaries::secondLambdaAux(p.clusters_arr, cluster) = 0.f;
1775 (ToLoad::WeightedEnergy, ToLoad::Lambda),
1776 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.secondLambda[cluster]),
1777 CMCTemporaries::secondLambdaAux_ptr(p.clusters_arr, cluster),
1780 (ToLoad::ReverseSumEnergies),
1781 (
const float new_secondLambda = p.clusters_arr->moments.secondLambda[cluster] + CMCTemporaries::secondLambdaAux(p.clusters_arr, cluster);
1782 p.clusters_arr->moments.secondLambda[cluster] = new_secondLambda *
data.rev_sum_energies;
1789 (p.clusters_arr->moments.secondR[cluster] = 0.f;
1790 CMCTemporaries::secondRAux(p.clusters_arr, cluster) = 0.f;
1793 (ToLoad::WeightedEnergy, ToLoad::R),
1794 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.secondR[cluster]),
1795 CMCTemporaries::secondRAux_ptr(p.clusters_arr, cluster),
1798 (ToLoad::ReverseSumEnergies),
1799 (
const float new_secondR = p.clusters_arr->moments.secondR[cluster] + CMCTemporaries::secondRAux(p.clusters_arr, cluster);
1800 p.clusters_arr->moments.secondR[cluster] = new_secondR *
data.rev_sum_energies;
1808 (p.clusters_arr->moments.significance[cluster] = 0.f;
1809 CMCTemporaries::significanceAux(p.clusters_arr, cluster) = 0.f;
1811 (ToLoad::CellNoise),
1813 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.significance[cluster]),
1814 CMCTemporaries::significanceAux_ptr(p.clusters_arr, cluster),
1817 (ToLoad::ClusterEnergy),
1818 (
const float prev_v = p.clusters_arr->moments.significance[cluster] + CMCTemporaries::significanceAux(p.clusters_arr, cluster);
1819 p.clusters_arr->moments.significance[cluster] = (prev_v > 0.f ?
data.cluster_energy * rsqrtf(prev_v) : 0.f);)
1825 (p.clusters_arr->moments.time[cluster] = 0.f;
1826 CMCTemporaries::timeAux(p.clusters_arr, cluster) = 0.f;
1827 p.clusters_arr->moments.secondTime[cluster] = 0.f;
1828 CMCTemporaries::secondTimeAux(p.clusters_arr, cluster) = 0.f;
1830 (ToLoad::CellTime, ToLoad::CellTimeMomentsCheck),
1831 (ToLoad::SquaredWeightedNonMomentsEnergy),
1832 (
if (
data.time_moments_check)
1834 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.time[cluster]),
1835 CMCTemporaries::timeAux_ptr(p.clusters_arr, cluster),
1836 data.time * data.squared_normE);
1837 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(p.clusters_arr->moments.secondTime[cluster]),
1838 CMCTemporaries::secondTimeAux_ptr(p.clusters_arr, cluster),
1839 data.time * data.time * data.squared_normE);
1841 (ToLoad::TimeNormalization),
1842 (
if (
data.time_norm != 0.f)
1844 const float real_norm = 1.0f / data.time_norm;
1845 const float time = (p.clusters_arr->moments.time[cluster] + CMCTemporaries::timeAux(p.clusters_arr, cluster))
1847 const float second_sum = p.clusters_arr->moments.secondTime[cluster] + CMCTemporaries::secondTimeAux(p.clusters_arr, cluster);
1848 p.clusters_arr->moments.time[cluster] = time;
1849 p.clusters_arr->moments.secondTime[cluster] = CaloRecGPU::Helpers::product_sum_cornea_harrison_tang(second_sum, real_norm, -time, time);
1853 p.clusters_arr->moments.time[cluster] = 0.f;
1854 p.clusters_arr->moments.secondTime[cluster] = 0.f;
1863 template <
int num,
int delta = 0>
1865 (EnergyPerSampleSeveral,
1866 (ToLoad::SamplingFromMomentIndex),
1867 (
const int offset =
data.sampling * num + delta;
1868 for (
int i = 0; i < num; ++i)
1870 p.clusters_arr->moments.energyPerSample[offset + i]
1872 CMCTemporaries::energyPerSampleAux(p.clusters_arr, offset + i, cluster) = 0.f;
1875 (ToLoad::CellEnergy, ToLoad::CellSampling),
1877 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(
p.clusters_arr->moments.energyPerSample[
data.sampling][cluster]),
1878 CMCTemporaries::energyPerSampleAux_ptr(
p.clusters_arr,
data.sampling, cluster),
1881 (ToLoad::SamplingFromMomentIndex),
1883 for (
int i = 0;
i <
num; ++
i)
1885 p.clusters_arr->moments.energyPerSample[
offset +
i]
1886 [cluster] += CMCTemporaries::energyPerSampleAux(
p.clusters_arr, offset + i, cluster);
1891 using EnergyPerSample = EnergyPerSampleSeveral<1, 0>;
1894 template <
int num,
int delta = 0>
1896 (EtaPerSampleSeveral,
1897 (ToLoad::SamplingFromMomentIndex),
1898 (
const int offset =
data.sampling * num + delta;
1899 for (
int i = 0;
i <
num; ++
i)
1901 p.clusters_arr->moments.etaPerSample[
offset +
i]
1903 CMCTemporaries::etaPerSampleAux(
p.clusters_arr, offset + i, cluster) = 0.f;
1906 (ToLoad::CellSampling, ToLoad::CellAbsEnergy, ToLoad::CellEta),
1908 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(
p.clusters_arr->moments.etaPerSample[
data.sampling][cluster]),
1909 CMCTemporaries::etaPerSampleAux_ptr(
p.clusters_arr,
data.sampling, cluster),
1912 (ToLoad::SamplingFromMomentIndex),
1914 for (
int i = 0;
i <
num; ++
i)
1917 const float normalization = CMCTemporaries::absoluteEnergyPerSample(
p.clusters_arr, idx, cluster) +
1918 CMCTemporaries::absoluteEnergyPerSampleAux(
p.clusters_arr, idx, cluster);
1919 const float rev_normalization = 1.0f / (normalization != 0.f ? normalization : 1.0f);
1920 const float new_eta =
p.clusters_arr->moments.etaPerSample[
idx][cluster] + CMCTemporaries::etaPerSampleAux(
p.clusters_arr, idx, cluster);
1921 p.clusters_arr->moments.etaPerSample[
idx][cluster] = new_eta * rev_normalization;
1926 using EtaPerSample = EtaPerSampleSeveral<1, 0>;
1928 template <
int num,
int delta = 0>
1930 (NCellSamplingSeveral,
1931 (ToLoad::SamplingFromMomentIndex),
1932 (
const int offset =
data.sampling * num + delta;
1933 for (
int i = 0;
i <
num; ++
i)
1935 p.clusters_arr->moments.nCellSampling[
offset +
i]
1939 (ToLoad::CellSampling),
1941 (atomicAdd(&(
p.clusters_arr->moments.nCellSampling[
data.sampling][cluster]), 1);),
1946 using NCellSampling = NCellSamplingSeveral<1, 0>;
1948 template <
int num,
int delta = 0>
1950 (PhiPerSampleSeveral,
1951 (ToLoad::SamplingFromMomentIndex),
1952 (
const int offset =
data.sampling * num + delta;
1953 for (
int i = 0;
i <
num; ++
i)
1955 p.clusters_arr->moments.phiPerSample[
offset +
i]
1957 CMCTemporaries::phiPerSampleAux(
p.clusters_arr, offset + i, cluster) = 0.f;
1960 (ToLoad::CellSampling, ToLoad::CellAbsEnergy, ToLoad::CellPhi),
1961 (ToLoad::SeedCellPhi),
1963 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(&(
p.clusters_arr->moments.phiPerSample[
data.sampling][cluster]),
1964 CMCTemporaries::phiPerSampleAux_ptr(
p.clusters_arr,
data.sampling, cluster),
1965 data.abs_energy *
data.weight * phi_real);
1967 (ToLoad::SamplingFromMomentIndex),
1969 for (
int i = 0;
i <
num; ++
i)
1972 const float normalization = CMCTemporaries::absoluteEnergyPerSample(
p.clusters_arr, idx, cluster) +
1973 CMCTemporaries::absoluteEnergyPerSampleAux(
p.clusters_arr, idx, cluster);
1974 const float rev_normalization = 1.0f / (normalization != 0.f ? normalization : 1.0f);
1975 const float new_phi =
p.clusters_arr->moments.phiPerSample[
idx][cluster] + CMCTemporaries::phiPerSampleAux(
p.clusters_arr, idx, cluster);
1981 using PhiPerSample = PhiPerSampleSeveral<1, 0>;
1989 (AverageLArQNormalization,
1991 (CMCTemporaries::averageLArQNorm(
p.clusters_arr, cluster) = 0.f;
1992 CMCTemporaries::averageLArQNormAux(
p.clusters_arr, cluster) = 0.f;),
1993 (ToLoad::CellLArQCheck),
1994 (ToLoad::SquareWeightedEnergyOrNegative),
1995 (
if (
data.LArQ_cell_check)
1997 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::averageLArQNorm_ptr(p.clusters_arr, cluster),
1998 CMCTemporaries::averageLArQNormAux_ptr(p.clusters_arr, cluster),
1999 data.square_w_E_or_neg);
2007 (AverageTileQNormalization,
2009 (CMCTemporaries::averageTileQNorm(
p.clusters_arr, cluster) = 0.f;
2010 CMCTemporaries::averageTileQNormAux(
p.clusters_arr, cluster) = 0.f;),
2011 (ToLoad::CellQualityProvenance, ToLoad::CellTileQCheck),
2012 (ToLoad::SquareWeightedEnergyOrNegative),
2013 (
if (
data.TileQ_cell_check)
2015 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::averageTileQNorm_ptr(p.clusters_arr, cluster),
2016 CMCTemporaries::averageTileQNormAux_ptr(p.clusters_arr, cluster),
2017 data.square_w_E_or_neg);
2025 (EnergyDensityNormalization,
2027 (CMCTemporaries::energyDensityNormalization(
p.clusters_arr, cluster) = 0.f;
2028 CMCTemporaries::energyDensityNormalizationAux(
p.clusters_arr, cluster) = 0.f;
2030 (ToLoad::CellVolume),
2031 (ToLoad::WeightedEnergy),
2034 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::energyDensityNormalization_ptr(p.clusters_arr, cluster),
2035 CMCTemporaries::energyDensityNormalizationAux_ptr(p.clusters_arr, cluster),
2036 data.weighted_energy);
2044 (FirstAndSecondMaxEnergyAndCell,
2046 (CMCTemporaries::maxCellEnergyAndCell(
p.clusters_arr, cluster) = 0ULL;
2047 CMCTemporaries::secondMaxCellEnergyAndCell(
p.clusters_arr, cluster) = 0ULL;
2050 (ToLoad::WeightedEnergy),
2051 (
if (
data.weighted_energy > 0)
2053 unsigned long long int energy_and_cell = __float_as_uint(data.weighted_energy);
2055 energy_and_cell = (energy_and_cell << 32) | (cell + 1);
2056 const unsigned long long int old_enc = atomicMax(&(CMCTemporaries::maxCellEnergyAndCell(p.clusters_arr, cluster)), energy_and_cell);
2057 atomicMax(&(CMCTemporaries::secondMaxCellEnergyAndCell(p.clusters_arr, cluster)), min(old_enc, energy_and_cell));
2066 (LateralNormalization,
2068 (CMCTemporaries::lateralNormalization(
p.clusters_arr, cluster) = 0.f;
2069 CMCTemporaries::lateralNormalizationAux(
p.clusters_arr, cluster) = 0.f;
2072 (ToLoad::WeightedEnergy, ToLoad::ClusterMaxAndSecondMaxCell, ToLoad::R),
2073 (
const float real_r = (cell !=
data.max_cell && cell !=
data.second_max_cell) ?
2076 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::lateralNormalization_ptr(
p.clusters_arr, cluster),
2077 CMCTemporaries::lateralNormalizationAux_ptr(
p.clusters_arr, cluster),
2078 data.weighted_energy * real_r * real_r);
2085 (LongitudinalNormalization,
2087 (CMCTemporaries::longitudinalNormalization(
p.clusters_arr, cluster) = 0.f;
2088 CMCTemporaries::longitudinalNormalizationAux(
p.clusters_arr, cluster) = 0.f;
2091 (ToLoad::WeightedEnergy, ToLoad::ClusterMaxAndSecondMaxCell, ToLoad::Lambda),
2092 (
const float real_lambda = (cell !=
data.max_cell && cell !=
data.second_max_cell) ?
2093 data.lambda :
max(
data.lambda,
p.opts->min_l_longitudinal);
2095 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::longitudinalNormalization_ptr(
p.clusters_arr, cluster),
2096 CMCTemporaries::longitudinalNormalizationAux_ptr(
p.clusters_arr, cluster),
2097 data.weighted_energy * real_lambda * real_lambda);
2106 (CMCTemporaries::matrix00(
p.clusters_arr, cluster) = 0.f;
2107 CMCTemporaries::matrix00Aux(
p.clusters_arr, cluster) = 0.f;
2110 (ToLoad::CenterX, ToLoad::SquareWeightedEnergy),
2111 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::matrix00_ptr(
p.clusters_arr, cluster),
2112 CMCTemporaries::matrix00Aux_ptr(
p.clusters_arr, cluster),
2121 (CMCTemporaries::matrix10(
p.clusters_arr, cluster) = 0.f;
2122 CMCTemporaries::matrix10Aux(
p.clusters_arr, cluster) = 0.f;
2124 (ToLoad::CellX, ToLoad::CellY),
2125 (ToLoad::CenterX, ToLoad::CenterY, ToLoad::SquareWeightedEnergy),
2126 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::matrix10_ptr(
p.clusters_arr, cluster),
2127 CMCTemporaries::matrix10Aux_ptr(
p.clusters_arr, cluster),
2136 (CMCTemporaries::matrix20(
p.clusters_arr, cluster) = 0.f;
2137 CMCTemporaries::matrix20Aux(
p.clusters_arr, cluster) = 0.f;
2139 (ToLoad::CellX, ToLoad::CellZ),
2140 (ToLoad::CenterX, ToLoad::CenterZ, ToLoad::SquareWeightedEnergy),
2141 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::matrix20_ptr(
p.clusters_arr, cluster),
2142 CMCTemporaries::matrix20Aux_ptr(
p.clusters_arr, cluster),
2151 (CMCTemporaries::matrix11(
p.clusters_arr, cluster) = 0.f;
2152 CMCTemporaries::matrix11Aux(
p.clusters_arr, cluster) = 0.f;
2155 (ToLoad::CenterY, ToLoad::SquareWeightedEnergy),
2156 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::matrix11_ptr(
p.clusters_arr, cluster),
2157 CMCTemporaries::matrix11Aux_ptr(
p.clusters_arr, cluster),
2166 (CMCTemporaries::matrix21(
p.clusters_arr, cluster) = 0.f;
2167 CMCTemporaries::matrix21Aux(
p.clusters_arr, cluster) = 0.f;
2169 (ToLoad::CellY, ToLoad::CellZ),
2170 (ToLoad::CenterY, ToLoad::CenterZ, ToLoad::SquareWeightedEnergy),
2171 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::matrix21_ptr(
p.clusters_arr, cluster),
2172 CMCTemporaries::matrix21Aux_ptr(
p.clusters_arr, cluster),
2181 (CMCTemporaries::matrix22(
p.clusters_arr, cluster) = 0.f;
2182 CMCTemporaries::matrix22Aux(
p.clusters_arr, cluster) = 0.f;
2185 (ToLoad::CenterZ, ToLoad::SquareWeightedEnergy),
2186 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::matrix22_ptr(
p.clusters_arr, cluster),
2187 CMCTemporaries::matrix22Aux_ptr(
p.clusters_arr, cluster),
2195 (MaxAndSecondMaxCells,
2201 (ToLoad::ClusterCellWithMaxEnergy, ToLoad::ClusterCellWithSecondMaxEnergy),
2202 (
unsigned long long to_store =
data.max_E_cell;
2203 to_store = (to_store << 32u) |
static_cast<unsigned int>(
data.second_max_E_cell);
2204 CMCTemporaries::maxAndSecondMaxCells(
p.clusters_arr, cluster) = to_store;
2209 (MaxSignificanceAndSampling,
2211 (CMCTemporaries::maxSignificanceAndSampling(
p.clusters_arr, cluster) = 0ULL;),
2212 (ToLoad::CellSampling, ToLoad::CellNoise),
2213 (ToLoad::WeightedEnergyOrNegative),
2214 (
const float max_sig =
data.noise > 0.f ?
data.weighted_energy_or_negative /
data.noise : 0.f;
2215 unsigned long long int max_S_and_S = __float_as_uint(fabsf(max_sig));
2216 max_S_and_S = (max_S_and_S << 32) | (static_cast<unsigned long long int>(
data.sampling) << 1) | (max_sig > 0.f);
2217 atomicMax(&(CMCTemporaries::maxSignificanceAndSampling(
p.clusters_arr, cluster)), max_S_and_S);),
2225 (CMCTemporaries::mX(
p.clusters_arr, cluster) = 0.f;
2226 CMCTemporaries::mXAux(
p.clusters_arr, cluster) = 0.f;),
2228 (ToLoad::WeightedCellPositionNormalization),
2229 (
const float mx =
data.w_E_r_dir *
data.x;
2230 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::mX_ptr(
p.clusters_arr, cluster),
2231 CMCTemporaries::mXAux_ptr(
p.clusters_arr, cluster),
2240 (CMCTemporaries::mY(
p.clusters_arr, cluster) = 0.f;
2241 CMCTemporaries::mYAux(
p.clusters_arr, cluster) = 0.f;),
2243 (ToLoad::WeightedCellPositionNormalization),
2244 (
const float my =
data.w_E_r_dir *
data.y;
2245 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::mY_ptr(
p.clusters_arr, cluster),
2246 CMCTemporaries::mYAux_ptr(
p.clusters_arr, cluster),
2255 (CMCTemporaries::mZ(
p.clusters_arr, cluster) = 0.f;
2256 CMCTemporaries::mZAux(
p.clusters_arr, cluster) = 0.f;),
2258 (ToLoad::WeightedCellPositionNormalization),
2259 (
const float mz =
data.w_E_r_dir *
data.z;
2260 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::mZ_ptr(
p.clusters_arr, cluster),
2261 CMCTemporaries::mZAux_ptr(
p.clusters_arr, cluster),
2270 (NumPositiveEnergyCells,
2272 (CMCTemporaries::numPositiveEnergyCells(
p.clusters_arr, cluster) = 0;),
2274 (ToLoad::WeightedEnergyOrNegative),
2275 (
if (
data.weighted_energy_or_negative > 0)
2277 atomicAdd(&(CMCTemporaries::numPositiveEnergyCells(p.clusters_arr, cluster)), 1);
2286 (ToLoad::SeedCellGeometryPhi),
2287 (CMCTemporaries::seedCellPhi(
p.clusters_arr, cluster) =
data.seed_cell_phi_coordinate;),
2296 (SumAbsEnergyNonMoments,
2298 (CMCTemporaries::sumAbsEnergyNonMoments(
p.clusters_arr, cluster) = 0.f;
2299 CMCTemporaries::sumAbsEnergyNonMomentsAux(
p.clusters_arr, cluster) = 0.f;
2301 (ToLoad::CellAbsEnergy),
2303 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::sumAbsEnergyNonMoments_ptr(
p.clusters_arr, cluster),
2304 CMCTemporaries::sumAbsEnergyNonMomentsAux_ptr(
p.clusters_arr, cluster),
2314 (CMCTemporaries::sumSquareEnergies(
p.clusters_arr, cluster) = 0.f;
2315 CMCTemporaries::sumSquareEnergiesAux(
p.clusters_arr, cluster) = 0.f;
2318 (ToLoad::SquareWeightedEnergy),
2319 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::sumSquareEnergies_ptr(
p.clusters_arr, cluster),
2320 CMCTemporaries::sumSquareEnergiesAux_ptr(
p.clusters_arr, cluster),
2330 (CMCTemporaries::timeNormalization(
p.clusters_arr, cluster) = 0.f;
2331 CMCTemporaries::timeNormalizationAux(
p.clusters_arr, cluster) = 0.f;
2333 (ToLoad::CellTimeMomentsCheck),
2334 (ToLoad::SquaredWeightedNonMomentsEnergy),
2335 (
if (
data.time_moments_check)
2337 CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::timeNormalization_ptr(p.clusters_arr, cluster),
2338 CMCTemporaries::timeNormalizationAux_ptr(p.clusters_arr, cluster),
2339 data.squared_normE);
2351 template <
int num,
int delta = 0>
2353 (AbsoluteEnergyPerSampleSeveral,
2354 (ToLoad::SamplingFromMomentIndex),
2355 (
const int offset =
data.sampling * num + delta;
2356 for (
int i = 0;
i <
num; ++
i)
2358 CMCTemporaries::absoluteEnergyPerSample(
p.clusters_arr, offset + i, cluster)
2360 CMCTemporaries::absoluteEnergyPerSampleAux(
p.clusters_arr, offset + i, cluster) = 0.f;
2363 (ToLoad::CellSampling, ToLoad::CellAbsEnergy),
2365 (CaloRecGPU::Helpers::device_kahan_babushka_neumaier(CMCTemporaries::absoluteEnergyPerSample_ptr(
p.clusters_arr,
data.sampling, cluster),
2366 CMCTemporaries::absoluteEnergyPerSampleAux_ptr(
p.clusters_arr,
data.sampling, cluster),
2373 using AbsoluteEnergyPerSample = AbsoluteEnergyPerSampleSeveral<1, 0>;
2375 template <
int num,
int delta = 0>
2377 (MaxEnergyAndCellPerSampleSeveral,
2378 (ToLoad::SamplingFromMomentIndex),
2379 (
const int offset =
data.sampling * num + delta;
2380 for (
int i = 0;
i <
num; ++
i)
2382 CMCTemporaries::maxEnergyAndCellPerSample(
p.clusters_arr, offset + i, cluster)
2386 (ToLoad::CellSampling, ToLoad::CellEnergy),
2388 (
const unsigned int energy_pattern = __float_as_uint(
data.energy *
data.weight);
2390 E_and_cell = (E_and_cell << 32) | cell;
2391 atomicMax(&(CMCTemporaries::maxEnergyAndCellPerSample(
p.clusters_arr,
data.sampling, cluster)), E_and_cell);
2397 using MaxEnergyAndCellPerSample = MaxEnergyAndCellPerSampleSeveral<1, 0>;
2399 template <
int num,
int delta = 0>
2401 (MaxECellPerSampleSeveral,
2402 (ToLoad::SamplingFromMomentIndex),
2405 constexpr unsigned long long int comparison = (total_ordering_zero << 32) | 0xFFFFFFFFU;
2407 for (
int i = 0;
i <
num; ++
i)
2409 const unsigned long long max_energy_and_cell = CMCTemporaries::maxEnergyAndCellPerSample(
p.clusters_arr, offset + i, cluster);
2410 const int cell = (max_energy_and_cell > comparison ? ((
int) (max_energy_and_cell & 0x7FFFFFFFU)) : -1);
2411 CMCTemporaries::maxECellPerSample(
p.clusters_arr, offset + i, cluster) =
cell;
2421 using MaxECellPerSample = MaxECellPerSampleSeveral<1, 0>;