7 #ifndef CALORECGPU_GPUCLUSTERINFOANDMOMENTSCALCULATORHELPER_CUDA_H
8 #define CALORECGPU_GPUCLUSTERINFOANDMOMENTSCALCULATORHELPER_CUDA_H
15 #include "CaloGeoHelpers/CaloSampling.h"
19 #include <type_traits>
30 using nested_type = T[nested_size];
33 _Pragma(
"nv_diag_suppress 177")
35 #define CALORECGPU_EXPAND(...) __VA_ARGS__
37 #define CALORECGPU_CONCAT_HELPER_INNER(A, ...) A ## __VA_ARGS__
38 #define CALORECGPU_CONCAT_HELPER(A, B) CALORECGPU_CONCAT_HELPER_INNER(A, B)
41 #define CMC_TEMPARR_1_DECLARE(TEMPNAME, TYPE) \
42 template <class PtrLike> __host__ __device__ const TYPE * TEMPNAME (const PtrLike & arr); \
43 template <class PtrLike> __host__ __device__ TYPE * TEMPNAME (PtrLike & arr); \
44 template <class PtrLike> __host__ __device__ const TYPE & TEMPNAME (const PtrLike & arr, const int idx); \
45 template <class PtrLike> __host__ __device__ TYPE & TEMPNAME (PtrLike & arr, const int idx); \
46 struct to_end_with_semicolon
48 #define CMC_TEMPARR_1_SPLIT_DECLARE(TEMPNAME, TYPE) \
49 CMC_TEMPARR_1_DECLARE(CALORECGPU_CONCAT_HELPER(TEMPNAME, _1), TYPE); \
50 CMC_TEMPARR_1_DECLARE(CALORECGPU_CONCAT_HELPER(TEMPNAME, _2), TYPE); \
51 template <class PtrLike> __host__ __device__ const TYPE & TEMPNAME (const PtrLike & arr, const int idx); \
52 template <class PtrLike> __host__ __device__ TYPE & TEMPNAME (PtrLike & arr, const int idx); \
53 struct to_end_with_semicolon
55 #define CMC_TEMPARR_2_DECLARE(TEMPNAME, TYPE) \
56 template <class PtrLike> __host__ __device__ const nested_type<TYPE> * TEMPNAME (const PtrLike & arr); \
57 template <class PtrLike> __host__ __device__ nested_type<TYPE> * TEMPNAME (PtrLike & arr); \
58 template <class PtrLike> __host__ __device__ const nested_type<TYPE> & TEMPNAME (const PtrLike & arr, const int idx); \
59 template <class PtrLike> __host__ __device__ nested_type<TYPE> & TEMPNAME (PtrLike & arr, const int idx); \
60 template <class PtrLike> __host__ __device__ const TYPE & TEMPNAME (const PtrLike & arr, const int idx, const int jdx); \
61 template <class PtrLike> __host__ __device__ TYPE & TEMPNAME (PtrLike & arr, const int idx, const int jdx); \
62 struct to_end_with_semicolon
64 #define CMC_TEMPARR_2_SPLIT_DECLARE(TEMPNAME, TYPE) \
65 CMC_TEMPARR_2_DECLARE(CALORECGPU_CONCAT_HELPER(TEMPNAME, _1), TYPE); \
66 CMC_TEMPARR_2_DECLARE(CALORECGPU_CONCAT_HELPER(TEMPNAME, _2), TYPE); \
67 template <class PtrLike> __host__ __device__ const TYPE & TEMPNAME (const PtrLike & arr, const int idx, const int jdx); \
68 template <class PtrLike> __host__ __device__ TYPE & TEMPNAME (PtrLike & arr, const int idx, const int jdx); \
69 struct to_end_with_semicolon
72 #define CMC_TEMPARR_1(TEMPNAME, BASEVAR, TYPE) \
73 template <class PtrLike> __host__ __device__ const TYPE * TEMPNAME (const PtrLike & arr) \
75 constexpr auto misalignment = offsetof(CaloRecGPU::ClusterMomentsArr, BASEVAR) % alignof(TYPE); \
76 const char * aligned = ((const char *) arr->BASEVAR) + (alignof(TYPE) - misalignment) * (misalignment > 0); \
77 return (const TYPE *) (aligned); \
79 template <class PtrLike> __host__ __device__ TYPE * TEMPNAME (PtrLike & arr) \
81 constexpr auto misalignment = offsetof(CaloRecGPU::ClusterMomentsArr, BASEVAR) % alignof(TYPE); \
82 char * aligned = ((char *) arr->BASEVAR) + (alignof(TYPE) - misalignment) * (misalignment > 0); \
83 return (TYPE *) (aligned); \
85 template <class PtrLike> __host__ __device__ const TYPE & TEMPNAME (const PtrLike & arr, const int idx) \
87 return TEMPNAME (arr) [idx]; \
89 template <class PtrLike> __host__ __device__ TYPE & TEMPNAME (PtrLike & arr, const int idx) \
91 return TEMPNAME (arr) [idx]; \
92 } struct to_end_with_semicolon
94 #define CMC_TEMPARR_1_SPLIT(TEMPNAME, BASEVAR1, BASEVAR2, TYPE) \
95 CMC_TEMPARR_1(CALORECGPU_CONCAT_HELPER(TEMPNAME, _1), BASEVAR1, TYPE); \
96 CMC_TEMPARR_1(CALORECGPU_CONCAT_HELPER(TEMPNAME, _2), BASEVAR2, TYPE); \
97 template <class PtrLike> __host__ __device__ const TYPE & TEMPNAME (const PtrLike & arr, const int idx) \
99 if (idx >= split_size) \
101 return CALORECGPU_CONCAT_HELPER(TEMPNAME, _2)(arr, idx - split_size); \
105 return CALORECGPU_CONCAT_HELPER(TEMPNAME, _1)(arr, idx); \
108 template <class PtrLike> __host__ __device__ TYPE & TEMPNAME (PtrLike & arr, const int idx) \
110 if (idx >= split_size) \
112 return CALORECGPU_CONCAT_HELPER(TEMPNAME, _2)(arr, idx - split_size); \
116 return CALORECGPU_CONCAT_HELPER(TEMPNAME, _1)(arr, idx); \
118 } struct to_end_with_semicolon
120 #define CMC_TEMPARR_2(TEMPNAME, BASEVAR, TYPE) \
121 template <class PtrLike> __host__ __device__ const nested_type<TYPE> * TEMPNAME (const PtrLike & arr) \
123 constexpr auto misalignment = offsetof(CaloRecGPU::ClusterMomentsArr, BASEVAR) % alignof(nested_type<TYPE>); \
124 const char * aligned = ((const char *) arr->BASEVAR) + (alignof(nested_type<TYPE>) - misalignment) * (misalignment > 0); \
125 return (const nested_type<TYPE> *) (aligned); \
127 template <class PtrLike> __host__ __device__ nested_type<TYPE> * TEMPNAME (PtrLike & arr) \
129 constexpr auto misalignment = offsetof(CaloRecGPU::ClusterMomentsArr, BASEVAR) % alignof(nested_type<TYPE>); \
130 char * aligned = ((char *) arr->BASEVAR) + (alignof(nested_type<TYPE>) - misalignment) * (misalignment > 0); \
131 return (nested_type<TYPE> *) (aligned); \
133 template <class PtrLike> __host__ __device__ const nested_type<TYPE> & TEMPNAME (const PtrLike & arr, const int idx) \
135 return TEMPNAME (arr) [idx]; \
137 template <class PtrLike> __host__ __device__ nested_type<TYPE> & TEMPNAME (PtrLike & arr, const int idx) \
139 return TEMPNAME (arr) [idx]; \
141 template <class PtrLike> __host__ __device__ const TYPE & TEMPNAME (const PtrLike & arr, const int idx, const int jdx) \
143 return TEMPNAME (arr) [idx] [jdx]; \
145 template <class PtrLike> __host__ __device__ TYPE & TEMPNAME (PtrLike & arr, const int idx, const int jdx) \
147 return TEMPNAME (arr) [idx] [jdx]; \
148 } struct to_end_with_semicolon
150 #define CMC_TEMPARR_2_SPLIT(TEMPNAME, BASEVAR1, BASEVAR2, TYPE) \
151 CMC_TEMPARR_2(CALORECGPU_CONCAT_HELPER(TEMPNAME, _1), BASEVAR1, TYPE); \
152 CMC_TEMPARR_2(CALORECGPU_CONCAT_HELPER(TEMPNAME, _2), BASEVAR2, TYPE); \
153 template <class PtrLike> __host__ __device__ const TYPE & TEMPNAME (const PtrLike & arr, const int idx, const int jdx) \
155 if (jdx >= split_size) \
157 return CALORECGPU_CONCAT_HELPER(TEMPNAME, _2)(arr, idx, jdx - split_size); \
161 return CALORECGPU_CONCAT_HELPER(TEMPNAME, _1)(arr, idx, jdx); \
164 template <class PtrLike> __host__ __device__ TYPE & TEMPNAME (PtrLike & arr, const int idx, const int jdx) \
166 if (jdx >= split_size) \
168 return CALORECGPU_CONCAT_HELPER(TEMPNAME, _2)(arr, idx, jdx - split_size); \
172 return CALORECGPU_CONCAT_HELPER(TEMPNAME, _1)(arr, idx, jdx); \
174 } struct to_end_with_semicolon
477 CMC_TEMPARR_2 ( numberNonEmptySamplings, maxPhiPerSample,
int);
478 CMC_TEMPARR_2 ( maxMomentsEnergyPerSample, maxEtaPerSample,
unsigned int);
481 CMC_TEMPARR_2 ( absoluteEnergyPerSample, maxEPerSample,
float);
482 CMC_TEMPARR_2 ( absoluteEnergyPerSampleAux, energyPerSample,
float);
499 CMC_TEMPARR_1 ( energyDensityNormalizationAux, longitudinal,
float);
500 CMC_TEMPARR_1 ( sumAbsEnergyNonMoments, engCalibFracHad,
float);
501 CMC_TEMPARR_1 ( sumAbsEnergyNonMomentsAux, nExtraCellSampling,
float);
508 CMC_TEMPARR_1_SPLIT( maxCellEnergyAndCell, engCalibDeadTileG3, engCalibDeadTile0,
unsigned long long);
509 CMC_TEMPARR_1_SPLIT( secondMaxCellEnergyAndCell, engCalibDeadHEC0, engCalibDeadEME0,
unsigned long long);
510 CMC_TEMPARR_1_SPLIT( maxAndSecondMaxCells, engCalibDeadHEC0, engCalibDeadEME0,
unsigned long long);
521 CMC_TEMPARR_1 ( numPositiveEnergyCells, engCalibFracRest,
int);
523 CMC_TEMPARR_1 ( sumSquareEnergiesAux, engCalibDeadEMB0,
float);
542 CMC_TEMPARR_1_SPLIT( maxSignificanceAndSampling, engCalibDeadTileG3, engCalibDeadTile0,
unsigned long long);
547 CMC_TEMPARR_2_SPLIT( maxEnergyAndCellPerSample, maxEtaPerSample, maxPhiPerSample,
unsigned long long);
554 CMC_TEMPARR_1 ( lateralNormalizationAux, engCalibDeadL,
float);
555 CMC_TEMPARR_1 ( longitudinalNormalization, engCalibDeadM,
float);
556 CMC_TEMPARR_1 ( longitudinalNormalizationAux, engCalibDeadT,
float);
561 _Pragma(
"nv_diag_default 177")
565 __device__
void add_with_corr(
float * sum_arr,
float * corr_arr,
const int idx,
const float v)
567 const float old_sum = atomicAdd(sum_arr +
idx,
v);
568 const float new_sum = old_sum +
v;
569 if (fabsf(old_sum) >= fabsf(
v))
571 atomicAdd(corr_arr +
idx, (old_sum - new_sum) +
v);
575 atomicAdd(corr_arr +
idx, (
v - new_sum) + old_sum);
580 template <
class ... Ts>
583 static constexpr
unsigned int number =
static_cast<unsigned int>(
sizeof...(Ts));
586 template <
class ... Ts,
class T>
587 constexpr __host__ __device__
auto operator| (
const TypeList<Ts...> &
list,
const TypeList<T> &)
589 if constexpr ((
false || ... || std::is_same_v<T, Ts>))
595 return TypeList<Ts...,
T> {};
599 template <
class ... As,
class ... Bs>
600 constexpr __host__ __device__
auto operator | (
const TypeList<As...> &
list,
const TypeList<Bs...> &)
602 return (
list | ... | TypeList<Bs> {});
605 template <
class ... As,
class ... Bs>
606 constexpr __host__ __device__
auto operator + (
const TypeList<As...> &
list,
const TypeList<Bs...> &)
608 return TypeList<As..., Bs...> {};
614 static constexpr
bool value =
false;
617 template <
class ... Ts>
618 struct is_type_list<TypeList<Ts...>>
620 static constexpr
bool value =
true;
638 struct OnePassLoadHelper
640 static_assert(is_type_list_v<T>);
646 template <
class ... Ts,
class T>
647 constexpr __host__ __device__
auto operator + (
const OnePassLoadHelper<TypeList<Ts...>> &,
const TypeList<T> &)
649 constexpr
auto combined = TypeList<Ts...> {} | TypeList<T> {};
651 return OnePassLoadHelper<std::decay_t<decltype(combined)>> {};
655 template <
class T,
class NewLoad>
656 constexpr __host__ __device__
auto operator | (
const OnePassLoadHelper<T> &
c,
const TypeList<NewLoad> &
n)
658 auto combine =
c |
typename NewLoad::AssumedList{};
664 template <
class T,
class ... Ts>
665 constexpr __host__ __device__
auto operator | (
const OnePassLoadHelper<T> &
c,
const TypeList<Ts...> &)
667 return (
c | ... | TypeList<Ts> {});
671 using ClusterPassLoadHelper = OnePassLoadHelper<T>;
673 template <
class T1,
class T2>
674 struct CellAndClusterPassLoadHelper
676 static_assert(is_type_list_v<T1> && is_type_list_v<T2>);
678 using ToLoadCells =
T1;
679 using ToLoadClusters =
T2;
685 template <
class T1,
class T2,
class NewCellLoad>
686 constexpr __host__ __device__
auto operator | (
const CellAndClusterPassLoadHelper<T1, T2> &
c,
const TypeList<NewCellLoad> &
n)
688 auto combine_cell = OnePassLoadHelper<T1> {} |
n;
690 return CellAndClusterPassLoadHelper<
typename std::decay_t<decltype(combine_cell)>::ToLoad,
T2> {};
693 template <
class T1,
class T2,
class ... Ts>
694 constexpr __host__ __device__
auto operator | (
const CellAndClusterPassLoadHelper<T1, T2> &
c,
const TypeList<Ts...> &)
696 return (
c | ... | TypeList<Ts> {});
699 template <
class T1,
class T2,
class NewClusterLoad>
700 constexpr __host__ __device__
auto operator ^ (
const CellAndClusterPassLoadHelper<T1, T2> &
c,
const TypeList<NewClusterLoad> &
n)
702 auto combine_dependencies = (
c |
typename NewClusterLoad::AssumedPreviousList{}) ^
typename NewClusterLoad::AssumedList{};
704 using DepType = std::decay_t<decltype(combine_dependencies)>;
706 auto combine_clusters =
typename DepType::ToLoadClusters{} |
n;
708 return CellAndClusterPassLoadHelper<
typename DepType::ToLoadCells, std::decay_t<decltype(combine_clusters)>> {};
711 template <
class T1,
class T2,
class ... Ts>
712 constexpr __host__ __device__
auto operator ^ (
const CellAndClusterPassLoadHelper<T1, T2> &
c,
const TypeList<Ts...> &)
714 return (
c ^ ... ^ TypeList<Ts> {});
718 struct ClusterLoader;
720 template <
class ... Ts>
721 struct ClusterLoader<ClusterPassLoadHelper<TypeList<Ts...>>> : Ts...
730 struct CellClusterLoader;
736 __host__ __device__ WeightCarrier(
const float w):
weight(
w)
741 template <
class ... CellLoad,
class ... ClusterLoad>
742 struct CellClusterLoader<CellAndClusterPassLoadHelper<TypeList<CellLoad...>, TypeList<ClusterLoad...>>>: WeightCarrier, CellLoad..., ClusterLoad...
744 __host__ __device__ CellClusterLoader(
Parameters p,
const int cell,
const int cluster,
const float weight):
745 WeightCarrier(
weight), CellLoad(*this,
p,
cell)..., ClusterLoad(*this,
p, cluster)...
750 template <
class ... Moments>
751 __host__ __device__
void one_cluster_before_pass(
const TypeList<Moments...> &,
const int cluster,
Parameters p)
753 constexpr
auto to_load = (ClusterPassLoadHelper<TypeList<>> {} | ... |
typename Moments::BeforeLoading{});
755 ClusterLoader<std::decay_t<decltype(to_load)>>
data{
p, cluster};
757 (Moments::before(
p,
data, cluster), ...);
760 template <
class ... Moments>
761 __host__ __device__
void one_cluster_after_pass(
const TypeList<Moments...> &,
const int cluster,
Parameters p)
763 constexpr
auto to_load = (ClusterPassLoadHelper<TypeList<>> {} | ... |
typename Moments::AfterLoading{});
765 ClusterLoader<std::decay_t<decltype(to_load)>>
data{
p, cluster};
767 (Moments::after(
p,
data, cluster), ...);
770 template <
class ... Moments>
771 __host__ __device__
void one_cell_pass(
const TypeList<Moments ...> &,
const int cell,
const int cluster,
const float weight,
Parameters p)
773 constexpr
auto to_load = ((CellAndClusterPassLoadHelper<TypeList<>, TypeList<>> {} | ... |
typename Moments::CellLoading{}) ^ ... ^
typename Moments::ClusterLoading{});
775 CellClusterLoader<std::decay_t<decltype(to_load)>>
data{
p,
cell, cluster,
weight};
777 (Moments::per_cell(
p,
data,
cell, cluster), ...);
784 template <
class ... ThisStepMomentLists,
class ... NextStepMomentLists>
785 __host__ __device__
void do_cluster_pass(
const TypeList<ThisStepMomentLists...> &,
const TypeList<NextStepMomentLists...> &,
const int cluster,
Parameters p)
787 auto helper_1 = [&](
const auto &
list,
const int count)
789 if (
p.moments_index ==
count)
791 one_cluster_after_pass(
list, cluster,
p);
794 auto helper_2 = [&](
const auto &
list,
const int count)
796 if (
p.moments_index ==
count)
798 one_cluster_before_pass(
list, cluster,
p);
804 (helper_1(ThisStepMomentLists{},
count++), ...);
808 (helper_2(NextStepMomentLists{},
count++), ...);
811 template <
class... MomentLists>
812 __host__ __device__
void do_cell_pass(
const TypeList<MomentLists...> &,
const int cell,
const int cluster,
const float weight,
Parameters p)
816 if (
p.moments_index ==
count)
833 #define CALORECGPU_CMC_LOAD(NAME, NEEDED, PREVNEEDED, VARS, INIT) \
836 using AssumedList = TypeList<CALORECGPU_EXPAND NEEDED >; \
837 using AssumedPreviousList = TypeList<CALORECGPU_EXPAND PREVNEEDED >; \
838 CALORECGPU_EXPAND VARS \
839 template <class Final> __device__ NAME(const Final & f, Parameters p, const int idx) { CALORECGPU_EXPAND INIT } \
844 #define CALORECGPU_CMC_LOAD_SIMPLE_CELL_INFO(NAME, VARNAME, PROPNAME) \
845 CALORECGPU_CMC_LOAD(NAME, \
848 (std::decay_t<decltype(std::declval<CaloRecGPU::CellInfoArr>().PROPNAME[0])> VARNAME;), \
849 (VARNAME = p.cell_info_arr->PROPNAME[idx];) \
852 #define CALORECGPU_CMC_LOAD_SIMPLE_GEOMETRY_INFO(NAME, VARNAME, PROPNAME) \
853 CALORECGPU_CMC_LOAD(NAME, \
856 (std::decay_t<decltype(std::declval<CaloRecGPU::GeometryArr>().PROPNAME[0])> VARNAME;), \
857 (VARNAME = p.geometry->PROPNAME[idx];) \
860 #define CALORECGPU_CMC_LOAD_SIMPLE_CLUSTER_INFO(NAME, VARNAME, PROPNAME) \
861 CALORECGPU_CMC_LOAD(NAME, \
864 (std::decay_t<decltype(std::declval<CaloRecGPU::ClusterInfoArr>().PROPNAME[0])> VARNAME;), \
865 (VARNAME = p.clusters_arr->PROPNAME[idx];) \
868 #define CALORECGPU_CMC_LOAD_SIMPLE_MOMENT_INFO(NAME, VARNAME, PROPNAME) \
869 CALORECGPU_CMC_LOAD(NAME, \
872 (std::decay_t<decltype(std::declval<CaloRecGPU::ClusterMomentsArr>().PROPNAME[0])> VARNAME;), \
873 (VARNAME = p.moments_arr->PROPNAME[idx];) \
878 #define CALORECGPU_CMC_LOAD_SIMPLE_PER_SAMPLING_MOMENT_INFO(NAME, VARNAME, PROPNAME) \
879 CALORECGPU_CMC_LOAD(NAME, \
882 (std::decay_t<decltype(std::declval<CaloRecGPU::ClusterMomentsArr>().PROPNAME[0][0])> VARNAME;), \
883 (VARNAME = p.moments_arr->PROPNAME[f.sampling][idx];) \
886 #define CALORECGPU_CMC_LOAD_SIMPLE_TEMPORARY_INFO(NAME, VARNAME, PROPNAME) \
887 CALORECGPU_CMC_LOAD(NAME, \
890 (std::decay_t<decltype(CMCTemporaries::PROPNAME(std::declval<CaloRecGPU::ClusterMomentsArr *>(),0))> VARNAME;), \
891 (VARNAME = CMCTemporaries::PROPNAME(p.moments_arr, idx);) \
896 #define CALORECGPU_CMC_LOAD_SIMPLE_PER_SAMPLING_TEMPORARY_INFO(NAME, VARNAME, PROPNAME) \
897 CALORECGPU_CMC_LOAD(NAME, \
900 (std::decay_t<decltype(CMCTemporaries::PROPNAME(std::declval<CaloRecGPU::ClusterMomentsArr*>(),0,0))> VARNAME;), \
901 (VARNAME = CMCTemporaries::PROPNAME(p.moments_arr, f.sampling, idx);) \
909 (SamplingFromMomentIndex,
913 (sampling =
p.moments_index;)
921 (sampling =
p.geometry->sampling(
idx);)
942 (abs_energy = fabsf(
f.energy);)
947 (CellEnergy, CellAbsEnergy),
949 (
float moments_energy;),
950 (moments_energy = (
p.opts->use_abs_energy ||
f.energy > 0.f) ?
f.abs_energy : 0.f;)
957 (CellQualityProvenance,
961 (qp =
p.cell_info_arr->qualityProvenance[
idx];)
969 (is_tile =
p.geometry->is_tile(
idx);)
974 (CellIsTile, CellQualityProvenance),
977 (is_bad =
p.cell_info_arr->is_bad(
f.is_tile,
f.qp,
false);)
982 (CellIsTile, CellGain, CellEnergy),
985 (
noise = (
f.is_tile &&
p.opts->use_two_gaussian_noise ?
986 p.noise_arr->get_double_gaussian_noise(
idx,
f.gain,
f.energy) :
987 p.noise_arr->get_noise(
idx,
f.gain) );)
992 (CellIsBad, CellIsTile, CellQualityProvenance),
994 (
bool LArQ_cell_check;),
995 (LArQ_cell_check = !
f.is_bad && !
f.is_tile && ((
f.qp.provenance() & 0x2800U) == 0x2000U);)
1000 (CellIsBad, CellIsTile, CellQualityProvenance),
1002 (
bool TileQ_cell_check;),
1003 (TileQ_cell_check = !
f.is_bad &&
f.is_tile &&
f.qp.tile_qual1() != 0xFFU &&
f.qp.tile_qual2() != 0xFFU;)
1008 (CellTimeMomentsCheck,
1009 (CellIsTile, CellQualityProvenance, CellSampling),
1011 (
bool time_moments_check;),
1012 (time_moments_check = ( (
f.is_tile && (
f.qp.provenance() & 0x8080U)) ||
1013 (!
f.is_tile && (
f.qp.provenance() & 0x2000U)) ) &&
1026 (CellMomentsEnergy),
1027 (
float weighted_energy;),
1028 (weighted_energy =
f.moments_energy *
f.weight;)
1032 (SquareWeightedEnergy,
1035 (
float square_w_E;),
1036 (square_w_E =
f.weighted_energy *
f.weighted_energy;)
1040 (WeightedEnergyOverVolume,
1043 (
float w_E_over_V;),
1044 (w_E_over_V = (
f.volume > 0.f ?
f.weighted_energy /
f.volume : 1.f);)
1048 (WeightedCellPositionNormalization,
1050 (CellX, CellY, CellZ),
1054 (
const float r_dir_base = rnorm3df(
f.x,
f.y,
f.z);
1055 r_dir = isinf(r_dir_base) ? 0.
f : r_dir_base;
1056 w_E_r_dir =
f.weighted_energy *
f.r_dir;
1061 (WeightedEnergyOrNegative,
1063 (CellEnergy, CellAbsEnergy),
1064 (
float weighted_energy_or_negative;),
1065 (weighted_energy_or_negative = (
p.opts->use_abs_energy ? fabsf(
f.energy) :
f.energy) *
f.weight;)
1069 (SquareWeightedEnergyOrNegative,
1070 (WeightedEnergyOrNegative),
1072 (
float square_w_E_or_neg;),
1073 (square_w_E_or_neg =
f.weighted_energy_or_negative *
f.weighted_energy_or_negative;)
1077 (WeightedNonMomentsEnergy,
1081 (normE =
f.weight *
f.energy;)
1085 (SquaredWeightedNonMomentsEnergy,
1086 (WeightedNonMomentsEnergy),
1088 (
float squared_normE;),
1089 (squared_normE =
f.normE *
f.normE;)
1114 (CenterX, CenterY, CenterZ),
1115 (CellX, CellY, CellZ),
1116 (
float dx,
dy, dz;),
1117 (
dx =
f.x -
f.center_x;
1118 dy =
f.y -
f.center_y;
1119 dz =
f.z -
f.center_z;
1125 (Deltas, ShowerAxisX, ShowerAxisY, ShowerAxisZ),
1133 (Deltas, ShowerAxisX, ShowerAxisY, ShowerAxisZ),
1148 (ReverseSumEnergies,
1151 (
float rev_sum_energies;),
1152 (rev_sum_energies = 1.0
f / (
f.sum_energies > 0.f ?
f.sum_energies : 1.f);)
1161 (EnergyDensityNormalization,
1162 (EnergyDensityNormalizationBase, EnergyDensityNormalizationCorr),
1164 (
float energy_density_norm;),
1165 (energy_density_norm =
f.energy_density_norm_base +
f.energy_density_norm_corr;)
1169 (ReverseEnergyDensityNormalization,
1170 (EnergyDensityNormalization),
1172 (
float rev_energy_density_norm;),
1173 (rev_energy_density_norm = 1.0
f / (
f.energy_density_norm > 0.f ?
f.energy_density_norm : 1.f);)
1179 (ClusterCellWithMaxEnergy,
1180 (ClusterMaxCellEnergyAndCell),
1183 (max_E_cell = (
f.max_E_and_cell & 0x7FFFFFFFU) - 1;)
1187 (ClusterMaxCellEnergy,
1188 (ClusterMaxCellEnergyAndCell),
1191 (max_E = __uint_as_float(
f.max_E_and_cell >> 32U);)
1197 (ClusterCellWithSecondMaxEnergy,
1198 (ClusterSecondMaxCellEnergyAndCell),
1200 (
int second_max_E_cell;),
1201 (second_max_E_cell = (
f.second_max_E_and_cell & 0x7FFFFFFFU) - 1;)
1205 (ClusterSecondMaxCellEnergy,
1206 (ClusterSecondMaxCellEnergyAndCell),
1208 (
float second_max_E;),
1209 (second_max_E = __uint_as_float(
f.second_max_E_and_cell >> 32U);)
1216 (SumAbsEnergyNonMoments,
1217 (SumAbsEnergyNonMomentsBase, SumAbsEnergyNonMomentsCorr),
1219 (
float abs_energy_non_moments;),
1220 (abs_energy_non_moments =
f.abs_energy_non_moments_base +
f.abs_energy_non_moments_corr;)
1224 (ReverseSumAbsEnergyNonMoments,
1225 (SumAbsEnergyNonMoments),
1227 (
float rev_abs_energy_non_moments;),
1228 (rev_abs_energy_non_moments = 1.
f / (
f.abs_energy_non_moments != 0.f ?
f.abs_energy_non_moments : 1.f);)
1238 (ReverseClusterEnergy,
1241 (
float rev_cluster_energy;),
1242 (rev_cluster_energy = (
f.cluster_energy != 0.f ? 1.f /
f.cluster_energy : 1.f);)
1252 (TimeNormalizationBase, TimeNormalizationCorr),
1255 (time_norm =
f.time_norm_base +
f.time_norm_corr;)
1267 (ClusterMaxAndSecondMaxCell,
1268 (ClusterMaxAndSecondMaxCellTogether),
1270 (
int max_cell, second_max_cell;),
1271 ( max_cell =
f.stored_max_and_second_max >> 32U;
1272 second_max_cell =
f.stored_max_and_second_max;
1279 (ReverseAbsoluteEnergyPerSample,
1280 (AbsoluteEnergyPerSample),
1282 (
float rev_sampling_normalization;),
1283 (rev_sampling_normalization = 1.0
f / (
f.sampling_normalization != 0.f ?
f.sampling_normalization : 1.0f);)
1297 (SeedCellGeometryPhi,
1300 (
float seed_cell_phi_coordinate;),
1302 p.geometry->phi[
f.seed_cell] : -999);
1316 using BeforeLoading = TypeList<>;
1328 using CellLoading = TypeList<>;
1331 using ClusterLoading = TypeList<>;
1344 using AfterLoading = TypeList<>;
1356 #define CALORECGPU_CMC_MOMENT_CALC(NAME, BEFORELOAD, BEFOREEXEC, CELLLOAD, CLUSTERLOAD, CELLEXEC, AFTERLOAD, AFTEREXEC) \
1359 using BeforeLoading = TypeList<CALORECGPU_EXPAND BEFORELOAD>; \
1360 template <class T> __device__ static void before(Parameters p, \
1362 const int cluster) \
1363 { CALORECGPU_EXPAND BEFOREEXEC } \
1364 using CellLoading = TypeList<CALORECGPU_EXPAND CELLLOAD>; \
1365 using ClusterLoading = TypeList<CALORECGPU_EXPAND CLUSTERLOAD>; \
1366 template <class T> __device__ static void per_cell(Parameters p, \
1369 const int cluster) \
1370 { CALORECGPU_EXPAND CELLEXEC } \
1371 using AfterLoading = TypeList<CALORECGPU_EXPAND AFTERLOAD>; \
1372 template <class T> __device__ static void after(Parameters p, \
1374 const int cluster) \
1375 { CALORECGPU_EXPAND AFTEREXEC } \
1388 (ClusterEnergyEtaAndEt,
1390 (
p.clusters_arr->clusterEnergy[cluster] = 0.f;
1391 CMCTemporaries::clusterEnergyAux(
p.moments_arr, cluster) = 0.f;
1392 p.clusters_arr->clusterEta[cluster] = 0.f;
1393 CMCTemporaries::clusterEtaAux(
p.moments_arr, cluster) = 0.f;),
1394 (ToLoad::CellEnergy, ToLoad::CellAbsEnergy, ToLoad::CellEta),
1396 (add_with_corr(
p.clusters_arr->clusterEnergy,
1397 CMCTemporaries::clusterEnergyAux(
p.moments_arr),
1400 add_with_corr(
p.clusters_arr->clusterEta,
1401 CMCTemporaries::clusterEtaAux(
p.moments_arr),
1405 (ToLoad::SumAbsEnergyNonMoments, ToLoad::ReverseSumAbsEnergyNonMoments),
1406 (
const float temp_energy =
p.clusters_arr->clusterEnergy[cluster] + CMCTemporaries::clusterEnergyAux(
p.moments_arr, cluster);
1408 p.clusters_arr->clusterEnergy[cluster] = temp_energy;
1410 const float temp_eta = (
p.clusters_arr->clusterEta[cluster] +
1411 CMCTemporaries::clusterEtaAux(
p.moments_arr, cluster)) *
data.rev_abs_energy_non_moments;
1413 p.clusters_arr->clusterEta[cluster] = temp_eta * (
data.abs_energy_non_moments != 0.f);
1416 const float temp_ET = temp_energy / coshf(fabsf(temp_eta));
1418 p.clusters_arr->clusterEt[cluster] = temp_ET * (
data.abs_energy_non_moments != 0.f);
1425 (
p.clusters_arr->clusterPhi[cluster] = 0.f;
1426 CMCTemporaries::clusterPhiAux(
p.moments_arr, cluster) = 0.f;
1428 (ToLoad::CellAbsEnergy, ToLoad::CellPhi),
1429 (ToLoad::SeedCellPhi),
1430 (
const float phi_real = CaloRecGPU::Helpers::regularize_angle(
data.phi,
data.phi_0);
1431 add_with_corr(
p.clusters_arr->clusterPhi,
1432 CMCTemporaries::clusterPhiAux(
p.moments_arr),
1434 phi_real *
data.abs_energy *
data.weight);
1436 (ToLoad::SumAbsEnergyNonMoments, ToLoad::ReverseSumAbsEnergyNonMoments),
1437 (
const float old_phi =
p.clusters_arr->clusterPhi[cluster] + CMCTemporaries::clusterPhiAux(
p.moments_arr, cluster);
1438 p.clusters_arr->clusterPhi[cluster] = CaloRecGPU::Helpers::regularize_angle(old_phi *
1439 data.rev_abs_energy_non_moments, 0.f) *
1440 (
data.abs_energy_non_moments != 0.f);
1451 (
p.moments_arr->avgLArQ[cluster] = 0.f;
1452 CMCTemporaries::avgLArQAux(
p.moments_arr, cluster) = 0.f;
1454 (ToLoad::CellQualityProvenance, ToLoad::CellLArQCheck),
1455 (ToLoad::SquareWeightedEnergyOrNegative),
1456 (
if (
data.LArQ_cell_check)
1458 add_with_corr(p.moments_arr->avgLArQ,
1459 CMCTemporaries::avgLArQAux(p.moments_arr),
1461 data.square_w_E_or_neg * data.qp.quality());
1465 (
const float norm_LAr = CMCTemporaries::averageLArQNorm(
p.moments_arr, cluster) + CMCTemporaries::averageLArQNormAux(
p.moments_arr, cluster);
1466 const float rev_norm_LAr = 1.0f / (norm_LAr > 0.f ? norm_LAr : 1.0f);
1467 const float new_LArQ =
p.moments_arr->avgLArQ[cluster] + CMCTemporaries::avgLArQAux(
p.moments_arr, cluster);
1468 p.moments_arr->avgLArQ[cluster] = new_LArQ * rev_norm_LAr;
1475 (
p.moments_arr->avgTileQ[cluster] = 0.f;
1476 CMCTemporaries::avgTileQAux(
p.moments_arr, cluster) = 0.f;
1478 (ToLoad::CellQualityProvenance, ToLoad::CellTileQCheck),
1479 (ToLoad::SquareWeightedEnergyOrNegative),
1480 (
if (
data.TileQ_cell_check)
1482 const float max_quality = max((unsigned int) data.qp.tile_qual1(), (unsigned int) data.qp.tile_qual2());
1484 add_with_corr(p.moments_arr->avgTileQ,
1485 CMCTemporaries::avgTileQAux(p.moments_arr),
1487 data.square_w_E_or_neg * max_quality);
1491 (
const float norm_Tile = CMCTemporaries::averageTileQNorm(
p.moments_arr, cluster) + CMCTemporaries::averageTileQNormAux(
p.moments_arr, cluster);
1492 const float rev_norm_Tile = 1.0f / (norm_Tile > 0.f ? norm_Tile : 1.0f);
1493 const float new_TileQ =
p.moments_arr->avgTileQ[cluster] + CMCTemporaries::avgTileQAux(
p.moments_arr, cluster);
1494 p.moments_arr->avgTileQ[cluster] = new_TileQ * rev_norm_Tile;
1501 (
p.moments_arr->badCellsCorrE[cluster] = 0.f;
1502 CMCTemporaries::badCellsCorrEAux(
p.moments_arr, cluster) = 0.f;
1504 (ToLoad::CellIsBad, ToLoad::CellEnergy),
1505 (ToLoad::WeightedEnergyOrNegative),
1508 add_with_corr(p.moments_arr->badCellsCorrE,
1509 CMCTemporaries::badCellsCorrEAux(p.moments_arr),
1511 data.weighted_energy_or_negative);
1515 (
p.moments_arr->badCellsCorrE[cluster] += CMCTemporaries::badCellsCorrEAux(
p.moments_arr, cluster);
1522 (
p.moments_arr->badLArQFrac[cluster] = 0.f;
1523 CMCTemporaries::badLArQFracAux(
p.moments_arr, cluster) = 0.f;
1525 (ToLoad::CellQualityProvenance, ToLoad::CellLArQCheck),
1526 (ToLoad::WeightedEnergyOrNegative),
1527 (
if (
data.LArQ_cell_check &&
data.qp.quality() >
p.opts->min_LAr_quality)
1529 add_with_corr(p.moments_arr->badLArQFrac,
1530 CMCTemporaries::badLArQFracAux(p.moments_arr),
1532 data.weighted_energy_or_negative);
1535 (ToLoad::ReverseClusterEnergy),
1536 (
const float new_badLArQFrac =
p.moments_arr->badLArQFrac[cluster] + CMCTemporaries::badLArQFracAux(
p.moments_arr, cluster);
1537 p.moments_arr->badLArQFrac[cluster] = new_badLArQFrac *
data.rev_cluster_energy;
1548 (ToLoad::MaxSignificanceAndSampling),
1549 (
const float max_sig = __uint_as_float(
data.max_sig_and_samp >> 32);
1550 p.moments_arr->cellSignificance[cluster] = max_sig * (
data.max_sig_and_samp & 1 ? 1.f : -1.f);
1561 (ToLoad::MaxSignificanceAndSampling),
1562 (
const int max_samp = (
data.max_sig_and_samp & 0xFFFFFFFEU) >> 1;
1563 p.moments_arr->cellSigSampling[cluster] = max_samp;
1571 (
p.moments_arr->centerX[cluster] = 0.f;
1572 CMCTemporaries::centerXAux(
p.moments_arr, cluster) = 0.f;
1575 (ToLoad::WeightedEnergy),
1576 (add_with_corr(
p.moments_arr->centerX,
1577 CMCTemporaries::centerXAux(
p.moments_arr),
1581 (ToLoad::ReverseSumEnergies),
1582 (
const float new_value =
p.moments_arr->centerX[cluster] + CMCTemporaries::centerXAux(
p.moments_arr, cluster);
1583 p.moments_arr->centerX[cluster] = new_value *
data.rev_sum_energies;
1589 (
p.moments_arr->centerY[cluster] = 0.f;
1590 CMCTemporaries::centerYAux(
p.moments_arr, cluster) = 0.f;
1593 (ToLoad::WeightedEnergy),
1594 (add_with_corr(
p.moments_arr->centerY,
1595 CMCTemporaries::centerYAux(
p.moments_arr),
1599 (ToLoad::ReverseSumEnergies),
1600 (
const float new_value =
p.moments_arr->centerY[cluster] + CMCTemporaries::centerYAux(
p.moments_arr, cluster);
1601 p.moments_arr->centerY[cluster] = new_value *
data.rev_sum_energies;
1607 (
p.moments_arr->centerZ[cluster] = 0.f;
1608 CMCTemporaries::centerZAux(
p.moments_arr, cluster) = 0.f;
1611 (ToLoad::WeightedEnergy),
1612 (add_with_corr(
p.moments_arr->centerZ,
1613 CMCTemporaries::centerZAux(
p.moments_arr),
1617 (ToLoad::ReverseSumEnergies),
1618 (
const float new_value =
p.moments_arr->centerZ[cluster] + CMCTemporaries::centerZAux(
p.moments_arr, cluster);
1619 p.moments_arr->centerZ[cluster] = new_value *
data.rev_sum_energies;
1626 (
p.moments_arr->engFracEM[cluster] = 0.f;
1627 CMCTemporaries::engFracEMAux(
p.moments_arr, cluster) = 0.f;),
1628 (ToLoad::CellSampling),
1629 (ToLoad::WeightedEnergy),
1638 add_with_corr(p.moments_arr->engFracEM,
1639 CMCTemporaries::engFracEMAux(p.moments_arr),
1641 data.weighted_energy);
1644 (ToLoad::ReverseSumEnergies),
1645 (
const float new_engFracEM =
p.moments_arr->engFracEM[cluster] + CMCTemporaries::engFracEMAux(
p.moments_arr, cluster);
1646 p.moments_arr->engFracEM[cluster] = new_engFracEM *
data.rev_sum_energies;
1653 (
p.moments_arr->engBadCells[cluster] = 0.f;
1654 CMCTemporaries::engBadCellsAux(
p.moments_arr, cluster) = 0.f;),
1655 (ToLoad::CellIsBad),
1656 (ToLoad::WeightedEnergyOrNegative),
1659 add_with_corr(p.moments_arr->engBadCells,
1660 CMCTemporaries::engBadCellsAux(p.moments_arr),
1662 data.weighted_energy_or_negative);
1666 (
p.moments_arr->engBadCells[cluster] += CMCTemporaries::engBadCellsAux(
p.moments_arr, cluster);)
1676 (ToLoad::ReverseSumEnergies, ToLoad::ClusterMaxCellEnergy),
1677 (
p.moments_arr->engFracMax[cluster] =
data.max_E *
data.rev_sum_energies;)
1681 (EngPosAndEngFracCore,
1683 (
const float sum_energies =
p.moments_arr->engPos[cluster] + CMCTemporaries::engPosAux(
p.moments_arr, cluster);
1684 p.moments_arr->engPos[cluster] = sum_energies;
1685 p.moments_arr->engFracCore[cluster] *= (sum_energies != 0.f ? 1.0f / sum_energies : 0.f);
1697 (
p.moments_arr->firstEngDens[cluster] = 0.f;
1698 CMCTemporaries::firstEngDensAux(
p.moments_arr, cluster) = 0.f;),
1699 (ToLoad::CellVolume),
1700 (ToLoad::WeightedEnergy, ToLoad::WeightedEnergyOverVolume),
1703 add_with_corr(p.moments_arr->firstEngDens,
1704 CMCTemporaries::firstEngDensAux(p.moments_arr),
1706 data.weighted_energy * data.w_E_over_V);
1709 (ToLoad::ReverseEnergyDensityNormalization),
1710 (
const float new_firstEngDens =
p.moments_arr->firstEngDens[cluster] + CMCTemporaries::firstEngDensAux(
p.moments_arr, cluster);
1711 p.moments_arr->firstEngDens[cluster] = new_firstEngDens *
data.rev_energy_density_norm;
1718 (
p.moments_arr->firstEta[cluster] = 0.f;
1719 CMCTemporaries::firstEtaAux(
p.moments_arr, cluster) = 0.f;
1722 (ToLoad::WeightedEnergy),
1723 (add_with_corr(
p.moments_arr->firstEta,
1724 CMCTemporaries::firstEtaAux(
p.moments_arr),
1728 (ToLoad::ReverseSumEnergies),
1729 (
const float new_firstEta =
p.moments_arr->firstEta[cluster] + CMCTemporaries::firstEtaAux(
p.moments_arr, cluster);
1730 p.moments_arr->firstEta[cluster] = new_firstEta *
data.rev_sum_energies;
1737 (
p.moments_arr->firstPhi[cluster] = 0.f;
1738 CMCTemporaries::firstPhiAux(
p.moments_arr, cluster) = 0.f;
1741 (ToLoad::SeedCellPhi, ToLoad::WeightedEnergy),
1742 (
const float phi_real = CaloRecGPU::Helpers::regularize_angle(
data.phi,
data.phi_0);
1743 add_with_corr(
p.moments_arr->firstPhi,
1744 CMCTemporaries::firstPhiAux(
p.moments_arr),
1746 data.weighted_energy * phi_real);
1748 (ToLoad::ReverseSumEnergies),
1749 (
const float new_firstPhi =
p.moments_arr->firstPhi[cluster] + CMCTemporaries::firstPhiAux(
p.moments_arr, cluster);
1750 p.moments_arr->firstPhi[cluster] = CaloRecGPU::Helpers::regularize_angle(new_firstPhi *
data.rev_sum_energies);
1757 (
p.moments_arr->lateral[cluster] = 0.f;
1758 CMCTemporaries::lateralAux(
p.moments_arr, cluster) = 0.f;
1761 (ToLoad::WeightedEnergy, ToLoad::ClusterMaxAndSecondMaxCell, ToLoad::R),
1764 add_with_corr(p.moments_arr->lateral,
1765 CMCTemporaries::lateralAux(p.moments_arr),
1767 data.weighted_energy * data.r * data.r);
1771 (
const float new_lateral =
p.moments_arr->lateral[cluster] + CMCTemporaries::lateralAux(
p.moments_arr, cluster);
1772 const float new_norm = CMCTemporaries::lateralNormalization(
p.moments_arr, cluster) +
1773 CMCTemporaries::lateralNormalizationAux(
p.moments_arr, cluster);
1774 p.moments_arr->lateral[cluster] = new_lateral / (new_norm != 0.f ? new_norm : 1.f);
1781 (
p.moments_arr->longitudinal[cluster] = 0.f;
1782 CMCTemporaries::longitudinalAux(
p.moments_arr, cluster) = 0.f;
1785 (ToLoad::WeightedEnergy, ToLoad::ClusterMaxAndSecondMaxCell,
ToLoad::Lambda),
1788 add_with_corr(p.moments_arr->longitudinal,
1789 CMCTemporaries::longitudinalAux(p.moments_arr),
1791 data.weighted_energy * data.lambda * data.lambda);
1795 (
const float new_longitudinal =
p.moments_arr->longitudinal[cluster] + CMCTemporaries::longitudinalAux(
p.moments_arr, cluster);
1796 const float new_norm = CMCTemporaries::longitudinalNormalization(
p.moments_arr, cluster) +
1797 CMCTemporaries::longitudinalNormalizationAux(
p.moments_arr, cluster);
1798 p.moments_arr->longitudinal[cluster] = new_longitudinal / (new_norm != 0.f ? new_norm : 1.f);
1809 (ToLoad::SumEnergies),
1810 (
const float mx = CMCTemporaries::mX(
p.moments_arr, cluster) + CMCTemporaries::mXAux(
p.moments_arr, cluster);
1811 const float my = CMCTemporaries::mY(
p.moments_arr, cluster) + CMCTemporaries::mYAux(
p.moments_arr, cluster);
1812 const float mz = CMCTemporaries::mZ(
p.moments_arr, cluster) + CMCTemporaries::mZAux(
p.moments_arr, cluster);
1814 const float v_1 =
mx *
mx;
1815 const float v_2 =
my *
my;
1816 const float v_3 =
mz *
mz;
1817 const float v_4 =
data.sum_energies *
data.sum_energies;
1819 const float c_1 = fmaf(
mx,
mx, -v_1);
1820 const float c_2 = fmaf(
my,
my, -v_2);
1821 const float c_3 = fmaf(
mz,
mz, -v_3);
1822 const float c_4 = fmaf(
data.sum_energies,
data.sum_energies, -v_4);
1826 p.moments_arr->mass[cluster] = sqrtf(fabsf(sq_mass)) * ((sq_mass > 0.f) - (sq_mass < 0.
f));
1833 (
p.moments_arr->nBadCells[cluster] = 0;),
1834 (ToLoad::CellIsBad),
1838 atomicAdd(&(p.moments_arr->nBadCells[cluster]), 1);
1847 (
p.moments_arr->nBadCellsCorr[cluster] = 0;),
1848 (ToLoad::CellIsBad, ToLoad::CellEnergy),
1852 atomicAdd(&(p.moments_arr->nBadCellsCorr[cluster]), 1);
1860 (NExtraCellSampling,
1862 (
p.moments_arr->nExtraCellSampling[cluster] = 0;),
1863 (ToLoad::CellSampling, ToLoad::CellEta),
1867 atomicAdd(&(p.moments_arr->nExtraCellSampling[cluster]), 1);
1877 (
p.moments_arr->numCells[cluster] = 0;),
1880 (atomicAdd(&(
p.moments_arr->numCells[cluster]), 1);),
1888 (
p.moments_arr->PTD[cluster] = 0.f;
1889 CMCTemporaries::PTDAux(
p.moments_arr, cluster) = 0.f;
1892 (ToLoad::SquareWeightedEnergy),
1893 (add_with_corr(
p.moments_arr->PTD,
1894 CMCTemporaries::PTDAux(
p.moments_arr),
1910 (ToLoad::SumEnergies),
1911 (
const float new_PTD =
p.moments_arr->PTD[cluster] + CMCTemporaries::PTDAux(
p.moments_arr, cluster);
1912 p.moments_arr->PTD[cluster] = 1.0f / ((
data.sum_energies > 0.f ?
data.sum_energies : 1.f) * rsqrtf(new_PTD));
1920 (
p.moments_arr->secondEngDens[cluster] = 0.f;
1921 CMCTemporaries::secondEngDensAux(
p.moments_arr, cluster) = 0.f;
1923 (ToLoad::CellVolume),
1924 (ToLoad::WeightedEnergy, ToLoad::WeightedEnergyOverVolume),
1927 add_with_corr(p.moments_arr->secondEngDens,
1928 CMCTemporaries::secondEngDensAux(p.moments_arr),
1930 data.weighted_energy * data.w_E_over_V * data.w_E_over_V);
1933 (ToLoad::ReverseEnergyDensityNormalization),
1934 (
const float new_secondEngDens =
p.moments_arr->secondEngDens[cluster] + CMCTemporaries::secondEngDensAux(
p.moments_arr, cluster);
1935 p.moments_arr->secondEngDens[cluster] = new_secondEngDens *
data.rev_energy_density_norm;)
1941 (
p.moments_arr->secondLambda[cluster] = 0.f;
1942 CMCTemporaries::secondLambdaAux(
p.moments_arr, cluster) = 0.f;
1946 (add_with_corr(
p.moments_arr->secondLambda,
1947 CMCTemporaries::secondLambdaAux(
p.moments_arr),
1951 (ToLoad::ReverseSumEnergies),
1952 (
const float new_secondLambda =
p.moments_arr->secondLambda[cluster] + CMCTemporaries::secondLambdaAux(
p.moments_arr, cluster);
1953 p.moments_arr->secondLambda[cluster] = new_secondLambda *
data.rev_sum_energies;
1960 (
p.moments_arr->secondR[cluster] = 0.f;
1961 CMCTemporaries::secondRAux(
p.moments_arr, cluster) = 0.f;
1964 (ToLoad::WeightedEnergy, ToLoad::R),
1965 (add_with_corr(
p.moments_arr->secondR,
1966 CMCTemporaries::secondRAux(
p.moments_arr),
1970 (ToLoad::ReverseSumEnergies),
1971 (
const float new_secondR =
p.moments_arr->secondR[cluster] + CMCTemporaries::secondRAux(
p.moments_arr, cluster);
1972 p.moments_arr->secondR[cluster] = new_secondR *
data.rev_sum_energies;
1980 (
p.moments_arr->significance[cluster] = 0.f;
1981 CMCTemporaries::significanceAux(
p.moments_arr, cluster) = 0.f;
1983 (ToLoad::CellNoise),
1985 (add_with_corr(
p.moments_arr->significance,
1986 CMCTemporaries::significanceAux(
p.moments_arr),
1990 (ToLoad::ClusterEnergy),
1991 (
const float prev_v =
p.moments_arr->significance[cluster] + CMCTemporaries::significanceAux(
p.moments_arr, cluster);
1992 p.moments_arr->significance[cluster] = (prev_v > 0.f ?
data.cluster_energy * rsqrtf(prev_v) : 0.
f);)
1998 (
p.moments_arr->time[cluster] = 0.f;
1999 CMCTemporaries::timeAux(
p.moments_arr, cluster) = 0.f;
2000 p.moments_arr->secondTime[cluster] = 0.f;
2001 CMCTemporaries::secondTimeAux(
p.moments_arr, cluster) = 0.f;
2003 (ToLoad::CellTime, ToLoad::CellTimeMomentsCheck),
2004 (ToLoad::SquaredWeightedNonMomentsEnergy),
2005 (
if (
data.time_moments_check)
2007 add_with_corr(p.moments_arr->time,
2008 CMCTemporaries::timeAux(p.moments_arr),
2010 data.time * data.squared_normE);
2011 add_with_corr(p.moments_arr->secondTime,
2012 CMCTemporaries::secondTimeAux(p.moments_arr),
2014 data.time * data.time * data.squared_normE);
2016 (ToLoad::TimeNormalization),
2017 (
if (
data.time_norm != 0.f)
2019 const float real_norm = 1.0f / data.time_norm;
2020 const float time = (p.moments_arr->time[cluster] + CMCTemporaries::timeAux(p.moments_arr, cluster))
2022 const float second_sum = p.moments_arr->secondTime[cluster] + CMCTemporaries::secondTimeAux(p.moments_arr, cluster);
2023 p.moments_arr->time[cluster] = time;
2024 p.moments_arr->secondTime[cluster] = ClusterMomentsCalculator::product_sum_cornea_harrison_tang(second_sum, real_norm, -time, time);
2028 p.moments_arr->time[cluster] = 0.f;
2029 p.moments_arr->secondTime[cluster] = 0.f;
2038 template <
int num,
int delta = 0>
2040 (EnergyPerSampleSeveral,
2041 (ToLoad::SamplingFromMomentIndex),
2043 for (
int i = 0;
i <
num; ++
i)
2045 p.moments_arr->energyPerSample[
offset +
i]
2047 CMCTemporaries::energyPerSampleAux(
p.moments_arr,
offset +
i, cluster) = 0.f;
2050 (ToLoad::CellEnergy, ToLoad::CellSampling),
2052 (add_with_corr(
p.moments_arr->energyPerSample[
data.sampling],
2053 CMCTemporaries::energyPerSampleAux(
p.moments_arr,
data.sampling),
2057 (ToLoad::SamplingFromMomentIndex),
2059 for (
int i = 0;
i <
num; ++
i)
2061 p.moments_arr->energyPerSample[
offset +
i]
2062 [cluster] += CMCTemporaries::energyPerSampleAux(
p.moments_arr,
offset +
i, cluster);
2067 using EnergyPerSample = EnergyPerSampleSeveral<1, 0>;
2070 template <
int num,
int delta = 0>
2072 (EtaPerSampleSeveral,
2073 (ToLoad::SamplingFromMomentIndex),
2075 for (
int i = 0;
i <
num; ++
i)
2077 p.moments_arr->etaPerSample[
offset +
i]
2079 CMCTemporaries::etaPerSampleAux(
p.moments_arr,
offset +
i, cluster) = 0.f;
2082 (ToLoad::CellSampling, ToLoad::CellAbsEnergy, ToLoad::CellEta),
2084 (add_with_corr(
p.moments_arr->etaPerSample[
data.sampling],
2085 CMCTemporaries::etaPerSampleAux(
p.moments_arr,
data.sampling),
2089 (ToLoad::SamplingFromMomentIndex),
2091 for (
int i = 0;
i <
num; ++
i)
2094 const float normalization = CMCTemporaries::absoluteEnergyPerSample(
p.moments_arr,
idx, cluster) +
2095 CMCTemporaries::absoluteEnergyPerSampleAux(
p.moments_arr,
idx, cluster);
2096 const float rev_normalization = 1.0f / (normalization != 0.f ? normalization : 1.0f);
2097 const float new_eta =
p.moments_arr->etaPerSample[
idx][cluster] + CMCTemporaries::etaPerSampleAux(
p.moments_arr,
idx, cluster);
2098 p.moments_arr->etaPerSample[
idx][cluster] = new_eta * rev_normalization;
2103 using EtaPerSample = EtaPerSampleSeveral<1, 0>;
2105 template <
int num,
int delta = 0>
2107 (NCellSamplingSeveral,
2108 (ToLoad::SamplingFromMomentIndex),
2110 for (
int i = 0;
i <
num; ++
i)
2112 p.moments_arr->nCellSampling[
offset +
i]
2116 (ToLoad::CellSampling),
2118 (atomicAdd(&(
p.moments_arr->nCellSampling[
data.sampling][cluster]), 1);),
2123 using NCellSampling = NCellSamplingSeveral<1, 0>;
2125 template <
int num,
int delta = 0>
2127 (PhiPerSampleSeveral,
2128 (ToLoad::SamplingFromMomentIndex),
2130 for (
int i = 0;
i <
num; ++
i)
2132 p.moments_arr->phiPerSample[
offset +
i]
2134 CMCTemporaries::phiPerSampleAux(
p.moments_arr,
offset +
i, cluster) = 0.f;
2137 (ToLoad::CellSampling, ToLoad::CellAbsEnergy, ToLoad::CellPhi),
2138 (ToLoad::SeedCellPhi),
2139 (
const float phi_real = CaloRecGPU::Helpers::regularize_angle(
data.phi,
data.phi_0);
2140 add_with_corr(
p.moments_arr->phiPerSample[
data.sampling],
2141 CMCTemporaries::phiPerSampleAux(
p.moments_arr,
data.sampling),
2143 data.abs_energy *
data.weight * phi_real);
2145 (ToLoad::SamplingFromMomentIndex),
2147 for (
int i = 0;
i <
num; ++
i)
2150 const float normalization = CMCTemporaries::absoluteEnergyPerSample(
p.moments_arr,
idx, cluster) +
2151 CMCTemporaries::absoluteEnergyPerSampleAux(
p.moments_arr,
idx, cluster);
2152 const float rev_normalization = 1.0f / (normalization != 0.f ? normalization : 1.0f);
2153 const float new_phi =
p.moments_arr->phiPerSample[
idx][cluster] + CMCTemporaries::phiPerSampleAux(
p.moments_arr,
idx, cluster);
2154 p.moments_arr->phiPerSample[
idx][cluster] = CaloRecGPU::Helpers::regularize_angle(new_phi * rev_normalization, 0.
f);
2159 using PhiPerSample = PhiPerSampleSeveral<1, 0>;
2167 (AverageLArQNormalization,
2169 (CMCTemporaries::averageLArQNorm(
p.moments_arr, cluster) = 0.f;
2170 CMCTemporaries::averageLArQNormAux(
p.moments_arr, cluster) = 0.f;),
2171 (ToLoad::CellLArQCheck),
2172 (ToLoad::SquareWeightedEnergyOrNegative),
2173 (
if (
data.LArQ_cell_check)
2175 add_with_corr(CMCTemporaries::averageLArQNorm(p.moments_arr),
2176 CMCTemporaries::averageLArQNormAux(p.moments_arr),
2178 data.square_w_E_or_neg);
2186 (AverageTileQNormalization,
2188 (CMCTemporaries::averageTileQNorm(
p.moments_arr, cluster) = 0.f;
2189 CMCTemporaries::averageTileQNormAux(
p.moments_arr, cluster) = 0.f;),
2190 (ToLoad::CellQualityProvenance, ToLoad::CellTileQCheck),
2191 (ToLoad::SquareWeightedEnergyOrNegative),
2192 (
if (
data.TileQ_cell_check)
2194 add_with_corr(CMCTemporaries::averageTileQNorm(p.moments_arr),
2195 CMCTemporaries::averageTileQNormAux(p.moments_arr),
2197 data.square_w_E_or_neg);
2205 (EnergyDensityNormalization,
2207 (CMCTemporaries::energyDensityNormalization(
p.moments_arr, cluster) = 0.f;
2208 CMCTemporaries::energyDensityNormalizationAux(
p.moments_arr, cluster) = 0.f;
2210 (ToLoad::CellVolume),
2211 (ToLoad::WeightedEnergy),
2214 add_with_corr(CMCTemporaries::energyDensityNormalization(p.moments_arr),
2215 CMCTemporaries::energyDensityNormalizationAux(p.moments_arr),
2217 data.weighted_energy);
2225 (FirstAndSecondMaxEnergyAndCell,
2227 (CMCTemporaries::maxCellEnergyAndCell(
p.moments_arr, cluster) = 0ULL;
2228 CMCTemporaries::secondMaxCellEnergyAndCell(
p.moments_arr, cluster) = 0ULL;
2231 (ToLoad::WeightedEnergy),
2232 (
if (
data.weighted_energy > 0)
2234 unsigned long long int energy_and_cell = __float_as_uint(data.weighted_energy);
2236 energy_and_cell = (energy_and_cell << 32) | (cell + 1);
2237 const unsigned long long int old_enc = atomicMax(&(CMCTemporaries::maxCellEnergyAndCell(p.moments_arr, cluster)), energy_and_cell);
2238 atomicMax(&(CMCTemporaries::secondMaxCellEnergyAndCell(p.moments_arr, cluster)), min(old_enc, energy_and_cell));
2247 (LateralNormalization,
2249 (CMCTemporaries::lateralNormalization(
p.moments_arr, cluster) = 0.f;
2250 CMCTemporaries::lateralNormalizationAux(
p.moments_arr, cluster) = 0.f;
2253 (ToLoad::WeightedEnergy, ToLoad::ClusterMaxAndSecondMaxCell, ToLoad::R),
2254 (
const float real_r = (
cell !=
data.max_cell &&
cell !=
data.second_max_cell) ?
2257 add_with_corr(CMCTemporaries::lateralNormalization(
p.moments_arr),
2258 CMCTemporaries::lateralNormalizationAux(
p.moments_arr),
2260 data.weighted_energy * real_r * real_r);
2267 (LongitudinalNormalization,
2269 (CMCTemporaries::longitudinalNormalization(
p.moments_arr, cluster) = 0.f;
2270 CMCTemporaries::longitudinalNormalizationAux(
p.moments_arr, cluster) = 0.f;
2273 (ToLoad::WeightedEnergy, ToLoad::ClusterMaxAndSecondMaxCell,
ToLoad::Lambda),
2274 (
const float real_lambda = (
cell !=
data.max_cell &&
cell !=
data.second_max_cell) ?
2275 data.lambda :
max(
data.lambda,
p.opts->min_l_longitudinal);
2277 add_with_corr(CMCTemporaries::longitudinalNormalization(
p.moments_arr),
2278 CMCTemporaries::longitudinalNormalizationAux(
p.moments_arr),
2280 data.weighted_energy * real_lambda * real_lambda);
2289 (CMCTemporaries::matrix00(
p.moments_arr, cluster) = 0.f;
2290 CMCTemporaries::matrix00Aux(
p.moments_arr, cluster) = 0.f;
2293 (ToLoad::CenterX, ToLoad::SquareWeightedEnergy),
2294 (add_with_corr(CMCTemporaries::matrix00(
p.moments_arr),
2295 CMCTemporaries::matrix00Aux(
p.moments_arr),
2305 (CMCTemporaries::matrix10(
p.moments_arr, cluster) = 0.f;
2306 CMCTemporaries::matrix10Aux(
p.moments_arr, cluster) = 0.f;
2308 (ToLoad::CellX, ToLoad::CellY),
2309 (ToLoad::CenterX, ToLoad::CenterY, ToLoad::SquareWeightedEnergy),
2310 (add_with_corr(CMCTemporaries::matrix10(
p.moments_arr),
2311 CMCTemporaries::matrix10Aux(
p.moments_arr),
2321 (CMCTemporaries::matrix20(
p.moments_arr, cluster) = 0.f;
2322 CMCTemporaries::matrix20Aux(
p.moments_arr, cluster) = 0.f;
2324 (ToLoad::CellX, ToLoad::CellZ),
2325 (ToLoad::CenterX, ToLoad::CenterZ, ToLoad::SquareWeightedEnergy),
2326 (add_with_corr(CMCTemporaries::matrix20(
p.moments_arr),
2327 CMCTemporaries::matrix20Aux(
p.moments_arr),
2337 (CMCTemporaries::matrix11(
p.moments_arr, cluster) = 0.f;
2338 CMCTemporaries::matrix11Aux(
p.moments_arr, cluster) = 0.f;
2341 (ToLoad::CenterY, ToLoad::SquareWeightedEnergy),
2342 (add_with_corr(CMCTemporaries::matrix11(
p.moments_arr),
2343 CMCTemporaries::matrix11Aux(
p.moments_arr),
2353 (CMCTemporaries::matrix21(
p.moments_arr, cluster) = 0.f;
2354 CMCTemporaries::matrix21Aux(
p.moments_arr, cluster) = 0.f;
2356 (ToLoad::CellY, ToLoad::CellZ),
2357 (ToLoad::CenterY, ToLoad::CenterZ, ToLoad::SquareWeightedEnergy),
2358 (add_with_corr(CMCTemporaries::matrix21(
p.moments_arr),
2359 CMCTemporaries::matrix21Aux(
p.moments_arr),
2369 (CMCTemporaries::matrix22(
p.moments_arr, cluster) = 0.f;
2370 CMCTemporaries::matrix22Aux(
p.moments_arr, cluster) = 0.f;
2373 (ToLoad::CenterZ, ToLoad::SquareWeightedEnergy),
2374 (add_with_corr(CMCTemporaries::matrix22(
p.moments_arr),
2375 CMCTemporaries::matrix22Aux(
p.moments_arr),
2384 (MaxAndSecondMaxCells,
2390 (ToLoad::ClusterCellWithMaxEnergy, ToLoad::ClusterCellWithSecondMaxEnergy),
2391 (
unsigned long long to_store =
data.max_E_cell;
2392 to_store = (to_store << 32
u) |
static_cast<unsigned int>(
data.second_max_E_cell);
2393 CMCTemporaries::maxAndSecondMaxCells(
p.moments_arr, cluster) = to_store;
2398 (MaxSignificanceAndSampling,
2400 (CMCTemporaries::maxSignificanceAndSampling(
p.moments_arr, cluster) = 0ULL;),
2401 (ToLoad::CellSampling, ToLoad::CellNoise),
2402 (ToLoad::WeightedEnergyOrNegative),
2403 (
const float max_sig =
data.noise > 0.f ?
data.weighted_energy_or_negative /
data.noise : 0.f;
2404 unsigned long long int max_S_and_S = __float_as_uint(fabsf(max_sig));
2405 max_S_and_S = (max_S_and_S << 32) | (((
unsigned long long int)
data.sampling << 1)) | (max_sig > 0.f);
2406 atomicMax(&(CMCTemporaries::maxSignificanceAndSampling(
p.moments_arr, cluster)), max_S_and_S);),
2414 (CMCTemporaries::mX(
p.moments_arr, cluster) = 0.f;
2415 CMCTemporaries::mXAux(
p.moments_arr, cluster) = 0.f;),
2417 (ToLoad::WeightedCellPositionNormalization),
2419 add_with_corr(CMCTemporaries::mX(
p.moments_arr),
2420 CMCTemporaries::mXAux(
p.moments_arr),
2430 (CMCTemporaries::mY(
p.moments_arr, cluster) = 0.f;
2431 CMCTemporaries::mYAux(
p.moments_arr, cluster) = 0.f;),
2433 (ToLoad::WeightedCellPositionNormalization),
2435 add_with_corr(CMCTemporaries::mY(
p.moments_arr),
2436 CMCTemporaries::mYAux(
p.moments_arr),
2446 (CMCTemporaries::mZ(
p.moments_arr, cluster) = 0.f;
2447 CMCTemporaries::mZAux(
p.moments_arr, cluster) = 0.f;),
2449 (ToLoad::WeightedCellPositionNormalization),
2451 add_with_corr(CMCTemporaries::mZ(
p.moments_arr),
2452 CMCTemporaries::mZAux(
p.moments_arr),
2462 (NumPositiveEnergyCells,
2464 (CMCTemporaries::numPositiveEnergyCells(
p.moments_arr, cluster) = 0;),
2466 (ToLoad::WeightedEnergyOrNegative),
2467 (
if (
data.weighted_energy_or_negative > 0)
2469 atomicAdd(&(CMCTemporaries::numPositiveEnergyCells(p.moments_arr, cluster)), 1);
2478 (ToLoad::SeedCellGeometryPhi),
2479 (CMCTemporaries::seedCellPhi(
p.moments_arr, cluster) =
data.seed_cell_phi_coordinate;),
2488 (SumAbsEnergyNonMoments,
2490 (CMCTemporaries::sumAbsEnergyNonMoments(
p.moments_arr, cluster) = 0.f;
2491 CMCTemporaries::sumAbsEnergyNonMomentsAux(
p.moments_arr, cluster) = 0.f;
2493 (ToLoad::CellAbsEnergy),
2495 (add_with_corr(CMCTemporaries::sumAbsEnergyNonMoments(
p.moments_arr),
2496 CMCTemporaries::sumAbsEnergyNonMomentsAux(
p.moments_arr),
2507 (CMCTemporaries::sumSquareEnergies(
p.moments_arr, cluster) = 0.f;
2508 CMCTemporaries::sumSquareEnergiesAux(
p.moments_arr, cluster) = 0.f;
2511 (ToLoad::SquareWeightedEnergy),
2512 (add_with_corr(CMCTemporaries::sumSquareEnergies(
p.moments_arr),
2513 CMCTemporaries::sumSquareEnergiesAux(
p.moments_arr),
2524 (CMCTemporaries::timeNormalization(
p.moments_arr, cluster) = 0.f;
2525 CMCTemporaries::timeNormalizationAux(
p.moments_arr, cluster) = 0.f;
2527 (ToLoad::CellTimeMomentsCheck),
2528 (ToLoad::SquaredWeightedNonMomentsEnergy),
2529 (
if (
data.time_moments_check)
2531 add_with_corr(CMCTemporaries::timeNormalization(p.moments_arr),
2532 CMCTemporaries::timeNormalizationAux(p.moments_arr),
2534 data.squared_normE);
2546 template <
int num,
int delta = 0>
2548 (AbsoluteEnergyPerSampleSeveral,
2549 (ToLoad::SamplingFromMomentIndex),
2551 for (
int i = 0;
i <
num; ++
i)
2553 CMCTemporaries::absoluteEnergyPerSample(
p.moments_arr,
offset +
i, cluster)
2555 CMCTemporaries::absoluteEnergyPerSampleAux(
p.moments_arr,
offset +
i, cluster) = 0.f;
2558 (ToLoad::CellSampling, ToLoad::CellAbsEnergy),
2560 (add_with_corr(CMCTemporaries::absoluteEnergyPerSample(
p.moments_arr,
data.sampling),
2561 CMCTemporaries::absoluteEnergyPerSampleAux(
p.moments_arr,
data.sampling),
2569 using AbsoluteEnergyPerSample = AbsoluteEnergyPerSampleSeveral<1, 0>;
2571 template <
int num,
int delta = 0>
2573 (MaxEnergyAndCellPerSampleSeveral,
2574 (ToLoad::SamplingFromMomentIndex),
2576 for (
int i = 0;
i <
num; ++
i)
2578 CMCTemporaries::maxEnergyAndCellPerSample(
p.moments_arr,
offset +
i, cluster)
2582 (ToLoad::CellSampling, ToLoad::CellEnergy),
2584 (
const unsigned int energy_pattern = __float_as_uint(
data.energy *
data.weight);
2586 E_and_cell = (E_and_cell << 32) |
cell;
2587 atomicMax(&(CMCTemporaries::maxEnergyAndCellPerSample(
p.moments_arr,
data.sampling, cluster)), E_and_cell);
2593 using MaxEnergyAndCellPerSample = MaxEnergyAndCellPerSampleSeveral<1, 0>;
2595 template <
int num,
int delta = 0>
2597 (MaxECellPerSampleSeveral,
2598 (ToLoad::SamplingFromMomentIndex),
2601 constexpr
unsigned long long int comparison = (total_ordering_zero << 32) | 0xFFFFFFFFU;
2603 for (
int i = 0;
i <
num; ++
i)
2605 const unsigned long long max_energy_and_cell = CMCTemporaries::maxEnergyAndCellPerSample(
p.moments_arr,
offset +
i, cluster);
2606 const int cell = (max_energy_and_cell > comparison ? ((
int) (max_energy_and_cell & 0x7FFFFFFFU)) : -1);
2607 CMCTemporaries::maxECellPerSample(
p.moments_arr,
offset +
i, cluster) =
cell;
2617 using MaxECellPerSample = MaxECellPerSampleSeveral<1, 0>;