ATLAS Offline Software
Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2025 CERN for the benefit of the ATLAS collaboration
3 //
4 // Dear emacs, this is -*- c++ -*-
5 //
6 
7 #ifndef CALORECGPU_HELPERS_H
8 #define CALORECGPU_HELPERS_H
9 
10 #include <utility>
11 #include <type_traits>
12 #include <cstring>
13 //For memcpy, of all things...
14 #include <string>
15 #include <cstdio>
16 #include <iostream>
17 #include <thread>
18 #include <mutex>
19 #include <shared_mutex>
20 #include <memory>
21 #include <vector>
22 #include <climits>
23 #include <new>
24 #include <cmath>
25 
26 #if __cpp_lib_math_constants
27  #include <numbers>
28 #endif
29 //This is the best way to have pi,
30 //but we provide a more manual alternative.
31 //Of course, there's also M_PI,
32 //but we wanted to ensure the type matched
33 //to prevent any GPU-based casting shenanigans.
34 
35 namespace CaloRecGPU
36 {
37 
38 #ifndef CUDA_AVAILABLE
39 
40  #ifdef __CUDA_ARCH__
41  #define CUDA_AVAILABLE 1
42  #elif __CUDA__
43  #define CUDA_AVAILABLE 1
44  #elif __CUDACC__
45  #define CUDA_AVAILABLE 1
46  #else
47  #define CUDA_AVAILABLE 0
48  #endif
49 
50 #endif
51 
52 #if CUDA_AVAILABLE
53 
54 #define CUDA_HOS_DEV __host__ __device__
55 
56 
62  CUDA_HOS_DEV inline void CUDA_gpu_assert(cudaError_t code, const char * file, int line, bool abort = true)
63  {
64  if (code != cudaSuccess)
65  {
66  printf("CUDA error: %s (%s %d)\n", cudaGetErrorString(code), file, line);
67  if (abort)
68  {
69 #ifdef __CUDA_ARCH__
70  asm("trap;");
71 #else
72  exit(code);
73 #endif
74  }
75  }
76  }
77 
82 #define CUDA_ERRCHECK(...) CUDA_ERRCHECK_HELPER(__VA_ARGS__, true)
83 
84 #define CUDA_ERRCHECK_HELPER(ans, ...) do { ::CaloRecGPU::CUDA_gpu_assert((ans), __FILE__, __LINE__, CUDA_ERRCHECK_GET_FIRST(__VA_ARGS__, true) ); } while(0)
85 #define CUDA_ERRCHECK_GET_FIRST(x, ...) x
86 
87 
88 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 350
89  #if CUDART_VERSION >= 12000
90  #define CUDA_CAN_USE_TAIL_LAUNCH 1
91  #else
92  #define CUDA_CAN_USE_TAIL_LAUNCH 0
93  #endif
94 #elif defined(__CUDA_ARCH__)
95  #error "CUDA compute capability at least 3.5 is needed so we can have dynamic parallelism!"
96 #endif
97 
98 
99 #else
100 
101 #define CUDA_HOS_DEV
102 #define CUDA_ERRCHECK(...)
103 
104 #endif
105 
106  namespace CUDA_Helpers
107  {
108 
110  {
111  void * ptr = nullptr;
112 
113  template <class T = const void>
114  constexpr operator T * () const
115  {
116  return (T *) ptr;
117  }
118 
119  constexpr operator bool() const
120  {
121  return ptr != nullptr;
122  }
123 
124  template <class T>
126  {
127  }
128 
129  CUDAStreamPtrHolder() = default;
130  };
131  //Can't do much more than this
132  //since cudaStream_t is a typedef...
133  //Though not typesafe, it is still
134  //semantically more safe than a naked void *...
135 
139  void * allocate(const size_t num);
140 
144  void deallocate(void * address);
145 
149  void * allocate_pinned(const size_t num);
150 
155 
156 
160  void GPU_to_CPU(void * dest, const void * const source, const size_t num);
161 
165  void CPU_to_GPU(void * dest, const void * const source, const size_t num);
166 
170  void GPU_to_GPU(void * dest, const void * const source, const size_t num);
171 
172 
176  void GPU_to_CPU_async(void * dest, const void * const source, const size_t num, CUDAStreamPtrHolder stream = {});
177 
181  void CPU_to_GPU_async(void * dest, const void * const source, const size_t num, CUDAStreamPtrHolder stream = {});
182 
186  void GPU_to_GPU_async(void * dest, const void * const source, const size_t num, CUDAStreamPtrHolder stream = {});
187 
192 
196  void optimize_block_and_grid_size(void * func, int & block_size, int & grid_size, const int dynamic_memory = 0, const int block_size_limit = 0);
197 
201  void optimize_block_and_grid_size_for_cooperative_launch(void * func, int & block_size, int & grid_size, const int dynamic_memory = 0, const int block_size_limit = 0);
202 
204 
206 
207  std::string GPU_name();
208  }
209 
210  namespace Helpers
211  {
212 
214  template <class T1, class T2>
215  inline constexpr auto int_ceil_div(const T1 num, const T2 denom)
216  {
217  return num / denom + (num % denom != 0);
218  }
219 
221  template <class T1, class T2>
222  inline constexpr auto int_floor_div(const T1 num, const T2 denom)
223  {
224  return num / denom;
225  }
226 
228  template <class Base = float, class Exp = int>
229  inline constexpr Base compile_time_pow2(const Exp exp)
230  {
231  Base ret = 1;
232  if (exp < 0)
233  {
234  for (Exp i = 0; i < -exp; ++i)
235  {
236  ret /= Base(2);
237  }
238  }
239  else
240  {
241  for (Exp i = 0; i < exp; ++i)
242  {
243  ret *= Base(2);
244  }
245  }
246  return ret;
247  }
248  //Though we could possibly bit-hack stuff due to IEEE-754 reliance elsewhere,
249  //it's not valid and type-safe C++...
250  //Since it's compile-time, this being a trifle slower is meaningless.
251 
252 
255  template <class T>
256  inline constexpr unsigned char Pearson_hash(const T number)
257  {
258  constexpr unsigned char initial_value = 42;
259  //The answer.
260 
261  constexpr unsigned char c_mult = 7;
262  constexpr unsigned char c_add = 1;
263  //For our "look up table": table[i] = c_mult * i + c_add
264  //For an appropriate choice of constants (such as this),
265  //this will be bijective (modulo 255), as required.
266 
267  unsigned char ret = initial_value;
268 
269  for (unsigned int i = 0; i < sizeof(T); i += sizeof(unsigned char))
270  {
271  const unsigned char to_hash = number >> (i * CHAR_BIT);
272  const unsigned char operand = ret ^ to_hash;
273  ret = c_mult * operand + c_add;
274  }
275 
276  return ret;
277  }
278 
279 
282  template <class T>
283  inline constexpr unsigned short Pearson_hash_16_bit(const T number)
284  {
285  constexpr unsigned short initial_value = 42754;
286  //The answer and the standard.
287 
288  constexpr unsigned short c_mult = 7;
289  constexpr unsigned short c_add = 1;
290  //For our "look up table": table[i] = c_mult * i + c_add
291  //For an appropriate choice of constants (such as this),
292  //this will be bijective (modulo 255), as required.
293 
294  unsigned short ret = initial_value;
295 
296  for (unsigned int i = 0; i < sizeof(T); i += sizeof(unsigned short))
297  {
298  const unsigned short to_hash = number >> (i * CHAR_BIT);
299  const unsigned short operand = ret ^ to_hash;
300  ret = c_mult * operand + c_add;
301  }
302 
303  return ret;
304  }
305 
306 
308  namespace Constants
309  {
310 #ifdef __cpp_lib_math_constants
311  template <class T>
312  inline constexpr T pi = std::numbers::pi_v<T>;
313 
314  template <class T>
315  inline constexpr T sqrt2 = std::numbers::sqrt2_v<T>;
316 #else
317  template <class T>
318  inline constexpr T pi = T(3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756482337867831652712019091456485669234603486104543266482133936072602491412737245870066063155881748815209209628292540917153643678925903600113305305488204665213841469519415116094330572703657595919530921861173819326117931051185480744623799627495673518857527248912279381830119491298336733624L);
319 
320  template <class T>
321  inline constexpr T sqrt2 = T(1.4142135623730950488016887242096980785696718753769480731766797379907324784621070388503875343276415727350138462309122970249248360558507372126441214970999358314132226659275055927557999505011527820605714701095599716059702745345968620147285174186408891986095523292304843087143214508397626036279952514079896872533965463318088296406206152583523950547457502877599617298355752203375318570113543746034084988471603868999706990048150305440277903164542478230684929369186215805784631115966687130130156185689872372352885092649L);
322 #endif
323 
324  template <class T>
325  inline constexpr T inv_sqrt2 = T(0.70710678118654752440084436210484903928483593768847403658833986899536623923105351942519376716382078636750692311545614851246241802792536860632206074854996791570661133296375279637789997525057639103028573505477998580298513726729843100736425870932044459930477616461524215435716072541988130181399762570399484362669827316590441482031030762917619752737287514387998086491778761016876592850567718730170424942358019344998534950240751527201389515822712391153424646845931079028923155579833435650650780928449361861764425463243L);
326  //Why is this not in the C++ constants?!
327 
328  }
329 
330  CUDA_HOS_DEV static inline
331  float erf_inv_wrapper (const float x)
332  {
333  using namespace std;
334 #ifdef __CUDA_ARCH__
335  return erfinvf(x);
336 #else
337  //Copied directly from ROOT...
338 
339  int kMaxit = 50;
340  float kEps = 1e-14f;
341  float kConst = 0.8862269254527579f; // sqrt(pi)/2.0
342 
343  if (abs(x) <= kEps)
344  {
345  return kConst * x;
346  }
347 
348  // Newton iterations
349  float erfi, derfi, y0, y1, dy0, dy1;
350  if (fabsf(x) < 1.0f)
351  {
352  erfi = kConst * fabsf(x);
353  y0 = erff(0.9f * erfi);
354  derfi = 0.1f * erfi;
355  for (int iter = 0; iter < kMaxit; iter++)
356  {
357  y1 = 1.f - erfc(erfi);
358  dy1 = fabsf(x) - y1;
359  if (fabsf(dy1) < kEps)
360  {
361  if (x < 0)
362  {
363  return -erfi;
364  }
365  else
366  {
367  return erfi;
368  }
369  }
370  dy0 = y1 - y0;
371  derfi *= dy1 / dy0;
372  y0 = y1;
373  erfi += derfi;
374  if (fabsf(derfi / erfi) < kEps)
375  {
376  if (x < 0.f)
377  {
378  return -erfi;
379  }
380  else
381  {
382  return erfi;
383  }
384  }
385  }
386  }
387  return 0; //did not converge
388 #endif
389  }
390 
391  //Food for thought: any sort of proper argument reduction here?
392  //(E. g. Cody-Waite or Payne-Hanek algorithm?)
393 
394  CUDA_HOS_DEV static inline
395  float regularize_angle(const float b, const float a = 0.f)
396  //a. k. a. proxim in Athena code.
397  {
398  using namespace std;
399  constexpr float pi = Helpers::Constants::pi<float>;
400  constexpr float two_pi = 2 * pi;
401  const float ret = remainderf(b, two_pi);
402  return ret + ((ret < a - pi) - (ret > a + pi)) * two_pi;
403  }
404 
405  CUDA_HOS_DEV static inline
406  double regularize_angle(const double b, const double a = 0.)
407  //a. k. a. proxim in Athena code.
408  {
409  using namespace std;
410  constexpr double pi = Helpers::Constants::pi<double>;
411  constexpr double two_pi = 2 * pi;
412  const double ret = remainderf(b, two_pi);
413  return ret + ((ret < a - pi) - (ret > a + pi)) * two_pi;
414  }
415 
416  template <class T>
417  CUDA_HOS_DEV static inline
418  T angular_difference(const T x, const T y)
419  {
420  return regularize_angle(regularize_angle(x) - regularize_angle(y));
421  }
422 
423  CUDA_HOS_DEV static inline
424  float eta_from_coordinates(const float x, const float y, const float z)
425  {
426  using namespace std;
427 
428  if (x != 0 || y != 0)
429  {
430 #ifdef __CUDA_ARCH__
431  const float m = norm3df(x, y, z);
432 #else
433  const float m = hypot(x, y, z);
434 #endif
435  return 0.5f * logf((m + z) / (m - z));
436  }
437  else
438  {
439  constexpr float s_etaMax = 22756.0f;
440  return z + ((z > 0) - (z < 0)) * s_etaMax;
441  }
442  }
443 
444  CUDA_HOS_DEV static inline
445  double eta_from_coordinates(const double x, const double y, const double z)
446  {
447  using namespace std;
448  if (x != 0 || y != 0)
449  {
450 #ifdef __CUDA_ARCH__
451  const float m = norm3d(x, y, z);
452 #else
453  const float m = hypot(x, y, z);
454 #endif
455  return 0.5 * log((m + z) / (m - z));
456  }
457  else
458  {
459  constexpr double s_etaMax = 22756.0;
460  return z + ((z > 0) - (z < 0)) * s_etaMax;
461  }
462  }
463 
465  namespace MemoryContext
466  {
467  struct CPU
468  {
469  constexpr static char const * name = "CPU";
470  };
471  struct CUDAGPU
472  {
473  constexpr static char const * name = "CUDA GPU";
474  };
476  {
477  constexpr static char const * name = "CUDA Pinned CPU";
478  };
479  }
480 
482  template <class T, class indexer>
484  {
485  private:
486  template <class C, class dummy = void> struct unary_helper;
487 
488  template <class dummy> struct unary_helper<MemoryContext::CPU, dummy>
489  {
490  static inline T * allocate(const indexer size)
491  {
492  return new T[size];
493  }
494 
495  static inline void deallocate(T *& arr)
496  {
497  delete[] arr;
498  }
499 
500  };
501 
502  template <class dummy> struct unary_helper<MemoryContext::CUDAGPU, dummy>
503  {
504  static inline T * allocate(const indexer size)
505  {
506  return static_cast<T *>(CUDA_Helpers::allocate(sizeof(T) * size));
507  }
508 
509  static inline void deallocate(T *& arr)
510  {
512  }
513  };
514 
515 
516  template <class dummy> struct unary_helper<MemoryContext::CUDAPinnedCPU, dummy>
517  {
518  static inline T * allocate(const indexer size)
519  {
520  return static_cast<T *>(CUDA_Helpers::allocate_pinned(sizeof(T) * size));
521  }
522 
523  static inline void deallocate(T *& arr)
524  {
526  }
527  };
528 
529  template <class C1, class C2, class dummy = void> struct copy_helper;
530 
531  template <class dummy> struct copy_helper<MemoryContext::CPU, MemoryContext::CPU, dummy>
532  {
533  static inline void copy (T * dest, const T * const source, const indexer sz)
534  {
535  std::memcpy(dest, source, sizeof(T) * sz);
536  }
537  };
538 
539  template <class dummy> struct copy_helper<MemoryContext::CPU, MemoryContext::CUDAGPU, dummy>
540  {
541  static inline void copy (T * dest, const T * const source, const indexer sz)
542  {
543  CUDA_Helpers::GPU_to_CPU(dest, source, sizeof(T) * sz);
544  }
545  };
546 
547  template <class dummy> struct copy_helper<MemoryContext::CUDAGPU, MemoryContext::CUDAGPU, dummy>
548  {
549  static inline void copy (T * dest, const T * const source, const indexer sz)
550  {
551  CUDA_Helpers::GPU_to_GPU(dest, source, sizeof(T) * sz);
552  }
553  };
554 
555  template <class dummy> struct copy_helper<MemoryContext::CUDAGPU, MemoryContext::CPU, dummy>
556  {
557  static inline void copy (T * dest, const T * const source, const indexer sz)
558  {
559  CUDA_Helpers::CPU_to_GPU(dest, source, sizeof(T) * sz);
560  }
561  };
562 
563  template <class dummy> struct copy_helper<MemoryContext::CUDAPinnedCPU, MemoryContext::CPU, dummy>
564  {
565  static inline void copy (T * dest, const T * const source, const indexer sz)
566  {
567  std::memcpy(dest, source, sizeof(T) * sz);
568  }
569  };
570 
571  template <class dummy> struct copy_helper<MemoryContext::CPU, MemoryContext::CUDAPinnedCPU, dummy>
572  {
573  static inline void copy (T * dest, const T * const source, const indexer sz)
574  {
575  std::memcpy(dest, source, sizeof(T) * sz);
576  }
577  };
578 
579  template <class dummy> struct copy_helper<MemoryContext::CUDAPinnedCPU, MemoryContext::CUDAPinnedCPU, dummy>
580  {
581  static inline void copy (T * dest, const T * const source, const indexer sz)
582  {
583  std::memcpy(dest, source, sizeof(T) * sz);
584  }
585  };
586 
587  template <class dummy> struct copy_helper<MemoryContext::CUDAPinnedCPU, MemoryContext::CUDAGPU, dummy>
588  {
589  static inline void copy (T * dest, const T * const source, const indexer sz)
590  {
591  CUDA_Helpers::GPU_to_CPU(dest, source, sizeof(T) * sz);
592  }
593  };
594 
595  template <class dummy> struct copy_helper<MemoryContext::CUDAGPU, MemoryContext::CUDAPinnedCPU, dummy>
596  {
597  static inline void copy (T * dest, const T * const source, const indexer sz)
598  {
599  CUDA_Helpers::CPU_to_GPU(dest, source, sizeof(T) * sz);
600  }
601  };
602 
603 
604  template <class C1, class C2, class dummy = void> struct move_helper;
605 
606  template <class C1, class C2, class dummy> struct move_helper
607  {
608  inline static void move(T *& dest, T *& source, const indexer sz)
609  {
613  }
614  };
615 
616  template <class C, class dummy> struct move_helper<C, C, dummy>
617  {
618  inline static void move(T *& dest, T *& source, const indexer)
619  {
620  dest = source;
621  source = nullptr;
622  }
623  };
624 
625  public:
627  template <class Context> static inline T * allocate(const indexer size)
628  {
629  T * ret = nullptr;
630  if (size > 0)
631  {
633  }
634 #if CALORECGPU_HELPERS_DEBUG
635  std::cerr << "ALLOCATED " << size << " x " << sizeof(T) << " in " << Context::name << ": " << ret << std::endl;
636 #endif
637  return ret;
638  }
639 
641  template <class Context> static inline void deallocate(T *& arr)
642  {
643  if (arr == nullptr)
644  //This check is to ensure the code behaves on non-CUDA enabled platforms
645  //where some destructors might still be called with nullptr.
646  {
647  return;
648  }
650 #if CALORECGPU_HELPERS_DEBUG
651  std::cerr << "DEALLOCATED in " << Context::name << ": " << arr << std::endl;
652 #endif
653  arr = nullptr;
654  }
655 
656 
658  template <class DestContext, class SourceContext>
659  static inline void copy(T * dest, const T * const source, const indexer sz)
660  {
661  if (sz > 0 && source != nullptr)
662  {
664  }
665 #if CALORECGPU_HELPERS_DEBUG
666  std::cerr << "COPIED " << sz << " from " << SourceContext::name << " to " << DestContext::name << ": " << source << " to " << dest << std::endl;
667 #endif
668  }
669 
670 
676  template <class DestContext, class SourceContext>
677  static inline void move(T *& dest, T *& source, const indexer sz)
678  {
679 #if CALORECGPU_HELPERS_DEBUG
680  std::cerr << "MOVED " << sz << " from " << SourceContext::name << " to " << DestContext::name << ": " << source << " to " << dest;
681 #endif
682  if (sz > 0 && source != nullptr)
683  {
685  }
686  else
687  {
688  dest = nullptr;
689  deallocate<SourceContext>(source);
690  }
691 #if CALORECGPU_HELPERS_DEBUG
692  std::cerr << " | " << source << " to " << dest << std::endl;
693 #endif
694  }
695 
696  };
697 
704  template <class T, class indexer, class Context, bool hold_arrays = true>
706 
707  template <class T, class indexer, class Context>
708  class SimpleContainer<T, indexer, Context, true>
709  {
710  static_assert(std::is_trivially_copyable<T>::value, "SimpleContainer only works with a trivially copyable type.");
711  T * m_array;
712  indexer m_size;
713 
714  template <class a, class b, class c, bool d> friend class SimpleContainer;
715 
717 
718  public:
719 
720  CUDA_HOS_DEV inline indexer size() const
721  {
722  return m_size;
723  }
724 
725  CUDA_HOS_DEV inline T & operator[] (const indexer i)
726  {
727  return m_array[i];
728  }
729 
730  CUDA_HOS_DEV inline const T & operator[] (const indexer i) const
731  {
732  return m_array[i];
733  }
734 
735  inline void clear()
736  {
737  Manager::deallocate(m_array);
738  m_size = 0;
739  }
740 
741  inline void resize(const indexer new_size)
742  {
743  if (new_size == 0)
744  {
745  clear();
746  }
747  else if (new_size != m_size)
748  {
749  T * temp = m_array;
750  m_array = Manager::template allocate<Context>(new_size);
751  Manager::template copy<Context, Context>(m_array, temp, (m_size < new_size ? m_size : new_size));
752  Manager::template deallocate<Context>(temp);
753  m_size = new_size;
754  }
755  }
756 
757  SimpleContainer() : m_array(nullptr), m_size(0)
758  {
759  }
760 
761  SimpleContainer(const indexer sz) : m_size(sz)
762  {
763  m_array = Manager::template allocate<Context>(sz);
764  }
765 
769  SimpleContainer(T * other_array, const indexer sz) : m_size(sz)
770  {
771  m_array = Manager::template allocate<Context>(sz);
772  Manager::template copy<Context, Context>(m_array, other_array, sz);
773  }
774 
775  SimpleContainer(const SimpleContainer & other) : m_size(other.m_size)
776  {
777  m_array = Manager::template allocate<Context>(m_size);
778  Manager::template copy<Context, Context>(m_array, other.m_array, m_size);
779  }
780 
782  {
783  m_array = nullptr;
784  Manager::template move<Context, Context>(m_array, other.m_array, m_size);
785  other.m_size = 0;
786  }
787 
788  template <class other_indexer, class other_context, bool other_hold>
790  m_size(other.m_size)
791  {
792 
793  m_array = Manager::template allocate<Context>(m_size);
794  Manager::template copy<Context, other_context>(m_array, other.m_array, m_size);
795  }
796 
797  template <class other_indexer, class other_context>
799  m_size(other.m_size)
800  {
801  m_array = nullptr;
802  Manager::template move<Context, other_context>(m_array, other.m_array, m_size);
803  other.m_size = 0;
804  }
805 
807  {
808  if (this == &other)
809  {
810  return (*this);
811  }
812  else
813  {
814  resize(other.size());
815  Manager::template copy<Context, Context>(m_array, other.m_array, m_size);
816  return (*this);
817  }
818  }
819 
821  {
822  if (this == &other)
823  {
824  return (*this);
825  }
826  else
827  {
828  clear();
829  Manager::template move<Context, Context>(m_array, other.m_array, other.size());
830  m_size = other.m_size;
831  other.m_size = 0;
832  return (*this);
833  }
834  }
835 
836 
837  template <class other_indexer, class other_context, bool other_hold>
839  {
840  resize(other.m_size);
841  Manager::template copy<Context, other_context>(m_array, other.m_array, m_size);
842  return (*this);
843  }
844 
845  template <class other_indexer, class other_context>
847  {
848  clear();
849  Manager::template move<Context, other_context>(m_array, other.m_array, other.m_size);
850  m_size = other.m_size;
851  other.m_size = 0;
852  return (*this);
853  }
854 
856  {
857  Manager::template deallocate<Context>(m_array);
858  m_size = 0;
859  }
860 
861  CUDA_HOS_DEV operator const T * () const
862  {
863  return m_array;
864  }
865 
866  CUDA_HOS_DEV operator T * ()
867  {
868  return m_array;
869  }
870 
871  template <class stream, class str = std::basic_string<typename stream::char_type> >
872  void textual_output(stream & s, const str & separator = " ") const
873  {
875  {
876  s << m_size << separator;
877  for (indexer i = 0; i < m_size - 1; ++i)
878  {
879  s << m_array[i] << separator;
880  }
881  s << m_array[m_size - 1];
882  }
883  else
884  {
886  other.textual_output(s, separator);
887  }
888  }
889 
890  template <class stream>
892  {
894  {
895  indexer new_size;
896  s >> new_size >> std::ws;
897  if (s.fail())
898  {
899  //Throw errors, perhaps? Don't know if we can/should use exceptions...
900  std::cerr << "FAILED READING " << this << "!" << std::endl;
901  new_size = 0;
902  }
903  resize(new_size);
904  for (indexer i = 0; i < m_size - 1; ++i)
905  {
906  s >> m_array[i];
907  s >> std::ws;
908  }
909  s >> m_array[m_size - 1];
910  }
911  else
912  {
914  other.textual_input(s);
915  (*this) = other;
916  }
917  }
918 
919  template <class stream>
920  void binary_output(stream & s) const
921  {
923  {
924  s.write((char *) &m_size, sizeof(indexer));
925  for (indexer i = 0; i < m_size; ++i)
926  {
927  s.write((char *) (m_array + i), sizeof(T));
928  }
929  }
930  else
931  {
933  other.binary_output(s);
934  }
935  }
936 
937  template <class stream>
939  {
941  {
942  indexer new_size;
943  s.read((char *) &new_size, sizeof(indexer));
944  if (s.fail())
945  {
946  //Throw errors, perhaps? Don't know if we can/should use exceptions...
947  std::cerr << "FAILED READING " << this << "!" << std::endl;
948  new_size = 0;
949  }
950  resize(new_size);
951  for (indexer i = 0; i < m_size; ++i)
952  {
953  s.read((char *) (m_array + i), sizeof(T));
954  }
955  }
956  else
957  {
959  other.binary_input(s);
960  (*this) = other;
961  }
962  }
963 
964  };
965 
966  template <class T, class indexer, class Context>
967  class SimpleContainer<T, indexer, Context, false>
968  {
969  static_assert(std::is_trivially_copyable<T>::value, "SimpleContainer only works with a trivially copyable type.");
970  T * m_array;
971  indexer m_size;
972 
974 
975  template <class a, class b, class c, bool d> friend class SimpleContainer;
976 
977  public:
978 
979  CUDA_HOS_DEV inline indexer size() const
980  {
981  return m_size;
982  }
983 
984  CUDA_HOS_DEV inline T & operator[] (const indexer i)
985  {
986  return m_array[i];
987  }
988 
989  CUDA_HOS_DEV inline const T & operator[] (const indexer i) const
990  {
991  return m_array[i];
992  }
993 
994  // cppcheck-suppress uninitMemberVar
995  //Try to suppress the uninitialized member thing that is probably being thrown off by the CUDA_HOS_DEV macro...
996  CUDA_HOS_DEV SimpleContainer() : m_array(nullptr), m_size(0)
997  {
998  }
999 
1003  // cppcheck-suppress uninitMemberVar
1004  //Try to suppress the uninitialized member thing that is probably being thrown off by the CUDA_HOS_DEV macro...
1005  CUDA_HOS_DEV SimpleContainer(T * other_array, const indexer sz) : m_array(other_array), m_size(sz)
1006  {
1007  }
1008 
1009  template <class other_indexer, bool other_hold>
1010  // cppcheck-suppress uninitMemberVar
1011  //Try to suppress the uninitialized member thing that is probably being thrown off by the CUDA_HOS_DEV macro...
1013  m_size(other.m_size),
1014  m_array(other.m_array)
1015  {
1016  }
1017 
1018  // cppcheck-suppress operatorEqVarError
1019  //Try to suppress the uninitialized member thing that is probably being thrown off by the CUDA_HOS_DEV macro...
1021  {
1022  if (this == &other)
1023  {
1024  return (*this);
1025  }
1026  else
1027  {
1028  m_array = other.m_array;
1029  m_size = other.m_size;
1030  return (*this);
1031  }
1032  }
1033 
1034  template <class other_indexer, bool other_hold>
1035  // cppcheck-suppress operatorEqVarError
1036  //Try to suppress the uninitialized member thing that is probably being thrown off by the CUDA_HOS_DEV macro...
1038  {
1039  m_size = other.m_size;
1040  m_array = other.m_array;
1041  return (*this);
1042  }
1043 
1044  CUDA_HOS_DEV operator const T * () const
1045  {
1046  return m_array;
1047  }
1048 
1049  CUDA_HOS_DEV operator T * ()
1050  {
1051  return m_array;
1052  }
1053  };
1054 
1056  template <class T, class indexer = unsigned int>
1058 
1060  template <class T, class indexer = unsigned int>
1062 
1064  template <class T, class indexer = unsigned int>
1066 
1073  template <class T, class Context, bool hold_object = true>
1075 
1076  template <class T, class Context>
1077  class SimpleHolder<T, Context, true>
1078  {
1079  static_assert(std::is_trivially_copyable<T>::value, "SimpleHolder only works with a trivially copyable type.");
1080 
1081  using indexer = unsigned int;
1082 
1084 
1086 
1087  template <class a, class b, bool c> friend class SimpleHolder;
1088 
1089  public:
1090 
1091  CUDA_HOS_DEV const T & operator *() const
1092  {
1093  return *m_object;
1094  }
1095 
1097  {
1098  return *m_object;
1099  }
1100 
1101  CUDA_HOS_DEV const T * operator ->() const
1102  {
1103  return m_object;
1104  }
1105 
1106  CUDA_HOS_DEV T * operator ->()
1107  {
1108  return m_object;
1109  }
1110 
1111  CUDA_HOS_DEV inline bool valid() const
1112  {
1113  return m_object != nullptr;
1114  }
1115 
1116  inline void clear()
1117  {
1118  Manager::template deallocate<Context>(m_object);
1119  }
1120 
1121  inline void allocate()
1122  {
1123  if (m_object == nullptr)
1124  {
1125  m_object = Manager::template allocate<Context>(1);
1126  }
1127  }
1128 
1129  SimpleHolder(): m_object(nullptr)
1130  {
1131  }
1132 
1133  SimpleHolder(const bool really_allocate)
1134  {
1135  if (really_allocate)
1136  {
1137  m_object = Manager::template allocate<Context>(1);
1138  }
1139  else
1140  {
1141  m_object = nullptr;
1142  }
1143  }
1144 
1149  explicit SimpleHolder(X * other_p)
1150  {
1151  m_object = Manager::template allocate<Context>(1);
1152  Manager::template copy<Context, Context>(m_object, other_p, 1);
1153  }
1154 
1159  SimpleHolder(const X & other_v) : SimpleHolder(&other_v)
1160  {
1161 
1162 
1163  }
1164 
1166  {
1167  if (other.valid())
1168  {
1169  m_object = Manager::template allocate<Context>(1);
1170  Manager::template copy<Context, Context>(m_object, other.m_object, other.valid());
1171  }
1172  else
1173  {
1174  m_object = nullptr;
1175  }
1176  }
1177 
1178  template < class X, class other_context, bool other_hold,
1181  {
1182  if (other.valid())
1183  {
1184  m_object = Manager::template allocate<Context>(1);
1185  Manager::template copy<Context, other_context>(m_object, other.m_object, other.valid());
1186  }
1187  else
1188  {
1189  m_object = nullptr;
1190  }
1191  }
1192 
1194  {
1195  m_object = nullptr;
1196  Manager::template move<Context, Context>(m_object, other.m_object, other.valid());
1197  }
1198 
1199  template < class X, class other_context,
1202  {
1203  m_object = nullptr;
1204  Manager::template move<Context, other_context>(m_object, other.m_object, other.valid());
1205  }
1206 
1207  // cppcheck-suppress operatorEqVarError
1208  //Try to suppress the uninitialized member thing that is probably being thrown off by the CUDA_HOS_DEV macro...
1210  {
1211  if (!valid() && other.valid())
1212  {
1213  allocate();
1214  }
1215  if (&other != this)
1216  {
1217  Manager::template copy<Context, Context>(m_object, other.m_object, other.valid());
1218  }
1219  return (*this);
1220  }
1221 
1222  template < class X, class other_context, bool other_hold,
1224  // cppcheck-suppress operatorEqVarError
1225  //Try to suppress the uninitialized member thing that is probably being thrown off by the CUDA_HOS_DEV macro...
1227  {
1228  if (!valid() && other.valid())
1229  {
1230  allocate();
1231  }
1232  Manager::template copy<Context, other_context>(m_object, other.m_object, other.valid());
1233  return (*this);
1234  }
1235 
1236  // cppcheck-suppress operatorEqVarError
1237  //Try to suppress the uninitialized member thing that is probably being thrown off by the CUDA_HOS_DEV macro...
1239  {
1240  if (&other != this)
1241  {
1242  clear();
1243  Manager::template move<Context, Context>(m_object, other.m_object, other.valid());
1244  }
1245  return (*this);
1246  }
1247 
1248  template < class X, class other_context,
1250  // cppcheck-suppress operatorEqVarError
1251  //Try to suppress the uninitialized member thing that is probably being thrown off by the CUDA_HOS_DEV macro...
1253  {
1254  clear();
1255  Manager::template move<Context, other_context>(m_object, other.m_object, other.valid());
1256  return (*this);
1257  }
1258 
1260  {
1261  Manager::template deallocate<Context>(m_object);
1262  }
1263 
1265  CUDA_HOS_DEV operator const X * () const
1266  {
1267  return m_object;
1268  }
1269 
1271  CUDA_HOS_DEV operator X * ()
1272  {
1273  return m_object;
1274  }
1275 
1276  template <class stream, class str = std::basic_string<typename stream::char_type> >
1277  void textual_output(stream & s, const str & separator = " ") const
1278  {
1280  {
1281  if (m_object == nullptr)
1282  {
1283  s << 0;
1284  }
1285  else
1286  {
1287  s << 1 << separator << (*m_object);
1288  }
1289  }
1290  else
1291  {
1293  other.textual_output(s, separator);
1294  }
1295  }
1296 
1297  template <class stream>
1299  {
1301  {
1302  bool is_valid;
1303  s >> is_valid >> std::ws;
1304  if (s.fail())
1305  {
1306  //Throw errors, perhaps? Don't know if we can/should use exceptions...
1307  std::cerr << "FAILED READING " << this << "!" << std::endl;
1308  is_valid = false;
1309  }
1310  if (is_valid)
1311  {
1312  allocate();
1313  s >> (*m_object);
1314  }
1315  else
1316  {
1317  clear();
1318  }
1319  }
1320  else
1321  {
1323  other.textual_input(s);
1324  (*this) = other;
1325  }
1326  }
1327 
1328  template <class stream>
1329  void binary_output(stream & s) const
1330  {
1331  if (m_object == nullptr)
1332  {
1333  return;
1334  }
1336  {
1337  s.write(reinterpret_cast<char *>( m_object), sizeof(T));
1338  }
1339  else
1340  {
1342  other.binary_output(s);
1343  }
1344  }
1345 
1346  template <class stream>
1348  {
1350  {
1351  allocate();
1352  s.read(reinterpret_cast<char *> (m_object), sizeof(T));
1353  }
1354  else
1355  {
1357  other.binary_input(s);
1358  (*this) = other;
1359  }
1360  }
1361 
1362  };
1363 
1364  template <class T, class Context>
1365  class SimpleHolder<T, Context, false>
1366  {
1367  static_assert(std::is_trivially_copyable<T>::value, "SimpleHolder only works with a trivially copyable type.");
1368 
1369  using indexer = unsigned int;
1370 
1372 
1374 
1375  template <class a, class b, bool c> friend class SimpleHolder;
1376 
1377  public:
1378 
1379  CUDA_HOS_DEV const T & operator *() const
1380  {
1381  return *m_object;
1382  }
1383 
1385  {
1386  return *m_object;
1387  }
1388 
1389  CUDA_HOS_DEV const T * operator ->() const
1390  {
1391  return m_object;
1392  }
1393 
1394  CUDA_HOS_DEV T * operator ->()
1395  {
1396  return m_object;
1397  }
1398 
1399  CUDA_HOS_DEV inline bool valid() const
1400  {
1401  return m_object != nullptr;
1402  }
1403 
1404  // cppcheck-suppress uninitMemberVar
1405  //Try to suppress the uninitialized member thing that is probably being thrown off by the CUDA_HOS_DEV macro...
1406  CUDA_HOS_DEV SimpleHolder() : m_object(nullptr)
1407  {
1408  }
1409 
1414  // cppcheck-suppress uninitMemberVar
1415  //Try to suppress the uninitialized member thing that is probably being thrown off by the CUDA_HOS_DEV macro...
1417  {
1418  m_object = other_p;
1419  }
1420 
1421  template < class X, bool other_hold,
1423  // cppcheck-suppress uninitMemberVar
1424  //Try to suppress the uninitialized member thing that is probably being thrown off by the CUDA_HOS_DEV macro...
1426  {
1427  m_object = other.m_object;
1428  }
1429 
1430  template < class X, bool other_hold,
1432  // cppcheck-suppress operatorEqVarError
1433  //Try to suppress the uninitialized member thing that is probably being thrown off by the CUDA_HOS_DEV macro...
1435  {
1436  m_object = other.m_object;
1437  return (*this);
1438  }
1439 
1441  CUDA_HOS_DEV operator const X * () const
1442  {
1443  return m_object;
1444  }
1445 
1447  CUDA_HOS_DEV operator X * ()
1448  {
1449  return m_object;
1450  }
1451  };
1452 
1454  template <class T>
1456 
1458  template <class T>
1460 
1462  template <class T>
1464 
1466  template <class T>
1468 
1478  template <class T>
1480  {
1481  private:
1482  std::vector< std::unique_ptr<T> > m_held;
1483  std::vector< typename std::thread::id > m_thread_equivs;
1484  //For a sufficiently small number of threads
1485  //(not much more than 100 or so?)
1486  //it's faster to have linear search+insert
1487  //than any other addressing mode
1488  //(e. g. unordered_map)
1489  //We could still consider a more sophisticated solution...
1490 
1491  //Simple alternative: some sort of stack for non-assigned objects,
1492  //pushing and popping instead of linear searching.
1493  //(But with constant memory -> no (de)allocations.)
1494 
1495  mutable std::shared_mutex m_mutex;
1496 
1498  {
1499  std::unique_lock<std::shared_mutex> lock(m_mutex);
1500  m_held.emplace_back(std::make_unique<T>());
1501  m_thread_equivs.emplace_back(std::this_thread::get_id());
1502  return *(m_held.back());
1503  }
1504 
1505  public:
1506  T & get_one()
1507  {
1508  {
1509  std::shared_lock<std::shared_mutex> lock(m_mutex);
1510  std::thread::id this_id = std::this_thread::get_id();
1511  const std::thread::id invalid_id{};
1512  for (size_t i = 0; i < m_thread_equivs.size(); ++i)
1513  {
1514  if (m_thread_equivs[i] == invalid_id)
1515  {
1516  m_thread_equivs[i] = this_id;
1517  return *(m_held[i]);
1518  }
1519  }
1520  }
1521  return add_one_and_return();
1522  }
1523 
1525  T & get_for_thread() const
1526  {
1527  std::shared_lock<std::shared_mutex> lock(m_mutex);
1528  std::thread::id this_id = std::this_thread::get_id();
1529  for (size_t i = 0; i < m_thread_equivs.size(); ++i)
1530  {
1531  if (m_thread_equivs[i] == this_id)
1532  {
1533  return *(m_held[i]);
1534  }
1535  }
1536  //Here would be a good place for an unreachable.
1537  //C++23?
1538  return *(m_held.back());
1539  }
1540 
1542  {
1543  std::unique_lock<std::shared_mutex> lock(m_mutex);
1544  std::thread::id this_id = std::this_thread::get_id();
1545  const std::thread::id invalid_id{};
1546  for (size_t i = 0; i < m_thread_equivs.size(); ++i)
1547  {
1548  if (m_thread_equivs[i] == this_id)
1549  {
1550  m_thread_equivs[i] = invalid_id;
1551  }
1552  }
1553  }
1554 
1555  void resize(const size_t new_size)
1556  {
1557  std::unique_lock<std::shared_mutex> lock(m_mutex);
1558  if (new_size < m_held.size())
1559  {
1560  m_held.resize(new_size);
1561  m_thread_equivs.resize(new_size);
1562  }
1563  else if (new_size > m_held.size())
1564  {
1565  const size_t to_add = new_size - m_held.size();
1566  const std::thread::id invalid_id{};
1567  for (size_t i = 0; i < to_add; ++i)
1568  {
1569  m_held.emplace_back(std::make_unique<T>());
1570  m_thread_equivs.emplace_back(invalid_id);
1571  }
1572  }
1573  }
1574 
1575  template <class F, class ... Args>
1576  void operate_on_all(F && f, Args && ... args)
1577  {
1578  std::unique_lock<std::shared_mutex> lock(m_mutex);
1579  for (std::unique_ptr<T> & obj : m_held)
1580  {
1581  f(*obj, std::forward<Args>(args)...);
1582  }
1583  }
1584 
1585  size_t held_size() const
1586  {
1587  std::shared_lock<std::shared_mutex> lock(m_mutex);
1588  return m_held.size();
1589  }
1590 
1591  size_t available_size() const
1592  {
1593  std::shared_lock<std::shared_mutex> lock(m_mutex);
1594  size_t count = 0;
1595  const std::thread::id invalid_id{};
1596  for (const auto & id : m_thread_equivs)
1597  {
1598  if (id == invalid_id)
1599  {
1600  ++count;
1601  }
1602  }
1603  return count;
1604  }
1605 
1606  size_t filled_size() const
1607  {
1608  std::shared_lock<std::shared_mutex> lock(m_mutex);
1609  size_t count = 0;
1610  const std::thread::id invalid_id{};
1611  for (const auto & id : m_thread_equivs)
1612  {
1613  if (id == invalid_id)
1614  {
1615  ++count;
1616  }
1617  }
1618  return m_held.size() - count;
1619  }
1620  };
1621 
1624  template <class T>
1626  {
1627  private:
1629  T * m_held;
1630  public:
1632  m_sth(s), m_held(nullptr)
1633  {
1634  }
1635  T & get_one()
1636  {
1637  if (m_held == nullptr)
1638  {
1639  m_held = &(m_sth.get_one());
1640  }
1641  return *m_held;
1642  }
1644  {
1645  if (m_held != nullptr)
1646  {
1647  m_sth.release_one();
1648  m_held = nullptr;
1649  }
1650  }
1652  {
1653  if (m_held != nullptr)
1654  {
1655  m_sth.release_one();
1656  }
1657  }
1660  {
1661  get_one();
1662  ptr = m_held;
1663  }
1664  };
1665 
1670  template <class T>
1672  {
1673  private:
1674 
1675  alignas(T) char m_buf[sizeof(T)];
1676  T * m_object = nullptr;
1677 
1678  public:
1679 
1680  maybe_allocate(const bool allocate, const T & t)
1681  {
1682  if (allocate)
1683  {
1684  m_object = new (m_buf) T(t);
1685  }
1686  }
1687 
1688  maybe_allocate(const bool allocate, T && t)
1689  {
1690  if (allocate)
1691  {
1692  m_object = new (m_buf) T(t);
1693  }
1694  }
1695 
1696  template <class ... Args>
1697  maybe_allocate(const bool allocate, Args && ... args)
1698  {
1699  if (allocate)
1700  {
1701  m_object = new (m_buf) T(std::forward<Args>(args)...);
1702  }
1703  }
1704 
1706  {
1707  }
1708 
1709 
1711  {
1712  }
1713 
1715  {
1716  if (&other != this)
1717  {
1718  if (m_object != nullptr)
1719  {
1720  (*m_object) = other.get();
1721  }
1722  else
1723  {
1724  m_object = new (m_buf) T(other.get());
1725  }
1726  }
1727  return (*this);
1728  }
1729 
1730 
1732  {
1733  if (&other != this)
1734  {
1735  if (m_object != nullptr)
1736  {
1737  (*m_object) = other.get();
1738  }
1739  else
1740  {
1741  m_object = new (m_buf) T(other.get());
1742  }
1743  }
1744  return (*this);
1745  }
1746 
1748  {
1749  if (m_object != nullptr)
1750  {
1751  m_object->~T();
1752  }
1753  }
1754 
1755  bool valid() const
1756  {
1757  return m_object != nullptr;
1758  }
1759 
1760  T && get() &&
1761  {
1762  return *m_object;
1763  }
1764 
1765  T & get() &
1766  {
1767  return *m_object;
1768  }
1769 
1770  const T & get() const &
1771  {
1772  return *m_object;
1773  }
1774 
1775  const T * operator ->() const
1776  {
1777  return m_object;
1778  }
1779 
1781  {
1782  return m_object;
1783  }
1784 
1785  operator T & ()
1786  {
1787  return *m_object;
1788  }
1789 
1790  operator T && () &&
1791  {
1792  return *m_object;
1793  }
1794 
1795  operator const T & () const
1796  {
1797  return *m_object;
1798  }
1799  };
1800  }
1801 
1802 }
1803 
1804 #endif // CALORECGPU_HELPERS_H
CaloRecGPU::Helpers::maybe_allocate::maybe_allocate
maybe_allocate(maybe_allocate &&other)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1710
AllowedVariables::e
e
Definition: AsgElectronSelectorTool.cxx:37
CaloRecGPU::CUDA_Helpers::CPU_to_GPU_async
void CPU_to_GPU_async(void *dest, const void *const source, const size_t num, CUDAStreamPtrHolder stream={})
Copies num bytes from source in CPU memory to dest in GPU memory, asynchronously.
temp
Definition: JetEventDict.h:21
CaloRecGPU::Helpers::maybe_allocate::maybe_allocate
maybe_allocate(const bool allocate, T &&t)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1688
CaloRecGPU::Helpers::maybe_allocate::operator=
maybe_allocate & operator=(const maybe_allocate &other)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1714
CaloRecGPU::Helpers::MemoryManagement
! Handles allocation of a type T, using indexer as the integer type to indicate sizes.
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:484
createLinkingScheme.iter
iter
Definition: createLinkingScheme.py:62
CaloRecGPU::CUDA_Helpers::optimize_block_and_grid_size
void optimize_block_and_grid_size(void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0)
Optimizes block and grid size according to cudaOccupancyMaxPotentialBlockSize.
CaloRecGPU::CUDA_Helpers::supports_cooperative_launches
bool supports_cooperative_launches()
CaloRecGPU::Helpers::MemoryManagement::copy_helper< MemoryContext::CUDAGPU, MemoryContext::CPU, dummy >::copy
static void copy(T *dest, const T *const source, const indexer sz)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:557
CaloRecGPU::Helpers::MemoryManagement::copy_helper< MemoryContext::CPU, MemoryContext::CUDAPinnedCPU, dummy >::copy
static void copy(T *dest, const T *const source, const indexer sz)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:573
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::SimpleHolder
SimpleHolder(SimpleHolder &&other)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1193
CaloRecGPU::CUDA_Helpers::GPU_to_CPU_async
void GPU_to_CPU_async(void *dest, const void *const source, const size_t num, CUDAStreamPtrHolder stream={})
Copies num bytes from source in GPU memory to dest in CPU memory, asynchronously.
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::SimpleContainer
SimpleContainer(const SimpleContainer< T, other_indexer, other_context, other_hold > &other)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:789
fitman.sz
sz
Definition: fitman.py:527
CaloRecGPU::Helpers::MemoryManagement::move_helper
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:604
CaloRecGPU::Helpers::MemoryManagement::unary_helper< MemoryContext::CUDAPinnedCPU, dummy >::allocate
static T * allocate(const indexer size)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:518
CaloRecGPU::Helpers::Constants::pi
constexpr T pi
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:318
CaloRecGPU::Helpers::MemoryManagement::unary_helper
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:486
CaloRecGPU::CUDA_Helpers::optimize_block_and_grid_size_for_cooperative_launch
void optimize_block_and_grid_size_for_cooperative_launch(void *func, int &block_size, int &grid_size, const int dynamic_memory=0, const int block_size_limit=0)
Optimizes block and grid size for a cooperative launch.
python.CaloAddPedShiftConfig.args
args
Definition: CaloAddPedShiftConfig.py:47
CaloRecGPU::Helpers::MemoryManagement::deallocate
static void deallocate(T *&arr)
!
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:641
CaloRecGPU::Helpers::int_floor_div
constexpr auto int_floor_div(const T1 num, const T2 denom)
Returns the floor of num/denom, with proper rounding.
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:222
CaloRecGPU::CUDA_Helpers::CPU_to_GPU
void CPU_to_GPU(void *dest, const void *const source, const size_t num)
Copies num bytes from source in CPU memory to dest in GPU memory.
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::valid
CUDA_HOS_DEV bool valid() const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1111
CaloRecGPU::Helpers::maybe_allocate::maybe_allocate
maybe_allocate(const maybe_allocate &other)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1705
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::binary_output
void binary_output(stream &s) const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1329
CaloRecGPU::Helpers::int_ceil_div
constexpr auto int_ceil_div(const T1 num, const T2 denom)
Returns the ceiling of num/denom, with proper rounding.
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:215
xAOD::short
short
Definition: Vertex_v1.cxx:165
CaloRecGPU::Helpers::separate_thread_holder::resize
void resize(const size_t new_size)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1555
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::textual_output
void textual_output(stream &s, const str &separator=" ") const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:872
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::SimpleHolder
SimpleHolder(const bool really_allocate)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1133
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::~SimpleContainer
~SimpleContainer()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:855
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::clear
void clear()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1116
CaloRecGPU::Helpers::separate_thread_holder::add_one_and_return
T & add_one_and_return()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1497
xAOD::char
char
Definition: TrigDecision_v1.cxx:38
CaloRecGPU::Helpers::MemoryManagement::allocate
static T * allocate(const indexer size)
!
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:627
DMTest::C
C_v1 C
Definition: C.h:26
CaloRecGPU::Helpers::SimpleHolder
Holds one objects of type \T in memory context Context.
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1074
CaloRecGPU::Helpers::Pearson_hash_16_bit
constexpr unsigned short Pearson_hash_16_bit(const T number)
Calculates a 16-bit Pearson hash from @ number.
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:283
CaloRecGPU::Helpers::MemoryManagement::unary_helper< MemoryContext::CUDAGPU, dummy >::allocate
static T * allocate(const indexer size)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:504
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::allocate
void allocate()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1121
CaloRecGPU::Helpers::separate_thread_accessor::separate_thread_accessor
separate_thread_accessor(separate_thread_holder< T > &s)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1631
taskman.template
dictionary template
Definition: taskman.py:316
CaloRecGPU::Helpers::SimpleHolder< T, Context, false >::valid
CUDA_HOS_DEV bool valid() const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1399
CaloRecGPU::CUDA_Helpers::GPU_synchronize
void GPU_synchronize(CUDAStreamPtrHolder stream={})
Synchronizes the stream.
CaloRecGPU::CUDA_Helpers::deallocate_pinned
void deallocate_pinned(void *address)
Deallocates address in CPU pinned memory.
CaloRecGPU::Helpers::SimpleHolder< T, Context, false >::indexer
unsigned int indexer
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1369
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::SimpleHolder
SimpleHolder(const X &other_v)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1159
CaloRecGPU::Helpers::separate_thread_holder::m_held
std::vector< std::unique_ptr< T > > m_held
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1482
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::SimpleContainer
SimpleContainer(SimpleContainer &&other)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:781
CaloRecGPU::Helpers::maybe_allocate::operator->
const T * operator->() const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1775
CaloRecGPU::Helpers::separate_thread_holder::operate_on_all
void operate_on_all(F &&f, Args &&... args)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1576
CaloRecGPU::Helpers::MemoryContext::CUDAGPU
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:472
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, false >::SimpleContainer
CUDA_HOS_DEV SimpleContainer(T *other_array, const indexer sz)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1005
athena.value
value
Definition: athena.py:124
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::textual_input
void textual_input(stream &s)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:891
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::SimpleContainer
SimpleContainer(SimpleContainer< T, other_indexer, other_context, true > &&other)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:798
perfmonmt-printer.dest
dest
Definition: perfmonmt-printer.py:189
CaloRecGPU::Helpers::MemoryManagement::unary_helper< MemoryContext::CPU, dummy >::deallocate
static void deallocate(T *&arr)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:495
python.RatesEmulationExample.lock
lock
Definition: RatesEmulationExample.py:148
CaloRecGPU::Helpers::MemoryManagement::unary_helper< MemoryContext::CUDAPinnedCPU, dummy >::deallocate
static void deallocate(T *&arr)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:523
columnar::operator=
AccessorTemplate & operator=(AccessorTemplate &&that)
Definition: VectorColumn.h:88
read_hist_ntuple.t
t
Definition: read_hist_ntuple.py:5
CaloRecGPU::Helpers::MemoryManagement::unary_helper< MemoryContext::CPU, dummy >::allocate
static T * allocate(const indexer size)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:490
dbg::ptr
void * ptr(T *p)
Definition: SGImplSvc.cxx:74
const
bool const RAWDATA *ch2 const
Definition: LArRodBlockPhysicsV0.cxx:560
CaloRecGPU::Helpers::MemoryManagement::unary_helper< MemoryContext::CUDAGPU, dummy >::deallocate
static void deallocate(T *&arr)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:509
Args
Definition: test_lwtnn_fastgraph.cxx:12
CaloRecGPU::Helpers::maybe_allocate::maybe_allocate
maybe_allocate(const bool allocate, Args &&... args)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1697
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::SimpleHolder
SimpleHolder()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1129
drawFromPickle.exp
exp
Definition: drawFromPickle.py:36
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::SimpleContainer
SimpleContainer(const indexer sz)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:761
x
#define x
CaloRecGPU::Helpers::MemoryManagement::copy_helper< MemoryContext::CUDAGPU, MemoryContext::CUDAGPU, dummy >::copy
static void copy(T *dest, const T *const source, const indexer sz)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:549
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::SimpleHolder
SimpleHolder(SimpleHolder< X, other_context, true > &&other)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1201
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::indexer
unsigned int indexer
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1081
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::binary_input
void binary_input(stream &s)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:938
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::binary_output
void binary_output(stream &s) const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:920
dq_defect_bulk_create_defects.line
line
Definition: dq_defect_bulk_create_defects.py:27
AthenaPoolTestWrite.stream
string stream
Definition: AthenaPoolTestWrite.py:12
python.CaloAddPedShiftConfig.type
type
Definition: CaloAddPedShiftConfig.py:42
XMLtoHeader.count
count
Definition: XMLtoHeader.py:84
makeTRTBarrelCans.y1
tuple y1
Definition: makeTRTBarrelCans.py:15
Monitored::X
@ X
Definition: HistogramFillerUtils.h:24
CaloRecGPU::Helpers::MemoryContext::CUDAPinnedCPU
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:476
CaloRecGPU::CUDA_Helpers::deallocate
void deallocate(void *address)
Deallocates address in GPU memory.
CaloRecGPU::Helpers::MemoryManagement::move_helper< C, C, dummy >::move
static void move(T *&dest, T *&source, const indexer)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:618
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::textual_input
void textual_input(stream &s)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1298
CaloRecGPU::Helpers::Pearson_hash
constexpr unsigned char Pearson_hash(const T number)
Calculates a Pearson hash from @ number.
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:256
CaloRecGPU::Helpers::MemoryContext::CUDAGPU::name
constexpr static char const * name
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:473
python.setupRTTAlg.size
int size
Definition: setupRTTAlg.py:39
calibdata.valid
list valid
Definition: calibdata.py:44
CaloRecGPU::Helpers::separate_thread_holder::filled_size
size_t filled_size() const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1606
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::SimpleHolder
SimpleHolder(X *other_p)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1149
histSizes.code
code
Definition: histSizes.py:129
InDetDD::operator*
SiLocalPosition operator*(const SiLocalPosition &position, const double factor)
Definition: SiLocalPosition.cxx:98
python.utils.AtlRunQueryDQUtils.p
p
Definition: AtlRunQueryDQUtils.py:209
CaloRecGPU::Helpers::MemoryManagement::copy_helper< MemoryContext::CPU, MemoryContext::CUDAGPU, dummy >::copy
static void copy(T *dest, const T *const source, const indexer sz)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:541
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::SimpleContainer
SimpleContainer()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:757
CaloRecGPU::Helpers::separate_thread_holder::m_mutex
std::shared_mutex m_mutex
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1495
CUDA_HOS_DEV
#define CUDA_HOS_DEV
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:101
CaloRecGPU::CUDA_Helpers::GPU_name
std::string GPU_name()
CaloRecGPU::Helpers::MemoryContext::CUDAPinnedCPU::name
constexpr static char const * name
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:477
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::size
CUDA_HOS_DEV indexer size() const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:720
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::textual_output
void textual_output(stream &s, const str &separator=" ") const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1277
lumiFormat.i
int i
Definition: lumiFormat.py:85
z
#define z
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::~SimpleHolder
~SimpleHolder()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1259
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::binary_input
void binary_input(stream &s)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1347
CaloRecGPU::Helpers::separate_thread_holder::m_thread_equivs
std::vector< typename std::thread::id > m_thread_equivs
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1483
Base
CaloRecGPU::CUDA_Helpers::GPU_to_GPU
void GPU_to_GPU(void *dest, const void *const source, const size_t num)
Copies num bytes from source to dest, both in GPU memory.
CalibDbCompareRT.dummy
dummy
Definition: CalibDbCompareRT.py:59
CaloRecGPU::Helpers::MemoryContext::CPU::name
constexpr static char const * name
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:469
file
TFile * file
Definition: tile_monitor.h:29
CaloRecGPU::CUDA_Helpers::GPU_to_CPU
void GPU_to_CPU(void *dest, const void *const source, const size_t num)
Copies num bytse from source in GPU memory to dest in CPU memory.
CaloRecGPU::Helpers::maybe_allocate::valid
bool valid() const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1755
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::clear
void clear()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:735
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::SimpleHolder
SimpleHolder(const SimpleHolder< X, other_context, other_hold > &other)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1180
CaloRecGPU::CUDA_Helpers::supports_dynamic_parallelism
bool supports_dynamic_parallelism()
hist_file_dump.f
f
Definition: hist_file_dump.py:140
CaloRecGPU::Helpers::SimpleHolder< T, Context, false >::SimpleHolder
CUDA_HOS_DEV SimpleHolder()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1406
CaloRecGPU::Helpers::separate_thread_holder::get_one
T & get_one()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1506
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::m_object
T * m_object
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1083
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, false >::SimpleContainer
CUDA_HOS_DEV SimpleContainer(const SimpleContainer< T, other_indexer, Context, other_hold > &other)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1012
CaloRecGPU::Helpers::MemoryManagement::copy_helper< MemoryContext::CUDAPinnedCPU, MemoryContext::CUDAPinnedCPU, dummy >::copy
static void copy(T *dest, const T *const source, const indexer sz)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:581
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::m_array
T * m_array
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:710
CaloRecGPU::Helpers::SimpleHolder< T, Context, false >::m_object
T * m_object
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1371
CaloRecGPU::Helpers::SimpleContainer
Holds a run-time amount of objects of type \T, measuring sizes with indexer, in memory context Contex...
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:705
calibdata.exit
exit
Definition: calibdata.py:235
compute_lumi.denom
denom
Definition: compute_lumi.py:76
CaloRecGPU::Helpers::MemoryManagement::copy_helper< MemoryContext::CPU, MemoryContext::CPU, dummy >::copy
static void copy(T *dest, const T *const source, const indexer sz)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:533
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::SimpleContainer
SimpleContainer(T *other_array, const indexer sz)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:769
CaloRecGPU::Helpers::MemoryContext::CPU
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:468
CaloRecGPU::Helpers::MemoryManagement::copy_helper< MemoryContext::CUDAPinnedCPU, MemoryContext::CUDAGPU, dummy >::copy
static void copy(T *dest, const T *const source, const indexer sz)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:589
trigbs_pickEvents.num
num
Definition: trigbs_pickEvents.py:76
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, false >::size
CUDA_HOS_DEV indexer size() const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:979
MuonR4::SegmentFit::ParamDefs::y0
@ y0
CaloRecGPU::CUDA_Helpers::allocate
void * allocate(const size_t num)
Allocates and returns the address of num bytes from GPU memory.
CaloRecGPU::Helpers::maybe_allocate::m_object
T * m_object
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1676
CaloRecGPU::CUDA_Helpers::allocate_pinned
void * allocate_pinned(const size_t num)
Allocates and returns the address of num bytes from CPU pinned memory.
CaloRecGPU::Helpers::Constants::sqrt2
constexpr T sqrt2
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:321
CaloRecGPU::Helpers::MemoryManagement::copy_helper< MemoryContext::CUDAPinnedCPU, MemoryContext::CPU, dummy >::copy
static void copy(T *dest, const T *const source, const indexer sz)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:565
id
SG::auxid_t id
Definition: Control/AthContainers/Root/debug.cxx:239
python.selection.number
number
Definition: selection.py:20
name
std::string name
Definition: Control/AthContainers/Root/debug.cxx:240
plotBeamSpotMon.b
b
Definition: plotBeamSpotMon.py:76
CaloRecGPU::Helpers::separate_thread_holder
Manages objects of type T in a thread-safe way, ensuring that there's an object available for each se...
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1480
CaloRecGPU::Helpers::separate_thread_holder::get_for_thread
T & get_for_thread() const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1525
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, false >::m_array
T * m_array
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:969
columnar::operator[]
ObjectId< CI, CM > operator[](std::size_t) const noexcept
Definition: ObjectRange.h:169
CaloRecGPU::Helpers::maybe_allocate::get
const T & get() const &
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1770
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, false >::SimpleContainer
CUDA_HOS_DEV SimpleContainer()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:996
CaloRecGPU::Helpers::MemoryManagement::move
static void move(T *&dest, T *&source, const indexer sz)
!
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:677
CaloRecGPU::Helpers::separate_thread_accessor::separate_thread_accessor
separate_thread_accessor(separate_thread_holder< T > &s, T *&ptr)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1658
CaloRecGPU::CUDA_Helpers::CUDAStreamPtrHolder
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:110
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::m_size
indexer m_size
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:712
RTTAlgmain.address
address
Definition: RTTAlgmain.py:55
CaloRecGPU::CUDA_Helpers::GPU_to_GPU_async
void GPU_to_GPU_async(void *dest, const void *const source, const size_t num, CUDAStreamPtrHolder stream={})
Copies num bytes from source to dest, both in GPU memory, asynchronously.
CaloRecGPU::Helpers::SimpleHolder< T, Context, false >::SimpleHolder
CUDA_HOS_DEV SimpleHolder(const SimpleHolder< X, Context, other_hold > &other)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1425
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::SimpleContainer
SimpleContainer(const SimpleContainer &other)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:775
CaloRecGPU::Helpers::SimpleHolder< T, Context, true >::SimpleHolder
SimpleHolder(const SimpleHolder &other)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1165
CaloRecGPU::Helpers::MemoryManagement::copy
static void copy(T *dest, const T *const source, const indexer sz)
!
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:659
CaloRecGPU::Helpers::maybe_allocate::maybe_allocate
maybe_allocate(const bool allocate, const T &t)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1680
CaloRecGPU::Helpers::maybe_allocate
Possibly holds an object in its internal buffer.
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1672
a
TList * a
Definition: liststreamerinfos.cxx:10
InDetDD::other
@ other
Definition: InDetDD_Defs.h:16
VKalVrtAthena::varHolder_detail::clear
void clear(T &var)
Definition: NtupleVars.h:48
y
#define y
python.CaloAddPedShiftConfig.int
int
Definition: CaloAddPedShiftConfig.py:45
CaloRecGPU::CUDA_Helpers::CUDAStreamPtrHolder::CUDAStreamPtrHolder
CUDAStreamPtrHolder()=default
CaloRecGPU::Helpers::separate_thread_holder::release_one
void release_one()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1541
CaloRecGPU::Helpers::maybe_allocate::m_buf
char m_buf[sizeof(T)]
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1675
F
#define F(x, y, z)
Definition: MD5.cxx:112
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, false >::m_size
indexer m_size
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:971
python.CaloCondTools.log
log
Definition: CaloCondTools.py:20
ExpressionParsing::ast::operand
boost::variant< nil, double, unsigned int, bool, std::string, boost::recursive_wrapper< unaryexpr_ >, boost::recursive_wrapper< expression > > operand
Definition: ParsingInternals.h:43
CaloRecGPU::Helpers::compile_time_pow2
constexpr Base compile_time_pow2(const Exp exp)
Returns 2 to the power of exp.
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:229
CaloRecGPU::Helpers::MemoryManagement::copy_helper
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:529
python.SystemOfUnits.s
float s
Definition: SystemOfUnits.py:147
CaloRecGPU::Helpers::MemoryManagement::copy_helper< MemoryContext::CUDAGPU, MemoryContext::CUDAPinnedCPU, dummy >::copy
static void copy(T *dest, const T *const source, const indexer sz)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:597
CaloRecGPU::Helpers::separate_thread_accessor::release_one
void release_one()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1643
copySelective.source
string source
Definition: copySelective.py:31
str
Definition: BTagTrackIpAccessor.cxx:11
calibdata.copy
bool copy
Definition: calibdata.py:26
CaloRecGPU::Helpers::MemoryManagement::move_helper::move
static void move(T *&dest, T *&source, const indexer sz)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:608
CaloRecGPU::CUDA_Helpers::CUDAStreamPtrHolder::CUDAStreamPtrHolder
CUDAStreamPtrHolder(T *p)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:125
CaloRecGPU::CUDA_Helpers::CUDAStreamPtrHolder::ptr
void * ptr
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:111
CaloRecGPU::Helpers::Constants::inv_sqrt2
constexpr T inv_sqrt2
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:325
CaloRecGPU::Helpers::separate_thread_accessor::m_held
T * m_held
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1629
python.PyAthena.obj
obj
Definition: PyAthena.py:132
CaloRecGPU
Definition: BaseDefinitions.h:11
CaloRecGPU::Helpers::separate_thread_holder::available_size
size_t available_size() const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1591
xAOD::bool
setBGCode setTAP setLVL2ErrorBits bool
Definition: TrigDecision_v1.cxx:60
CaloRecGPU::Helpers::separate_thread_accessor::m_sth
separate_thread_holder< T > & m_sth
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1628
CaloRecGPU::Helpers::separate_thread_accessor::get_one
T & get_one()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1635
CaloRecGPU::Helpers::separate_thread_accessor::~separate_thread_accessor
~separate_thread_accessor()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1651
TSU::T
unsigned long long T
Definition: L1TopoDataTypes.h:35
python.SystemOfUnits.m
float m
Definition: SystemOfUnits.py:106
CaloRecGPU::Helpers::SimpleHolder< T, Context, false >::SimpleHolder
CUDA_HOS_DEV SimpleHolder(X *other_p)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1416
CaloRecGPU::Helpers::maybe_allocate::get
T && get() &&
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1760
CaloRecGPU::Helpers::separate_thread_accessor
!
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1626
CaloRecGPU::Helpers::maybe_allocate::~maybe_allocate
~maybe_allocate()
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1747
python.SystemOfUnits.L
float L
Definition: SystemOfUnits.py:92
CaloRecGPU::Helpers::SimpleContainer< T, indexer, Context, true >::resize
void resize(const indexer new_size)
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:741
CaloRecGPU::Helpers::separate_thread_holder::held_size
size_t held_size() const
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1585
CaloRecGPU::Helpers::maybe_allocate::get
T & get() &
Definition: Calorimeter/CaloRecGPU/CaloRecGPU/Helpers.h:1765