ATLAS Offline Software
Loading...
Searching...
No Matches
IterateUntilCondition Namespace Reference

Classes

struct  BasicStorage
struct  Holder
 Condition, Before, After and Funcs must all be functor classes. More...
struct  Storage

Functions

template<class Condition, class Before, class After, class ... Funcs, class ... Args>
__device__ void cooperative_kernel_impl (const Holder< Condition, Before, After, Funcs... > &, Args &&... args)
template<class HolderLike, class ... Args>
__global__ void cooperative_kernel (Args ... args)
__device__ bool try_lock_mutex (Storage *store)
__device__ void unlock_mutex (Storage *store)
__device__ void disable_mutex (Storage *store)
__device__ bool check_if_participating (Storage *store)
template<class Condition, class Before, class After, class ... Funcs, class ... Args>
__device__ void normal_kernel_impl (const Holder< Condition, Before, After, Funcs... > &, Storage *store, Args &&... args)
template<class HolderLike, class ... Args>
__global__ void normal_kernel (Storage *store, Args ... args)
template<class Condition, class Before, class After, class ... Funcs>
auto make_holder (Condition c, Before b, After a, Funcs ... fs)
 Must pass functors!

Function Documentation

◆ check_if_participating()

__device__ bool IterateUntilCondition::check_if_participating ( Storage * store)
inline

Definition at line 117 of file IterateUntilCondition.h.

118 {
119 const bool locked = try_lock_mutex(store);
120
121 unsigned int old_count = Storage::NumMaxBlocks;
122
123 if (atomicOr(&store->poll_closed, 0U) == 0)
124 {
125 old_count = atomicAdd(&store->count, 1);
126 store->block_indices[blockIdx.x] = old_count;
127 unlock_mutex(store);
128 }
129 else
130 {
131 if (locked)
132 {
133 unlock_mutex(store);
134 }
135 return false;
136 }
137
138 if (atomicOr(&store->poll_closed, 0U))
139 {
140 try_lock_mutex(store);
141 atomicOr(&store->poll_closed, 1U);
142 //disable_mutex(store);
143 unlock_mutex(store);
144 }
145
146 return (old_count < Storage::NumMaxBlocks);
147 }
__device__ bool try_lock_mutex(Storage *store)
__device__ void unlock_mutex(Storage *store)
TestStore store
Definition TestStore.cxx:23
static constexpr unsigned int NumMaxBlocks

◆ cooperative_kernel()

template<class HolderLike, class ... Args>
__global__ void IterateUntilCondition::cooperative_kernel ( Args ... args)

Definition at line 65 of file IterateUntilCondition.h.

66 {
67 cooperative_kernel_impl(HolderLike{}, args...);
68 }
__device__ void cooperative_kernel_impl(const Holder< Condition, Before, After, Funcs... > &, Args &&... args)

◆ cooperative_kernel_impl()

template<class Condition, class Before, class After, class ... Funcs, class ... Args>
__device__ void IterateUntilCondition::cooperative_kernel_impl ( const Holder< Condition, Before, After, Funcs... > & ,
Args &&... args )

Definition at line 43 of file IterateUntilCondition.h.

44 {
45 cooperative_groups::grid_group grid = cooperative_groups::this_grid();
46
47 Condition checker;
48
49 Before{}(gridDim.x, blockIdx.x, checker, std::forward<Args>(args)...);
50
51 while (!checker(gridDim.x, blockIdx.x, std::forward<Args>(args)...))
52 {
53 auto helper = [&](auto func)
54 {
55 func(gridDim.x, blockIdx.x, checker, std::forward<Args>(args)...);
56 grid.sync();
57 };
58
59 (helper(Funcs{}), ...);
60 }
61 After{}(gridDim.x, blockIdx.x, checker, std::forward<Args>(args)...);
62 }
std::unique_ptr< ICondition > Condition

◆ disable_mutex()

__device__ void IterateUntilCondition::disable_mutex ( Storage * store)
inline

Definition at line 112 of file IterateUntilCondition.h.

113 {
114 atomicOr(&store->mutex_check, 0x80000000U);
115 }

◆ make_holder()

template<class Condition, class Before, class After, class ... Funcs>
auto IterateUntilCondition::make_holder ( Condition c,
Before b,
After a,
Funcs ... fs )

Must pass functors!

They will receive two unsigned ints for grid size and block index (for simplicity, we only handle 1D grids), a reference to a mutable Condition (except for Condition) and any arguments you pass to the execute of this return. The functions should not use the actual block indices, but the thread indices inside the block are respected. Condition must return a boolean, with true meaning we have reached the end of the iterations, all others are void. Condition may be (locally) stateful as the same local instance is used throughout the iterations, while the others must be stateless (being constructed every iteration in the case of Funcs).

Definition at line 318 of file IterateUntilCondition.h.

319 {
320 return Holder<Condition, Before, After, Funcs...> {};
321 }
Condition, Before, After and Funcs must all be functor classes.

◆ normal_kernel()

template<class HolderLike, class ... Args>
__global__ void IterateUntilCondition::normal_kernel ( Storage * store,
Args ... args )

Definition at line 247 of file IterateUntilCondition.h.

248 {
249 normal_kernel_impl(HolderLike{}, store, args...);
250 }
__device__ void normal_kernel_impl(const Holder< Condition, Before, After, Funcs... > &, Storage *store, Args &&... args)

◆ normal_kernel_impl()

template<class Condition, class Before, class After, class ... Funcs, class ... Args>
__device__ void IterateUntilCondition::normal_kernel_impl ( const Holder< Condition, Before, After, Funcs... > & ,
Storage * store,
Args &&... args )

Definition at line 155 of file IterateUntilCondition.h.

156 {
157 __shared__ bool is_participating;
158
159 const bool is_reference_thread = (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0);
160
161 if (is_reference_thread)
162 {
163 is_participating = check_if_participating(store);
164 }
165
166 __syncthreads();
167
168 const unsigned int this_block_index = store->block_indices[blockIdx.x];
169 const unsigned int total_blocks = min(store->count, Storage::NumMaxBlocks);
170
171 if (is_participating)
172 {
173 const bool is_reference_block = (this_block_index == 0);
174
175 const unsigned int this_thread_index = threadIdx.z * blockDim.y * blockDim.x +
176 threadIdx.y * blockDim.x +
177 threadIdx.x;
178
179 const unsigned int num_threads_per_block = blockDim.x * blockDim.y * blockDim.z;
180
181 Condition checker;
182
183
184 Before{}(total_blocks, this_block_index, checker, std::forward<Args>(args)...);
185
186 while (!checker(total_blocks, this_block_index, std::forward<Args>(args)...))
187 {
188 auto helper = [&](auto func)
189 {
190
191 func(total_blocks, this_block_index, checker, std::forward<Args>(args)...);
192
193 return;
194
195 //Technically, for the foreseeable future,
196 //this could be simply the if,
197 //as the maximum number of concurrent blocks
198 //in all devices is smaller than 1024...
199 if (is_reference_block)
200 {
201
202 for (unsigned int block_to_check = this_thread_index + 1; block_to_check < total_blocks; block_to_check += num_threads_per_block)
203 {
204 while (store->wait_flags[block_to_check] == 0);
205 //When porting to non-CUDA, this may need to be some form of atomic load.
206 }
207
208 __syncthreads();
209
210 for (unsigned int block_to_check = this_thread_index + 1; block_to_check < total_blocks; block_to_check += num_threads_per_block)
211 {
212 atomicAnd(&(store->wait_flags[block_to_check]), 0U);
213 }
214 }
215 else
216 {
217 __syncthreads();
218
219 if (is_reference_thread)
220 {
221 atomicOr(&(store->wait_flags[this_block_index]), 1U);
222
223 while (store->wait_flags[this_block_index] != 0);
224 //When porting to non-CUDA, this may need to be some form of atomic load.
225 }
226
227 __syncthreads();
228 }
229
230 };
231
232 (helper(Funcs{}), ...);
233 }
234
235 After{}(total_blocks, this_block_index, checker, std::forward<Args>(args)...);
236 }
237
238#if CALORECGPU_ITERATE_UNTIL_CONDITION_DEBUG
239 if (is_reference_thread)
240 {
241 printf("%d | %d | %u %u \n", blockIdx.x, static_cast<int>(is_participating), total_blocks, this_block_index);
242 }
243#endif
244 }
#define min(a, b)
Definition cfImp.cxx:40
__device__ bool check_if_participating(Storage *store)

◆ try_lock_mutex()

__device__ bool IterateUntilCondition::try_lock_mutex ( Storage * store)
inline

Definition at line 86 of file IterateUntilCondition.h.

87 {
88 const unsigned int ticket = atomicAdd(&store->mutex_ticket, 1U);
89
90 unsigned int last_check = 0;
91
92 bool was_once_valid = false;
93
94 int count = 0;
95
96 do
97 {
98 last_check = atomicOr(&store->mutex_check, 0U);
99 was_once_valid = !(last_check & 0x80000000U);
100 ++count;
101 }
102 while (last_check < ticket && !(last_check & 0x80000000U));
103
104 return was_once_valid && count < 10000;
105 }
int count(std::string s, const std::string &regx)
count how many occurances of a regx are in a string
Definition hcg.cxx:146

◆ unlock_mutex()

__device__ void IterateUntilCondition::unlock_mutex ( Storage * store)
inline

Definition at line 107 of file IterateUntilCondition.h.

108 {
109 atomicAdd(&store->mutex_check, 1U);
110 }