ATLAS Offline Software
Loading...
Searching...
No Matches
IterateUntilCondition Namespace Reference

Classes

struct  BasicStorage
struct  Holder
 Condition, Before, After and Funcs must all be functor classes. More...
struct  Storage

Functions

template<class Condition, class Before, class After, class ... Funcs, class ... Args>
__device__ void cooperative_kernel_impl (const Holder< Condition, Before, After, Funcs... > &, Args &&... args)
template<class HolderLike, class ... Args>
__global__ void cooperative_kernel (Args ... args)
__device__ bool try_lock_mutex (Storage *store)
__device__ void unlock_mutex (Storage *store)
__device__ void disable_mutex (Storage *store)
__device__ bool check_if_participating (Storage *store)
template<class Condition, class Before, class After, class ... Funcs, class ... Args>
__device__ void normal_kernel_impl (const Holder< Condition, Before, After, Funcs... > &, Storage *store, Args &&... args)
template<class HolderLike, class ... Args>
__global__ void normal_kernel (Storage *store, Args ... args)
template<class Condition, class Before, class After, class ... Funcs>
auto make_holder (Condition c, Before b, After a, Funcs ... fs)
 Must pass functors!

Function Documentation

◆ check_if_participating()

__device__ bool IterateUntilCondition::check_if_participating ( Storage * store)
inline

Definition at line 118 of file IterateUntilCondition.h.

119 {
120 const bool locked = try_lock_mutex(store);
121
122 unsigned int old_count = Storage::NumMaxBlocks;
123
124 volatile unsigned int * poll_closed_ptr = static_cast<volatile unsigned int *>(&store->poll_closed);
125
126 if (*poll_closed_ptr == 0)
127 {
128 old_count = atomicAdd(&store->count, 1);
129 store->block_indices[blockIdx.x] = old_count;
130 unlock_mutex(store);
131 }
132 else
133 {
134 if (locked)
135 {
136 unlock_mutex(store);
137 }
138 return false;
139 }
140
141 if (*poll_closed_ptr == 0)
142 {
143 try_lock_mutex(store);
144 *poll_closed_ptr = 1;
145 disable_mutex(store);
146 unlock_mutex(store);
147 }
148
149 return (old_count < Storage::NumMaxBlocks);
150 }
__device__ bool try_lock_mutex(Storage *store)
__device__ void unlock_mutex(Storage *store)
__device__ void disable_mutex(Storage *store)
static constexpr unsigned int NumMaxBlocks

◆ cooperative_kernel()

template<class HolderLike, class ... Args>
__global__ void IterateUntilCondition::cooperative_kernel ( Args ... args)

Definition at line 65 of file IterateUntilCondition.h.

66 {
67 cooperative_kernel_impl(HolderLike{}, args...);
68 }
__device__ void cooperative_kernel_impl(const Holder< Condition, Before, After, Funcs... > &, Args &&... args)

◆ cooperative_kernel_impl()

template<class Condition, class Before, class After, class ... Funcs, class ... Args>
__device__ void IterateUntilCondition::cooperative_kernel_impl ( const Holder< Condition, Before, After, Funcs... > & ,
Args &&... args )

Definition at line 43 of file IterateUntilCondition.h.

44 {
45 cooperative_groups::grid_group grid = cooperative_groups::this_grid();
46
47 Condition checker;
48
49 Before{}(gridDim.x, blockIdx.x, checker, std::forward<Args>(args)...);
50
51 while (!checker(gridDim.x, blockIdx.x, std::forward<Args>(args)...))
52 {
53 auto helper = [&](auto func)
54 {
55 func(gridDim.x, blockIdx.x, checker, std::forward<Args>(args)...);
56 grid.sync();
57 };
58
59 (helper(Funcs{}), ...);
60 }
61 After{}(gridDim.x, blockIdx.x, checker, std::forward<Args>(args)...);
62 }
std::unique_ptr< ICondition > Condition

◆ disable_mutex()

__device__ void IterateUntilCondition::disable_mutex ( Storage * store)
inline

Definition at line 112 of file IterateUntilCondition.h.

113 {
114 volatile unsigned int * ptr = static_cast<volatile unsigned int *>(&store->mutex_check);
115 *ptr = *ptr | 0x80000000U;
116 }

◆ make_holder()

template<class Condition, class Before, class After, class ... Funcs>
auto IterateUntilCondition::make_holder ( Condition c,
Before b,
After a,
Funcs ... fs )

Must pass functors!

They will receive two unsigned ints for grid size and block index (for simplicity, we only handle 1D grids), a reference to a mutable Condition (except for Condition) and any arguments you pass to the execute of this return. The functions should not use the actual block indices, but the thread indices inside the block are respected. Condition must return a boolean, with true meaning we have reached the end of the iterations, all others are void. Condition may be (locally) stateful as the same local instance is used throughout the iterations, while the others must be stateless (being constructed every iteration in the case of Funcs).

Definition at line 335 of file IterateUntilCondition.h.

336 {
337 return Holder<Condition, Before, After, Funcs...> {};
338 }
Condition, Before, After and Funcs must all be functor classes.

◆ normal_kernel()

template<class HolderLike, class ... Args>
__global__ void IterateUntilCondition::normal_kernel ( Storage * store,
Args ... args )

Definition at line 264 of file IterateUntilCondition.h.

265 {
266 normal_kernel_impl(HolderLike{}, store, args...);
267 }
__device__ void normal_kernel_impl(const Holder< Condition, Before, After, Funcs... > &, Storage *store, Args &&... args)

◆ normal_kernel_impl()

template<class Condition, class Before, class After, class ... Funcs, class ... Args>
__device__ void IterateUntilCondition::normal_kernel_impl ( const Holder< Condition, Before, After, Funcs... > & ,
Storage * store,
Args &&... args )

Definition at line 153 of file IterateUntilCondition.h.

154 {
155 __shared__ bool is_participating;
156
157 const bool is_reference_thread = (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0);
158
159 if (is_reference_thread)
160 {
161 is_participating = check_if_participating(store);
162 }
163
164 __syncthreads();
165
166 const unsigned int this_block_index = store->block_indices[blockIdx.x];
167 const unsigned int total_blocks = min(*static_cast<volatile unsigned int *>(&store->count), Storage::NumMaxBlocks);
168
169 if (is_participating)
170 {
171 const bool is_reference_block = (this_block_index == 0);
172
173 const unsigned int this_thread_index = threadIdx.z * blockDim.y * blockDim.x +
174 threadIdx.y * blockDim.x +
175 threadIdx.x;
176
177 const unsigned int num_threads_per_block = blockDim.x * blockDim.y * blockDim.z;
178
179 Condition checker;
180
181
182 Before{}(total_blocks, this_block_index, checker, std::forward<Args>(args)...);
183
184 while (!checker(total_blocks, this_block_index, std::forward<Args>(args)...))
185 {
186 auto helper = [&](auto func)
187 {
188
189 func(total_blocks, this_block_index, checker, std::forward<Args>(args)...);
190
191 //Technically, for the foreseeable future,
192 //this could be simply the if,
193 //as the maximum number of concurrent blocks
194 //in all devices is smaller than 1024...
195 if (is_reference_block)
196 {
197 for (unsigned int block_to_check = this_thread_index + 1; block_to_check < total_blocks; block_to_check += num_threads_per_block)
198 {
199 unsigned int last_check = 0;
200
201 volatile unsigned int * to_check = static_cast<volatile unsigned int*>(&store->wait_flags[block_to_check]);
202
203 do
204 {
205 last_check = *to_check;
206 }
207 while (last_check == 0);
208 //When porting to non-CUDA, this may need to be some form of atomic load.
209 }
210
211 __syncthreads();
212
213 for (unsigned int block_to_check = this_thread_index + 1; block_to_check < total_blocks; block_to_check += num_threads_per_block)
214 {
215 __threadfence();
216 //*static_cast<volatile unsigned int*>(&store->wait_flags[block_to_check]) = 0;
217 atomicAnd(&(store->wait_flags[block_to_check]), 0U);
218 }
219
220 __syncthreads();
221 }
222 else
223 {
224 __syncthreads();
225
226 if (is_reference_thread)
227 {
228 atomicOr(&(store->wait_flags[this_block_index]), 1U);
229
230 unsigned int last_check = 1;
231
232 volatile unsigned int * to_check = static_cast<volatile unsigned int*>(&store->wait_flags[this_block_index]);
233
234 __threadfence();
235
236 do
237 {
238 last_check = *to_check;
239 }
240 while (last_check != 0);
241 //When porting to non-CUDA, this may need to be some form of atomic load.
242 }
243
244 __syncthreads();
245 }
246
247 };
248
249 (helper(Funcs{}), ...);
250 }
251
252 After{}(total_blocks, this_block_index, checker, std::forward<Args>(args)...);
253 }
254
255#if CALORECGPU_ITERATE_UNTIL_CONDITION_DEBUG
256 if (is_reference_thread)
257 {
258 printf("%d | %d | %u %u \n", blockIdx.x, static_cast<int>(is_participating), total_blocks, this_block_index);
259 }
260#endif
261 }
#define min(a, b)
Definition cfImp.cxx:40
__device__ bool check_if_participating(Storage *store)
TestStore store
Definition TestStore.cxx:23

◆ try_lock_mutex()

__device__ bool IterateUntilCondition::try_lock_mutex ( Storage * store)
inline

Definition at line 86 of file IterateUntilCondition.h.

87 {
88 const unsigned int ticket = atomicAdd(&store->mutex_ticket, 1U);
89
90 unsigned int last_check = 0;
91
92 bool was_once_valid = false;
93
94 volatile unsigned int * to_check = static_cast<volatile unsigned int *>(&store->mutex_check);
95
96 do
97 {
98 last_check = *to_check;
99 was_once_valid = !(last_check & 0x80000000U);
100 }
101 while (last_check < ticket && !(last_check & 0x80000000U));
102
103 return was_once_valid;
104 }

◆ unlock_mutex()

__device__ void IterateUntilCondition::unlock_mutex ( Storage * store)
inline

Definition at line 106 of file IterateUntilCondition.h.

107 {
108 volatile unsigned int * ptr = static_cast<volatile unsigned int *>(&store->mutex_check);
109 *ptr = *ptr + 1;
110 }