154 {
155 __shared__ bool is_participating;
156
157 const bool is_reference_thread = (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0);
158
159 if (is_reference_thread)
160 {
162 }
163
164 __syncthreads();
165
166 const unsigned int this_block_index = store->block_indices[blockIdx.x];
168
169 if (is_participating)
170 {
171 const bool is_reference_block = (this_block_index == 0);
172
173 const unsigned int this_thread_index = threadIdx.z * blockDim.y * blockDim.x +
174 threadIdx.y * blockDim.x +
175 threadIdx.x;
176
177 const unsigned int num_threads_per_block = blockDim.x * blockDim.y * blockDim.z;
178
180
181
182 Before{}(total_blocks, this_block_index,
checker, std::forward<Args>(args)...);
183
184 while (!
checker(total_blocks, this_block_index, std::forward<Args>(args)...))
185 {
186 auto helper = [&](
auto func)
187 {
188
189 func(total_blocks, this_block_index, checker, std::forward<Args>(args)...);
190
191
192
193
194
195 if (is_reference_block)
196 {
197 for (unsigned int block_to_check = this_thread_index + 1; block_to_check < total_blocks; block_to_check += num_threads_per_block)
198 {
199 unsigned int last_check = 0;
200
201 volatile unsigned int * to_check =
static_cast<volatile unsigned int*
>(&
store->wait_flags[block_to_check]);
202
203 do
204 {
205 last_check = *to_check;
206 }
207 while (last_check == 0);
208
209 }
210
211 __syncthreads();
212
213 for (unsigned int block_to_check = this_thread_index + 1; block_to_check < total_blocks; block_to_check += num_threads_per_block)
214 {
215 __threadfence();
216
217 atomicAnd(&(
store->wait_flags[block_to_check]), 0U);
218 }
219
220 __syncthreads();
221 }
222 else
223 {
224 __syncthreads();
225
226 if (is_reference_thread)
227 {
228 atomicOr(&(
store->wait_flags[this_block_index]), 1U);
229
230 unsigned int last_check = 1;
231
232 volatile unsigned int * to_check =
static_cast<volatile unsigned int*
>(&
store->wait_flags[this_block_index]);
233
234 __threadfence();
235
236 do
237 {
238 last_check = *to_check;
239 }
240 while (last_check != 0);
241
242 }
243
244 __syncthreads();
245 }
246
247 };
248
250 }
251
252 After{}(total_blocks, this_block_index,
checker, std::forward<Args>(args)...);
253 }
254
255#if CALORECGPU_ITERATE_UNTIL_CONDITION_DEBUG
256 if (is_reference_thread)
257 {
258 printf("%d | %d | %u %u \n", blockIdx.x, static_cast<int>(is_participating), total_blocks, this_block_index);
259 }
260#endif
261 }
__device__ bool check_if_participating(Storage *store)