156 {
157 __shared__ bool is_participating;
158
159 const bool is_reference_thread = (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0);
160
161 if (is_reference_thread)
162 {
164 }
165
166 __syncthreads();
167
168 const unsigned int this_block_index = store->block_indices[blockIdx.x];
170
171 if (is_participating)
172 {
173 const bool is_reference_block = (this_block_index == 0);
174
175 const unsigned int this_thread_index = threadIdx.z * blockDim.y * blockDim.x +
176 threadIdx.y * blockDim.x +
177 threadIdx.x;
178
179 const unsigned int num_threads_per_block = blockDim.x * blockDim.y * blockDim.z;
180
182
183
184 Before{}(total_blocks, this_block_index,
checker, std::forward<Args>(args)...);
185
186 while (!
checker(total_blocks, this_block_index, std::forward<Args>(args)...))
187 {
188 auto helper = [&](
auto func)
189 {
190
191 func(total_blocks, this_block_index, checker, std::forward<Args>(args)...);
192
193 return;
194
195
196
197
198
199 if (is_reference_block)
200 {
201
202 for (unsigned int block_to_check = this_thread_index + 1; block_to_check < total_blocks; block_to_check += num_threads_per_block)
203 {
204 while (
store->wait_flags[block_to_check] == 0);
205
206 }
207
208 __syncthreads();
209
210 for (unsigned int block_to_check = this_thread_index + 1; block_to_check < total_blocks; block_to_check += num_threads_per_block)
211 {
212 atomicAnd(&(
store->wait_flags[block_to_check]), 0U);
213 }
214 }
215 else
216 {
217 __syncthreads();
218
219 if (is_reference_thread)
220 {
221 atomicOr(&(
store->wait_flags[this_block_index]), 1U);
222
223 while (
store->wait_flags[this_block_index] != 0);
224
225 }
226
227 __syncthreads();
228 }
229
230 };
231
233 }
234
235 After{}(total_blocks, this_block_index,
checker, std::forward<Args>(args)...);
236 }
237
238#if CALORECGPU_ITERATE_UNTIL_CONDITION_DEBUG
239 if (is_reference_thread)
240 {
241 printf("%d | %d | %u %u \n", blockIdx.x, static_cast<int>(is_participating), total_blocks, this_block_index);
242 }
243#endif
244 }
__device__ bool check_if_participating(Storage *store)