ATLAS Offline Software
PileUpUtils.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2 
3 from copy import deepcopy
4 from math import ceil
5 
6 from AthenaCommon.Logging import logging
7 from AthenaConfiguration.AutoConfigFlags import GetFileMD
8 
9 
10 def pileupInputCollections(inputFiles):
11  if not len(inputFiles):
12  return [] #Should never hit this but just in case
13  rawCollections = [type_key[1] for type_key in GetFileMD(inputFiles).get("itemList",[])]
14  collections = [col for col in rawCollections if not col.endswith('Aux.') ]
15  return collections
16 
17 
18 def pileUpCalc(nSignalEvts, refreshRate, nSubEvtPerBunch, nBunches):
19  """Returns the toal number of needed events"""
20  totalSubEvts = nBunches * nSubEvtPerBunch
21  totalSubEvts += totalSubEvts * refreshRate * nSignalEvts
22  return totalSubEvts
23 
24 
25 def getNBkgEventsPerFile(initialList, logger):
26  """Get number of events in a PU file"""
27  nBkgEventsPerFile = 5000
28  try:
29  from PyUtils.MetaReader import read_metadata
30  metadata = read_metadata(initialList[0])
31  metadata = metadata[initialList[0]] # promote all keys one level up
32  nBkgEventsPerFile = int(metadata['nentries'])
33  logger.debug('{} -> __Test__001__:\n{}'.format(__file__, nBkgEventsPerFile))
34  logger.info('Number of background events per file (read from file) = %s.', nBkgEventsPerFile)
35  except Exception:
36  import traceback
37  traceback.print_exc()
38  logger.warning('Failed to count the number of background events in %s.'
39  'Assuming 5000 - if this is an overestimate the job may die.', initialList[0])
40  return nBkgEventsPerFile
41 
42 
43 def getInputCollectionOffset(flags, initialList):
44  """Calculate random offset into the input PU files"""
45  logger = logging.getLogger("PileUp")
46 
47  offsetrnd = 0
48  if flags.Input.JobNumber >= 0:
49  nBkgEventsPerFile = getNBkgEventsPerFile(initialList, logger)
50 
51  # Turn jobNumber into a random number following https://en.wikipedia.org/wiki/Xorshift
52  #x ^= x << 13;
53  #x ^= x >> 17;
54  #x ^= x << 5;
55  offsetrnd = int(flags.Input.JobNumber + nBkgEventsPerFile * len(initialList))
56  offsetrnd = offsetrnd ^ (offsetrnd << 13)
57  offsetrnd = offsetrnd ^ (offsetrnd >> 17)
58  offsetrnd = offsetrnd ^ (offsetrnd << 15)
59  offsetrnd = offsetrnd % (nBkgEventsPerFile * len(initialList))
60 
61  logger.info('Event offset into the collection = %s', offsetrnd)
62 
63  return offsetrnd
64 
65 
66 def generateBackgroundInputCollections(flags, initialList, nBkgEvtsPerCrossing, correctForEmptyBunchCrossings):
67  """Preparing the list of required input PU files"""
68  logger = logging.getLogger("PileUp")
69 
70  finalList = []
71 
72  nSignalEvts = 1000
73  if flags.Exec.MaxEvents > 0:
74  nSignalEvts = int(flags.Exec.MaxEvents)
75  logger.info('Number of signal events (from Exec.MaxEvents) = %s.', nSignalEvts)
76  else:
77  nSignalEvts = 0
78  from PyUtils.MetaReader import read_metadata
79  for inFile in list(flags.Input.Files):
80  try:
81  metadata = read_metadata(inFile)
82  metadata = metadata[inFile] # promote all keys one level up
83  nSignalEvts += int(metadata['nentries'])
84  logger.debug('{} -> __Test__001__:\n{}'.format(__file__, nSignalEvts))
85  except Exception as err:
86  logger.warning("Unable to open file [%s]", inFile)
87  logger.warning('caught:\n%s', err)
88  import traceback
89  traceback.print_exc()
90  logger.info('Number of signal events (read from files) = %s.', nSignalEvts)
91 
92  nBkgEventsPerFile = getNBkgEventsPerFile(initialList, logger)
93  nBunchesTotal = int(1 + flags.Digitization.PU.FinalBunchCrossing - flags.Digitization.PU.InitialBunchCrossing)
94  nBunches = nBunchesTotal
95  if correctForEmptyBunchCrossings:
96  nBunches = int(ceil(float(nBunches) * float(flags.Digitization.PU.BunchSpacing)) / float(flags.Beam.BunchSpacing))
97  logger.info('Simulating a maximum of %s colliding-bunch crossings (%s colliding+non-colliding total) per signal event', nBunches, nBunchesTotal)
98 
99  nBkgEventsForJob = pileUpCalc(float(nSignalEvts), 1.0, float(nBkgEvtsPerCrossing), nBunches)
100  # Add the event offset to the number of required background events to ensure a sufficient duplication of the minbias files
101  eventOffset = flags.Digitization.PU.HighPtMinBiasInputColOffset if flags.Digitization.PU.HighPtMinBiasInputColOffset > 0 else 0
102  nBkgEventsForJob += eventOffset
103  logger.info('Number of background events required: %s, including %s for the offset. Number of background events in input files: %s',
104  nBkgEventsForJob, eventOffset, (nBkgEventsPerFile * len(initialList)))
105  numberOfRepetitionsRequiredTmp = float(nBkgEventsForJob) / float(nBkgEventsPerFile * len(initialList))
106  numberOfRepetitionsRequired = 1 + int(ceil(numberOfRepetitionsRequiredTmp))
107  # FIXME many copies of this string seems rather inefficient
108  for i in range(0, numberOfRepetitionsRequired):
109  finalList += initialList
110  logger.info('Expanding input list from %s to %s',
111  len(initialList), len(finalList))
112  return finalList
113 
114 
115 def loadPileUpProfile(flags, fragment_string):
116  """Load pile-up profile from file."""
117  parts = fragment_string.split('.')
118  if len(parts) < 2:
119  raise ValueError('Pile-up profile configuration should be of the form Package.Module')
120 
121  from importlib import import_module
122  loaded_module = import_module(fragment_string)
123  return loaded_module.setupProfile(flags)
124 
125 
127  profile,
128  randomMuSampling=False,
129  sequentialEventNumbers=False,
130  doNotCorrectMaxEvents=False):
131  """Generate pile-up profile"""
132  logger = logging.getLogger("PileUp")
133  logger.info('Doing RunLumiOverride configuration from file.')
134 
135  jobNumber = flags.Input.JobNumber
136  maxEvents = flags.Exec.MaxEvents
137  totalEvents = flags.Exec.MaxEvents
138  skipEvents = flags.Exec.SkipEvents
139 
140  # executor splitting
141  if flags.ExecutorSplitting.TotalSteps > 1:
142  totalEvents = flags.ExecutorSplitting.TotalEvents
143 
144  if maxEvents == -1:
145  raise ValueError("maxEvents = -1 is not supported! Please set this to the number of events per file times the number of files per job.")
146  if not doNotCorrectMaxEvents and not flags.ExecutorSplitting.TotalSteps > 1:
147  # round up to nearest 100 events..
148  corrMaxEvents = ceil(float(maxEvents) / 100.0) * 100.0
149  else:
150  if not flags.ExecutorSplitting.TotalSteps > 1:
151  logger.warning("Using the actual number of HITS input events for this job -- not for production use!")
152  corrMaxEvents = maxEvents
153 
154  # We may need to repeat this run for long production jobs.
155  # NB: unlike vanilla variable-mu jobs, it's possible to waste
156  # up to trfMaxEvents-1 events per complete run in prodsys if
157  # the number of events specified by this run is not evenly
158  # divisible by trfMaxEvents.
159  generatedProfile = loadPileUpProfile(flags, profile)
160  # do executor step filtering
161  if flags.ExecutorSplitting.TotalSteps > 1:
162  generatedProfile = list(filter(lambda lb: 'step' not in lb or lb['step'] == flags.ExecutorSplitting.Step, generatedProfile))
163 
164  runMaxEvents = sum(lb["evts"] for lb in generatedProfile)
165  logger.info("There are %d events in this run.", runMaxEvents)
166  jobsPerRun = int(ceil(float(runMaxEvents) / corrMaxEvents))
167  logger.info("Assuming there are usually %d events per job. (Based on %d events in this job.)",
168  corrMaxEvents, maxEvents)
169  logger.info("There must be %d jobs per run.", jobsPerRun)
170 
171  # Override event numbers with sequential ones if requested
172  if sequentialEventNumbers:
173  logger.info("All event numbers will be sequential.")
174 
175  # Random mu sampling
176  if randomMuSampling:
177  logger.info("Mu values will be sampled randomly from the set profile.")
178  # Load needed tools
179  from RunDependentSimComps.RunDependentMCTaskIterator import getRandomlySampledRunLumiInfoFragment
181  jobnumber=(jobNumber - 1),
182  task=generatedProfile,
183  maxEvents=maxEvents,
184  totalEvents=totalEvents,
185  skipEvents=skipEvents,
186  sequentialEventNumbers=sequentialEventNumbers)
187  else:
188  # Load needed tools
189  from RunDependentSimComps.RunDependentMCTaskIterator import getRunLumiInfoFragment
190  fragment = getRunLumiInfoFragment(
191  jobnumber=(jobNumber - 1),
192  task=generatedProfile,
193  maxEvents=maxEvents,
194  totalEvents=totalEvents,
195  skipEvents=skipEvents,
196  sequentialEventNumbers=sequentialEventNumbers)
197 
198  # Remove lumiblocks with no events
199  for element in fragment:
200  if element['evts'] == 0:
201  logger.warning('Found lumiblock with no events! This lumiblock will not be used:\n (' + element.__str__() + ')' )
202  fragment = [x for x in fragment if x['evts'] != 0]
203 
204  from RunDependentSimComps.RunLumiConfigTools import condenseRunLumiInfoFragment
205  logger.info("Writing RunDMC trigger configuration fragment to file. listOfRunsEvents = %s",
206  condenseRunLumiInfoFragment(fragment, "RunDMCTriggerRunsInfo.py"))
207 
208  flags.Input.RunAndLumiOverrideList = fragment
209 
210 
212  profile,
213  sequentialEventNumbers=False,
214  doNotCorrectMaxEvents=False):
215  """Generate RunAndLumiOverrideList """
216  logger = logging.getLogger("PileUp")
217  logger.info('Doing RunLumiOverride configuration from file.')
218 
219  jobNumber = flags.Input.JobNumber
220  maxEvents = flags.Exec.MaxEvents
221  totalEvents = flags.Exec.MaxEvents
222  skipEvents = flags.Exec.SkipEvents
223 
224  # executor splitting
225  if flags.ExecutorSplitting.TotalSteps > 1:
226  totalEvents = flags.ExecutorSplitting.TotalEvents
227 
228  if maxEvents == -1:
229  errorMessage = "maxEvents = -1 is not supported! Please set this to the number of events per file times the number of files per job."
230  raise SystemExit(errorMessage)
231  if not doNotCorrectMaxEvents and not flags.ExecutorSplitting.TotalSteps > 1:
232  # round up to nearest 25 events...
233  corrMaxEvents = ceil(float(maxEvents) / 25.0) * 25.0
234  else:
235  logger.warning(
236  "Using the actual number of EVNT input events for this job -- not for production use!")
237  corrMaxEvents = maxEvents
238 
239  # We may need to repeat this run for long production jobs.
240  # NB: unlike vanilla variable-mu jobs, it's possible to waste
241  # up to trfMaxEvents-1 events per complete run in prodsys if
242  # the number of events specified by this run is not evenly
243  # divisible by trfMaxEvents.
244  tempProfile = loadPileUpProfile(flags, profile)
245  profileTotalEvents = sum(lb['evts'] for lb in tempProfile)
246  corrTotalEvents = max(maxEvents, 50)
247  scaleTaskLengthSim = float(corrTotalEvents) / float(profileTotalEvents)
248 
249  generatedProfile = []
250  step = -1
251  cacheElement = None
252 
253  def simEvts(x):
254  return int(scaleTaskLengthSim * x)
255 
256  for el in tempProfile:
257  if el['step'] != step:
258  if cacheElement is not None:
259  cacheElement['evts'] = simEvts(cacheElement['evts'])
260  generatedProfile += [cacheElement]
261  step = el['step']
262  cacheElement = deepcopy(el)
263  cacheElement['mu'] = step
264  else:
265  cacheElement['evts'] += el['evts']
266  cacheElement['evts'] = simEvts(cacheElement['evts'])
267  generatedProfile += [cacheElement]
268 
269  runMaxEvents = sum(lb["evts"] for lb in generatedProfile)
270  logger.info("There are %d events in this run.", runMaxEvents)
271  jobsPerRun = int(ceil(float(runMaxEvents) / corrMaxEvents))
272  logger.info("Assuming there are usually %d events per job. (Based on %d events in this job.)",
273  corrMaxEvents, maxEvents)
274  logger.info("There must be %d jobs per run.", jobsPerRun)
275 
276  # Override event numbers with sequential ones if requested
277  if sequentialEventNumbers:
278  logger.info("All event numbers will be sequential.")
279 
280  # Load needed tools
281  from RunDependentSimComps.RunDependentMCTaskIterator import getRunLumiInfoFragment
282  fragment = getRunLumiInfoFragment(
283  jobnumber=(jobNumber - 1),
284  task=generatedProfile,
285  maxEvents=maxEvents,
286  totalEvents=totalEvents,
287  skipEvents=skipEvents,
288  sequentialEventNumbers=sequentialEventNumbers)
289 
290  # Remove lumiblocks with no events
291  for element in fragment:
292  if element['evts'] == 0:
293  logger.warning('Found lumiblock with no events! This lumiblock will not be used:\n (' + element.__str__() + ')' )
294  fragment = sorted([x for x in fragment if x['evts'] != 0], key=lambda x: x['step'])
295 
296  flags.Input.RunAndLumiOverrideList = fragment
297 
298 
300  """Scale the number of events per crossing to the largest value in job.
301 
302  Note: beam halo and beam gas will NOT be scaled!"""
303  logger = logging.getLogger("PileUp")
304 
305  maxMu = max(element['mu'] for element in flags.Input.RunAndLumiOverrideList)
306  if not (maxMu > 0 and flags.Digitization.PU.NumberOfCollisions):
307  return
308 
309  scale = maxMu / flags.Digitization.PU.NumberOfCollisions
310  nCollisions = flags.Digitization.PU.NumberOfCollisions
311  if nCollisions:
312  flags.Digitization.PU.NumberOfCollisions = maxMu
313  logger.info("Changing Digitization.PU.NumberOfCollisions from %s to %s",
314  nCollisions, flags.Digitization.PU.NumberOfCollisions)
315 
316  if flags.Digitization.PU.NumberOfLowPtMinBias:
317  old = flags.Digitization.PU.NumberOfLowPtMinBias
318  flags.Digitization.PU.NumberOfLowPtMinBias *= scale
319  logger.info("Changing Digitization.PU.NumberOfLowPtMinBias from %s to %s",
320  old, flags.Digitization.PU.NumberOfLowPtMinBias)
321 
322  if flags.Digitization.PU.NumberOfHighPtMinBias:
323  old = flags.Digitization.PU.NumberOfHighPtMinBias
324  flags.Digitization.PU.NumberOfHighPtMinBias *= scale
325  logger.info("Changing Digitization.PU.NumberOfHighPtMinBias from %s to %s",
326  old, flags.Digitization.PU.NumberOfHighPtMinBias)
327 
328  if flags.Digitization.PU.NumberOfCavern:
329  old = flags.Digitization.PU.NumberOfCavern
330  flags.Digitization.PU.NumberOfCavern *= scale
331  logger.info("Changing Digitization.PU.NumberOfCavern from %s to %s",
332  old, flags.Digitization.PU.NumberOfCavern)
333 
334 
336  bunchStructure = flags.Digitization.PU.BunchStructureConfig
337 
338  # custom pile-up
339  if flags.Digitization.PU.CustomProfile:
340  if isinstance(flags.Digitization.PU.CustomProfile, str):
341  flags.Digitization.PU.CustomProfile = eval(flags.Digitization.PU.CustomProfile)
342  if isinstance(flags.Digitization.PU.CustomProfile, dict):
343  pileUpProfile = 'RunDependentSimData.PileUpProfile_muRange'
344  else:
345  pileUpProfile = flags.Digitization.PU.ProfileConfig
346 
347  # sanity check
348  if not bunchStructure:
349  raise ValueError('Bunch structure needs to be set')
350 
351  # Setup beam intensity pattern
352  parts = bunchStructure.split('.')
353  if len(parts) < 2:
354  raise ValueError('Bunch structure configuration should be of the form Package.Module')
355 
356  from importlib import import_module
357  loaded_module = import_module(bunchStructure)
358  loaded_module.setupBunchStructure(flags)
359 
360  # Setup pile-up profile
361  flags.Digitization.PU.NumberOfCollisions = flags.Digitization.PU.NumberOfLowPtMinBias + flags.Digitization.PU.NumberOfHighPtMinBias
362  if pileUpProfile:
363  generatePileUpProfile(flags, pileUpProfile,
364  sequentialEventNumbers=flags.Digitization.PU.ForceSequentialEventNumbers)
python.AutoConfigFlags.GetFileMD
def GetFileMD(filenames, allowEmpty=True, maxLevel='peeker')
Definition: AutoConfigFlags.py:65
vtune_athena.format
format
Definition: vtune_athena.py:14
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
python.MetaReader.read_metadata
def read_metadata(filenames, file_type=None, mode='lite', promote=None, meta_key_filter=None, unique_tag_info_values=True, ignoreNonExistingLocalFiles=False)
Definition: MetaReader.py:53
max
constexpr double max()
Definition: ap_fixedTest.cxx:33
python.PileUpUtils.pileupInputCollections
def pileupInputCollections(inputFiles)
Definition: PileUpUtils.py:10
python.PileUpUtils.generateBackgroundInputCollections
def generateBackgroundInputCollections(flags, initialList, nBkgEvtsPerCrossing, correctForEmptyBunchCrossings)
Definition: PileUpUtils.py:66
python.PileUpUtils.pileUpCalc
def pileUpCalc(nSignalEvts, refreshRate, nSubEvtPerBunch, nBunches)
Definition: PileUpUtils.py:18
python.RunLumiConfigTools.condenseRunLumiInfoFragment
def condenseRunLumiInfoFragment(frag, filename="./RunDMCTriggerRunsInfo.py")
Definition: RunLumiConfigTools.py:3
covarianceTool.filter
filter
Definition: covarianceTool.py:514
python.PileUpUtils.scaleNumberOfCollisions
def scaleNumberOfCollisions(flags)
Definition: PileUpUtils.py:299
python.PileUpUtils.generatePileUpProfile
def generatePileUpProfile(flags, profile, randomMuSampling=False, sequentialEventNumbers=False, doNotCorrectMaxEvents=False)
Definition: PileUpUtils.py:126
convertTimingResiduals.sum
sum
Definition: convertTimingResiduals.py:55
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
histSizes.list
def list(name, path='/')
Definition: histSizes.py:38
DerivationFramework::TriggerMatchingUtils::sorted
std::vector< typename T::value_type > sorted(T begin, T end)
Helper function to create a sorted vector from an unsorted one.
python.PileUpUtils.getNBkgEventsPerFile
def getNBkgEventsPerFile(initialList, logger)
Definition: PileUpUtils.py:25
python.PileUpUtils.getInputCollectionOffset
def getInputCollectionOffset(flags, initialList)
Definition: PileUpUtils.py:43
python.RunDependentMCTaskIterator.getRandomlySampledRunLumiInfoFragment
def getRandomlySampledRunLumiInfoFragment(jobnumber, task, maxEvents, totalEvents, skipEvents, sequentialEventNumbers=False)
Definition: RunDependentMCTaskIterator.py:42
python.PileUpUtils.setupPileUpProfile
def setupPileUpProfile(flags)
Definition: PileUpUtils.py:335
python.PileUpUtils.loadPileUpProfile
def loadPileUpProfile(flags, fragment_string)
Definition: PileUpUtils.py:115
get
T * get(TKey *tobj)
get a TObject* from a TKey* (why can't a TObject be a TKey?)
Definition: hcg.cxx:127
python.PileUpUtils.generateRunAndLumiProfile
def generateRunAndLumiProfile(flags, profile, sequentialEventNumbers=False, doNotCorrectMaxEvents=False)
Definition: PileUpUtils.py:211
readCCLHist.float
float
Definition: readCCLHist.py:83
python.RunDependentMCTaskIterator.getRunLumiInfoFragment
def getRunLumiInfoFragment(jobnumber, task, maxEvents, totalEvents, skipEvents, sequentialEventNumbers=False)
Definition: RunDependentMCTaskIterator.py:14