ATLAS Offline Software
DataFormatRates.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
2 
3 # Script for checking the yield and skimming efficiency for derived data formats
4 
5 # needed python imports
6 import os, argparse, subprocess
7 
8 # use (rather than copy) some useful functions
9 from PROCTools import ExtractEvents as ee
10 
11 # handle the CL parameters
12 parser = argparse.ArgumentParser(description='Extract yields and skimming efficiencies for derived data formats using ARQ and AMI. Written by C. Ohm.')
13 parser.add_argument('-r', '--runs', required=True, type=str, help='Run number range, e.g. 123456 or 123456-199999')
14 parser.add_argument('-f', '--fileformat', type=ee.validFileFormat, help='File format: (D)RAW(_XYZ), (D)ESD(_XYZ), (D)AOD(_XYZ)', default='AOD')
15 parser.add_argument('-b', '--baselineformat', type=ee.validFileFormat, help='File format to use for reference yields for the efficiency calculation (defaults to AOD)', default='AOD')
16 parser.add_argument('-m', '--matchingstring', type=str, nargs='?', default='', help='String for matching the dataset to look in, useful when there are several processings available, or both merged and unmerged datasets, e.g. "merge.DESDM_RPVLL*f620_m*" will do what you expect')
17 parser.add_argument('-s', '--stream', type=str, nargs='?', help='Stream name, defaults to "physics_Main"', default='physics_Main')
18 parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Verbose mode, prints out eos paths and commands, file names, etc')
19 parser.add_argument('-q', '--quick', action='store_true', default=False, help='Quick mode, skips ARQ call and uses local pickle file from previous query')
20 
21 args = parser.parse_args()
22 #print (args)
23 
24 if args.quick:
25  print ("Quick mode: will use pickle file on disk from previous query, will not call ARQ")
26 else:
27  # run the query - all of the retrieved data is stored in a pickle file on disk
28  cmd = "AtlRunQuery.py \"find run %s and ready and st %s 100k+ / show lumi\"" % (args.runs, args.stream)
29  if args.verbose:
30  print ("Will now execute the following ARQ query:")
31  print (" %s" % cmd)
32  env = os.environ.copy()
33  output = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env)
34  if args.verbose:
35  for line in output.stdout.readlines():
36  print (" %s" % line.rstrip())
37  print ("Retrieved ARQ results")
38 
39 print ("Will now open the pickle file containing the needed lumi info")
40 
41 # read in the pickle file with the results
42 import pickle
43 f = open("data/atlrunquery.pickle", 'rb')
44 d = pickle.load(f)
45 
46 print ("Loaded pickle file containing info for %d runs" % len(d['Run']))
47 
48 lumiPerRun = {}
49 eventsPerRun = {}
50 selectedEventsPerRun = {}
51 
52 for run in d['Run']:
53  print ("Will now sum up integrated luminosity for run %d" % run)
54  # calculate the integrated luminosity based on the length of the LBs and the inst lumi per LB
55  lbDict = d[run]['#LB']
56  lumiTag = [key for key in d[run] if 'ofllumi:0:' in key][0]
57  print (" Using lumi tag: %s" % lumiTag)
58  lumiDict = d[run][lumiTag]
59  print (lumiDict)
60  integratedLumi = 0
61  for lb in range(1,len(lbDict)):
62  print ("lb: %d" % lb)
63  print ("lumiDict[lb-1]:")
64  print (lumiDict[lb-1])
65  #print ("Will now add lumi for LB %d with length %f and lumi %f" % (lb, lbDict[1][lb]-lbDict[1][lb-1], lumiDict[lb-1]['value']))
66  #integratedLumi += lumiDict[lb-1]['value']*(lbDict[1][lb]-lbDict[1][lb-1]) # this was good for online lumi which comes in std query.. /CO
67  integratedLumi += float(lumiDict[lb-1]['value']) # this already has integrated lumi per LB, I guess? /CO
68  #print (" LB %d: integrated lumi = %f" % (lb, integratedLumi))
69  print (" => Done: %f x 10^30 cm^-2" % (integratedLumi))
70  lumiPerRun[run] = integratedLumi
71 
72 # now go to AMI to get the event yields for the datasets you're interested in
73 import pyAMI.client
74 import pyAMI.atlas.api as AtlasAPI
75 client = pyAMI.client.Client('atlas')
76 AtlasAPI.init()
77 
78 for run in d['Run']:
79 
80  pattern = "data15_13TeV.%08d.physics_Main.merge.DESDM_RPVLL" % run
81  pattern += '.f%_m%'
82  dslist = AtlasAPI.list_datasets(client, patterns = pattern, fields = ['events'], type = 'DESDM_RPVLL')
83  #print (dslist)
84  if len(dslist) > 0:
85  print (dslist[0]['events'])
86  selectedEventsPerRun[run] = dslist[0]['events']
87  pattern = "data15_13TeV.%08d.physics_Main.merge.AOD" % run
88  pattern += '.f%_m%'
89  dslist = AtlasAPI.list_datasets(client, patterns = pattern, fields = ['events'], type = 'AOD')
90  #print (dslist)
91  if len(dslist) > 0:
92  print (dslist[0]['events'])
93  eventsPerRun[run] = dslist[0]['events']
94 
95 import ROOT as r
96 import math
97 
98 bins = len(eventsPerRun)
99 
100 yieldPerRunHisto = r.TH1F("YieldPerRun", "YieldPerRun", bins, 0, bins)
101 skimmingEffPerRunHisto = r.TH1F("SkimmingEfficiencyPerRun", "SkimmingEfficiencyPerRun", bins, 0, bins)
102 yieldPerLumiHisto = r.TH1F("YieldPerRunPerLumi", "YieldPerRunPerLumi", bins, 0, bins)
103 runs = 0
104 for run in eventsPerRun:
105  events = float(selectedEventsPerRun[run])
106  allevents = float(eventsPerRun[run])
107  lumi = lumiPerRun[run]
108  yieldPerRunHisto.SetBinContent(runs+1, events)
109  yieldPerRunHisto.SetBinError(runs+1, math.sqrt(events))
110  yieldPerRunHisto.GetXaxis().SetBinLabel(runs+1, str(run))
111  skimmingEffPerRunHisto.SetBinContent(runs+1, events/allevents)
112  skimmingEffPerRunHisto.SetBinError(runs+1, events/allevents*math.sqrt(1/events + 1/allevents))
113  skimmingEffPerRunHisto.GetXaxis().SetBinLabel(runs+1, str(run))
114  yieldPerLumiHisto.SetBinContent(runs+1, events/lumi)
115  yieldPerLumiHisto.SetBinError(runs+1, math.sqrt(events)/lumi)
116  yieldPerLumiHisto.GetXaxis().SetBinLabel(runs+1, str(run))
117  runs += 1
118 
119 c1 = r.TCanvas("PerRun", "PerRun")
120 yieldPerRunHisto.GetYaxis().SetRangeUser(0, yieldPerRunHisto.GetMaximum()*1.2)
121 yieldPerRunHisto.SetTitle("Event yield per run;Run;Events in DESDM_RPVLL stream")
122 yieldPerRunHisto.Draw()
123 c2 = r.TCanvas("EffPerRun", "EffPerRun")
124 skimmingEffPerRunHisto.GetYaxis().SetRangeUser(0, skimmingEffPerRunHisto.GetMaximum()*1.2)
125 skimmingEffPerRunHisto.SetTitle("Skimming efficiency per run;Run;Fraction of events in DESDM_RPVLL stream")
126 skimmingEffPerRunHisto.Draw()
127 c3 = r.TCanvas("PerLumi", "PerLumi")
128 yieldPerLumiHisto.GetYaxis().SetRangeUser(0, yieldPerLumiHisto.GetMaximum()*1.2)
129 yieldPerLumiHisto.SetTitle("Event yield per #mub;Run;Events in DESDM_RPVLL/#mub")
130 yieldPerLumiHisto.Draw()
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
Trk::open
@ open
Definition: BinningType.h:40
str
Definition: BTagTrackIpAccessor.cxx:11
readCCLHist.float
float
Definition: readCCLHist.py:83