ATLAS Offline Software
Loading...
Searching...
No Matches
DataFormatRates.py
Go to the documentation of this file.
1# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
2
3# Script for checking the yield and skimming efficiency for derived data formats
4
5# needed python imports
6import os, argparse, subprocess
7
8# use (rather than copy) some useful functions
9from PROCTools import ExtractEvents as ee
10
11# handle the CL parameters
12parser = argparse.ArgumentParser(description='Extract yields and skimming efficiencies for derived data formats using ARQ and AMI. Written by C. Ohm.')
13parser.add_argument('-r', '--runs', required=True, type=str, help='Run number range, e.g. 123456 or 123456-199999')
14parser.add_argument('-f', '--fileformat', type=ee.validFileFormat, help='File format: (D)RAW(_XYZ), (D)ESD(_XYZ), (D)AOD(_XYZ)', default='AOD')
15parser.add_argument('-b', '--baselineformat', type=ee.validFileFormat, help='File format to use for reference yields for the efficiency calculation (defaults to AOD)', default='AOD')
16parser.add_argument('-m', '--matchingstring', type=str, nargs='?', default='', help='String for matching the dataset to look in, useful when there are several processings available, or both merged and unmerged datasets, e.g. "merge.DESDM_RPVLL*f620_m*" will do what you expect')
17parser.add_argument('-s', '--stream', type=str, nargs='?', help='Stream name, defaults to "physics_Main"', default='physics_Main')
18parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Verbose mode, prints out eos paths and commands, file names, etc')
19parser.add_argument('-q', '--quick', action='store_true', default=False, help='Quick mode, skips ARQ call and uses local pickle file from previous query')
20
21args = parser.parse_args()
22#print (args)
23
24if args.quick:
25 print ("Quick mode: will use pickle file on disk from previous query, will not call ARQ")
26else:
27 # run the query - all of the retrieved data is stored in a pickle file on disk
28 cmd = "AtlRunQuery.py \"find run %s and ready and st %s 100k+ / show lumi\"" % (args.runs, args.stream)
29 if args.verbose:
30 print ("Will now execute the following ARQ query:")
31 print (" %s" % cmd)
32 env = os.environ.copy()
33 output = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env)
34 if args.verbose:
35 for line in output.stdout.readlines():
36 print (" %s" % line.rstrip())
37 print ("Retrieved ARQ results")
38
39print ("Will now open the pickle file containing the needed lumi info")
40
41# read in the pickle file with the results
42import pickle
43f = open("data/atlrunquery.pickle", 'rb')
44d = pickle.load(f)
45
46print ("Loaded pickle file containing info for %d runs" % len(d['Run']))
47
48lumiPerRun = {}
49eventsPerRun = {}
50selectedEventsPerRun = {}
51
52for run in d['Run']:
53 print ("Will now sum up integrated luminosity for run %d" % run)
54 # calculate the integrated luminosity based on the length of the LBs and the inst lumi per LB
55 lbDict = d[run]['#LB']
56 lumiTag = [key for key in d[run] if 'ofllumi:0:' in key][0]
57 print (" Using lumi tag: %s" % lumiTag)
58 lumiDict = d[run][lumiTag]
59 print (lumiDict)
60 integratedLumi = 0
61 for lb in range(1,len(lbDict)):
62 print ("lb: %d" % lb)
63 print ("lumiDict[lb-1]:")
64 print (lumiDict[lb-1])
65 #print ("Will now add lumi for LB %d with length %f and lumi %f" % (lb, lbDict[1][lb]-lbDict[1][lb-1], lumiDict[lb-1]['value']))
66 #integratedLumi += lumiDict[lb-1]['value']*(lbDict[1][lb]-lbDict[1][lb-1]) # this was good for online lumi which comes in std query.. /CO
67 integratedLumi += float(lumiDict[lb-1]['value']) # this already has integrated lumi per LB, I guess? /CO
68 #print (" LB %d: integrated lumi = %f" % (lb, integratedLumi))
69 print (" => Done: %f x 10^30 cm^-2" % (integratedLumi))
70 lumiPerRun[run] = integratedLumi
71
72# now go to AMI to get the event yields for the datasets you're interested in
73import pyAMI.client
74import pyAMI.atlas.api as AtlasAPI
75client = pyAMI.client.Client('atlas')
76AtlasAPI.init()
77
78for run in d['Run']:
79
80 pattern = "data15_13TeV.%08d.physics_Main.merge.DESDM_RPVLL" % run
81 pattern += '.f%_m%'
82 dslist = AtlasAPI.list_datasets(client, patterns = pattern, fields = ['events'], type = 'DESDM_RPVLL')
83 #print (dslist)
84 if len(dslist) > 0:
85 print (dslist[0]['events'])
86 selectedEventsPerRun[run] = dslist[0]['events']
87 pattern = "data15_13TeV.%08d.physics_Main.merge.AOD" % run
88 pattern += '.f%_m%'
89 dslist = AtlasAPI.list_datasets(client, patterns = pattern, fields = ['events'], type = 'AOD')
90 #print (dslist)
91 if len(dslist) > 0:
92 print (dslist[0]['events'])
93 eventsPerRun[run] = dslist[0]['events']
94
95import ROOT as r
96import math
97
98bins = len(eventsPerRun)
99
100yieldPerRunHisto = r.TH1F("YieldPerRun", "YieldPerRun", bins, 0, bins)
101skimmingEffPerRunHisto = r.TH1F("SkimmingEfficiencyPerRun", "SkimmingEfficiencyPerRun", bins, 0, bins)
102yieldPerLumiHisto = r.TH1F("YieldPerRunPerLumi", "YieldPerRunPerLumi", bins, 0, bins)
103runs = 0
104for run in eventsPerRun:
105 events = float(selectedEventsPerRun[run])
106 allevents = float(eventsPerRun[run])
107 lumi = lumiPerRun[run]
108 yieldPerRunHisto.SetBinContent(runs+1, events)
109 yieldPerRunHisto.SetBinError(runs+1, math.sqrt(events))
110 yieldPerRunHisto.GetXaxis().SetBinLabel(runs+1, str(run))
111 skimmingEffPerRunHisto.SetBinContent(runs+1, events/allevents)
112 skimmingEffPerRunHisto.SetBinError(runs+1, events/allevents*math.sqrt(1/events + 1/allevents))
113 skimmingEffPerRunHisto.GetXaxis().SetBinLabel(runs+1, str(run))
114 yieldPerLumiHisto.SetBinContent(runs+1, events/lumi)
115 yieldPerLumiHisto.SetBinError(runs+1, math.sqrt(events)/lumi)
116 yieldPerLumiHisto.GetXaxis().SetBinLabel(runs+1, str(run))
117 runs += 1
118
119c1 = r.TCanvas("PerRun", "PerRun")
120yieldPerRunHisto.GetYaxis().SetRangeUser(0, yieldPerRunHisto.GetMaximum()*1.2)
121yieldPerRunHisto.SetTitle("Event yield per run;Run;Events in DESDM_RPVLL stream")
122yieldPerRunHisto.Draw()
123c2 = r.TCanvas("EffPerRun", "EffPerRun")
124skimmingEffPerRunHisto.GetYaxis().SetRangeUser(0, skimmingEffPerRunHisto.GetMaximum()*1.2)
125skimmingEffPerRunHisto.SetTitle("Skimming efficiency per run;Run;Fraction of events in DESDM_RPVLL stream")
126skimmingEffPerRunHisto.Draw()
127c3 = r.TCanvas("PerLumi", "PerLumi")
128yieldPerLumiHisto.GetYaxis().SetRangeUser(0, yieldPerLumiHisto.GetMaximum()*1.2)
129yieldPerLumiHisto.SetTitle("Event yield per #mub;Run;Events in DESDM_RPVLL/#mub")
130yieldPerLumiHisto.Draw()