ATLAS Offline Software
Loading...
Searching...
No Matches
CscCalibQuery.py
Go to the documentation of this file.
1#/usr/bin/env python
2# Copyright (C) 2002-2020 CERN for the benefit of the ATLAS collaboration
3
4
5import os
6import sys
7import re
8import glob
9import subprocess
10import pickle
11
12
13#This script looks for new CSC calibration files on castor and runs on them. It:
14#1: Checks to see if a job is currently running by looking for a calibrationRunning### file in the script directory. Quits if anything is running.
15#2: If no job is currently running, sees if there are any new calibration directories that we haven't yet processed. Castor is checked and its contents are checked against a Castor Contents local text file. This file isn't nesscary (there is a redundency later on to prevent duplicate calib jobs), but speeds things up.
16#3: If there are new directories, we check to see if any have at least numFilesToRun files. If they don't, we don't update Castor Contents, but we don't run on them either (its assumed they are being staged and we'll pick them up next time CscCalibQuery is run)
17#4: If we find a valid calibration directory with enough files, we check to see if a directory with calibration output for the new calibration run exists. If it does, we don't run on it again.
18#5: If it is indeed a new calibration directory, a bash script is created and submitted to a batch queue. The bash script:
19# a) Runs athena calibration algorithm
20# b) Produces web page with calibration monitoring plots
21# c) Emails mailing list about state of calibration (any problems)
22# d) Generates mysql database file for merging into COOL
23# e) (Disabled) automatically adds to database
24#...
25
26#minimum number of calibration file in castor to start calibration
27numFilesToRun=3
28responsiblePerson = "youzhou@email.arizona.edu"
29maillist = responsiblePerson
30
31CoolMergeByDefault = False
32
33#Utility functions################################
34
35
36def updateList(oldListPath,newList):
37 print ('updating file list')
38 #update the old file list
39 outFile = open(oldListPath, 'w')
40 outFile.write(newList)
41 outFile.close()
42
43#runs calibration on input file.
44#It creates a bash script which can setup the job, run it, and do post job
45#processing. The bash script is submitted to lxbatch
46def runCalib(calType, runNumber,workDir,castorCopyCmd):
47 #print ('running calib')
48 #initialize based on calibration type
49 scriptDir = '${HOME}/CSC/run/'
50 if calType == 'pulser':
51 calibScript = scriptDir + 'CscCalcSlopeMon.py'
52 dbScript = scriptDir + 'cscWritePSlopeCool.py'
53 webDir = '${HOME}/www/csc/pulser'
54 runListFile = "pulserRunList.pickle"
55 #extractScript = scriptDir +'CscExtractPulser.py'
56 elif( calType == 'ped'):
57 calibScript = scriptDir + 'CscCalcPedMon.py'
58 dbScript = scriptDir + 'cscWritePedRefCool.py'
59 onlDbFile = scriptDir + 'online.cal'
60 webDir = '${HOME}/www/csc/ped'
61 webPageUrl = 'https://atlas-csc-calib.web.cern.ch/atlas-csc-calib/ped/pedRun_' +runNumber
62 runListFile = "pedRunList.pickle"
63 #extractScript = scriptDir + 'CscExtractPed.py'
64
65 outputDir = workDir +"/CalibResults"
66 bsDir = workDir +"/Bytestream"
67
68 print ('outputDir = ' + outputDir)
69
70
71 #Setup finished email message
72 emailMessage = 'Finished ' + calType + ' calibration on run number ' + runNumber
73 emailMessage += '. Output in ' + outputDir + '.'
74 emailMessage += '\\nAnd website at:\\n'
75 emailMessage += webPageUrl
76
77 #Setup finished email subjects
78 goodEmailSubject = '[CSC CALIB PROC]: SUCCESS with ' + calType + 'calib run' + runNumber
79 badEmailSubject = '[CSC CALIB PROC]: PROBLEMS with ' + calType + 'calib run' + runNumber
80
81 #Prepare bash script for batch system
82 bashFilePath = workDir+'/CscCalib_' + calType + '_' + runNumber + '.sh'
83
84 bsubCmd ='cd ' + outputDir + ';bsub -q 2nd -R "type==SLC5_64&&mem>420" ' + bashFilePath
85
86 bashFileContents = "#!/bin/bash\n"
87 bashFileContents += "#To resubmit this job, submit it to the atlasmuonqueu like so:\n#"
88 bashFileContents += bsubCmd +"\n"
89 bashFileContents += "source ~/CSC/CscSetup.sh\n"
90 bashFileContents += "\n"
91 bashFileContents += "resultDir=\"" + outputDir + "\"\n"
92 bashFileContents += 'bytestreamDir="' + bsDir + '"\n'
93 bashFileContents += 'maillist="' + maillist + '"\n'
94 bashFileContents += 'webSiteDir="' + webDir + '"\n'
95
96 calFilePrefix= "${resultDir}/" + runNumber
97 inputPattern = "${bytestreamDir}/*.data"
98
99 calibCommand = 'echo "Running calibration"\n' \
100 + 'mkdir ${resultDir}\n' \
101 + 'athena.py -c "outputPre=\'' + calFilePrefix \
102 + '\';inputOnlCalibFile=\'' +onlDbFile \
103 + '\';inputPat=\'' + inputPattern \
104 + '\';reportPrefix=\'' + emailMessage \
105 + '\';" ' \
106 + calibScript
107
108
109 goodEmailCommand = ' mail -s "' + goodEmailSubject + '" $maillist < ' + calFilePrefix + "CalibReport.txt"
110 badEmailCommand = ' mail -s "' + badEmailSubject + '" $maillist < ' + calFilePrefix + "CalibReport.txt"
111
112 #For reference tag, we actually want the IOV to start just after the LAST run number
113 #Get old run numbers
114 infile = open(runListFile,"rb")
115 runList = pickle.load(infile)
116 runList.sort()
117 print ("got runs")
118 print (runList)
119 infile.close()
120
121
122 if(runNumber in runList):
123 print ("Mailing message")
124 message =["mail","-s",\
125 '"New castor run directory found for previously processed run ' + str(runNumber) + '"',\
126 responsiblePerson,\
127 "<",\
128 "runAlreadyProcessed.mail"]
129 print (message)
130 subprocess.call(message)
131 sys.exit()
132
133
134 highestRun = runList[-1]
135
136 isRunNumberConflict = False
137
138
139 if(highestRun > runNumber):
140 #Something odd happening. The Input run number is lower than the last run number
141 #this script (thinks it) processed
142 #Notify someone important, and don't add to cool when done...
143 subprocess.call(["mail","-s",\
144 "[CSC CALIB PROC] Wrong run number ordering on run " + str(runNumber) \
145 + "! Human intervension required!",\
146 responsiblePerson,\
147 "<",\
148 "runNumberConflict.mail"]\
149 )
150 isRunNumberConflict = True
151 else:
152 #No problem, update run list
153 runList += [runNumber]
154 outfile = open(runListFile,"wb")
155 pickle.dump(runList,outfile)
156 outfile.close()
157
158
159 #Label that we're working, so that other instances of CscCalibQuery won't run
160 subprocess.call(['touch','/afs/cern.ch/user/m/muoncali/CSC/run/runningCalibration' + runNumber])
161
162 #Command to create .db file
163 DbCommand = 'athena.py -c "input=\'' + calFilePrefix+'.cal\';output=\'' \
164 + calFilePrefix + '.db\';IOVRunStart=int(\'' + highestRun + '\')" ' + dbScript
165
166 UploadCommand = ''
167 #Noexec prevents actual execution of cool
168 #UploadCommand = '/afs/cern.ch/user/a/atlcond/utils/AtlCoolMerge.py ' \
169 # + "--batch --noexec --comment=\'Automated UPD1 update from " + calType \
170 # + ' run ' + runNumber +'\' ' \
171 # + calFilePrefix + '.db' \
172 # + ' COMP200 ATONR_COOL ATLAS_COOLONL_CSC_W PASSWORD '
173 #UploadCommand += "\n"
174
175
176
177
178
179 #Command to upload .db file to database
180 UploadCommand += '/afs/cern.ch/user/a/atlcond/utils/AtlCoolMerge.py ' \
181 + "--batch "
182 if(not CoolMergeByDefault or isRunNumberConflict):
183 UploadCommand += " --noexec "
184
185 UploadCommand += "--comment=\'Automated reference update from " + calType \
186 + ' run ' + runNumber + ' to IOV starting at ' + highestRun + "' " \
187 + calFilePrefix + '.db' \
188 + ' COMP200 ATLAS_COOLWRITE ATLAS_COOLOFL_CSC_W WCOOLOFL4CSC17 '
189
190 WebSiteCommand = '\nfs sa ${resultDir} webserver:afs read\n'
191 WebSiteCommand += 'cd $resultDir\n'
192 WebSiteCommand += 'ln -s ${resultDir} ${webSiteDir}/pedRun_' + runNumber + '\n'
193 WebSiteCommand += 'MakeCscPedSite.exe ' + runNumber + '.root ' + runNumber + '.cal_online\n'
194
195 t1 = '\t'
196
197 #Run calibration. If no problems detected, go ahead and upload.
198 bashFileContents += '\ncd ' + workDir + '\n' \
199 + "#Copying files from castor#\n" \
200 + castorCopyCmd \
201 + '\n'\
202 + "#Running calibration (and calib monitoring)\n"\
203 + "#, resulting in .cal file and status report\n"\
204 + calibCommand +'\n' \
205 + "\n"\
206 + "#Athena job to transform *.cal file to *.db file.\n"\
207 + DbCommand + '\n' \
208 + "#Python utility to upload *.db file to database. When entering by\n"\
209 + "#hand, I recomend removing '--batch' flag so you can check puposed\n"\
210 + "#operations before submision to database\n"\
211 + UploadCommand + '\n' \
212 + '#Check if AllCalibMonGood file was created, which means that\n'\
213 + '#this run passed acceptable criteria in the calibration monitoring\n'\
214 + 'if [ -a AllCalibMonGood ]; then\n' \
215 + t1 + "#Email list that the calibration looks good\n"\
216 + t1 + goodEmailCommand +'\n' \
217 + t1 + "################################################################\n"\
218 + t1 + "#Execute next two commands if you want to submit database entry#\n"\
219 + t1 + "#Useful if these steps were skipped due to suspicious behaviour#\n"\
220 + t1 + "#during calibration. #\n"\
221 + t1 + "###############################################################\n"\
222 + t1 + "\n"\
223 + t1 + '\n'\
224 + 'else\n' \
225 + t1 + "#Suspicious behaviour in calibration. Notify mail list of this fact\n"\
226 + t1 + badEmailCommand + '\n' \
227 + 'fi\n'\
228 + '\n'\
229 + '#Always create website'\
230 + WebSiteCommand + '\n' \
231 + 'rm -rf $bytestreamDir\n' \
232 + 'rm -rf ' + scriptDir +"runningCalibration" + runNumber +'\n'
233
234 #Write bashfile
235 print ("Printing bash file to: " +bashFilePath)
236 bashFile = open(bashFilePath, 'w')
237 bashFile.write(bashFileContents)
238 bashFile.close()
239
240 #Submit script
241 os.system('chmod +x ' + bashFilePath)
242 bsubsMessage = os.popen(bsubCmd).read()
243
244 #Send alert email
245 emailMessage = 'Starting ' + calType + ' calibration on run number ' + runNumber
246 emailSubject = '[CSC CALIB PROC]: ' + emailMessage
247 emailMessage += '\nbsubs output:\n' + bsubsMessage
248 os.system('echo "' + emailMessage + '" | mail -s "' + emailSubject + '" ' + maillist)
249
250
253
254print ('running' )
255#first command line argument should be the calibration type, pulser or ped.
256calType = sys.argv[1]
257if calType == 'pulser':
258 calibFileDir = '/castor/cern.ch/user/l/lampen/CalibRunTest/slope/'
259 oldListFilePath = '/afs/cern.ch/user/m/muoncali/CSC/run/pulserList.txt'
260 outputDir = '/afs/cern.ch/user/m/muoncali/w0/CSC/runs/pulser/pulser'
261elif calType == 'ped':
262 calibFileDir = '/castor/cern.ch/grid/atlas/DAQ/muon/csc/'
263 oldListFilePath = '/afs/cern.ch/user/m/muoncali/CSC/run/pedDirList.txt'
264 outputDir = '/afs/cern.ch/user/m/muoncali/w0/CSC/runs/ped/ped'
265 calibRe = re.compile('data1.*_calib.*calibration_pedCSC\\.daq\\.RAW.*\\.data')
266else:
267 print ('Need to specify pulser or ped')
268 os._exit(0)
269
270
271#First, see if a calibration is already running
272#If already running, stop
273testFile = glob.glob("/afs/cern.ch/user/m/muoncali/CSC/run/runningCalibration*")
274if(testFile):
275 print ('already running: ' + str(testFile))
276 sys.exit()
277
278
279#Get the current list of directories in castor
280print ('rfdir ' + calibFileDir)
281currentLs = os.popen('rfdir ' + calibFileDir).read()
282
283os.popen('touch testing')
284
285#Get the old list of files from castor
286inFile = open(oldListFilePath, 'r')
287oldLs = inFile.read()
288
289inFile.close()
290
291print ('checking for changes')
292
293import datetime
294now = datetime.datetime.now()
295today = now.day
296
297#Check if there are any changes between the lists
298if(oldLs != currentLs):
299 print ('There has been a change')
300 currentDirList = currentLs.split('\n')
301 oldDirList = oldLs.split('\n')
302
303 updateRunList = True
304 runningDir = ""
305 #Run on any new directories
306 for Dir in currentDirList:
307 #print ('Checking ' + Dir)
308 if Dir not in oldDirList:
309 print ("**********************************************************")
310 splitDir = Dir.split()
311 day = int(splitDir[-3])
312 DirName =splitDir[-1] #last ' ' delimited word in line is Dirname
313
314 print (splitDir)
315 timediff = today - day
316 if(timediff > 7 or timediff < -23):
317 print (timediff)
318
319 print ("Found old dir " + DirName + ", but its over at least a week old, so we're skipping it")
320
321 continue
322
323
324
325 print ("day is " + str(day))
326
327 #print ('Dirname is ' + DirName)
328 cmd = 'rfdir ' + calibFileDir + DirName
329 fileList = os.popen(cmd).read().split('\n')
330
331 nFiles = len(fileList) -1
332 print ("found " + str(nFiles) + " files")
333
334 runNumber = DirName
335
336 #prepare output directory
337 outputDirFull = outputDir + 'Run_' + runNumber
338 print ('outputDirFull is ' + outputDirFull)
339
340 #Loop through files in directory.
341 #Start building castor copy cmd
342 #If any files don't match the calibration file
343 #requirement, mark this directory as something not interested in.
344 ThisCalibDir = False
345 castorCopyCmd = "mkdir ${bytestreamDir}"
346 for file in fileList:
347 fileName = (file.split(' '))[-1] #last ' ' delimited word in line is fileName
348 if(fileName != ""):
349 print ("fileName: " +fileName)
350 ThisCalibDir = re.match(calibRe,fileName)
351 if(nFiles < numFilesToRun):
352 print ("only " +str(nFiles) + " files. Breaking.")
353 if(ThisCalibDir):
354 print ("There is a calib dir, but it only has " + str(nFiles)
355 + " file. Will not process.")
356 updateRunList = False
357 break
358 if(ThisCalibDir):
359 fullPath = calibFileDir + DirName + '/' + fileName
360 castorCopyCmd += "\nxrdcp root://castoratlas/" + fullPath + " ${bytestreamDir}"
361 #print ('found ' + fullPath)
362 else:
363 break
364
365 print ("found " + str(nFiles) + " files")
366
367 #only run if have enough files
368 if(nFiles >= numFilesToRun):
369 #Comment next 4 lines out if you want to run on multiple runs at once
370 if(runningDir):
371 updateRunList = False
372 print ("Found new calibration directory to run on (" +DirName + "), but already running on " + runningDir)
373 continue
374 print ('running on ' + DirName)
375
376 if(nFiles > numFilesToRun):
377 print ('WARNING! Found ' + str(nFiles) + ' calib files, when only ' + str(numFilesToRun) + ' were expected!')
378
379
380 if( os.path.exists(outputDirFull)):
381 print ("There is already a directory for run number " + runNumber)
382 print ("Will not run on this pedestal run")
383 continue
384
385
386 if(ThisCalibDir):
387 print (castorCopyCmd)
388 #updateList(oldListFilePath,currentLs)
389 #os.makedirs(outputDirFull+"/Bytestream")
390 os.makedirs(outputDirFull+"/CalibResults")
391
392 runCalib(calType, runNumber, outputDirFull, castorCopyCmd)
393
394 print('Launched job, but will continue to check if we have more runs to run on later')
395 runningDir = DirName
396 updateRunList = False
397 continue
398
399
400 if( updateRunList):
401 #We update the run list so long as there are no runs on it that we expect to run on in the future.
402 #The safest thing to do is to wait until all runs are processed.
403 print ("No unprocessed or currently being processed calibration runs. Will update run list.")
404 updateList(oldListFilePath,currentLs)
405 else:
406 print ("NOT updating run list")
407
408else:
409 print ('No changes between old and new runs')
410 pass
if(febId1==febId2)
void print(char *figname, TCanvas *c1)
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
runCalib(calType, runNumber, workDir, castorCopyCmd)
updateList(oldListPath, newList)
IovVectorMap_t read(const Folder &theFolder, const SelectionCriterion &choice, const unsigned int limit=10)