Collaboration diagram for BulkRun.BulkRun:

Public Member Functions
def	__init__ (self, inputPattern="/raid02/schernau/ped/csc/csc/*.data", processedFilesList="ProcessedFiles.list", outputDirBase="/raid02/lampen/datasets/csc/PedProcessing2", debug=False, allowDirOverwrite=False)

def	run (self, numToRun=10)

def	ReadProcessedFilesList (self)

def	AddProcessedFiles (self, newFiles)

def	FindFiles (self)

def	RunAthena (self, pattern, runNumber)

Public Attributes
	InputPattern

	ProcessedFilesList

	OutputDirBase

	debug

	AllowDirOverwrite

Detailed Description

Run on multiple pedestal files

Used to run on multiple pedestal bytestream files located somewhere on disk.
Puts results into seperate directories by run-number. It extracts pertinent
information from the file names, which are expected to be of the form:

data10_calib.00157081.calibration_pedCSC.daq.RAW._lb0000._CSC-EB._0001.data

run(numToRun) - process NumToRun (at most) runs out of those not yet run out of. This is the primary interface. 
ReadProcessedFilesList() - find all files already run on from a local text file
AddProcessedFiles() - save newly run on files to disk
FindFiles() - Find pattern and run number of a set of files that have not yet been run on
RunAthena() - run athena job for a set of runs

Definition at line 11 of file BulkRun.py.

Constructor & Destructor Documentation

◆ init()

def BulkRun.BulkRun.__init__	(	self,
		inputPattern = `"/raid02/schernau/ped/csc/csc/*.data"`,
		processedFilesList = `"ProcessedFiles.list"`,
		outputDirBase = `"/raid02/lampen/datasets/csc/PedProcessing2"`,
		debug = `False`,
		allowDirOverwrite = `False`
	)

initialize internal variables

Definition at line 27 of file BulkRun.py.

   def __init__(
       self,
       inputPattern = "/raid02/schernau/ped/csc/csc/*.data" ,
       processedFilesList = "ProcessedFiles.list",
       outputDirBase = "/raid02/lampen/datasets/csc/PedProcessing2",
       debug = False,
       allowDirOverwrite = False,
       ):
     """initialize internal variables"""
     self.InputPattern = inputPattern
     self.ProcessedFilesList = processedFilesList
     self.OutputDirBase = outputDirBase
     self.debug = debug
     self.AllowDirOverwrite = allowDirOverwrite
     print (self.InputPattern)
  

Member Function Documentation

◆ AddProcessedFiles()

def BulkRun.BulkRun.AddProcessedFiles	(	self,
		newFiles
	)

Save new processed files to disk

Definition at line 93 of file BulkRun.py.

   def AddProcessedFiles(self,newFiles):
     """Save new processed files to disk"""
     ProcessedFiles = self.ReadProcessedFilesList()
     ProcessedFiles += newFiles
     f = open(self.ProcessedFilesList,"wb")
     pickle.dump(ProcessedFiles,f)
     f.close()
  
     return True
  
  

◆ FindFiles()

def BulkRun.BulkRun.FindFiles ( self )

Definition at line 105 of file BulkRun.py.

   def FindFiles(self):
  
     #Initial setup
     FoundUnprocessedFile = False
  
     #Get processed file list
     ProcessedFiles = self.ReadProcessedFilesList()
  
     #Get list of files in input dir
     print ("Input pattern: " + self.InputPattern)
     inputFiles = glob(self.InputPattern)
  
     if(self.debug):
       print()
       print ("Searching for file")
       print ("InputPattern: " + self.InputPattern)
       #print ("inputFiles: ")
       #print (inputFiles)
  
     pattern = ""
     runNumber = ""
  
     #Loop through list until find file that is 
     for file in inputFiles:
       if not ProcessedFiles.count(file):
  
         index = file.find("data10")
         if(index == -1):
           index = file.find("data11")
         if(index == -1):
           print ("ERROR! Index of -1!")
           raise Exception("Index error") 
         FoundUnprocessedFile = True
         pattern = file[0:index + 22] + "*" #includes run number
         runNumber = file[index + 13: index + 21]
  
     if(not FoundUnprocessedFile):
       return "",""
  
     if(self.debug) : 
       print ("Found unprocessed file with pattern: " + pattern)
       print ("This includes files:")
       print (glob(pattern))
  
     return pattern, runNumber
  

◆ ReadProcessedFilesList()

def BulkRun.BulkRun.ReadProcessedFilesList ( self )

Definition at line 73 of file BulkRun.py.

   def ReadProcessedFilesList(self):
  
     ProcessedFiles = []
  
     #Get processed files
     f = open(self.ProcessedFilesList,"rb") 
     ProcessedFiles = pickle.load(f)
     f.close()
      
     #Remove newline character from each filename
  
     #for index in range(len(ProcessedFiles)) : 
     #  file = ProcessedFiles[index]
     #  ProcessedFiles[index] = file[0:len(file)-1] 
  
     print ('Processed String: ')
     print (ProcessedFiles)
  
     return ProcessedFiles
  

◆ run()

def BulkRun.BulkRun.run	(	self,
		numToRun = `10`
	)

Run over all run numbers, find files for each, and submit each set to
CscCalcPedMon.py

Definition at line 43 of file BulkRun.py.

   def run(self, numToRun = 10) :
     """
     Run over all run numbers, find files for each, and submit each set to
     CscCalcPedMon.py
     """
  
     print ("Running on " + str(numToRun) + " runsSet.")
  
  
     runNumbers = []
     for runCnt in range(numToRun) :
       print (">>>>Running on runSet " + str(runCnt+1) + " of " + str(numToRun))
       pattern,runNumber = self.FindFiles()
       if(pattern != ""):
         #have files to run on
         runNumbers += [runNumber]
         self.RunAthena(pattern,runNumber)
       else:
         print ("No more unprocessed files. Congrats!")
         print ("N runs done: " + str(runCnt +1))
         print (runNumbers)
         return
     print ("finished all " + str(numToRun) )
     print ("Run numbers include:")
     print (runNumbers)
     print()
     print ("All Processed files:" )
     print (self.ReadProcessedFilesList())
  

◆ RunAthena()

def BulkRun.BulkRun.RunAthena	(	self,
		pattern,
		runNumber
	)

Run athena on a particular set of files matching pattern

Definition at line 151 of file BulkRun.py.

   def RunAthena(self, pattern, runNumber):
     """Run athena on a particular set of files matching pattern"""
     outputDirPath = self.OutputDirBase + "/" + runNumber
       
     if(self.AllowDirOverwrite):
       subprocess.call("rm -rf " + outputDirPath)
  
     print ("Making directory" + outputDirPath)
     #Create output directory
     os.mkdir(outputDirPath,0o755)
  
     #Change directory to output directory
     #os.chdir(outputDirPath)
  
     #Athena options
     athOpt = "outputPre='" + outputDirPath +"/" + runNumber 
     athOpt += "';inputPat='" + pattern 
     athOpt += "';reportPrefix='runNumber = " 
     athOpt += runNumber + "'"
  
     #athena arguments
     athArgs = ["athena.py", "-c", athOpt, "CscCalcPedMon.py"]
  
     #Output log file
     logFile = open(outputDirPath + "/run.log","w")
  
     print()
     print ("**************************************")
     print ("Starting running on run " + str(runNumber))
     sys.stdout.flush()
     subprocess.Popen(athArgs,stdout=logFile,stderr=subprocess.STDOUT).wait()
     print ("Finished run " + str(runNumber))
     print ("**************************************")
     print()
  
     logFile.close()
  
  
     #Add files we just ran on to file list
     newProcessedFiles = glob(pattern)
     self.AddProcessedFiles(newProcessedFiles)

Member Data Documentation

◆ AllowDirOverwrite

BulkRun.BulkRun.AllowDirOverwrite

Definition at line 33 of file BulkRun.py.

◆ debug

BulkRun.BulkRun.debug

Definition at line 32 of file BulkRun.py.

◆ InputPattern

BulkRun.BulkRun.InputPattern

Definition at line 29 of file BulkRun.py.

◆ OutputDirBase

BulkRun.BulkRun.OutputDirBase

Definition at line 31 of file BulkRun.py.

◆ ProcessedFilesList

BulkRun.BulkRun.ProcessedFilesList

Definition at line 30 of file BulkRun.py.

The documentation for this class was generated from the following file:

BulkRun.py

Public Member Functions

Public Attributes