ATLAS Offline Software
getPipeDate.py
Go to the documentation of this file.
1 #Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration
2 
3 # Python script from Nick Dann
4 # Clean-ups for clarity and additional comments from Jennet Dickinson
5 # Search for JENNET to find where to replace file paths etc
6 # Nick set this up multithreaded, but Jennet prefers not to run this way,
7 # so she can more easily see what is going on
8 
9 # Comments from Nick:
10 # Multithreaded python script which finds files downloaded from the ATLAS DDV, reads in the data, then saves the data in seperate files for each DCS group.
11 # Files from DDV should be in format YYYY_MM_DD-YYYY_MM_DD.txt
12 # I save to SENSORNAME.ssv (space seperated variable). I usually save times as YYYY-MM-DD HH:MM:SS UTC timestamp value. Times are CERN time (CEST or CET).
13 
14 # scanDataThread is the thread declaration
15 # scanData (dataType) is the function which you should call to start the searching; datatype should be a string.
16 
17 #!/usr/bin/env python
18 import os
19 import shutil
20 import subprocess
21 import datetime, time
22 import fileinput
23 import sys
24 import random
25 from os.path import expanduser
26 import threading
27 
28 def sensorToNumber (stave,side,module): #converts from IBL stave, senors and side to DCS group number (<112)
29 
30 
31  number = 8 * (int(stave)-1) + module
32  if side == 'C':
33  number+=4
34  return number-1
35 
36 def findFiles (searchFolderPath): #find all files in searchFolderPath, return as list
37 
38  try:
39  os.chdir(searchFolderPath)
40  except IOError:
41  print('No entries in ' + searchFolderPath)
42  return -1
43 
44  todaysList=[]
45 
46  #SOMEHOW, this searches the search Folder and returns a list of contained files in well... files.
47  for src_dir, dirs, files in os.walk(searchFolderPath):
48  #i parse through the files, I think. For each, I check if there's a reference to it in the
49  #diary entry for the day. If not, I go ahead and append the figure to the relevent diary entry.
50  for file_ in files:
51  sortedFile = os.path.join(src_dir, file_)
52  todaysList.append(str(sortedFile))
53  #now we've got a list containing all files we want to add, sort it alphabetically and add them.
54  todaysList.sort()
55  return todaysList
56 
57 #Function that actually reads in data from fileName, saves it to... somewhere. Probably dataFolder/datatype/modulename.ssv . It also returns
58 #stuff like the most recent dates for all DCS groups (endDates), and number of datapoints found for each DCS group (tempDataNumbers).
59 #Probably says something if it can't read the data as well. Gonna be honest, DDV is a bit of a nightmare and I've spent too long messing
60 #around with it to remember what my bodges are.
61 def appendEntries(fileName,dataType,dataFolder,dateEntries,largestDates,firstDate,lastDate,borkDate):
62 
63  try:
64  rawFiles = open(fileName,'r')
65  except IOError:
66  print ("could not open file " + str(fileName) + " fool")
67  return firstDate #So this probably crashes if filename can't be opened. Try not to let that happen.
68 
69  returnList = [firstDate,largestDates,borkDate,dateEntries,firstDate]
70 
71  print(firstDate)
72 
73  index_Jennet = 0
74  for dataLine in rawFiles: #read in each line of fileName and do things to it. Does this work with empty files? I'm guessing so.
75 
76  index_Jennet = index_Jennet + 1
77  #We actually have two types of data file from DDV, one containing lots of commas, one which doesn't. The len(commasplit) thing is how I
78  #differentiate between the two types.
79 
80  if len(dataLine)>5:
81  commaSplit = dataLine.split(',')
82 
83  if '!!!!' in dataLine: #Our entry is blank, leg it!
84  return returnList
85 
86  if len(commaSplit)<2:
87  print("Dead")
88  else:
89  #from getDataSafely, format here splits data based on !!! between DCS groups.
90 
91  moduleList = dataLine.split('!!!')
92 
93  for module in moduleList: #Hah, look at me using accurate labels for my variables!
94 
95  elements = module.split(',')
96  name = elements[0]
97  A="A"
98  B="A"
99  C="A"
100  if len(name)>9:
101  A = name[4:6]
102  B = name[7]
103  C = 1
104 
105  moduleName = 'LI_S' + str(A) +'_'+ str(B)
106  try:
107  moduleNumber = int(2 * (int(A)-1)-1 )
108  if B =='C':
109  moduleNumber +=4
110 
111  outName = dataFolder+dataType+'/' + moduleName +'.ssv'
112 
113  #check if file exists, make it if not, append it if so.
114  if os.path.isfile(outName) == False:
115  output = open(outName,'w')
116  else:
117  output = open(outName,'a')
118 
119  position=0
120 
121  #I'm like 90% sure each element is a single time (so, time and sensor reading). This makes it slow.
122  for element in elements:
123 
124  if position!=0: #the first element is the DCS group name; not hella useful.
125 
126  tempLines = element.split()
127  date= tempLines[1].split('-')
128  time1 = tempLines[2].split(':')
129  data = tempLines[0]
130 
131  dateTime = datetime.datetime(int(date[2]), int(date[1]), int(date[0]), int(time1[0]), int(time1[1]), int(time1[2]),int(time1[3]))
132  #true if data stuff
133  if (dateTime>lastDate+datetime.timedelta(hours = 2)) or (dateTime<firstDate -datetime.timedelta(hours = 2)) :
134  returnList[2] = lastDate
135  print ("\nBORK BORK BORK! Error in date of " + fileName + " for " + dataType + " borkDate " + str(returnList[2]) )
136  print ("First line reads " + dataLine + "\n")
137  print ("Should read " + moduleName+"_"+dataType+"\n")
138 
139  borkRepork = open(dataFolder+dataType+"BORKBORKBORK.txt",'w')
140  borkRepork.write("filename " + fileName + "date range " + str(firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
141  borkRepork.close()
142  return returnList
143 
144 
145  if dateTime>returnList[1][moduleNumber]: #if time in right range, output.
146  returnList[3][moduleNumber]+=1
147  outputLine = moduleName + "_" + dataType + " " + str(dateTime) + " " + str( time.mktime(dateTime.timetuple()) ) + " " + str(data) + "\n"
148 
149  returnList[1][moduleNumber] = dateTime
150 
151  output.write(outputLine)
152 
153 
154 
155  position+=1
156 
157  output.close()
158 
159  except: #we failed at something, so print out an error message and run away screaming
160  returnList[2] = lastDate
161  print("Something broke :( \n")
162  print("Could be an error in data values of " + fileName + " for " + dataType + " borkDate " + str(returnList[2]) )
163  print("First line reads " + name + "\n")
164 
165  borkRepork = open(dataFolder+dataType+"BORKBORKBORK.txt",'w')
166  borkRepork.write("filename " + fileName + "date range " + str(firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
167  borkRepork.close()
168  print("Try running again... often this is an issue with the connection to eos")
169  return returnList
170 
171  return returnList
172 
173 
174 def scanData (dataType):
175 
176  #home directory definition
177  fillerDate = datetime.datetime(2000,1,1,1,1,1,1)
178  tempDate = datetime.datetime(2000,1,1,1,1,1,1)
179  returnList =[fillerDate,fillerDate,fillerDate,False,"LI_S00_0_M0"]
180  # JENNET setsfile paths
181  homeDirectory = os.path.expanduser('/eos/atlas/user/j/jdickins/Pixel/LeakageCurrent/')
182 
183  #define path to folder
184  dataFolder = homeDirectory + "/IBLData/processedData/"
185  inputFolder = homeDirectory + "/IBLData/rawData/"+dataType+"/"
186  entriesFolder = homeDirectory + "/IBLData/rawData/entriesPerDay/" + dataType + "/"
187 
188  #make directories if they don't exist
189  if not os.path.exists(dataFolder+dataType):
190  os.mkdir(dataFolder+dataType)
191 
192  if not os.path.exists(entriesFolder):
193  os.mkdir(entriesFolder)
194 
195  #declare some stuff, so it's in scope when we need it
196  startingDates=[]
197  endDates=[]
198  filesProcessed=[]
199  dataPoints=[]
200  dailyPoints=[]
201  smallestEndDate=fillerDate
202 
203 
204  #This section checks to see if there's a file containing the dates we've worked up till.
205  if not os.path.exists(dataFolder+dataType+".txt"):
206 
207  #No dates file found, so create one.
208  print("No any file found! at " + dataFolder+dataType +" Making default values")
209  #set default max and min values for each sensor
210 
211  datesFile = open(dataFolder+dataType+".txt",'w')
212  firstTempDate = datetime.datetime(2015,5,1,0,0,0,0)
213  lastTempDate = datetime.datetime(2015,5,1,0,0,0,1)
214  smallestEndDate = lastTempDate
215 
216  for stave in range(1,15):
217  staveString = str(stave)
218  if stave<10:
219  staveString="0"+str(stave)
220  for side in ['A','C']:
221  moduleName = 'LI_S' + str(staveString) + '_' + side
222  datesFile.write(moduleName +" " + str(firstTempDate) + " " + str(lastTempDate) +" 0 0\n")
223  startingDates.append(firstTempDate)
224  endDates.append(lastTempDate)
225  filesProcessed.append(0)
226  dataPoints.append(0)
227  dailyPoints.append(0)
228 
229  datesFile.close()
230 
231  else: #dates file exists, so read dates for each DCS group
232  print("Found " + dataFolder+dataType+".txt")
233  datesFile = open(dataFolder+dataType+".txt",'r')
234 
235  holder=0
236 
237  for dateLine in datesFile: #read each line in, each line should correspond to one DCS group
238 
239  tempDatesLine = dateLine.split()
240  filesProcessed.append(int(tempDatesLine[5]) )
241  dataPoints.append( int( tempDatesLine[6]) )
242  dailyPoints.append(0)
243 
244  firstTemp = tempDatesLine[1].split('-')
245  lastTemp = tempDatesLine[3].split('-')
246 
247  firstTempTime = tempDatesLine[2].split(':')
248  lastTempTime = tempDatesLine[4].split(':')
249 
250  firstTempTimes = firstTempTime[2].split('.')
251  lastTempTimes = lastTempTime[2].split('.')
252 
253  if len(firstTempTimes)<2:
254  firstTempTimes.append(0)
255  if len(lastTempTimes)<2:
256  lastTempTimes.append(0)
257 
258  firstTempDate = datetime.datetime(int(firstTemp[0]), int(firstTemp[1]), int(firstTemp[2]), int(firstTempTime[0]),int(firstTempTime[1]), int(firstTempTimes[0]), int(firstTempTimes[1]))
259  lastTempDate = datetime.datetime(int(lastTemp[0]), int(lastTemp[1]), int(lastTemp[2]), int(lastTempTime[0]), int(lastTempTime[1]),int(lastTempTimes[0]), int(lastTempTimes[1]))
260 
261  startingDates.append(firstTempDate)
262  endDates.append(lastTempDate)
263 
264  if holder==0:
265  returnList[0] = firstTempDate
266  returnList[1] = lastTempDate
267  smallestEndDate=lastTempDate
268 
269 
270  else:
271  if firstTempDate<returnList[0]:
272  returnList[0] = firstTempDate
273  if lastTempDate>returnList[1]:
274  returnList[1] = lastTempDate
275  if lastTempDate < smallestEndDate:
276  smallestEndDate = lastTempDate
277 
278 
279  holder+=1
280 
281  datesFile.close()
282  print ("Investigating " + dataType + " from " + str(smallestEndDate))
283 
284 
285  holder = 0
286 
287  #call function to return list of all files in input folder
288  fileList = findFiles(inputFolder)
289 
290  firstTempDate = startingDates[0]
291  lastTempDate = endDates[0]
292  numberFiles = len(fileList)
293  fileNumber = 0
294 
295  #iterate through all files from file list, opening them if they're in the time period of interest.
296  for fileName in fileList:
297 
298 # print(fileName)
299 
300  end = len(fileName)
301  endDate1 = fileName[end-4:end]
302  endDate2 = fileName[end-7:end-6]
303 
304  if endDate1=='.txt' and endDate2=='_': #check file ends with text, and contains underscore in expected place; Could make this more rigorous
305 
306  tempDataNumber = 0
307  startDate = fileName[end-23:end-15]
308  endDate = fileName[end-12:end-4]
309  endDateSplit = endDate.split('_')
310  endDateFile = datetime.datetime(2000+int(endDateSplit[0]),int(endDateSplit[1]),int(endDateSplit[2]),0,0,0,1)
311 
312  startDateSplit = startDate.split('_')
313  startDateFile = datetime.datetime(2000+int(startDateSplit[0]),int(startDateSplit[1]),int(startDateSplit[2]),0,0,0,1)
314 
315  if endDateFile > smallestEndDate: #data from region of interest
316  #APPEND FILES DOING THINGS HERE
317 
318  lastTempDate = endDateFile
319  [firstTempDate,endDates,fillerDate,tempDataNumbers,smallestEndDate] = appendEntries(fileName,dataType,dataFolder,dailyPoints,endDates,startDateFile,endDateFile,fillerDate)
320  #append entries called here. Editing of data files done at that location.
321 
322  for i in range(0,28): #112 DCS groups for IBL, if you're doing something else, change that number.#
323  filesProcessed[i] +=1 #number of files
324  dataPoints[i]+=tempDataNumbers[i] #number of data points for each DCS group
325 
326  holderX=0
327 
328  #this does something. Probably recording number of data points per day for each DCS group
329  for stave in range(1,15):
330  staveString = str(stave)
331  if stave<10:
332  staveString="0"+str(stave)
333  for side in ['A','C']:
334  moduleName = 'LI_S' + str(staveString) + '_' + side
335  outName = entriesFolder + moduleName + ".txt"
336  dataLine = str(startDate) + " " + str(tempDataNumbers[holderX]) + "\n"
337  tempDataNumbers[holderX]=0
338 
339  if os.path.isfile(outName) == False:
340  output = open(outName,'w')
341  output.write(dataLine)
342  output.close()
343 
344  else:
345  output = open(outName,'a')
346  output.write(dataLine)
347  output.close()
348  holderX +=1
349 
350  #check if the list is bork-a-dorked. If borked, save and break
351  if returnList[2]!=fillerDate:
352  returnList[2] = fillerDate
353  datesFile = open(dataFolder+dataType+".txt",'w')
354  tempHolder=0
355  for stave in range(1,15):
356  staveString = str(stave)
357  if stave<10:
358  staveString="0"+str(stave)
359  for side in ['A','C']:
360  moduleName = 'LI_S' + str(staveString) + '_' + side
361  datesFile.write(moduleName +" " + str(startingDates[tempHolder]) + " " + str(endDates[tempHolder]) +" " + str(filesProcessed[tempHolder]) + " " + str ( dataPoints[tempHolder] ) + "\n")
362  tempHolder+=1
363 
364  datesFile.close()
365 
366 
367  return returnList
368 
369  sys.stdout.flush()
370  fileNumber+=1
371 
372  #open file, go through lists, if value < smallest, set to that,
373 
374  #if number bigger, set to that
375  #if any value smaller than previous largest value, bork bork!
376 
377  holder+=1
378 
379  datesFile = open(dataFolder+dataType+".txt",'w')
380  tempHolder=0
381  for staveX in range(1,15):
382  staveStringX = str(staveX)
383  if staveX<10:
384  staveStringX="0"+str(staveX)
385  for sideX in ['A','C']:
386  moduleNameX = 'LI_S' + str(staveStringX) + '_' + sideX
387  datesFile.write(moduleNameX +" " + str(startingDates[tempHolder]) + " " + str(endDates[tempHolder]) +" " + str(filesProcessed[tempHolder]) + " " + str ( dataPoints[tempHolder] ) +"\n")
388 
389  tempHolder+=1
390 
391  datesFile.close()
392  returnList[3]=True
393 
394  return returnList
395 
396 class scanDataThread (threading.Thread):
397  def __init__(self, threadID, name):
398  threading.Thread.__init__(self)
399  self.threadID = threadID
400  self.name = name
401 
402  def run(self):
403  print ("Starting getPipeDate.py for " + self.name)
404  returnList = scanData(self.name)
405  print ( "Exiting getPipeDate.py for " + self.name + " with no issues, data range from " + str(returnList[0]) + " to " + str(returnList[1]) )
406 
407 
410 def main():
411 
412  thread1 = scanDataThread (1,'ENV_TT' )
413  thread1.start()
414 
415 if __name__ == "__main__":
416  main()
getPipeDate.scanDataThread.__init__
def __init__(self, threadID, name)
Definition: getPipeDate.py:397
getPipeDate.scanData
def scanData(dataType)
Definition: getPipeDate.py:174
getPipeDate.appendEntries
def appendEntries(fileName, dataType, dataFolder, dateEntries, largestDates, firstDate, lastDate, borkDate)
Definition: getPipeDate.py:61
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
getPipeDate.scanDataThread.threadID
threadID
Definition: getPipeDate.py:399
getPipeDate.scanDataThread.run
def run(self)
Definition: getPipeDate.py:402
getPipeDate.findFiles
def findFiles(searchFolderPath)
Definition: getPipeDate.py:36
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
getPipeDate.main
def main()
Definition: getPipeDate.py:410
Trk::open
@ open
Definition: BinningType.h:40
getPipeDate.scanDataThread
Definition: getPipeDate.py:396
Muon::print
std::string print(const MuPatSegment &)
Definition: MuonTrackSteering.cxx:28
str
Definition: BTagTrackIpAccessor.cxx:11
getPipeDate.sensorToNumber
def sensorToNumber(stave, side, module)
Definition: getPipeDate.py:28
getPipeDate.scanDataThread.name
name
Definition: getPipeDate.py:400
Trk::split
@ split
Definition: LayerMaterialProperties.h:38