ATLAS Offline Software
getPipeDate.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2 
3 # Python script from Nick Dann
4 # Clean-ups for clarity and additional comments from Jennet Dickinson
5 # Search for JENNET to find where to replace file paths etc
6 # Nick set this up multithreaded, but Jennet prefers not to run this way,
7 # so she can more easily see what is going on
8 
9 # Comments from Nick:
10 # Multithreaded python script which finds files downloaded from the ATLAS DDV, reads in the data, then saves the data in seperate files for each DCS group.
11 # Files from DDV should be in format YYYY_MM_DD-YYYY_MM_DD.txt
12 # I save to SENSORNAME.ssv (space seperated variable). I usually save times as YYYY-MM-DD HH:MM:SS UTC timestamp value. Times are CERN time (CEST or CET).
13 
14 # scanDataThread is the thread declaration
15 # scanData (dataType) is the function which you should call to start the searching; datatype should be a string.
16 
17 #!/usr/bin/env python
18 import os
19 import datetime
20 import time
21 import sys
22 import threading
23 
24 
25 # converts from IBL stave, senors and side to DCS group number (<112)
26 def sensorToNumber(stave, side, module):
27 
28  number = 8 * (int(stave)-1) + module
29  if side == 'C':
30  number += 4
31  return number-1
32 
33 
34 def findFiles(searchFolderPath): # find all files in searchFolderPath, return as list
35 
36  try:
37  os.chdir(searchFolderPath)
38  except IOError:
39  print('No entries in ' + searchFolderPath)
40  return -1
41 
42  todaysList = []
43 
44  # SOMEHOW, this searches the search Folder and returns a list of contained files in well... files.
45  for src_dir, dirs, files in os.walk(searchFolderPath):
46  # i parse through the files, I think. For each, I check if there's a reference to it in the
47  # diary entry for the day. If not, I go ahead and append the figure to the relevent diary entry.
48  for file_ in files:
49  sortedFile = os.path.join(src_dir, file_)
50  todaysList.append(str(sortedFile))
51  # now we've got a list containing all files we want to add, sort it alphabetically and add them.
52  todaysList.sort()
53  return todaysList
54 
55 # Function that actually reads in data from fileName, saves it to... somewhere. Probably dataFolder/datatype/modulename.ssv . It also returns
56 # stuff like the most recent dates for all DCS groups (endDates), and number of datapoints found for each DCS group (tempDataNumbers).
57 # Probably says something if it can't read the data as well. Gonna be honest, DDV is a bit of a nightmare and I've spent too long messing
58 # around with it to remember what my bodges are.
59 
60 
61 def appendEntries(fileName, dataType, dataFolder, dateEntries, largestDates, firstDate, lastDate, borkDate):
62 
63  try:
64  with open(fileName, 'r') as rawFiles:
65 
66  returnList = [firstDate, largestDates,
67  borkDate, dateEntries, firstDate]
68 
69  print(firstDate)
70 
71  index_Jennet = 0
72  # read in each line of fileName and do things to it. Does this work with empty files? I'm guessing so.
73  for dataLine in rawFiles:
74 
75  index_Jennet = index_Jennet + 1
76  # We actually have two types of data file from DDV, one containing lots of commas, one which doesn't. The len(commasplit) thing is how I
77  # differentiate between the two types.
78 
79  if len(dataLine) > 5:
80  commaSplit = dataLine.split(',')
81 
82  if '!!!!' in dataLine: # Our entry is blank, leg it!
83  return returnList
84 
85  if len(commaSplit) < 2:
86  print("Dead")
87  else:
88  # from getDataSafely, format here splits data based on !!! between DCS groups.
89 
90  moduleList = dataLine.split('!!!')
91 
92  for module in moduleList: # Hah, look at me using accurate labels for my variables!
93 
94  elements = module.split(',')
95  name = elements[0]
96  A = "A"
97  B = "A"
98  if len(name) > 9:
99  A = name[4:6]
100  B = name[7]
101 
102  moduleName = 'LI_S' + str(A) + '_' + str(B)
103  try:
104  moduleNumber = int(2 * (int(A)-1)-1)
105  if B == 'C':
106  moduleNumber += 4
107 
108  outName = dataFolder+dataType+'/' + moduleName + '.ssv'
109 
110  # check if file exists, make it if not, append it if so.
111  mode = 'a' if os.path.isfile(outName) else 'w'
112  with open(outName, mode) as output:
113  position = 0
114 
115  # I'm like 90% sure each element is a single time (so, time and sensor reading). This makes it slow.
116  for element in elements:
117 
118  if position != 0: # the first element is the DCS group name; not hella useful.
119 
120  tempLines = element.split()
121  date = tempLines[1].split('-')
122  time1 = tempLines[2].split(':')
123  data = tempLines[0]
124 
125  dateTime = datetime.datetime(int(date[2]), int(date[1]), int(
126  date[0]), int(time1[0]), int(time1[1]), int(time1[2]), int(time1[3]))
127  # true if data stuff
128  if (dateTime > lastDate+datetime.timedelta(hours=2)) or (dateTime < firstDate - datetime.timedelta(hours=2)):
129  returnList[2] = lastDate
130  print("\nBORK BORK BORK! Error in date of " + fileName +
131  " for " + dataType + " borkDate " + str(returnList[2]))
132  print(
133  "First line reads " + dataLine + "\n")
134  print(
135  "Should read " + moduleName+"_"+dataType+"\n")
136 
137  with open(dataFolder+dataType+"BORKBORKBORK.txt", 'w') as borkRepork:
138  borkRepork.write("filename " + fileName + "date range " + str(
139  firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
140  return returnList
141 
142  # if time in right range, output.
143  if dateTime > returnList[1][moduleNumber]:
144  returnList[3][moduleNumber] += 1
145  outputLine = moduleName + "_" + dataType + " " + \
146  str(dateTime) + " " + str(time.mktime(
147  dateTime.timetuple())) + " " + str(data) + "\n"
148 
149  returnList[1][moduleNumber] = dateTime
150 
151  output.write(outputLine)
152 
153  position += 1
154 
155  except: # we failed at something, so print out an error message and run away screaming
156  returnList[2] = lastDate
157  print("Something broke :( \n")
158  print("Could be an error in data values of " + fileName +
159  " for " + dataType + " borkDate " + str(returnList[2]))
160  print("First line reads " + name + "\n")
161 
162  with open(dataFolder+dataType+"BORKBORKBORK.txt", 'w') as borkRepork:
163  borkRepork.write("filename " + fileName + "date range " + str(
164  firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
165  print(
166  "Try running again... often this is an issue with the connection to eos")
167  return returnList
168 
169  except IOError:
170  print("could not open file " + str(fileName) + " fool")
171  # So this probably crashes if filename can't be opened. Try not to let that happen.
172  return firstDate
173 
174  return returnList
175 
176 
177 def scanData(dataType):
178 
179  # home directory definition
180  fillerDate = datetime.datetime(2000, 1, 1, 1, 1, 1, 1)
181  returnList = [fillerDate, fillerDate, fillerDate, False, "LI_S00_0_M0"]
182  # JENNET setsfile paths
183  homeDirectory = os.path.expanduser(
184  '/eos/atlas/user/j/jdickins/Pixel/LeakageCurrent/')
185 
186  # define path to folder
187  dataFolder = homeDirectory + "/IBLData/processedData/"
188  inputFolder = homeDirectory + "/IBLData/rawData/"+dataType+"/"
189  entriesFolder = homeDirectory + "/IBLData/rawData/entriesPerDay/" + dataType + "/"
190 
191  # make directories if they don't exist
192  if not os.path.exists(dataFolder+dataType):
193  os.mkdir(dataFolder+dataType)
194 
195  if not os.path.exists(entriesFolder):
196  os.mkdir(entriesFolder)
197 
198  # declare some stuff, so it's in scope when we need it
199  startingDates = []
200  endDates = []
201  filesProcessed = []
202  dataPoints = []
203  dailyPoints = []
204  smallestEndDate = fillerDate
205 
206  # This section checks to see if there's a file containing the dates we've worked up till.
207  if not os.path.exists(dataFolder+dataType+".txt"):
208 
209  # No dates file found, so create one.
210  print("No any file found! at " + dataFolder +
211  dataType + " Making default values")
212  # set default max and min values for each sensor
213 
214  with open(dataFolder+dataType+".txt", 'w') as datesFile:
215  firstTempDate = datetime.datetime(2015, 5, 1, 0, 0, 0, 0)
216  lastTempDate = datetime.datetime(2015, 5, 1, 0, 0, 0, 1)
217  smallestEndDate = lastTempDate
218 
219  for stave in range(1, 15):
220  staveString = str(stave)
221  if stave < 10:
222  staveString = "0"+str(stave)
223  for side in ['A', 'C']:
224  moduleName = 'LI_S' + str(staveString) + '_' + side
225  datesFile.write(
226  moduleName + " " + str(firstTempDate) + " " + str(lastTempDate) + " 0 0\n")
227  startingDates.append(firstTempDate)
228  endDates.append(lastTempDate)
229  filesProcessed.append(0)
230  dataPoints.append(0)
231  dailyPoints.append(0)
232 
233  else: # dates file exists, so read dates for each DCS group
234  print("Found " + dataFolder+dataType+".txt")
235  with open(dataFolder+dataType+".txt", 'r') as datesFile:
236 
237  holder = 0
238 
239  for dateLine in datesFile: # read each line in, each line should correspond to one DCS group
240 
241  tempDatesLine = dateLine.split()
242  filesProcessed.append(int(tempDatesLine[5]))
243  dataPoints.append(int(tempDatesLine[6]))
244  dailyPoints.append(0)
245 
246  firstTemp = tempDatesLine[1].split('-')
247  lastTemp = tempDatesLine[3].split('-')
248 
249  firstTempTime = tempDatesLine[2].split(':')
250  lastTempTime = tempDatesLine[4].split(':')
251 
252  firstTempTimes = firstTempTime[2].split('.')
253  lastTempTimes = lastTempTime[2].split('.')
254 
255  if len(firstTempTimes) < 2:
256  firstTempTimes.append(0)
257  if len(lastTempTimes) < 2:
258  lastTempTimes.append(0)
259 
260  firstTempDate = datetime.datetime(int(firstTemp[0]), int(firstTemp[1]), int(firstTemp[2]), int(
261  firstTempTime[0]), int(firstTempTime[1]), int(firstTempTimes[0]), int(firstTempTimes[1]))
262  lastTempDate = datetime.datetime(int(lastTemp[0]), int(lastTemp[1]), int(lastTemp[2]), int(
263  lastTempTime[0]), int(lastTempTime[1]), int(lastTempTimes[0]), int(lastTempTimes[1]))
264 
265  startingDates.append(firstTempDate)
266  endDates.append(lastTempDate)
267 
268  if holder == 0:
269  returnList[0] = firstTempDate
270  returnList[1] = lastTempDate
271  smallestEndDate = lastTempDate
272 
273  else:
274  if firstTempDate < returnList[0]:
275  returnList[0] = firstTempDate
276  if lastTempDate > returnList[1]:
277  returnList[1] = lastTempDate
278  if lastTempDate < smallestEndDate:
279  smallestEndDate = lastTempDate
280 
281  holder += 1
282 
283  print("Investigating " + dataType + " from " + str(smallestEndDate))
284 
285  holder = 0
286 
287  # call function to return list of all files in input folder
288  fileList = findFiles(inputFolder)
289 
290  firstTempDate = startingDates[0]
291  lastTempDate = endDates[0]
292  fileNumber = 0
293 
294  # iterate through all files from file list, opening them if they're in the time period of interest.
295  for fileName in fileList:
296 
297  # print(fileName)
298 
299  end = len(fileName)
300  endDate1 = fileName[end-4:end]
301  endDate2 = fileName[end-7:end-6]
302 
303  # check file ends with text, and contains underscore in expected place; Could make this more rigorous
304  if endDate1 == '.txt' and endDate2 == '_':
305 
306  startDate = fileName[end-23:end-15]
307  endDate = fileName[end-12:end-4]
308  endDateSplit = endDate.split('_')
309  endDateFile = datetime.datetime(
310  2000+int(endDateSplit[0]), int(endDateSplit[1]), int(endDateSplit[2]), 0, 0, 0, 1)
311 
312  startDateSplit = startDate.split('_')
313  startDateFile = datetime.datetime(
314  2000+int(startDateSplit[0]), int(startDateSplit[1]), int(startDateSplit[2]), 0, 0, 0, 1)
315 
316  if endDateFile > smallestEndDate: # data from region of interest
317  # APPEND FILES DOING THINGS HERE
318 
319  lastTempDate = endDateFile
320  [firstTempDate, endDates, fillerDate, tempDataNumbers, smallestEndDate] = appendEntries(
321  fileName, dataType, dataFolder, dailyPoints, endDates, startDateFile, endDateFile, fillerDate)
322  # append entries called here. Editing of data files done at that location.
323 
324  # 112 DCS groups for IBL, if you're doing something else, change that number.#
325  for i in range(0, 28):
326  filesProcessed[i] += 1 # number of files
327  # number of data points for each DCS group
328  dataPoints[i] += tempDataNumbers[i]
329 
330  holderX = 0
331 
332  # this does something. Probably recording number of data points per day for each DCS group
333  for stave in range(1, 15):
334  staveString = str(stave)
335  if stave < 10:
336  staveString = "0"+str(stave)
337  for side in ['A', 'C']:
338  moduleName = 'LI_S' + str(staveString) + '_' + side
339  outName = entriesFolder + moduleName + ".txt"
340  dataLine = str(startDate) + " " + \
341  str(tempDataNumbers[holderX]) + "\n"
342  tempDataNumbers[holderX] = 0
343 
344  mode = 'a' if os.path.isfile(outName) else 'w'
345  with open(outName, mode) as output:
346  output.write(dataLine)
347  holderX += 1
348 
349  # check if the list is bork-a-dorked. If borked, save and break
350  if returnList[2] != fillerDate:
351  returnList[2] = fillerDate
352  with open(dataFolder+dataType+".txt", 'w') as datesFile:
353  tempHolder = 0
354  for stave in range(1, 15):
355  staveString = str(stave)
356  if stave < 10:
357  staveString = "0"+str(stave)
358  for side in ['A', 'C']:
359  moduleName = 'LI_S' + \
360  str(staveString) + '_' + side
361  datesFile.write(moduleName + " " + str(startingDates[tempHolder]) + " " + str(
362  endDates[tempHolder]) + " " + str(filesProcessed[tempHolder]) + " " + str(dataPoints[tempHolder]) + "\n")
363  tempHolder += 1
364 
365  return returnList
366 
367  sys.stdout.flush()
368  fileNumber += 1
369 
370  # open file, go through lists, if value < smallest, set to that,
371 
372  # if number bigger, set to that
373  # if any value smaller than previous largest value, bork bork!
374 
375  holder += 1
376 
377  with open(dataFolder+dataType+".txt", 'w') as datesFile:
378  tempHolder = 0
379  for staveX in range(1, 15):
380  staveStringX = str(staveX)
381  if staveX < 10:
382  staveStringX = "0"+str(staveX)
383  for sideX in ['A', 'C']:
384  moduleNameX = 'LI_S' + str(staveStringX) + '_' + sideX
385  datesFile.write(moduleNameX + " " + str(startingDates[tempHolder]) + " " + str(
386  endDates[tempHolder]) + " " + str(filesProcessed[tempHolder]) + " " + str(dataPoints[tempHolder]) + "\n")
387  tempHolder += 1
388 
389  returnList[3] = True
390 
391  return returnList
392 
393 
394 class scanDataThread (threading.Thread):
395  def __init__(self, threadID, name):
396  threading.Thread.__init__(self)
397  self.threadID = threadID
398  self.name = name
399 
400  def run(self):
401  print("Starting getPipeDate.py for " + self.name)
402  returnList = scanData(self.name)
403  print("Exiting getPipeDate.py for " + self.name + " with no issues, data range from " +
404  str(returnList[0]) + " to " + str(returnList[1]))
405 
406 
409 
410 
411 def main():
412 
413  thread1 = scanDataThread(1, 'ENV_TT')
414  thread1.start()
415 
416 
417 if __name__ == "__main__":
418  main()
getPipeDate.scanDataThread.__init__
def __init__(self, threadID, name)
Definition: getPipeDate.py:395
getPipeDate.scanData
def scanData(dataType)
Definition: getPipeDate.py:177
getPipeDate.appendEntries
def appendEntries(fileName, dataType, dataFolder, dateEntries, largestDates, firstDate, lastDate, borkDate)
Definition: getPipeDate.py:61
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
getPipeDate.scanDataThread.threadID
threadID
Definition: getPipeDate.py:397
getPipeDate.scanDataThread.run
def run(self)
Definition: getPipeDate.py:400
getPipeDate.findFiles
def findFiles(searchFolderPath)
Definition: getPipeDate.py:34
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:25
getPipeDate.main
def main()
Definition: getPipeDate.py:411
Trk::open
@ open
Definition: BinningType.h:40
getPipeDate.scanDataThread
Definition: getPipeDate.py:394
str
Definition: BTagTrackIpAccessor.cxx:11
getPipeDate.sensorToNumber
def sensorToNumber(stave, side, module)
Definition: getPipeDate.py:26
getPipeDate.scanDataThread.name
name
Definition: getPipeDate.py:398
Trk::split
@ split
Definition: LayerMaterialProperties.h:38