ATLAS Offline Software
getIBLDate.py
Go to the documentation of this file.
1 # Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2 
3 # Python script from Nick Dann
4 # Clean-ups for clarity and additional comments from Jennet Dickinson
5 # Search for JENNET to find where to replace file paths etc
6 # Nick set this up multithreaded, but Jennet prefers not to run this way,
7 # so she can more easily see what is going on
8 
9 # Comments from Nick:
10 # Multithreaded python script which finds files downloaded from the ATLAS DDV, reads in the data, then saves the data in seperate files for each DCS group.
11 # Files from DDV should be in format YYYY_MM_DD-YYYY_MM_DD.txt
12 # I save to SENSORNAME.ssv (space seperated variable). I usually save times as YYYY-MM-DD HH:MM:SS UTC timestamp value. Times are CERN time (CEST or CET).
13 
14 # scanDataThread is the thread declaration
15 # scanData (dataType) is the function which you should call to start the searching; datatype should be a string.
16 
17 #!/usr/bin/env python
18 import os
19 import datetime
20 import time
21 import sys
22 import threading
23 
24 
25 # converts from IBL stave, senors and side to DCS group number (<112)
26 def sensorToNumber(stave, side, module):
27 
28  number = 8 * (int(stave)-1) + module
29  if side == 'C':
30  number += 4
31  return number-1
32 
33 
34 def findFiles(searchFolderPath): # find all files in searchFolderPath, return as list
35 
36  try:
37  os.chdir(searchFolderPath)
38  except IOError:
39  print('No entries in ' + searchFolderPath)
40  return -1
41 
42  todaysList = []
43 
44  # SOMEHOW, this searches the search Folder and returns a list of contained files in well... files.
45  for src_dir, dirs, files in os.walk(searchFolderPath):
46  # i parse through the files, I think. For each, I check if there's a reference to it in the
47  # diary entry for the day. If not, I go ahead and append the figure to the relevent diary entry.
48  for file_ in files:
49  sortedFile = os.path.join(src_dir, file_)
50  todaysList.append(str(sortedFile))
51  # now we've got a list containing all files we want to add, sort it alphabetically and add them.
52  todaysList.sort()
53  return todaysList
54 
55 # Function that actually reads in data from fileName, saves it to... somewhere. Probably dataFolder/datatype/modulename.ssv . It also returns
56 # stuff like the most recent dates for all DCS groups (endDates), and number of datapoints found for each DCS group (tempDataNumbers).
57 # Probably says something if it can't read the data as well. Gonna be honest, DDV is a bit of a nightmare and I've spent too long messing
58 # around with it to remember what my bodges are.
59 
60 
61 def appendEntries(fileName, dataType, dataFolder, dateEntries, largestDates, firstDate, lastDate, borkDate):
62 
63  try:
64  with open(fileName, 'r') as rawFiles:
65 
66  returnList = [firstDate, largestDates,
67  borkDate, dateEntries, firstDate]
68 
69  print(firstDate)
70 
71  index_Jennet = 0
72  # read in each line of fileName and do things to it. Does this work with empty files? I'm guessing so.
73  for dataLine in rawFiles:
74 
75  print(index_Jennet)
76  index_Jennet = index_Jennet + 1
77  # We actually have two types of data file from DDV, one containing lots of commas, one which doesn't. The len(commasplit) thing is how I
78  # differentiate between the two types.
79 
80  if len(dataLine) > 5:
81  commaSplit = dataLine.split(',')
82 
83  if '!!!!' in dataLine: # Our entry is blank, leg it!
84  return returnList
85 
86  if len(commaSplit) < 2:
87 
88  # file is from DownloadTxtData. Erm, I'm not gonna comment this, rough idea is split tempLines, 0th element is the sensor name,
89  # grab the DCS flags from the name, use it to make teh module name. If that module name is unexpected, break and report an error
90 
91  tempLines = dataLine.split()
92  name = tempLines[0]
93  A = name[4:6]
94  B = name[7]
95  C = name[10]
96 
97  moduleName = 'LI_S' + \
98  str(A) + '_' + str(B) + '_M' + str(C)
99 
100  moduleNumber = int(8 * (int(A)-1) + int(C)-1)
101  if B == 'C':
102  moduleNumber += 4
103 
104  # This section is true if the data is bad; break and return
105  if (name != moduleName+"_"+dataType) or (len(tempLines) < 3):
106  returnList[2] = lastDate
107  print("\nBORK BORK BORK! Error in data values of " + fileName +
108  " for " + dataType + " borkDate " + str(returnList[2]))
109  print("First line reads " + dataLine+"\n")
110  print("should read " + moduleName+"_"+dataType+"\n")
111 
112  with open(dataFolder+dataType+"BORKBORKBORK.txt", 'w') as borkRepork:
113  borkRepork.write("filename " + fileName + "date range " + str(
114  firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
115 
116  return returnList
117 
118  outName = dataFolder+dataType+'/' + moduleName + \
119  '.ssv' # save file name made here!
120 
121  date = tempLines[1].split('-')
122  time1 = tempLines[2].split(':')
123  data = 0
124 
125  # Oh yeah, DDV switched to reporting absolute values to magnitudes for bias and current at some point.
126  if dataType == 'TModule':
127  data = tempLines[3]
128  else: # if current or bias, only use absolute values, instead of y'know, signed values. Stupid DDV.
129  data = abs(float(tempLines[3]))
130  dateTime = datetime.datetime(int(date[2]), int(date[1]), int(
131  date[0]), int(time1[0]), int(time1[1]), int(time1[2]), int(time1[3]))
132 
133  # This checks if the values are within the expected range, if not the below is true and it borks and returns.
134  if (dateTime > lastDate+datetime.timedelta(hours=2)) or (dateTime < firstDate - datetime.timedelta(hours=2)):
135  returnList[2] = lastDate
136  print("\nBORK BORK BORK! Error in date of " + fileName +
137  " for " + dataType + " borkDate " + str(returnList[2]))
138  print("First line reads " + dataLine + "\n")
139  print("should read " + moduleName+"_"+dataType+"\n")
140 
141  with open(dataFolder+dataType+"BORKBORKBORK.txt", 'w') as borkRepork:
142  borkRepork.write("filename " + fileName + "date range " + str(
143  firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
144  return returnList
145 
146  # if the end time for data is bigger than the previous max value for this DCS group. I think
147  if dateTime > returnList[1][moduleNumber]:
148  returnList[3][moduleNumber] += 1
149 
150  # format the line for saving to the finished file area he clunked clunkily. Change this if you wanna change the output format.
151  outputLine = moduleName+'_'+dataType + " " + \
152  str(dateTime) + " " + str(time.mktime(dateTime.timetuple())
153  ) + " " + str(data) + "\n"
154 
155  returnList[1][moduleNumber] = dateTime
156 
157  # check if output file already exists, if not create it, if so append previous file.
158  mode = 'a' if os.path.isfile(outName) else 'w'
159  with open(outName, 'w') as output:
160  output.write(outputLine)
161 
162  else:
163  # from getDataSafely, format here splits data based on !!! between DCS groups.
164 
165  moduleList = dataLine.split('!!!')
166 
167  for module in moduleList: # Hah, look at me using accurate labels for my variables!
168 
169  elements = module.split(',')
170  name = elements[0]
171  A = "A"
172  B = "A"
173  C = "A"
174  if len(name) > 9:
175  # print (name)
176  A = name[4:6]
177  B = name[7]
178  C = name[10]
179 
180  moduleName = 'LI_S' + \
181  str(A) + '_' + str(B) + '_M' + str(C)
182 
183  try:
184  moduleNumber = int(8 * (int(A)-1) + int(C)-1)
185  if B == 'C':
186  moduleNumber += 4
187 
188  outName = dataFolder+dataType+'/' + moduleName + '.ssv'
189 
190  # check if file exists, make it if not, append it if so.
191  mode = 'a' if os.path.isfile(outName) else 'w'
192  with open(outName, mode) as output:
193  position = 0
194 
195  # I'm like 90% sure each element is a single time (so, time and sensor reading). This makes this process slow.
196  for element in elements:
197 
198  if position != 0: # the first element is the DCS group name; not hella useful.
199 
200  tempLines = element.split()
201  date = tempLines[1].split('-')
202  time1 = tempLines[2].split(':')
203  data = 0
204 
205  # Make sure we use absolute values for current and bias.
206  if dataType == 'TModule':
207  data = tempLines[0]
208  else:
209  data = abs(float(tempLines[0]))
210 
211  dateTime = datetime.datetime(int(date[2]), int(date[1]), int(
212  date[0]), int(time1[0]), int(time1[1]), int(time1[2]), int(time1[3]))
213 
214  # true if data stuff
215  if (dateTime > lastDate+datetime.timedelta(hours=2)) or (dateTime < firstDate - datetime.timedelta(hours=2)):
216  returnList[2] = lastDate
217  print("\nBORK BORK BORK! Error in date of " + fileName +
218  " for " + dataType + " borkDate " + str(returnList[2]))
219  print(
220  "First line reads " + dataLine + "\n")
221  print(
222  "Should read " + moduleName+"_"+dataType+"\n")
223 
224  with open(dataFolder+dataType+"BORKBORKBORK.txt", 'w') as borkRepork:
225  borkRepork.write("filename " + fileName + "date range " + str(
226  firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
227  return returnList
228 
229  # if time in right range, output.
230  if dateTime > returnList[1][moduleNumber]:
231  returnList[3][moduleNumber] += 1
232  outputLine = moduleName + "_" + dataType + " " + \
233  str(dateTime) + " " + str(time.mktime(
234  dateTime.timetuple())) + " " + str(data) + "\n"
235 
236  returnList[1][moduleNumber] = dateTime
237 
238  output.write(outputLine)
239 
240  position += 1
241 
242  output.close()
243 
244  except: # we failed at something, so print out an error message and run away screaming
245  returnList[2] = lastDate
246  print("Something broke :( \n")
247  print("Could be an error in data values of " + fileName +
248  " for " + dataType + " borkDate " + str(returnList[2]))
249  print("First line reads " + name + "\n")
250 
251  with open(dataFolder+dataType+"BORKBORKBORK.txt", 'w') as borkRepork:
252  borkRepork.write("filename " + fileName + "date range " + str(
253  firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
254  print(
255  "Try running again... often this is an issue with the connection to eos")
256  return returnList
257 
258  except IOError:
259  print("could not open file " + str(fileName) + " fool")
260  # So this probably crashes if filename can't be opened. Try not to let that happen.
261  return firstDate
262 
263  return returnList
264 
265 
266 def scanData(dataType):
267 
268  # home directory definition
269  fillerDate = datetime.datetime(2000, 1, 1, 1, 1, 1, 1)
270  returnList = [fillerDate, fillerDate, fillerDate, False, "LI_S00_0_M0"]
271  # JENNET setsfile paths
272  homeDirectory = os.path.expanduser(
273  '/eos/atlas/user/j/jdickins/Pixel/LeakageCurrent/')
274 
275  # define path to folder
276  dataFolder = homeDirectory + "/IBLData/processedData/"
277  inputFolder = homeDirectory + "/IBLData/rawData/"+dataType+"/"
278  entriesFolder = homeDirectory + "/IBLData/rawData/entriesPerDay/" + dataType + "/"
279 
280  # make directories if they don't exist
281  if not os.path.exists(dataFolder+dataType):
282  os.mkdir(dataFolder+dataType)
283 
284  if not os.path.exists(entriesFolder):
285  os.mkdir(entriesFolder)
286 
287  # declare some stuff, so it's in scope when we need it
288  startingDates = []
289  endDates = []
290  filesProcessed = []
291  dataPoints = []
292  dailyPoints = []
293  smallestEndDate = fillerDate
294 
295  # This section checks to see if there's a file containing the dates we've worked up till.
296  if not os.path.exists(dataFolder+dataType+".txt"):
297 
298  # No dates file found, so create one.
299  print("No any file found! at " + dataFolder +
300  dataType + " Making default values")
301  # set default max and min values for each sensor
302 
303  with open(dataFolder+dataType+".txt", 'w') as datesFile:
304  firstTempDate = datetime.datetime(2015, 5, 1, 0, 0, 0, 0)
305  lastTempDate = datetime.datetime(2015, 5, 1, 0, 0, 0, 1)
306  smallestEndDate = lastTempDate
307 
308  for stave in range(1, 15):
309  staveString = str(stave)
310  if stave < 10:
311  staveString = "0"+str(stave)
312  for side in ['A', 'C']:
313  for DCSGroup in range(1, 5):
314  moduleName = 'LI_S' + \
315  str(staveString) + '_' + \
316  side + '_M' + str(DCSGroup)
317  datesFile.write(
318  moduleName + " " + str(firstTempDate) + " " + str(lastTempDate) + " 0 0\n")
319  startingDates.append(firstTempDate)
320  endDates.append(lastTempDate)
321  filesProcessed.append(0)
322  dataPoints.append(0)
323  dailyPoints.append(0)
324 
325  else: # dates file exists, so read dates for each DCS group
326  print("Found " + dataFolder+dataType+".txt")
327  with open(dataFolder+dataType+".txt", 'r') as datesFile:
328 
329  holder = 0
330 
331  for dateLine in datesFile: # read each line in, each line should correspond to one DCS group
332 
333  tempDatesLine = dateLine.split()
334  filesProcessed.append(int(tempDatesLine[5]))
335  dataPoints.append(int(tempDatesLine[6]))
336  dailyPoints.append(0)
337 
338  firstTemp = tempDatesLine[1].split('-')
339  lastTemp = tempDatesLine[3].split('-')
340 
341  firstTempTime = tempDatesLine[2].split(':')
342  lastTempTime = tempDatesLine[4].split(':')
343 
344  firstTempTimes = firstTempTime[2].split('.')
345  lastTempTimes = lastTempTime[2].split('.')
346 
347  if len(firstTempTimes) < 2:
348  firstTempTimes.append(0)
349  if len(lastTempTimes) < 2:
350  lastTempTimes.append(0)
351 
352  firstTempDate = datetime.datetime(
353  int(firstTemp[0]), int(firstTemp[1]), int(firstTemp[2]),
354  int(firstTempTime[0]), int(firstTempTime[1]),
355  int(firstTempTimes[0]), int(firstTempTimes[1]))
356  lastTempDate = datetime.datetime(
357  int(lastTemp[0]), int(lastTemp[1]), int(lastTemp[2]),
358  int(lastTempTime[0]), int(lastTempTime[1]),
359  int(lastTempTimes[0]), int(lastTempTimes[1]))
360 
361  startingDates.append(firstTempDate)
362  endDates.append(lastTempDate)
363 
364  if holder == 0:
365  returnList[0] = firstTempDate
366  returnList[1] = lastTempDate
367  smallestEndDate = lastTempDate
368 
369  else:
370  if firstTempDate < returnList[0]:
371  returnList[0] = firstTempDate
372  if lastTempDate > returnList[1]:
373  returnList[1] = lastTempDate
374  if lastTempDate < smallestEndDate:
375  smallestEndDate = lastTempDate
376 
377  holder += 1
378 
379  print("Investigating " + dataType + " from " + str(smallestEndDate))
380 
381  holder = 0
382 
383  # call function to return list of all files in input folder
384  fileList = findFiles(inputFolder)
385 
386  firstTempDate = startingDates[0]
387  lastTempDate = endDates[0]
388  fileNumber = 0
389 
390  # iterate through all files from file list, opening them if they're in the time period of interest.
391  for fileName in fileList:
392 
393  # print(fileName)
394 
395  end = len(fileName)
396  endDate1 = fileName[end-4:end]
397  endDate2 = fileName[end-7:end-6]
398 
399  # check file ends with text, and contains underscore in expected place; Could make this more rigorous
400  if endDate1 == '.txt' and endDate2 == '_':
401 
402  startDate = fileName[end-23:end-15]
403  endDate = fileName[end-12:end-4]
404  endDateSplit = endDate.split('_')
405  endDateFile = datetime.datetime(
406  2000+int(endDateSplit[0]), int(endDateSplit[1]), int(endDateSplit[2]), 0, 0, 0, 1)
407 
408  startDateSplit = startDate.split('_')
409  startDateFile = datetime.datetime(
410  2000+int(startDateSplit[0]), int(startDateSplit[1]), int(startDateSplit[2]), 0, 0, 0, 1)
411 
412  if endDateFile > smallestEndDate: # data from region of interest
413  # APPEND FILES DOING THINGS HERE
414 
415  lastTempDate = endDateFile
416  [firstTempDate, endDates, fillerDate, tempDataNumbers, smallestEndDate] = appendEntries(
417  fileName, dataType, dataFolder, dailyPoints, endDates, startDateFile, endDateFile, fillerDate)
418  # append entries called here. Editing of data files done at that location.
419 
420  # 112 DCS groups for IBL, if you're doing something else, change that number.
421  for i in range(0, 112):
422 
423  filesProcessed[i] += 1 # number of files
424  # number of data points for each DCS group
425  dataPoints[i] += tempDataNumbers[i]
426 
427  holderX = 0
428 
429  # this does something. Probably recording number of data points per day for each DCS group
430  for stave in range(1, 15):
431  staveString = str(stave)
432  if stave < 10:
433  staveString = "0"+str(stave)
434  for side in ['A', 'C']:
435  for DCSGroup in range(1, 5):
436  moduleName = 'LI_S' + \
437  str(staveString) + '_' + \
438  side + '_M' + str(DCSGroup)
439  outName = entriesFolder + moduleName + ".txt"
440  dataLine = str(startDate) + " " + \
441  str(tempDataNumbers[holderX]) + "\n"
442  tempDataNumbers[holderX] = 0
443 
444  mode = 'a' if os.path.isfile(outName) else 'w'
445  with open(outName, mode) as output:
446  output.write(dataLine)
447  holderX += 1
448 
449  # check if the list is bork-a-dorked. If borked, save and break
450  if returnList[2] != fillerDate:
451  returnList[2] = fillerDate
452  with open(dataFolder+dataType+".txt", 'w') as datesFile:
453  tempHolder = 0
454  for stave in range(1, 15):
455  staveString = str(stave)
456  if stave < 10:
457  staveString = "0"+str(stave)
458  for side in ['A', 'C']:
459  for DCSGroup in range(1, 5):
460  moduleName = 'LI_S' + \
461  str(staveString) + '_' + \
462  side + '_M' + str(DCSGroup)
463  datesFile.write(moduleName + " " + str(startingDates[tempHolder]) + " " + str(
464  endDates[tempHolder]) + " " + str(filesProcessed[tempHolder]) + " " + str(dataPoints[tempHolder]) + "\n")
465  tempHolder += 1
466 
467  return returnList
468 
469  sys.stdout.flush()
470  fileNumber += 1
471 
472  # open file, go through lists, if value < smallest, set to that,
473 
474  # if number bigger, set to that
475  # if any value smaller than previous largest value, bork bork!
476 
477  holder += 1
478 
479  with open(dataFolder+dataType+".txt", 'w') as datesFile:
480  tempHolder = 0
481  for staveX in range(1, 15):
482  staveStringX = str(staveX)
483  if staveX < 10:
484  staveStringX = "0"+str(staveX)
485  for sideX in ['A', 'C']:
486  for DCSGroupX in range(1, 5):
487  moduleNameX = 'LI_S' + \
488  str(staveStringX) + '_' + sideX + '_M' + str(DCSGroupX)
489  datesFile.write(moduleNameX + " " + str(startingDates[tempHolder]) + " " + str(
490  endDates[tempHolder]) + " " + str(filesProcessed[tempHolder]) + " " + str(dataPoints[tempHolder]) + "\n")
491  tempHolder += 1
492 
493  returnList[3] = True
494 
495  return returnList
496 
497 
498 class scanDataThread (threading.Thread):
499  def __init__(self, threadID, name):
500  threading.Thread.__init__(self)
501  self.threadID = threadID
502  self.name = name
503 
504  def run(self):
505  print("Starting getIBLDate.py for " + self.name)
506  returnList = scanData(self.name)
507  print("Exiting getIBLDate.py for " + self.name + " with no issues, data range from " +
508  str(returnList[0]) + " to " + str(returnList[1]))
509 
510 
513 
514 
515 def main():
516 
517  thread1 = scanDataThread(1, 'HV_VMeas')
518  thread1.start()
519 
520  thread2 = scanDataThread(2, 'PP4LV')
521  thread2.start()
522 
523  thread3 = scanDataThread(3, 'HV_IMeas')
524  thread3.start()
525 
526  thread4 = scanDataThread(4, 'TModule')
527  thread4.start()
528 
529 
530 if __name__ == "__main__":
531  main()
getIBLDate.scanData
def scanData(dataType)
Definition: getIBLDate.py:266
getIBLDate.main
def main()
Definition: getIBLDate.py:515
getIBLDate.scanDataThread
Definition: getIBLDate.py:498
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
getIBLDate.sensorToNumber
def sensorToNumber(stave, side, module)
Definition: getIBLDate.py:26
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:25
getIBLDate.scanDataThread.threadID
threadID
Definition: getIBLDate.py:501
getIBLDate.findFiles
def findFiles(searchFolderPath)
Definition: getIBLDate.py:34
Trk::open
@ open
Definition: BinningType.h:40
getIBLDate.scanDataThread.name
name
Definition: getIBLDate.py:502
getIBLDate.scanDataThread.run
def run(self)
Definition: getIBLDate.py:504
str
Definition: BTagTrackIpAccessor.cxx:11
getIBLDate.scanDataThread.__init__
def __init__(self, threadID, name)
Definition: getIBLDate.py:499
getIBLDate.appendEntries
def appendEntries(fileName, dataType, dataFolder, dateEntries, largestDates, firstDate, lastDate, borkDate)
Definition: getIBLDate.py:61
readCCLHist.float
float
Definition: readCCLHist.py:83
Trk::split
@ split
Definition: LayerMaterialProperties.h:38