ATLAS Offline Software
Loading...
Searching...
No Matches
getIBLDate.py
Go to the documentation of this file.
1# Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2
3# Python script from Nick Dann
4# Clean-ups for clarity and additional comments from Jennet Dickinson
5# Search for JENNET to find where to replace file paths etc
6# Nick set this up multithreaded, but Jennet prefers not to run this way,
7# so she can more easily see what is going on
8
9# Comments from Nick:
10# Multithreaded python script which finds files downloaded from the ATLAS DDV, reads in the data, then saves the data in seperate files for each DCS group.
11# Files from DDV should be in format YYYY_MM_DD-YYYY_MM_DD.txt
12# I save to SENSORNAME.ssv (space seperated variable). I usually save times as YYYY-MM-DD HH:MM:SS UTC timestamp value. Times are CERN time (CEST or CET).
13
14# scanDataThread is the thread declaration
15# scanData (dataType) is the function which you should call to start the searching; datatype should be a string.
16
17#!/usr/bin/env python
18import os
19import datetime
20import time
21import sys
22import threading
23
24
25# converts from IBL stave, senors and side to DCS group number (<112)
26def sensorToNumber(stave, side, module):
27
28 number = 8 * (int(stave)-1) + module
29 if side == 'C':
30 number += 4
31 return number-1
32
33
34def findFiles(searchFolderPath): # find all files in searchFolderPath, return as list
35
36 try:
37 os.chdir(searchFolderPath)
38 except IOError:
39 print('No entries in ' + searchFolderPath)
40 return -1
41
42 todaysList = []
43
44 # SOMEHOW, this searches the search Folder and returns a list of contained files in well... files.
45 for src_dir, dirs, files in os.walk(searchFolderPath):
46 # i parse through the files, I think. For each, I check if there's a reference to it in the
47 # diary entry for the day. If not, I go ahead and append the figure to the relevent diary entry.
48 for file_ in files:
49 sortedFile = os.path.join(src_dir, file_)
50 todaysList.append(str(sortedFile))
51 # now we've got a list containing all files we want to add, sort it alphabetically and add them.
52 todaysList.sort()
53 return todaysList
54
55# Function that actually reads in data from fileName, saves it to... somewhere. Probably dataFolder/datatype/modulename.ssv . It also returns
56# stuff like the most recent dates for all DCS groups (endDates), and number of datapoints found for each DCS group (tempDataNumbers).
57# Probably says something if it can't read the data as well. Gonna be honest, DDV is a bit of a nightmare and I've spent too long messing
58# around with it to remember what my bodges are.
59
60
61def appendEntries(fileName, dataType, dataFolder, dateEntries, largestDates, firstDate, lastDate, borkDate):
62
63 try:
64 with open(fileName, 'r') as rawFiles:
65
66 returnList = [firstDate, largestDates,
67 borkDate, dateEntries, firstDate]
68
69 print(firstDate)
70
71 index_Jennet = 0
72 # read in each line of fileName and do things to it. Does this work with empty files? I'm guessing so.
73 for dataLine in rawFiles:
74
75 print(index_Jennet)
76 index_Jennet = index_Jennet + 1
77 # We actually have two types of data file from DDV, one containing lots of commas, one which doesn't. The len(commasplit) thing is how I
78 # differentiate between the two types.
79
80 if len(dataLine) > 5:
81 commaSplit = dataLine.split(',')
82
83 if '!!!!' in dataLine: # Our entry is blank, leg it!
84 return returnList
85
86 if len(commaSplit) < 2:
87
88 # file is from DownloadTxtData. Erm, I'm not gonna comment this, rough idea is split tempLines, 0th element is the sensor name,
89 # grab the DCS flags from the name, use it to make teh module name. If that module name is unexpected, break and report an error
90
91 tempLines = dataLine.split()
92 name = tempLines[0]
93 A = name[4:6]
94 B = name[7]
95 C = name[10]
96
97 moduleName = 'LI_S' + \
98 str(A) + '_' + str(B) + '_M' + str(C)
99
100 moduleNumber = int(8 * (int(A)-1) + int(C)-1)
101 if B == 'C':
102 moduleNumber += 4
103
104 # This section is true if the data is bad; break and return
105 if (name != moduleName+"_"+dataType) or (len(tempLines) < 3):
106 returnList[2] = lastDate
107 print("\nBORK BORK BORK! Error in data values of " + fileName +
108 " for " + dataType + " borkDate " + str(returnList[2]))
109 print("First line reads " + dataLine+"\n")
110 print("should read " + moduleName+"_"+dataType+"\n")
111
112 with open(dataFolder+dataType+"BORKBORKBORK.txt", 'w') as borkRepork:
113 borkRepork.write("filename " + fileName + "date range " + str(
114 firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
115
116 return returnList
117
118 outName = dataFolder+dataType+'/' + moduleName + \
119 '.ssv' # save file name made here!
120
121 date = tempLines[1].split('-')
122 time1 = tempLines[2].split(':')
123 data = 0
124
125 # Oh yeah, DDV switched to reporting absolute values to magnitudes for bias and current at some point.
126 if dataType == 'TModule':
127 data = tempLines[3]
128 else: # if current or bias, only use absolute values, instead of y'know, signed values. Stupid DDV.
129 data = abs(float(tempLines[3]))
130 dateTime = datetime.datetime(int(date[2]), int(date[1]), int(
131 date[0]), int(time1[0]), int(time1[1]), int(time1[2]), int(time1[3]))
132
133 # This checks if the values are within the expected range, if not the below is true and it borks and returns.
134 if (dateTime > lastDate+datetime.timedelta(hours=2)) or (dateTime < firstDate - datetime.timedelta(hours=2)):
135 returnList[2] = lastDate
136 print("\nBORK BORK BORK! Error in date of " + fileName +
137 " for " + dataType + " borkDate " + str(returnList[2]))
138 print("First line reads " + dataLine + "\n")
139 print("should read " + moduleName+"_"+dataType+"\n")
140
141 with open(dataFolder+dataType+"BORKBORKBORK.txt", 'w') as borkRepork:
142 borkRepork.write("filename " + fileName + "date range " + str(
143 firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
144 return returnList
145
146 # if the end time for data is bigger than the previous max value for this DCS group. I think
147 if dateTime > returnList[1][moduleNumber]:
148 returnList[3][moduleNumber] += 1
149
150 # format the line for saving to the finished file area he clunked clunkily. Change this if you wanna change the output format.
151 outputLine = moduleName+'_'+dataType + " " + \
152 str(dateTime) + " " + str(time.mktime(dateTime.timetuple())
153 ) + " " + str(data) + "\n"
154
155 returnList[1][moduleNumber] = dateTime
156
157 # check if output file already exists, if not create it, if so append previous file.
158 mode = 'a' if os.path.isfile(outName) else 'w'
159 with open(outName, 'w') as output:
160 output.write(outputLine)
161
162 else:
163 # from getDataSafely, format here splits data based on !!! between DCS groups.
164
165 moduleList = dataLine.split('!!!')
166
167 for module in moduleList: # Hah, look at me using accurate labels for my variables!
168
169 elements = module.split(',')
170 name = elements[0]
171 A = "A"
172 B = "A"
173 C = "A"
174 if len(name) > 9:
175 # print (name)
176 A = name[4:6]
177 B = name[7]
178 C = name[10]
179
180 moduleName = 'LI_S' + \
181 str(A) + '_' + str(B) + '_M' + str(C)
182
183 try:
184 moduleNumber = int(8 * (int(A)-1) + int(C)-1)
185 if B == 'C':
186 moduleNumber += 4
187
188 outName = dataFolder+dataType+'/' + moduleName + '.ssv'
189
190 # check if file exists, make it if not, append it if so.
191 mode = 'a' if os.path.isfile(outName) else 'w'
192 with open(outName, mode) as output:
193 position = 0
194
195 # I'm like 90% sure each element is a single time (so, time and sensor reading). This makes this process slow.
196 for element in elements:
197
198 if position != 0: # the first element is the DCS group name; not hella useful.
199
200 tempLines = element.split()
201 date = tempLines[1].split('-')
202 time1 = tempLines[2].split(':')
203 data = 0
204
205 # Make sure we use absolute values for current and bias.
206 if dataType == 'TModule':
207 data = tempLines[0]
208 else:
209 data = abs(float(tempLines[0]))
210
211 dateTime = datetime.datetime(int(date[2]), int(date[1]), int(
212 date[0]), int(time1[0]), int(time1[1]), int(time1[2]), int(time1[3]))
213
214 # true if data stuff
215 if (dateTime > lastDate+datetime.timedelta(hours=2)) or (dateTime < firstDate - datetime.timedelta(hours=2)):
216 returnList[2] = lastDate
217 print("\nBORK BORK BORK! Error in date of " + fileName +
218 " for " + dataType + " borkDate " + str(returnList[2]))
219 print(
220 "First line reads " + dataLine + "\n")
221 print(
222 "Should read " + moduleName+"_"+dataType+"\n")
223
224 with open(dataFolder+dataType+"BORKBORKBORK.txt", 'w') as borkRepork:
225 borkRepork.write("filename " + fileName + "date range " + str(
226 firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
227 return returnList
228
229 # if time in right range, output.
230 if dateTime > returnList[1][moduleNumber]:
231 returnList[3][moduleNumber] += 1
232 outputLine = moduleName + "_" + dataType + " " + \
233 str(dateTime) + " " + str(time.mktime(
234 dateTime.timetuple())) + " " + str(data) + "\n"
235
236 returnList[1][moduleNumber] = dateTime
237
238 output.write(outputLine)
239
240 position += 1
241
242 output.close()
243
244 except: # we failed at something, so print out an error message and run away screaming
245 returnList[2] = lastDate
246 print("Something broke :( \n")
247 print("Could be an error in data values of " + fileName +
248 " for " + dataType + " borkDate " + str(returnList[2]))
249 print("First line reads " + name + "\n")
250
251 with open(dataFolder+dataType+"BORKBORKBORK.txt", 'w') as borkRepork:
252 borkRepork.write("filename " + fileName + "date range " + str(
253 firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
254 print(
255 "Try running again... often this is an issue with the connection to eos")
256 return returnList
257
258 except IOError:
259 print("could not open file " + str(fileName) + " fool")
260 # So this probably crashes if filename can't be opened. Try not to let that happen.
261 return firstDate
262
263 return returnList
264
265
266def scanData(dataType):
267
268 # home directory definition
269 fillerDate = datetime.datetime(2000, 1, 1, 1, 1, 1, 1)
270 returnList = [fillerDate, fillerDate, fillerDate, False, "LI_S00_0_M0"]
271 # JENNET setsfile paths
272 homeDirectory = os.path.expanduser(
273 '/eos/atlas/user/j/jdickins/Pixel/LeakageCurrent/')
274
275 # define path to folder
276 dataFolder = homeDirectory + "/IBLData/processedData/"
277 inputFolder = homeDirectory + "/IBLData/rawData/"+dataType+"/"
278 entriesFolder = homeDirectory + "/IBLData/rawData/entriesPerDay/" + dataType + "/"
279
280 # make directories if they don't exist
281 if not os.path.exists(dataFolder+dataType):
282 os.mkdir(dataFolder+dataType)
283
284 if not os.path.exists(entriesFolder):
285 os.mkdir(entriesFolder)
286
287 # declare some stuff, so it's in scope when we need it
288 startingDates = []
289 endDates = []
290 filesProcessed = []
291 dataPoints = []
292 dailyPoints = []
293 smallestEndDate = fillerDate
294
295 # This section checks to see if there's a file containing the dates we've worked up till.
296 if not os.path.exists(dataFolder+dataType+".txt"):
297
298 # No dates file found, so create one.
299 print("No any file found! at " + dataFolder +
300 dataType + " Making default values")
301 # set default max and min values for each sensor
302
303 with open(dataFolder+dataType+".txt", 'w') as datesFile:
304 firstTempDate = datetime.datetime(2015, 5, 1, 0, 0, 0, 0)
305 lastTempDate = datetime.datetime(2015, 5, 1, 0, 0, 0, 1)
306 smallestEndDate = lastTempDate
307
308 for stave in range(1, 15):
309 staveString = str(stave)
310 if stave < 10:
311 staveString = "0"+str(stave)
312 for side in ['A', 'C']:
313 for DCSGroup in range(1, 5):
314 moduleName = 'LI_S' + \
315 str(staveString) + '_' + \
316 side + '_M' + str(DCSGroup)
317 datesFile.write(
318 moduleName + " " + str(firstTempDate) + " " + str(lastTempDate) + " 0 0\n")
319 startingDates.append(firstTempDate)
320 endDates.append(lastTempDate)
321 filesProcessed.append(0)
322 dataPoints.append(0)
323 dailyPoints.append(0)
324
325 else: # dates file exists, so read dates for each DCS group
326 print("Found " + dataFolder+dataType+".txt")
327 with open(dataFolder+dataType+".txt", 'r') as datesFile:
328
329 holder = 0
330
331 for dateLine in datesFile: # read each line in, each line should correspond to one DCS group
332
333 tempDatesLine = dateLine.split()
334 filesProcessed.append(int(tempDatesLine[5]))
335 dataPoints.append(int(tempDatesLine[6]))
336 dailyPoints.append(0)
337
338 firstTemp = tempDatesLine[1].split('-')
339 lastTemp = tempDatesLine[3].split('-')
340
341 firstTempTime = tempDatesLine[2].split(':')
342 lastTempTime = tempDatesLine[4].split(':')
343
344 firstTempTimes = firstTempTime[2].split('.')
345 lastTempTimes = lastTempTime[2].split('.')
346
347 if len(firstTempTimes) < 2:
348 firstTempTimes.append(0)
349 if len(lastTempTimes) < 2:
350 lastTempTimes.append(0)
351
352 firstTempDate = datetime.datetime(
353 int(firstTemp[0]), int(firstTemp[1]), int(firstTemp[2]),
354 int(firstTempTime[0]), int(firstTempTime[1]),
355 int(firstTempTimes[0]), int(firstTempTimes[1]))
356 lastTempDate = datetime.datetime(
357 int(lastTemp[0]), int(lastTemp[1]), int(lastTemp[2]),
358 int(lastTempTime[0]), int(lastTempTime[1]),
359 int(lastTempTimes[0]), int(lastTempTimes[1]))
360
361 startingDates.append(firstTempDate)
362 endDates.append(lastTempDate)
363
364 if holder == 0:
365 returnList[0] = firstTempDate
366 returnList[1] = lastTempDate
367 smallestEndDate = lastTempDate
368
369 else:
370 if firstTempDate < returnList[0]:
371 returnList[0] = firstTempDate
372 if lastTempDate > returnList[1]:
373 returnList[1] = lastTempDate
374 if lastTempDate < smallestEndDate:
375 smallestEndDate = lastTempDate
376
377 holder += 1
378
379 print("Investigating " + dataType + " from " + str(smallestEndDate))
380
381 holder = 0
382
383 # call function to return list of all files in input folder
384 fileList = findFiles(inputFolder)
385
386 firstTempDate = startingDates[0]
387 lastTempDate = endDates[0]
388 fileNumber = 0
389
390 # iterate through all files from file list, opening them if they're in the time period of interest.
391 for fileName in fileList:
392
393 # print(fileName)
394
395 end = len(fileName)
396 endDate1 = fileName[end-4:end]
397 endDate2 = fileName[end-7:end-6]
398
399 # check file ends with text, and contains underscore in expected place; Could make this more rigorous
400 if endDate1 == '.txt' and endDate2 == '_':
401
402 startDate = fileName[end-23:end-15]
403 endDate = fileName[end-12:end-4]
404 endDateSplit = endDate.split('_')
405 endDateFile = datetime.datetime(
406 2000+int(endDateSplit[0]), int(endDateSplit[1]), int(endDateSplit[2]), 0, 0, 0, 1)
407
408 startDateSplit = startDate.split('_')
409 startDateFile = datetime.datetime(
410 2000+int(startDateSplit[0]), int(startDateSplit[1]), int(startDateSplit[2]), 0, 0, 0, 1)
411
412 if endDateFile > smallestEndDate: # data from region of interest
413 # APPEND FILES DOING THINGS HERE
414
415 lastTempDate = endDateFile
416 [firstTempDate, endDates, fillerDate, tempDataNumbers, smallestEndDate] = appendEntries(
417 fileName, dataType, dataFolder, dailyPoints, endDates, startDateFile, endDateFile, fillerDate)
418 # append entries called here. Editing of data files done at that location.
419
420 # 112 DCS groups for IBL, if you're doing something else, change that number.
421 for i in range(0, 112):
422
423 filesProcessed[i] += 1 # number of files
424 # number of data points for each DCS group
425 dataPoints[i] += tempDataNumbers[i]
426
427 holderX = 0
428
429 # this does something. Probably recording number of data points per day for each DCS group
430 for stave in range(1, 15):
431 staveString = str(stave)
432 if stave < 10:
433 staveString = "0"+str(stave)
434 for side in ['A', 'C']:
435 for DCSGroup in range(1, 5):
436 moduleName = 'LI_S' + \
437 str(staveString) + '_' + \
438 side + '_M' + str(DCSGroup)
439 outName = entriesFolder + moduleName + ".txt"
440 dataLine = str(startDate) + " " + \
441 str(tempDataNumbers[holderX]) + "\n"
442 tempDataNumbers[holderX] = 0
443
444 mode = 'a' if os.path.isfile(outName) else 'w'
445 with open(outName, mode) as output:
446 output.write(dataLine)
447 holderX += 1
448
449 # check if the list is bork-a-dorked. If borked, save and break
450 if returnList[2] != fillerDate:
451 returnList[2] = fillerDate
452 with open(dataFolder+dataType+".txt", 'w') as datesFile:
453 tempHolder = 0
454 for stave in range(1, 15):
455 staveString = str(stave)
456 if stave < 10:
457 staveString = "0"+str(stave)
458 for side in ['A', 'C']:
459 for DCSGroup in range(1, 5):
460 moduleName = 'LI_S' + \
461 str(staveString) + '_' + \
462 side + '_M' + str(DCSGroup)
463 datesFile.write(moduleName + " " + str(startingDates[tempHolder]) + " " + str(
464 endDates[tempHolder]) + " " + str(filesProcessed[tempHolder]) + " " + str(dataPoints[tempHolder]) + "\n")
465 tempHolder += 1
466
467 return returnList
468
469 sys.stdout.flush()
470 fileNumber += 1
471
472 # open file, go through lists, if value < smallest, set to that,
473
474 # if number bigger, set to that
475 # if any value smaller than previous largest value, bork bork!
476
477 holder += 1
478
479 with open(dataFolder+dataType+".txt", 'w') as datesFile:
480 tempHolder = 0
481 for staveX in range(1, 15):
482 staveStringX = str(staveX)
483 if staveX < 10:
484 staveStringX = "0"+str(staveX)
485 for sideX in ['A', 'C']:
486 for DCSGroupX in range(1, 5):
487 moduleNameX = 'LI_S' + \
488 str(staveStringX) + '_' + sideX + '_M' + str(DCSGroupX)
489 datesFile.write(moduleNameX + " " + str(startingDates[tempHolder]) + " " + str(
490 endDates[tempHolder]) + " " + str(filesProcessed[tempHolder]) + " " + str(dataPoints[tempHolder]) + "\n")
491 tempHolder += 1
492
493 returnList[3] = True
494
495 return returnList
496
497
498class scanDataThread (threading.Thread):
499 def __init__(self, threadID, name):
500 threading.Thread.__init__(self)
501 self.threadID = threadID
502 self.name = name
503
504 def run(self):
505 print("Starting getIBLDate.py for " + self.name)
506 returnList = scanData(self.name)
507 print("Exiting getIBLDate.py for " + self.name + " with no issues, data range from " +
508 str(returnList[0]) + " to " + str(returnList[1]))
509
510
513
514
515def main():
516
517 thread1 = scanDataThread(1, 'HV_VMeas')
518 thread1.start()
519
520 thread2 = scanDataThread(2, 'PP4LV')
521 thread2.start()
522
523 thread3 = scanDataThread(3, 'HV_IMeas')
524 thread3.start()
525
526 thread4 = scanDataThread(4, 'TModule')
527 thread4.start()
528
529
530if __name__ == "__main__":
531 main()
void print(char *figname, TCanvas *c1)
__init__(self, threadID, name)
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
sensorToNumber(stave, side, module)
Definition getIBLDate.py:26
scanData(dataType)
appendEntries(fileName, dataType, dataFolder, dateEntries, largestDates, firstDate, lastDate, borkDate)
Definition getIBLDate.py:61
findFiles(searchFolderPath)
Definition getIBLDate.py:34
Definition run.py:1