ATLAS Offline Software
Loading...
Searching...
No Matches
getPipeDate.py
Go to the documentation of this file.
1# Copyright (C) 2002-2024 CERN for the benefit of the ATLAS collaboration
2
3# Python script from Nick Dann
4# Clean-ups for clarity and additional comments from Jennet Dickinson
5# Search for JENNET to find where to replace file paths etc
6# Nick set this up multithreaded, but Jennet prefers not to run this way,
7# so she can more easily see what is going on
8
9# Comments from Nick:
10# Multithreaded python script which finds files downloaded from the ATLAS DDV, reads in the data, then saves the data in seperate files for each DCS group.
11# Files from DDV should be in format YYYY_MM_DD-YYYY_MM_DD.txt
12# I save to SENSORNAME.ssv (space seperated variable). I usually save times as YYYY-MM-DD HH:MM:SS UTC timestamp value. Times are CERN time (CEST or CET).
13
14# scanDataThread is the thread declaration
15# scanData (dataType) is the function which you should call to start the searching; datatype should be a string.
16
17#!/usr/bin/env python
18import os
19import datetime
20import time
21import sys
22import threading
23
24
25# converts from IBL stave, senors and side to DCS group number (<112)
26def sensorToNumber(stave, side, module):
27
28 number = 8 * (int(stave)-1) + module
29 if side == 'C':
30 number += 4
31 return number-1
32
33
34def findFiles(searchFolderPath): # find all files in searchFolderPath, return as list
35
36 try:
37 os.chdir(searchFolderPath)
38 except IOError:
39 print('No entries in ' + searchFolderPath)
40 return -1
41
42 todaysList = []
43
44 # SOMEHOW, this searches the search Folder and returns a list of contained files in well... files.
45 for src_dir, dirs, files in os.walk(searchFolderPath):
46 # i parse through the files, I think. For each, I check if there's a reference to it in the
47 # diary entry for the day. If not, I go ahead and append the figure to the relevent diary entry.
48 for file_ in files:
49 sortedFile = os.path.join(src_dir, file_)
50 todaysList.append(str(sortedFile))
51 # now we've got a list containing all files we want to add, sort it alphabetically and add them.
52 todaysList.sort()
53 return todaysList
54
55# Function that actually reads in data from fileName, saves it to... somewhere. Probably dataFolder/datatype/modulename.ssv . It also returns
56# stuff like the most recent dates for all DCS groups (endDates), and number of datapoints found for each DCS group (tempDataNumbers).
57# Probably says something if it can't read the data as well. Gonna be honest, DDV is a bit of a nightmare and I've spent too long messing
58# around with it to remember what my bodges are.
59
60
61def appendEntries(fileName, dataType, dataFolder, dateEntries, largestDates, firstDate, lastDate, borkDate):
62
63 try:
64 with open(fileName, 'r') as rawFiles:
65
66 returnList = [firstDate, largestDates,
67 borkDate, dateEntries, firstDate]
68
69 print(firstDate)
70
71 index_Jennet = 0
72 # read in each line of fileName and do things to it. Does this work with empty files? I'm guessing so.
73 for dataLine in rawFiles:
74
75 index_Jennet = index_Jennet + 1
76 # We actually have two types of data file from DDV, one containing lots of commas, one which doesn't. The len(commasplit) thing is how I
77 # differentiate between the two types.
78
79 if len(dataLine) > 5:
80 commaSplit = dataLine.split(',')
81
82 if '!!!!' in dataLine: # Our entry is blank, leg it!
83 return returnList
84
85 if len(commaSplit) < 2:
86 print("Dead")
87 else:
88 # from getDataSafely, format here splits data based on !!! between DCS groups.
89
90 moduleList = dataLine.split('!!!')
91
92 for module in moduleList: # Hah, look at me using accurate labels for my variables!
93
94 elements = module.split(',')
95 name = elements[0]
96 A = "A"
97 B = "A"
98 if len(name) > 9:
99 A = name[4:6]
100 B = name[7]
101
102 moduleName = 'LI_S' + str(A) + '_' + str(B)
103 try:
104 moduleNumber = int(2 * (int(A)-1)-1)
105 if B == 'C':
106 moduleNumber += 4
107
108 outName = dataFolder+dataType+'/' + moduleName + '.ssv'
109
110 # check if file exists, make it if not, append it if so.
111 mode = 'a' if os.path.isfile(outName) else 'w'
112 with open(outName, mode) as output:
113 position = 0
114
115 # I'm like 90% sure each element is a single time (so, time and sensor reading). This makes it slow.
116 for element in elements:
117
118 if position != 0: # the first element is the DCS group name; not hella useful.
119
120 tempLines = element.split()
121 date = tempLines[1].split('-')
122 time1 = tempLines[2].split(':')
123 data = tempLines[0]
124
125 dateTime = datetime.datetime(int(date[2]), int(date[1]), int(
126 date[0]), int(time1[0]), int(time1[1]), int(time1[2]), int(time1[3]))
127 # true if data stuff
128 if (dateTime > lastDate+datetime.timedelta(hours=2)) or (dateTime < firstDate - datetime.timedelta(hours=2)):
129 returnList[2] = lastDate
130 print("\nBORK BORK BORK! Error in date of " + fileName +
131 " for " + dataType + " borkDate " + str(returnList[2]))
132 print(
133 "First line reads " + dataLine + "\n")
134 print(
135 "Should read " + moduleName+"_"+dataType+"\n")
136
137 with open(dataFolder+dataType+"BORKBORKBORK.txt", 'w') as borkRepork:
138 borkRepork.write("filename " + fileName + "date range " + str(
139 firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
140 return returnList
141
142 # if time in right range, output.
143 if dateTime > returnList[1][moduleNumber]:
144 returnList[3][moduleNumber] += 1
145 outputLine = moduleName + "_" + dataType + " " + \
146 str(dateTime) + " " + str(time.mktime(
147 dateTime.timetuple())) + " " + str(data) + "\n"
148
149 returnList[1][moduleNumber] = dateTime
150
151 output.write(outputLine)
152
153 position += 1
154
155 except: # we failed at something, so print out an error message and run away screaming
156 returnList[2] = lastDate
157 print("Something broke :( \n")
158 print("Could be an error in data values of " + fileName +
159 " for " + dataType + " borkDate " + str(returnList[2]))
160 print("First line reads " + name + "\n")
161
162 with open(dataFolder+dataType+"BORKBORKBORK.txt", 'w') as borkRepork:
163 borkRepork.write("filename " + fileName + "date range " + str(
164 firstDate) + "-" + str(lastDate) + " first line " + dataLine + '\n')
165 print(
166 "Try running again... often this is an issue with the connection to eos")
167 return returnList
168
169 except IOError:
170 print("could not open file " + str(fileName) + " fool")
171 # So this probably crashes if filename can't be opened. Try not to let that happen.
172 return firstDate
173
174 return returnList
175
176
177def scanData(dataType):
178
179 # home directory definition
180 fillerDate = datetime.datetime(2000, 1, 1, 1, 1, 1, 1)
181 returnList = [fillerDate, fillerDate, fillerDate, False, "LI_S00_0_M0"]
182 # JENNET setsfile paths
183 homeDirectory = os.path.expanduser(
184 '/eos/atlas/user/j/jdickins/Pixel/LeakageCurrent/')
185
186 # define path to folder
187 dataFolder = homeDirectory + "/IBLData/processedData/"
188 inputFolder = homeDirectory + "/IBLData/rawData/"+dataType+"/"
189 entriesFolder = homeDirectory + "/IBLData/rawData/entriesPerDay/" + dataType + "/"
190
191 # make directories if they don't exist
192 if not os.path.exists(dataFolder+dataType):
193 os.mkdir(dataFolder+dataType)
194
195 if not os.path.exists(entriesFolder):
196 os.mkdir(entriesFolder)
197
198 # declare some stuff, so it's in scope when we need it
199 startingDates = []
200 endDates = []
201 filesProcessed = []
202 dataPoints = []
203 dailyPoints = []
204 smallestEndDate = fillerDate
205
206 # This section checks to see if there's a file containing the dates we've worked up till.
207 if not os.path.exists(dataFolder+dataType+".txt"):
208
209 # No dates file found, so create one.
210 print("No any file found! at " + dataFolder +
211 dataType + " Making default values")
212 # set default max and min values for each sensor
213
214 with open(dataFolder+dataType+".txt", 'w') as datesFile:
215 firstTempDate = datetime.datetime(2015, 5, 1, 0, 0, 0, 0)
216 lastTempDate = datetime.datetime(2015, 5, 1, 0, 0, 0, 1)
217 smallestEndDate = lastTempDate
218
219 for stave in range(1, 15):
220 staveString = str(stave)
221 if stave < 10:
222 staveString = "0"+str(stave)
223 for side in ['A', 'C']:
224 moduleName = 'LI_S' + str(staveString) + '_' + side
225 datesFile.write(
226 moduleName + " " + str(firstTempDate) + " " + str(lastTempDate) + " 0 0\n")
227 startingDates.append(firstTempDate)
228 endDates.append(lastTempDate)
229 filesProcessed.append(0)
230 dataPoints.append(0)
231 dailyPoints.append(0)
232
233 else: # dates file exists, so read dates for each DCS group
234 print("Found " + dataFolder+dataType+".txt")
235 with open(dataFolder+dataType+".txt", 'r') as datesFile:
236
237 holder = 0
238
239 for dateLine in datesFile: # read each line in, each line should correspond to one DCS group
240
241 tempDatesLine = dateLine.split()
242 filesProcessed.append(int(tempDatesLine[5]))
243 dataPoints.append(int(tempDatesLine[6]))
244 dailyPoints.append(0)
245
246 firstTemp = tempDatesLine[1].split('-')
247 lastTemp = tempDatesLine[3].split('-')
248
249 firstTempTime = tempDatesLine[2].split(':')
250 lastTempTime = tempDatesLine[4].split(':')
251
252 firstTempTimes = firstTempTime[2].split('.')
253 lastTempTimes = lastTempTime[2].split('.')
254
255 if len(firstTempTimes) < 2:
256 firstTempTimes.append(0)
257 if len(lastTempTimes) < 2:
258 lastTempTimes.append(0)
259
260 firstTempDate = datetime.datetime(int(firstTemp[0]), int(firstTemp[1]), int(firstTemp[2]), int(
261 firstTempTime[0]), int(firstTempTime[1]), int(firstTempTimes[0]), int(firstTempTimes[1]))
262 lastTempDate = datetime.datetime(int(lastTemp[0]), int(lastTemp[1]), int(lastTemp[2]), int(
263 lastTempTime[0]), int(lastTempTime[1]), int(lastTempTimes[0]), int(lastTempTimes[1]))
264
265 startingDates.append(firstTempDate)
266 endDates.append(lastTempDate)
267
268 if holder == 0:
269 returnList[0] = firstTempDate
270 returnList[1] = lastTempDate
271 smallestEndDate = lastTempDate
272
273 else:
274 if firstTempDate < returnList[0]:
275 returnList[0] = firstTempDate
276 if lastTempDate > returnList[1]:
277 returnList[1] = lastTempDate
278 if lastTempDate < smallestEndDate:
279 smallestEndDate = lastTempDate
280
281 holder += 1
282
283 print("Investigating " + dataType + " from " + str(smallestEndDate))
284
285 holder = 0
286
287 # call function to return list of all files in input folder
288 fileList = findFiles(inputFolder)
289
290 firstTempDate = startingDates[0]
291 lastTempDate = endDates[0]
292 fileNumber = 0
293
294 # iterate through all files from file list, opening them if they're in the time period of interest.
295 for fileName in fileList:
296
297 # print(fileName)
298
299 end = len(fileName)
300 endDate1 = fileName[end-4:end]
301 endDate2 = fileName[end-7:end-6]
302
303 # check file ends with text, and contains underscore in expected place; Could make this more rigorous
304 if endDate1 == '.txt' and endDate2 == '_':
305
306 startDate = fileName[end-23:end-15]
307 endDate = fileName[end-12:end-4]
308 endDateSplit = endDate.split('_')
309 endDateFile = datetime.datetime(
310 2000+int(endDateSplit[0]), int(endDateSplit[1]), int(endDateSplit[2]), 0, 0, 0, 1)
311
312 startDateSplit = startDate.split('_')
313 startDateFile = datetime.datetime(
314 2000+int(startDateSplit[0]), int(startDateSplit[1]), int(startDateSplit[2]), 0, 0, 0, 1)
315
316 if endDateFile > smallestEndDate: # data from region of interest
317 # APPEND FILES DOING THINGS HERE
318
319 lastTempDate = endDateFile
320 [firstTempDate, endDates, fillerDate, tempDataNumbers, smallestEndDate] = appendEntries(
321 fileName, dataType, dataFolder, dailyPoints, endDates, startDateFile, endDateFile, fillerDate)
322 # append entries called here. Editing of data files done at that location.
323
324 # 112 DCS groups for IBL, if you're doing something else, change that number.#
325 for i in range(0, 28):
326 filesProcessed[i] += 1 # number of files
327 # number of data points for each DCS group
328 dataPoints[i] += tempDataNumbers[i]
329
330 holderX = 0
331
332 # this does something. Probably recording number of data points per day for each DCS group
333 for stave in range(1, 15):
334 staveString = str(stave)
335 if stave < 10:
336 staveString = "0"+str(stave)
337 for side in ['A', 'C']:
338 moduleName = 'LI_S' + str(staveString) + '_' + side
339 outName = entriesFolder + moduleName + ".txt"
340 dataLine = str(startDate) + " " + \
341 str(tempDataNumbers[holderX]) + "\n"
342 tempDataNumbers[holderX] = 0
343
344 mode = 'a' if os.path.isfile(outName) else 'w'
345 with open(outName, mode) as output:
346 output.write(dataLine)
347 holderX += 1
348
349 # check if the list is bork-a-dorked. If borked, save and break
350 if returnList[2] != fillerDate:
351 returnList[2] = fillerDate
352 with open(dataFolder+dataType+".txt", 'w') as datesFile:
353 tempHolder = 0
354 for stave in range(1, 15):
355 staveString = str(stave)
356 if stave < 10:
357 staveString = "0"+str(stave)
358 for side in ['A', 'C']:
359 moduleName = 'LI_S' + \
360 str(staveString) + '_' + side
361 datesFile.write(moduleName + " " + str(startingDates[tempHolder]) + " " + str(
362 endDates[tempHolder]) + " " + str(filesProcessed[tempHolder]) + " " + str(dataPoints[tempHolder]) + "\n")
363 tempHolder += 1
364
365 return returnList
366
367 sys.stdout.flush()
368 fileNumber += 1
369
370 # open file, go through lists, if value < smallest, set to that,
371
372 # if number bigger, set to that
373 # if any value smaller than previous largest value, bork bork!
374
375 holder += 1
376
377 with open(dataFolder+dataType+".txt", 'w') as datesFile:
378 tempHolder = 0
379 for staveX in range(1, 15):
380 staveStringX = str(staveX)
381 if staveX < 10:
382 staveStringX = "0"+str(staveX)
383 for sideX in ['A', 'C']:
384 moduleNameX = 'LI_S' + str(staveStringX) + '_' + sideX
385 datesFile.write(moduleNameX + " " + str(startingDates[tempHolder]) + " " + str(
386 endDates[tempHolder]) + " " + str(filesProcessed[tempHolder]) + " " + str(dataPoints[tempHolder]) + "\n")
387 tempHolder += 1
388
389 returnList[3] = True
390
391 return returnList
392
393
394class scanDataThread (threading.Thread):
395 def __init__(self, threadID, name):
396 threading.Thread.__init__(self)
397 self.threadID = threadID
398 self.name = name
399
400 def run(self):
401 print("Starting getPipeDate.py for " + self.name)
402 returnList = scanData(self.name)
403 print("Exiting getPipeDate.py for " + self.name + " with no issues, data range from " +
404 str(returnList[0]) + " to " + str(returnList[1]))
405
406
409
410
411def main():
412
413 thread1 = scanDataThread(1, 'ENV_TT')
414 thread1.start()
415
416
417if __name__ == "__main__":
418 main()
void print(char *figname, TCanvas *c1)
__init__(self, threadID, name)
std::vector< std::string > split(const std::string &s, const std::string &t=":")
Definition hcg.cxx:177
scanData(dataType)
findFiles(searchFolderPath)
sensorToNumber(stave, side, module)
appendEntries(fileName, dataType, dataFolder, dateEntries, largestDates, firstDate, lastDate, borkDate)
Definition run.py:1