ATLAS Offline Software
add-xsec-uncert-quadrature-N.py
Go to the documentation of this file.
1 import os
2 import re
3 import glob
4 import numpy as np
5 import random
6 import matplotlib.pyplot as plt
7 
8 # Define the file pattern and the array of N values
9 filename_pattern = "pwg-*-st2-stat.dat"
10 #N_values = [50, 100,150,200,250,300,350,360,370,380,390,400]
11 #N_values = [350,360,370,380,385,390,392,395,396,397,398,399,400]
12 N_values = [50, 100,200,400,800,1000,1100,1200,1250,1260,1270,1275,1278,1279,1280]
13 plot_file_postfix=""
14 #N_values = [1200,1250,1255,1260,1265,1270,1275,1276,1277,1278,1279,1280]
15 #plot_file_postfix="_small_range"
16 num_repeats = 30
17 #num_repeats = 2
18 
19 # List all files matching the pattern
20 file_list = glob.glob(filename_pattern)
21 
22 # Find and remove empty files from the list
23 empty_files = [f for f in file_list if os.path.getsize(f) == 0]
24 file_list = [f for f in file_list if f not in empty_files]
25 
26 # Save the number of empty files
27 num_empty_files = len(empty_files)
28 
29 # Output the results
30 print(f"Number of empty files: {num_empty_files}")
31 
32 # Now subtract N_values by the number of empty files
33 N_values = [N-num_empty_files for N in N_values]
34 
35 print("N_values after subtracting empty files:")
36 print(N_values)
37 
38 # Initialize a dictionary to store results for each N
39 results = {N: [] for N in N_values}
40 
41 # Loop over each N in N_values
42 for N in N_values:
43  for _ in range(num_repeats):
44  # Randomly select N files from the file list
45  selected_files = random.sample(file_list, min(N, len(file_list)))
46 
47  # Initialize a list to store extracted values for the current run
48  value_list = []
49 
50  # Loop over selected files and process each one
51  for file_path in selected_files:
52  with open(file_path, 'r') as file:
53  for line in file:
54  # Search for lines matching the required pattern
55  match = re.search(r"^ grand total total \‍(pos\.-\|neg\.\|\‍):\s+(\d+\.\d+|\d+)\s+\+-\s+(\d+\.\d+E[-+]\d+|\d+\.\d+)", line)
56  if match:
57  # Extract the scientific notation value (XXXE-00X)
58  value = float(match.group(2))
59  value_list.append(value)
60 
61  # Calculate the quadrature sum for this run
62  if value_list:
63  quadrature_sum = np.sqrt(np.sum(np.array(value_list) ** 2))/len(value_list)
64  results[N].append(quadrature_sum)
65  else:
66  results[N].append(0)
67 
68 # find upper and lower bounds
69 # Loop over each N in N_values
70 upper_bounds = []
71 lower_bounds = []
72 value_list = []
73 for file_path in file_list:
74  with open(file_path, 'r') as file:
75  for line in file:
76  # Search for lines matching the required pattern
77  match = re.search(r"^ grand total total \‍(pos\.-\|neg\.\|\‍):\s+(\d+\.\d+|\d+)\s+\+-\s+(\d+\.\d+E[-+]\d+|\d+\.\d+)", line)
78  if match:
79  # Extract the scientific notation value (XXXE-00X)
80  value = float(match.group(2))
81  value_list.append(value)
82 if value_list:
83  value_list.sort()
84  for N in N_values:
85  upper_bound_value_list = value_list[-N:]
86  lower_bound_value_list = value_list[:N]
87  upper_bounds.append(np.sqrt(np.sum(np.array(upper_bound_value_list) ** 2))/len(upper_bound_value_list))
88  lower_bounds.append(np.sqrt(np.sum(np.array(lower_bound_value_list) ** 2))/len(lower_bound_value_list))
89 
90 #print("upper bounds")
91 #print(upper_bounds)
92 #print("lower bounds")
93 #print(lower_bounds)
94 
95 # Calculate mean and standard deviation for each N
96 mean_values = []
97 std_dev_values = []
98 for N in N_values:
99  mean_values.append(np.mean(results[N]))
100  std_dev_values.append(np.std(results[N]))
101 
102 # Plotting the results
103 plt.figure(figsize=(10, 6))
104 plt.plot(N_values,upper_bounds,label="upper bound")
105 plt.plot(N_values,lower_bounds,label="lower bound")
106 plt.errorbar(N_values, mean_values, yerr=std_dev_values, fmt='o-', capsize=5, label="x-sec uncertainty")
107 plt.fill_between(N_values,
108  np.array(mean_values) - np.array(std_dev_values),
109  np.array(mean_values) + np.array(std_dev_values),
110  alpha=0.2, color="blue", label="Error band (std dev)")
111 
112 plt.xlabel("Number of files (N)")
113 plt.ylabel("x-sec uncertainty")
114 plt.title("x-sec with error band vs. N")
115 plt.legend()
116 plt.grid(True)
117 plt.savefig("sum_of_squares_vs_N"+plot_file_postfix+".png", format="png", dpi=300, bbox_inches="tight")
118 
119 for N in N_values:
120  plt.errorbar([N]*len(results[N]),results[N],fmt="o",color='darkgreen',markersize=2)
121 plt.savefig("sum_of_squares_vs_N_with_val"+plot_file_postfix+".png", format="png", dpi=300, bbox_inches="tight")
122 
123 #plt.show()
124 
min
constexpr double min()
Definition: ap_fixedTest.cxx:26
dumpHVPathFromNtuple.append
bool append
Definition: dumpHVPathFromNtuple.py:91
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
print
void print(char *figname, TCanvas *c1)
Definition: TRTCalib_StrawStatusPlots.cxx:25
Trk::open
@ open
Definition: BinningType.h:40
readCCLHist.float
float
Definition: readCCLHist.py:83