ATLAS Offline Software
Loading...
Searching...
No Matches
add-xsec-uncert-quadrature-N.py
Go to the documentation of this file.
1import os
2import re
3import glob
4import numpy as np
5import random
6import matplotlib.pyplot as plt
7
8# Define the file pattern and the array of N values
9filename_pattern = "pwg-*-st2-stat.dat"
10#N_values = [50, 100,150,200,250,300,350,360,370,380,390,400]
11#N_values = [350,360,370,380,385,390,392,395,396,397,398,399,400]
12N_values = [50, 100,200,400,800,1000,1100,1200,1250,1260,1270,1275,1278,1279,1280]
13plot_file_postfix=""
14#N_values = [1200,1250,1255,1260,1265,1270,1275,1276,1277,1278,1279,1280]
15#plot_file_postfix="_small_range"
16num_repeats = 30
17#num_repeats = 2
18
19# List all files matching the pattern
20file_list = glob.glob(filename_pattern)
21
22# Find and remove empty files from the list
23empty_files = [f for f in file_list if os.path.getsize(f) == 0]
24file_list = [f for f in file_list if f not in empty_files]
25
26# Save the number of empty files
27num_empty_files = len(empty_files)
28
29# Output the results
30print(f"Number of empty files: {num_empty_files}")
31
32# Now subtract N_values by the number of empty files
33N_values = [N-num_empty_files for N in N_values]
34
35print("N_values after subtracting empty files:")
36print(N_values)
37
38# Initialize a dictionary to store results for each N
39results = {N: [] for N in N_values}
40
41# Loop over each N in N_values
42for N in N_values:
43 for _ in range(num_repeats):
44 # Randomly select N files from the file list
45 selected_files = random.sample(file_list, min(N, len(file_list)))
46
47 # Initialize a list to store extracted values for the current run
48 value_list = []
49
50 # Loop over selected files and process each one
51 for file_path in selected_files:
52 with open(file_path, 'r') as file:
53 for line in file:
54 # Search for lines matching the required pattern
55 match = re.search(r"^ grand total total \‍(pos\.-\|neg\.\|\‍):\s+(\d+\.\d+|\d+)\s+\+-\s+(\d+\.\d+E[-+]\d+|\d+\.\d+)", line)
56 if match:
57 # Extract the scientific notation value (XXXE-00X)
58 value = float(match.group(2))
59 value_list.append(value)
60
61 # Calculate the quadrature sum for this run
62 if value_list:
63 quadrature_sum = np.sqrt(np.sum(np.array(value_list) ** 2))/len(value_list)
64 results[N].append(quadrature_sum)
65 else:
66 results[N].append(0)
67
68# find upper and lower bounds
69# Loop over each N in N_values
70upper_bounds = []
71lower_bounds = []
72value_list = []
73for file_path in file_list:
74 with open(file_path, 'r') as file:
75 for line in file:
76 # Search for lines matching the required pattern
77 match = re.search(r"^ grand total total \‍(pos\.-\|neg\.\|\‍):\s+(\d+\.\d+|\d+)\s+\+-\s+(\d+\.\d+E[-+]\d+|\d+\.\d+)", line)
78 if match:
79 # Extract the scientific notation value (XXXE-00X)
80 value = float(match.group(2))
81 value_list.append(value)
82if value_list:
83 value_list.sort()
84 for N in N_values:
85 upper_bound_value_list = value_list[-N:]
86 lower_bound_value_list = value_list[:N]
87 upper_bounds.append(np.sqrt(np.sum(np.array(upper_bound_value_list) ** 2))/len(upper_bound_value_list))
88 lower_bounds.append(np.sqrt(np.sum(np.array(lower_bound_value_list) ** 2))/len(lower_bound_value_list))
89
90#print("upper bounds")
91#print(upper_bounds)
92#print("lower bounds")
93#print(lower_bounds)
94
95# Calculate mean and standard deviation for each N
96mean_values = []
97std_dev_values = []
98for N in N_values:
99 mean_values.append(np.mean(results[N]))
100 std_dev_values.append(np.std(results[N]))
101
102# Plotting the results
103plt.figure(figsize=(10, 6))
104plt.plot(N_values,upper_bounds,label="upper bound")
105plt.plot(N_values,lower_bounds,label="lower bound")
106plt.errorbar(N_values, mean_values, yerr=std_dev_values, fmt='o-', capsize=5, label="x-sec uncertainty")
107plt.fill_between(N_values,
108 np.array(mean_values) - np.array(std_dev_values),
109 np.array(mean_values) + np.array(std_dev_values),
110 alpha=0.2, color="blue", label="Error band (std dev)")
111
112plt.xlabel("Number of files (N)")
113plt.ylabel("x-sec uncertainty")
114plt.title("x-sec with error band vs. N")
115plt.legend()
116plt.grid(True)
117plt.savefig("sum_of_squares_vs_N"+plot_file_postfix+".png", format="png", dpi=300, bbox_inches="tight")
118
119for N in N_values:
120 plt.errorbar([N]*len(results[N]),results[N],fmt="o",color='darkgreen',markersize=2)
121plt.savefig("sum_of_squares_vs_N_with_val"+plot_file_postfix+".png", format="png", dpi=300, bbox_inches="tight")
122
123#plt.show()
124
void print(char *figname, TCanvas *c1)
#define min(a, b)
Definition cfImp.cxx:40