ATLAS Offline Software
Functions | Variables
python.power_of_test Namespace Reference

Functions

def _hist_fill (data, bins, entries=0, freeze=None)
 
def rvs_pairs (data1, data2, bins=0, kind='binbybin', bin_mean=1, size=1, entries=0, freeze=False)
 
def power_of_test (data1, data2, rvs_func='rvs_pairs', tests=['chi2_2samp'], rvs_key={}, test_key={}, parallel=None, sync=True)
 

Variables

bool PARALLEL = True
 
dictionary ipp_profile
 

Function Documentation

◆ _hist_fill()

def python.power_of_test._hist_fill (   data,
  bins,
  entries = 0,
  freeze = None 
)
private

Definition at line 25 of file power_of_test.py.

25 def _hist_fill(data, bins, entries = 0, freeze = None):
26  if type(bins)==int:
27  if bins==0:
28  bins = np.arange(data.size + 1, dtype = float)
29  else:
30  bins = np.linspace(data.min(), data.max(), bins)
31  binc = (bins[1:] + bins[:-1]) / 2
32  if freeze:
33  return lambda : data
34  else:
35  rv = rv_discrete(values = (binc, data/data.sum()))
36  return lambda : np.histogram(rv.rvs(size=data.sum() if entries==0 else entries), bins)[0].astype(float)
37 

◆ power_of_test()

def python.power_of_test.power_of_test (   data1,
  data2,
  rvs_func = 'rvs_pairs',
  tests = ['chi2_2samp'],
  rvs_key = {},
  test_key = {},
  parallel = None,
  sync = True 
)
Compute the corresponding p-values for each histrogram pairs from the random variates of the given 2 samples/frequencies for size_times.

Parameters
----------
data1, data2 : sequence of 1-D ndarrays
    Input data. Observed samples or frequencies.
rvs_func : [callable|str], optional, default : "rvs_pairs"
    The random variates function. The rvs_func can be either a callable or one of the following strings::

        String                     Description
        "rvs_pairs"                Compute the histogram pairs from the random
                                   variates of the given 2 samples/frequencies
                                   for size_times. 

tests : ([callable|str],...), optional, default : ["chi2_2samp"]
    A list of *test* statistical functions. The *test* can be either a callable or one of the following strings::

        String                     Description
        "chi2_2samp"               Read TS.chi2_2samp for further information.
        "BDM_2samp"                Read TS.BDM_2samp for further information.
        "likelihoodratio_ksamp"    Read TS.likelihoodratio_ksamp for further information.
        "likelihoodvalue_ksamp"    Read TS.likelihoodvalue_ksamp for further information.
        "ks_2samp"                 Read TS.ks_2samp for further information.
        "anderson_ksamp"           Read TS.anderson_ksamp for further information.
        "CVM_2samp"                Read TS.CVM_2samp for further information.

rvs_key : dict, optional, default : {}
    Keyword arguments for the rvs function, rvs_func.
test_key : dict, optional
    Keyword arguments for the test statistical function, test.
parallel : bool, optional, default : None
    If True, import IPyParallel package to do the parallel computation. If parallel is None,
    the global variable PARALLEL will be used instead.
sync : bool, optional, default : True
    When sync is False, an IPyParallel AsyncResult Object will be returned instead. Onyl affect
    when parallel is True.

Returns
-------
[p1, p2, ...] : 1-D array
    The corresponding p-values for each histogram pairs.

Definition at line 80 of file power_of_test.py.

80 def power_of_test(data1, data2, rvs_func = 'rvs_pairs', tests = ['chi2_2samp'], rvs_key = {}, test_key = {}, parallel=None, sync=True):
81  """Compute the corresponding p-values for each histrogram pairs from the random variates of the given 2 samples/frequencies for size_times.
82 
83  Parameters
84  ----------
85  data1, data2 : sequence of 1-D ndarrays
86  Input data. Observed samples or frequencies.
87  rvs_func : [callable|str], optional, default : "rvs_pairs"
88  The random variates function. The rvs_func can be either a callable or one of the following strings::
89 
90  String Description
91  "rvs_pairs" Compute the histogram pairs from the random
92  variates of the given 2 samples/frequencies
93  for size_times.
94 
95  tests : ([callable|str],...), optional, default : ["chi2_2samp"]
96  A list of *test* statistical functions. The *test* can be either a callable or one of the following strings::
97 
98  String Description
99  "chi2_2samp" Read TS.chi2_2samp for further information.
100  "BDM_2samp" Read TS.BDM_2samp for further information.
101  "likelihoodratio_ksamp" Read TS.likelihoodratio_ksamp for further information.
102  "likelihoodvalue_ksamp" Read TS.likelihoodvalue_ksamp for further information.
103  "ks_2samp" Read TS.ks_2samp for further information.
104  "anderson_ksamp" Read TS.anderson_ksamp for further information.
105  "CVM_2samp" Read TS.CVM_2samp for further information.
106 
107  rvs_key : dict, optional, default : {}
108  Keyword arguments for the rvs function, rvs_func.
109  test_key : dict, optional
110  Keyword arguments for the test statistical function, test.
111  parallel : bool, optional, default : None
112  If True, import IPyParallel package to do the parallel computation. If parallel is None,
113  the global variable PARALLEL will be used instead.
114  sync : bool, optional, default : True
115  When sync is False, an IPyParallel AsyncResult Object will be returned instead. Onyl affect
116  when parallel is True.
117 
118  Returns
119  -------
120  [p1, p2, ...] : 1-D array
121  The corresponding p-values for each histogram pairs.
122  """
123  if parallel is None: parallel = PARALLEL
124  if parallel:
125  try:
126  global client
127  client=Client(**ipp_profile)
128  size = rvs_key['size']
129  N = len(client)
130  jobs = []
131  for i in range(N):
132  rvs_key['size'] = (size//N + 1) if (i < size % N) else size//N
133  jobs.append(client[client.ids[i]].apply_async(power_of_test, data1, data2, rvs_func, tests, rvs_key, test_key, False))
134  ars = client._asyncresult_from_jobs(jobs)
135  if sync:
136  ars.wait_interactive()
137  ret = {}
138  for key, val in ars.get():
139  ret.setdefault(key, []).extend(val)
140  else:
141  return ars
142  finally:
143  client.close()
144  return ret
145  if type(rvs_func) is str:
146  rvs_func = globals()[rvs_func]
147  if type(tests) not in (list, tuple):
148  tests = [tests]
149  tests = [(t, getattr(TS, t)) if type(t)==str else (str(t), t) for t in tests]
150  ret = {}
151  for rvs1, rvs2 in rvs_func(data1,data2,**rvs_key):
152  for tname, test in tests:
153  ret.setdefault(tname, []).append(test(rvs1, rvs2, binned=True, **test_key).pvalue)
154  return ret

◆ rvs_pairs()

def python.power_of_test.rvs_pairs (   data1,
  data2,
  bins = 0,
  kind = 'binbybin',
  bin_mean = 1,
  size = 1,
  entries = 0,
  freeze = False 
)
Compute the histogram pairs from the random variates of the given 2 samples/frequencies for size_times. 

Parameters
----------
data1, data2 : sequence of 1-D ndarrays
    Input data. Observed samples or frequencies.
bins : [scalar|1-D array], optional, default : 0
    If bins is an int, it defines the number of equal-width bins in the given
    range. If bins is zero, the given data will be assumed to be frequency data,
    and it defines the bin index. If bins is an 1-D array, it defines the bin
    edges, including the rightmost edge, allowing for non-uniform bin widths.
kind : [str|(str,str)], optional, default : "binbybin"
    Bin-by-bin test
bin_mean : [scalar|(scalar,scalar)], optional, default : 1
    The desired mean count in each bin of the output histograms. For example, 
    if a histogram has 10000 entries and 100 bins, the bin_mean is 100. It is
    ignored when entries is given.
size : int, optional, default : 1
    The number of repetitions for computing the histograms.
entries : [scalar|(scalar,scalar)], optional, default : 0
    The desired entries of the output histograms. If entries is 0, the entries
    size of the input data will be used instead.
freeze : [bool,(bool,bool)], optional. default : False
    if True, the output histogram will be the frozen distribution of the input
    data

Yields
------
hist1, hist2 : (1-D array, 1-D array)
    Histogram pairs

Definition at line 38 of file power_of_test.py.

38 def rvs_pairs(data1, data2, bins = 0, kind = 'binbybin', bin_mean = 1, size = 1, entries = 0, freeze = False):
39  """
40  Compute the histogram pairs from the random variates of the given 2 samples/frequencies for size_times.
41 
42  Parameters
43  ----------
44  data1, data2 : sequence of 1-D ndarrays
45  Input data. Observed samples or frequencies.
46  bins : [scalar|1-D array], optional, default : 0
47  If bins is an int, it defines the number of equal-width bins in the given
48  range. If bins is zero, the given data will be assumed to be frequency data,
49  and it defines the bin index. If bins is an 1-D array, it defines the bin
50  edges, including the rightmost edge, allowing for non-uniform bin widths.
51  kind : [str|(str,str)], optional, default : "binbybin"
52  Bin-by-bin test
53  bin_mean : [scalar|(scalar,scalar)], optional, default : 1
54  The desired mean count in each bin of the output histograms. For example,
55  if a histogram has 10000 entries and 100 bins, the bin_mean is 100. It is
56  ignored when entries is given.
57  size : int, optional, default : 1
58  The number of repetitions for computing the histograms.
59  entries : [scalar|(scalar,scalar)], optional, default : 0
60  The desired entries of the output histograms. If entries is 0, the entries
61  size of the input data will be used instead.
62  freeze : [bool,(bool,bool)], optional. default : False
63  if True, the output histogram will be the frozen distribution of the input
64  data
65 
66  Yields
67  ------
68  hist1, hist2 : (1-D array, 1-D array)
69  Histogram pairs
70  """
71  data1 = data1.astype(float)
72  data2 = data2.astype(float)
73  entries = np.resize(entries, 2)
74  freeze = np.resize(freeze, 2)
75  hist1 = _hist_fill(data1, bins, entries[0], freeze[0])
76  hist2 = _hist_fill(data2, bins, entries[1], freeze[1])
77  for _ in range(size):
78  yield hist1(), hist2()
79 

Variable Documentation

◆ ipp_profile

dictionary python.power_of_test.ipp_profile
Initial value:
1 = {'sshserver':'kingman@140.114.94.171',
2  'profile':'ssh@nthu'}

Definition at line 22 of file power_of_test.py.

◆ PARALLEL

bool python.power_of_test.PARALLEL = True

Definition at line 9 of file power_of_test.py.

python.power_of_test.power_of_test
def power_of_test(data1, data2, rvs_func='rvs_pairs', tests=['chi2_2samp'], rvs_key={}, test_key={}, parallel=None, sync=True)
Definition: power_of_test.py:80
FakeBkgTools::Client
Client
Definition: FakeBkgInternals.h:141
TrigInDetValidation_Base.test
test
Definition: TrigInDetValidation_Base.py:146
dumpHVPathFromNtuple.append
bool append
Definition: dumpHVPathFromNtuple.py:91
python.power_of_test.rvs_pairs
def rvs_pairs(data1, data2, bins=0, kind='binbybin', bin_mean=1, size=1, entries=0, freeze=False)
Definition: power_of_test.py:38
plotBeamSpotVxVal.range
range
Definition: plotBeamSpotVxVal.py:195
CompareRootFiles.hist1
hist1
Definition: CompareRootFiles.py:36
python.CaloScaleNoiseConfig.type
type
Definition: CaloScaleNoiseConfig.py:78
python.power_of_test._hist_fill
def _hist_fill(data, bins, entries=0, freeze=None)
Definition: power_of_test.py:25
str
Definition: BTagTrackIpAccessor.cxx:11
CompareRootFiles.hist2
hist2
Definition: CompareRootFiles.py:37