ATLAS Offline Software
Loading...
Searching...
No Matches
python.power_of_test Namespace Reference

Functions

 _hist_fill (data, bins, entries=0, freeze=None)
 rvs_pairs (data1, data2, bins=0, kind='binbybin', bin_mean=1, size=1, entries=0, freeze=False)
 power_of_test (data1, data2, rvs_func='rvs_pairs', tests=['chi2_2samp'], rvs_key={}, test_key={}, parallel=None, sync=True)

Variables

bool PARALLEL = True
dict ipp_profile

Function Documentation

◆ _hist_fill()

python.power_of_test._hist_fill ( data,
bins,
entries = 0,
freeze = None )
protected

Definition at line 25 of file power_of_test.py.

25def _hist_fill(data, bins, entries = 0, freeze = None):
26 if type(bins)==int:
27 if bins==0:
28 bins = np.arange(data.size + 1, dtype = float)
29 else:
30 bins = np.linspace(data.min(), data.max(), bins)
31 binc = (bins[1:] + bins[:-1]) / 2
32 if freeze:
33 return lambda : data
34 else:
35 rv = rv_discrete(values = (binc, data/data.sum()))
36 return lambda : np.histogram(rv.rvs(size=data.sum() if entries==0 else entries), bins)[0].astype(float)
37

◆ power_of_test()

python.power_of_test.power_of_test ( data1,
data2,
rvs_func = 'rvs_pairs',
tests = ['chi2_2samp'],
rvs_key = {},
test_key = {},
parallel = None,
sync = True )
Compute the corresponding p-values for each histrogram pairs from the random variates of the given 2 samples/frequencies for size_times.

Parameters
----------
data1, data2 : sequence of 1-D ndarrays
    Input data. Observed samples or frequencies.
rvs_func : [callable|str], optional, default : "rvs_pairs"
    The random variates function. The rvs_func can be either a callable or one of the following strings::

        String                     Description
        "rvs_pairs"                Compute the histogram pairs from the random
                                   variates of the given 2 samples/frequencies
                                   for size_times. 

tests : ([callable|str],...), optional, default : ["chi2_2samp"]
    A list of *test* statistical functions. The *test* can be either a callable or one of the following strings::

        String                     Description
        "chi2_2samp"               Read TS.chi2_2samp for further information.
        "BDM_2samp"                Read TS.BDM_2samp for further information.
        "likelihoodratio_ksamp"    Read TS.likelihoodratio_ksamp for further information.
        "likelihoodvalue_ksamp"    Read TS.likelihoodvalue_ksamp for further information.
        "ks_2samp"                 Read TS.ks_2samp for further information.
        "anderson_ksamp"           Read TS.anderson_ksamp for further information.
        "CVM_2samp"                Read TS.CVM_2samp for further information.

rvs_key : dict, optional, default : {}
    Keyword arguments for the rvs function, rvs_func.
test_key : dict, optional
    Keyword arguments for the test statistical function, test.
parallel : bool, optional, default : None
    If True, import IPyParallel package to do the parallel computation. If parallel is None,
    the global variable PARALLEL will be used instead.
sync : bool, optional, default : True
    When sync is False, an IPyParallel AsyncResult Object will be returned instead. Onyl affect
    when parallel is True.

Returns
-------
[p1, p2, ...] : 1-D array
    The corresponding p-values for each histogram pairs.

Definition at line 80 of file power_of_test.py.

80def power_of_test(data1, data2, rvs_func = 'rvs_pairs', tests = ['chi2_2samp'], rvs_key = {}, test_key = {}, parallel=None, sync=True):
81 """Compute the corresponding p-values for each histrogram pairs from the random variates of the given 2 samples/frequencies for size_times.
82
83 Parameters
84 ----------
85 data1, data2 : sequence of 1-D ndarrays
86 Input data. Observed samples or frequencies.
87 rvs_func : [callable|str], optional, default : "rvs_pairs"
88 The random variates function. The rvs_func can be either a callable or one of the following strings::
89
90 String Description
91 "rvs_pairs" Compute the histogram pairs from the random
92 variates of the given 2 samples/frequencies
93 for size_times.
94
95 tests : ([callable|str],...), optional, default : ["chi2_2samp"]
96 A list of *test* statistical functions. The *test* can be either a callable or one of the following strings::
97
98 String Description
99 "chi2_2samp" Read TS.chi2_2samp for further information.
100 "BDM_2samp" Read TS.BDM_2samp for further information.
101 "likelihoodratio_ksamp" Read TS.likelihoodratio_ksamp for further information.
102 "likelihoodvalue_ksamp" Read TS.likelihoodvalue_ksamp for further information.
103 "ks_2samp" Read TS.ks_2samp for further information.
104 "anderson_ksamp" Read TS.anderson_ksamp for further information.
105 "CVM_2samp" Read TS.CVM_2samp for further information.
106
107 rvs_key : dict, optional, default : {}
108 Keyword arguments for the rvs function, rvs_func.
109 test_key : dict, optional
110 Keyword arguments for the test statistical function, test.
111 parallel : bool, optional, default : None
112 If True, import IPyParallel package to do the parallel computation. If parallel is None,
113 the global variable PARALLEL will be used instead.
114 sync : bool, optional, default : True
115 When sync is False, an IPyParallel AsyncResult Object will be returned instead. Onyl affect
116 when parallel is True.
117
118 Returns
119 -------
120 [p1, p2, ...] : 1-D array
121 The corresponding p-values for each histogram pairs.
122 """
123 if parallel is None: parallel = PARALLEL
124 if parallel:
125 try:
126 global client
127 client=Client(**ipp_profile)
128 size = rvs_key['size']
129 N = len(client)
130 jobs = []
131 for i in range(N):
132 rvs_key['size'] = (size//N + 1) if (i < size % N) else size//N
133 jobs.append(client[client.ids[i]].apply_async(power_of_test, data1, data2, rvs_func, tests, rvs_key, test_key, False))
134 ars = client._asyncresult_from_jobs(jobs)
135 if sync:
136 ars.wait_interactive()
137 ret = {}
138 for key, val in ars.get():
139 ret.setdefault(key, []).extend(val)
140 else:
141 return ars
142 finally:
143 client.close()
144 return ret
145 if type(rvs_func) is str:
146 rvs_func = globals()[rvs_func]
147 if type(tests) not in (list, tuple):
148 tests = [tests]
149 tests = [(t, getattr(TS, t)) if type(t)==str else (str(t), t) for t in tests]
150 ret = {}
151 for rvs1, rvs2 in rvs_func(data1,data2,**rvs_key):
152 for tname, test in tests:
153 ret.setdefault(tname, []).append(test(rvs1, rvs2, binned=True, **test_key).pvalue)
154 return ret

◆ rvs_pairs()

python.power_of_test.rvs_pairs ( data1,
data2,
bins = 0,
kind = 'binbybin',
bin_mean = 1,
size = 1,
entries = 0,
freeze = False )
Compute the histogram pairs from the random variates of the given 2 samples/frequencies for size_times. 

Parameters
----------
data1, data2 : sequence of 1-D ndarrays
    Input data. Observed samples or frequencies.
bins : [scalar|1-D array], optional, default : 0
    If bins is an int, it defines the number of equal-width bins in the given
    range. If bins is zero, the given data will be assumed to be frequency data,
    and it defines the bin index. If bins is an 1-D array, it defines the bin
    edges, including the rightmost edge, allowing for non-uniform bin widths.
kind : [str|(str,str)], optional, default : "binbybin"
    Bin-by-bin test
bin_mean : [scalar|(scalar,scalar)], optional, default : 1
    The desired mean count in each bin of the output histograms. For example, 
    if a histogram has 10000 entries and 100 bins, the bin_mean is 100. It is
    ignored when entries is given.
size : int, optional, default : 1
    The number of repetitions for computing the histograms.
entries : [scalar|(scalar,scalar)], optional, default : 0
    The desired entries of the output histograms. If entries is 0, the entries
    size of the input data will be used instead.
freeze : [bool,(bool,bool)], optional. default : False
    if True, the output histogram will be the frozen distribution of the input
    data

Yields
------
hist1, hist2 : (1-D array, 1-D array)
    Histogram pairs

Definition at line 38 of file power_of_test.py.

38def rvs_pairs(data1, data2, bins = 0, kind = 'binbybin', bin_mean = 1, size = 1, entries = 0, freeze = False):
39 """
40 Compute the histogram pairs from the random variates of the given 2 samples/frequencies for size_times.
41
42 Parameters
43 ----------
44 data1, data2 : sequence of 1-D ndarrays
45 Input data. Observed samples or frequencies.
46 bins : [scalar|1-D array], optional, default : 0
47 If bins is an int, it defines the number of equal-width bins in the given
48 range. If bins is zero, the given data will be assumed to be frequency data,
49 and it defines the bin index. If bins is an 1-D array, it defines the bin
50 edges, including the rightmost edge, allowing for non-uniform bin widths.
51 kind : [str|(str,str)], optional, default : "binbybin"
52 Bin-by-bin test
53 bin_mean : [scalar|(scalar,scalar)], optional, default : 1
54 The desired mean count in each bin of the output histograms. For example,
55 if a histogram has 10000 entries and 100 bins, the bin_mean is 100. It is
56 ignored when entries is given.
57 size : int, optional, default : 1
58 The number of repetitions for computing the histograms.
59 entries : [scalar|(scalar,scalar)], optional, default : 0
60 The desired entries of the output histograms. If entries is 0, the entries
61 size of the input data will be used instead.
62 freeze : [bool,(bool,bool)], optional. default : False
63 if True, the output histogram will be the frozen distribution of the input
64 data
65
66 Yields
67 ------
68 hist1, hist2 : (1-D array, 1-D array)
69 Histogram pairs
70 """
71 data1 = data1.astype(float)
72 data2 = data2.astype(float)
73 entries = np.resize(entries, 2)
74 freeze = np.resize(freeze, 2)
75 hist1 = _hist_fill(data1, bins, entries[0], freeze[0])
76 hist2 = _hist_fill(data2, bins, entries[1], freeze[1])
77 for _ in range(size):
78 yield hist1(), hist2()
79

Variable Documentation

◆ ipp_profile

dict python.power_of_test.ipp_profile
Initial value:
1= {'sshserver':'kingman@140.114.94.171',
2 'profile':'ssh@nthu'}

Definition at line 22 of file power_of_test.py.

◆ PARALLEL

bool python.power_of_test.PARALLEL = True

Definition at line 9 of file power_of_test.py.