3 from scipy.stats
import rv_discrete
4 import test_statistics
as TS
12 from ipyparallel
import Client
22 ipp_profile = {
'sshserver':
'kingman@140.114.94.171',
28 bins = np.arange(data.size + 1, dtype = float)
30 bins = np.linspace(data.min(), data.max(), bins)
31 binc = (bins[1:] + bins[:-1]) / 2
35 rv = rv_discrete(values = (binc, data/data.sum()))
36 return lambda : np.histogram(rv.rvs(size=data.sum()
if entries==0
else entries), bins)[0].astype(float)
38 def rvs_pairs(data1, data2, bins = 0, kind = 'binbybin', bin_mean = 1, size = 1, entries = 0, freeze = False):
40 Compute the histogram pairs from the random variates of the given 2 samples/frequencies for size_times.
44 data1, data2 : sequence of 1-D ndarrays
45 Input data. Observed samples or frequencies.
46 bins : [scalar|1-D array], optional, default : 0
47 If bins is an int, it defines the number of equal-width bins in the given
48 range. If bins is zero, the given data will be assumed to be frequency data,
49 and it defines the bin index. If bins is an 1-D array, it defines the bin
50 edges, including the rightmost edge, allowing for non-uniform bin widths.
51 kind : [str|(str,str)], optional, default : "binbybin"
53 bin_mean : [scalar|(scalar,scalar)], optional, default : 1
54 The desired mean count in each bin of the output histograms. For example,
55 if a histogram has 10000 entries and 100 bins, the bin_mean is 100. It is
56 ignored when entries is given.
57 size : int, optional, default : 1
58 The number of repetitions for computing the histograms.
59 entries : [scalar|(scalar,scalar)], optional, default : 0
60 The desired entries of the output histograms. If entries is 0, the entries
61 size of the input data will be used instead.
62 freeze : [bool,(bool,bool)], optional. default : False
63 if True, the output histogram will be the frozen distribution of the input
68 hist1, hist2 : (1-D array, 1-D array)
71 data1 = data1.astype(float)
72 data2 = data2.astype(float)
73 entries = np.resize(entries, 2)
74 freeze = np.resize(freeze, 2)
75 hist1 =
_hist_fill(data1, bins, entries[0], freeze[0])
76 hist2 =
_hist_fill(data2, bins, entries[1], freeze[1])
80 def power_of_test(data1, data2, rvs_func = 'rvs_pairs', tests = ['chi2_2samp'], rvs_key = {}, test_key = {}, parallel=None, sync=True):
81 """Compute the corresponding p-values for each histrogram pairs from the random variates of the given 2 samples/frequencies for size_times.
85 data1, data2 : sequence of 1-D ndarrays
86 Input data. Observed samples or frequencies.
87 rvs_func : [callable|str], optional, default : "rvs_pairs"
88 The random variates function. The rvs_func can be either a callable or one of the following strings::
91 "rvs_pairs" Compute the histogram pairs from the random
92 variates of the given 2 samples/frequencies
95 tests : ([callable|str],...), optional, default : ["chi2_2samp"]
96 A list of *test* statistical functions. The *test* can be either a callable or one of the following strings::
99 "chi2_2samp" Read TS.chi2_2samp for further information.
100 "BDM_2samp" Read TS.BDM_2samp for further information.
101 "likelihoodratio_ksamp" Read TS.likelihoodratio_ksamp for further information.
102 "likelihoodvalue_ksamp" Read TS.likelihoodvalue_ksamp for further information.
103 "ks_2samp" Read TS.ks_2samp for further information.
104 "anderson_ksamp" Read TS.anderson_ksamp for further information.
105 "CVM_2samp" Read TS.CVM_2samp for further information.
107 rvs_key : dict, optional, default : {}
108 Keyword arguments for the rvs function, rvs_func.
109 test_key : dict, optional
110 Keyword arguments for the test statistical function, test.
111 parallel : bool, optional, default : None
112 If True, import IPyParallel package to do the parallel computation. If parallel is None,
113 the global variable PARALLEL will be used instead.
114 sync : bool, optional, default : True
115 When sync is False, an IPyParallel AsyncResult Object will be returned instead. Onyl affect
116 when parallel is True.
120 [p1, p2, ...] : 1-D array
121 The corresponding p-values for each histogram pairs.
123 if parallel
is None: parallel = PARALLEL
127 client=
Client(**ipp_profile)
128 size = rvs_key[
'size']
132 rvs_key[
'size'] = (size//N + 1)
if (i < size % N)
else size//N
133 jobs.append(client[client.ids[i]].apply_async(power_of_test, data1, data2, rvs_func, tests, rvs_key, test_key,
False))
134 ars = client._asyncresult_from_jobs(jobs)
136 ars.wait_interactive()
138 for key, val
in ars.get():
139 ret.setdefault(key, []).
extend(val)
145 if type(rvs_func)
is str:
146 rvs_func = globals()[rvs_func]
147 if type(tests)
not in (list, tuple):
149 tests = [(t, getattr(TS, t))
if type(t)==str
else (
str(t), t)
for t
in tests]
151 for rvs1, rvs2
in rvs_func(data1,data2,**rvs_key):
152 for tname, test
in tests:
153 ret.setdefault(tname, []).
append(
test(rvs1, rvs2, binned=
True, **test_key).pvalue)