3 from scipy.stats 
import rv_discrete
 
    4 import test_statistics 
as TS
 
   12         from ipyparallel 
import Client
 
   22 ipp_profile = {
'sshserver':
'kingman@140.114.94.171',
 
   28             bins = np.arange(data.size + 1, dtype = float)
 
   30             bins = np.linspace(data.min(), data.max(), bins)
 
   31     binc = (bins[1:] + bins[:-1]) / 2 
 
   35         rv = rv_discrete(values = (binc, data/data.sum()))
 
   36         return lambda : np.histogram(rv.rvs(size=data.sum() 
if entries==0 
else entries), bins)[0].astype(float)
 
   38 def rvs_pairs(data1, data2, bins = 0, kind = 'binbybin', bin_mean = 1, size = 1, entries = 0, freeze = False):
 
   40     Compute the histogram pairs from the random variates of the given 2 samples/frequencies for size_times.  
   44     data1, data2 : sequence of 1-D ndarrays 
   45         Input data. Observed samples or frequencies. 
   46     bins : [scalar|1-D array], optional, default : 0 
   47         If bins is an int, it defines the number of equal-width bins in the given 
   48         range. If bins is zero, the given data will be assumed to be frequency data, 
   49         and it defines the bin index. If bins is an 1-D array, it defines the bin 
   50         edges, including the rightmost edge, allowing for non-uniform bin widths. 
   51     kind : [str|(str,str)], optional, default : "binbybin" 
   53     bin_mean : [scalar|(scalar,scalar)], optional, default : 1 
   54         The desired mean count in each bin of the output histograms. For example,  
   55         if a histogram has 10000 entries and 100 bins, the bin_mean is 100. It is 
   56         ignored when entries is given. 
   57     size : int, optional, default : 1 
   58         The number of repetitions for computing the histograms. 
   59     entries : [scalar|(scalar,scalar)], optional, default : 0 
   60         The desired entries of the output histograms. If entries is 0, the entries 
   61         size of the input data will be used instead. 
   62     freeze : [bool,(bool,bool)], optional. default : False 
   63         if True, the output histogram will be the frozen distribution of the input 
   68     hist1, hist2 : (1-D array, 1-D array) 
   71     data1 = data1.astype(float)
 
   72     data2 = data2.astype(float)
 
   73     entries = np.resize(entries, 2)
 
   74     freeze = np.resize(freeze, 2)
 
   75     hist1 = 
_hist_fill(data1, bins, entries[0], freeze[0])
 
   76     hist2 = 
_hist_fill(data2, bins, entries[1], freeze[1])
 
   80 def power_of_test(data1, data2, rvs_func = 'rvs_pairs', tests = ['chi2_2samp'], rvs_key = {}, test_key = {}, parallel=None, sync=True):
 
   81     """Compute the corresponding p-values for each histrogram pairs from the random variates of the given 2 samples/frequencies for size_times. 
   85     data1, data2 : sequence of 1-D ndarrays 
   86         Input data. Observed samples or frequencies. 
   87     rvs_func : [callable|str], optional, default : "rvs_pairs" 
   88         The random variates function. The rvs_func can be either a callable or one of the following strings:: 
   91             "rvs_pairs"                Compute the histogram pairs from the random 
   92                                        variates of the given 2 samples/frequencies 
   95     tests : ([callable|str],...), optional, default : ["chi2_2samp"] 
   96         A list of *test* statistical functions. The *test* can be either a callable or one of the following strings:: 
   99             "chi2_2samp"               Read TS.chi2_2samp for further information. 
  100             "BDM_2samp"                Read TS.BDM_2samp for further information. 
  101             "likelihoodratio_ksamp"    Read TS.likelihoodratio_ksamp for further information. 
  102             "likelihoodvalue_ksamp"    Read TS.likelihoodvalue_ksamp for further information. 
  103             "ks_2samp"                 Read TS.ks_2samp for further information. 
  104             "anderson_ksamp"           Read TS.anderson_ksamp for further information. 
  105             "CVM_2samp"                Read TS.CVM_2samp for further information. 
  107     rvs_key : dict, optional, default : {} 
  108         Keyword arguments for the rvs function, rvs_func. 
  109     test_key : dict, optional 
  110         Keyword arguments for the test statistical function, test. 
  111     parallel : bool, optional, default : None 
  112         If True, import IPyParallel package to do the parallel computation. If parallel is None, 
  113         the global variable PARALLEL will be used instead. 
  114     sync : bool, optional, default : True 
  115         When sync is False, an IPyParallel AsyncResult Object will be returned instead. Onyl affect 
  116         when parallel is True. 
  120     [p1, p2, ...] : 1-D array 
  121         The corresponding p-values for each histogram pairs. 
  123     if parallel 
is None: parallel = PARALLEL
 
  127             client=
Client(**ipp_profile)
 
  128             size = rvs_key[
'size']
 
  132                 rvs_key[
'size'] = (size//N + 1) 
if (i < size % N) 
else size//N
 
  133                 jobs.append(client[client.ids[i]].apply_async(power_of_test, data1, data2, rvs_func, tests, rvs_key, test_key, 
False))
 
  134             ars = client._asyncresult_from_jobs(jobs)
 
  136                 ars.wait_interactive()
 
  138                 for key, val 
in ars.get():
 
  139                     ret.setdefault(key, []).
extend(val)
 
  145     if type(rvs_func) 
is str:
 
  146         rvs_func = globals()[rvs_func]
 
  147     if type(tests) 
not in (list, tuple):
 
  149     tests = [(t, getattr(TS, t)) 
if type(t)==str 
else (
str(t), t) 
for t 
in tests]
 
  151     for rvs1, rvs2 
in rvs_func(data1,data2,**rvs_key):
 
  152         for tname, test 
in tests:
 
  153             ret.setdefault(tname, []).
append(
test(rvs1, rvs2, binned=
True, **test_key).pvalue)