Source code for pygeoda.sa.joincount

from ..libgeoda import gda_localjoincount, gda_localmultijoincount, VecBool, VecVecBool
from .lisa import lisa
import multiprocessing

__author__ = "Xun Li <lixun910@gmail.com>"

[docs]def local_joincount(w, data, **kwargs): """Local Join Count Statistics The function to apply local Join Count statistics. Args: w (Weight): An instance of Weight class. data (tuple/list/pandas.Series): A list of numeric values of selected variable undefs (list, optional): A list of boolean values to indicate which value is undefined or null permutations (int, optional): The number of permutations for the LISA computation permutation_method (str, optional): The permutation method used for the LISA computation. Options are {'complete', 'lookup-table'}. Default is 'complete'. significance_cutoff (float, optional): A cutoff value for significance p-values to filter not-significant clusters cpu_threads (int, optional): The number of cpu threads used for parallel LISA computation seed (int, optional): The seed for random number generator Returns: lisa: An instance of lisa class. """ if w == None: raise ValueError("Weights is None.") undefs = VecBool() if 'undefs' not in kwargs else kwargs['undefs'] significance_cutoff = 0.05 if 'significance_cutoff' not in kwargs else kwargs['significance_cutoff'] permutations = 999 if 'permutations' not in kwargs else kwargs['permutations'] permutation_method = 'complete' if 'permutation_method' not in kwargs else kwargs['permutation_method'] cpu_threads = 6 if 'cpu_threads' not in kwargs else kwargs['cpu_threads'] seed = 123456789 if 'seed' not in kwargs else kwargs['seed'] for i in data: if i != 0 and i != 1: raise ValueError("The input data is not binary.") lisa_obj = gda_localjoincount(w.gda_w, list(data), list(undefs), significance_cutoff, cpu_threads, permutations, permutation_method, seed) return lisa(lisa_obj)
[docs]def local_bijoincount(w, data, **kwargs): """Bivariate Local Join Count Statistics The function to apply (no-colocation) bivariate local Join Count statistics. The bivariate local join count only applies on two variables with no-colocation. Args: w (Weight): An instance of Weight class data (list or dataframe): A list of numeric vectors of selected variable or a data frame of selected variables e.g. guerry[['Crm_prs', 'Literacy']] permutations (int, optional): The number of permutations for the LISA computation permutation_method (str, optional): The permutation method used for the LISA computation. Options are {'complete', 'lookup-table'}. Default is 'complete'. significance_cutoff (float, optional): A cutoff value for significance p-values to filter not-significant clusters cpu_threads (int, optional): The number of cpu threads used for parallel LISA computation seed (int, optional): The seed for random number generator Returns: lisa: An instance of lisa class Examples: >>> import pygeoda >>> columbus = pygeoda.open("./data/columbus.shp") >>> columbus_q = pygeoda.queen_weights(columbus) >>> nsa = columbus.GetRealCol("nsa") >>> nsa_inv = [1-i for i in nsa] >>> lisa = pygeoda.local_bijoincount(columbus_q, [nsa, nsa_inv]) >>> jc = lisa.lisa_values() (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 3.0, 3.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) >>> pvals = lisa.lisa_pvalues() (nan, nan, nan, nan, nan, nan, nan, nan, 0.002, 0.034, nan, nan, nan, nan, nan, nan, 0.44, nan, nan, nan, nan, 0.262, nan, 0.125, 0.079, 0.053, nan, nan, nan, nan, 0.093, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan) >>> nn = lisa.lisa_num_nbrs() (2, 3, 4, 4, 8, 2, 4, 6, 8, 4, 5, 6, 4, 6, 6, 8, 3, 4, 3, 10, 3, 6, 3, 7, 8, 6, 4, 9, 7, 5, 3, 4, 4, 4, 7, 5, 6, 6, 3, 5, 3, 2, 6, 5, 4, 2, 2, 4, 3) """ if w == None: raise ValueError("Weights is None.") if type(data).__name__ == "DataFrame": data = data.values.transpose().tolist() if len(data) != 2: raise ValueError("The input data should be a list of tuples of two selected variables.") undefs = VecVecBool() if 'undefs' not in kwargs else kwargs['undefs'] significance_cutoff = 0.05 if 'significance_cutoff' not in kwargs else kwargs['significance_cutoff'] permutations = 999 if 'permutations' not in kwargs else kwargs['permutations'] permutation_method = 'complete' if 'permutation_method' not in kwargs else kwargs['permutation_method'] cpu_threads = 6 if 'cpu_threads' not in kwargs else kwargs['cpu_threads'] seed = 123456789 if 'seed' not in kwargs else kwargs['seed'] data1 = data[0] data2 = data[1] n1 = len(data1) n2 = len(data2) if n1 != n2: raise ValueError("The size of data1 does not match the size of data2.") for d in data: for i in d: if i != 0 and i != 1: raise ValueError("The input data is not binary.") has_colocation = False for i in range(n1): if data1[i] == 1 and data2[i] == 1: has_colocation = True break if has_colocation: raise ValueError("The bivariate local join count only applies on two variables with no-colocation.") lisa_obj = gda_localmultijoincount(w.gda_w, data, undefs, significance_cutoff, cpu_threads, permutations, permutation_method, seed) return lisa(lisa_obj)
[docs]def local_multijoincount(w, data, **kwargs): """(Multivariate) Colocation Local Join Count Statistics The function to apply (multivariate) colocation local Join Count statistics Args: w (Weight): An instance of Weight class data (list or dataframe): A list of numeric vectors of selected variable or a data frame of selected variables e.g. guerry[['Crm_prs', 'Literacy']] permutations (int, optional): The number of permutations for the LISA computation permutation_method (str, optional): The permutation method used for the LISA computation. Options are {'complete', 'lookup-table'}. Default is 'complete'. significance_cutoff (float, optional): A cutoff value for significance p-values to filter not-significant clusters cpu_threads (int, optional): The number of cpu threads used for parallel LISA computation seed (int, optional): The seed for random number generator Returns: lisa: An instance of lisa class """ if w == None: raise ValueError("Weights is None.") undefs = VecVecBool() if 'undefs' not in kwargs else kwargs['undefs'] significance_cutoff = 0.05 if 'significance_cutoff' not in kwargs else kwargs['significance_cutoff'] permutations = 999 if 'permutations' not in kwargs else kwargs['permutations'] permutation_method = 'complete' if 'permutation_method' not in kwargs else kwargs['permutation_method'] cpu_threads = 6 if 'cpu_threads' not in kwargs else kwargs['cpu_threads'] seed = 123456789 if 'seed' not in kwargs else kwargs['seed'] if type(data).__name__ == "DataFrame": data = data.values.transpose().tolist() n_vars = len(data) if n_vars <= 1: raise ValueError("The multivairate local Join Count applies to multiple (more than one) variables.") n_obs = len(data[0]) for d in data: if len(d) != n_obs: raise ValueError("The input data have variables with different size.") for i in d: if i != 0 and i != 1: raise ValueError("The input data is not binary.") if n_vars == 2 and (sum(data[0]) + sum(data[1]) == n_obs): no_colocation = True for i in range(n_obs): if data[0][i] == 1 and data[1][i] == 1: no_colocation = False break if no_colocation: raise ValueError("The input two variables have no colocations. Please use bivariate local join count: local_bijoincount().") lisa_obj = gda_localmultijoincount(w.gda_w, data, undefs, significance_cutoff, cpu_threads, permutations, permutation_method, seed) return lisa(lisa_obj)