Source code for phyde.bootstrap

""" Class for processing HyDe bootsrapping output. """

import numpy as np


[docs] class Bootstrap: """ A class representing a dictionary of species triplets with info for each bootstrap replicate stored as a dictionary containing parameter values. :param str bootfile: name of file with bootstrapping results. Example: .. code:: py import phyde as hd boot = hd.Bootstrap("hyde-boot.txt") """ def __init__(self, bootfile): """ Bootstrap constructor. """ self.bootfile = bootfile self.breps = {} self.triples = [] self._read_bootstraps(bootfile)
[docs] def __call__(self, attr, p1, hyb, p2): """ A callable method (the object can be called as a function) to access attributes from a bootstrapped HyDe analysis. Returned as a list. :param str attr: name of hypothesis test attribute to plot (e.g., "Gamma", "Zscore", "Pvalue", etc.) :param str p1: parent one. :param str hyb: putative hybrid. :param str p2: parent two. :rtype: list Returns a list of the given attribute across all bootstrap replicates for the given triple ``(p1, hyb, p2)``. Example: .. code:: py import phyde as hd boot = hd.Bootstrap("hyde-boot.txt") boot("Gamma", "sp1", "sp2", "sp3") """ return [t[attr] for t in self.breps[(p1, hyb, p2)]]
def _read_bootstraps(self, bootfile): with open(bootfile) as b: boot_read_in = b.read() if boot_read_in[-1] == "\n": boot_reps = boot_read_in[:-1].split("####\n") else: boot_reps = boot_read_in.split("####\n") split_bootreps = [r.splitlines() for r in boot_reps] print("Number of boot reps:", len(boot_reps[0].splitlines()[1:])) for s in split_bootreps: lines = [line.split("\t") for line in s[1:]] for l in lines: tripl = (l[0], l[1], l[2]) if tripl not in self.triples: self.triples.append(tripl) self.breps[tripl] = [] else: pass self.breps[tripl].append(self._boot_info(l[3:])) def _boot_info(self, b): if len(b) != 18: raise ValueError( "** Warning: length of bootrep entry is incorrect. **" ) else: pass boot_info = { "Zscore": float(b[0]), "Pvalue": float(b[1]), "Gamma": float(b[2]), "AAAA": float(b[3]), "AAAB": float(b[4]), "AABA": float(b[5]), "AABB": float(b[6]), "AABC": float(b[7]), "ABAA": float(b[8]), "ABAB": float(b[9]), "ABAC": float(b[10]), "ABBA": float(b[11]), "BAAA": float(b[12]), "ABBC": float(b[13]), "CABC": float(b[14]), "BACA": float(b[15]), "BCAA": float(b[16]), "ABCD": float(b[17]), } return boot_info def summarize(self): # Summarizes the results of a bootsrapped HyDe analysis. summaries = {} for t in self.triples: summaries[t] = {} summaries[t]["Zscore"] = [ np.mean(self.zscore(t)), np.std(self.zscore(t)), ] summaries[t]["Pvalue"] = [ np.mean(self.pvalue(t)), np.std(self.pvalue(t)), ] summaries[t]["Gamma"] = [ np.mean(self.gamma(t)), np.std(self.gamma(t)), ] return summaries def gamma(self, p1, hyb, p2): # Return the values of gamma for the triple (p1, hyb, p2). return [t["Gamma"] for t in self.breps[(p1, hyb, p2)]] def zscore(self, p1, hyb, p2): # Return the values of the test statistic for the triple (p1, hyb, p2). return [t["Zscore"] for t in self.breps[(p1, hyb, p2)]] def pvalue(self, p1, hyb, p2): # Return the p-values for the triple (p1, hyb, p2). return [t["Pvalue"] for t in self.breps[(p1, hyb, p2)]]
[docs] def abba_baba(self, p1, hyb, p2): """ Calculate Patterson's D-Statistic for all bootstrap replicates for the triple (p1, hyb, p2). Uses vectorization with numpy arrays. :param str p1: parent 1. :param str hyb: putative hybrid. :param str p2: :rtype: numpy.array Example: .. code:: py import phyde as hd boot = hd.Bootstrap("hyde-boot.txt") boot.abba_baba("sp1", "sp2", "sp3") """ return ( np.array([t["ABBA"] for t in self.breps[(p1, hyb, p2)]]) - np.array([t["ABAB"] for t in self.breps[(p1, hyb, p2)]]) ) / ( np.array([t["ABBA"] for t in self.breps[(p1, hyb, p2)]]) + np.array([t["ABAB"] for t in self.breps[(p1, hyb, p2)]]) )
def site_patterns(self, p1, hyb, p2): # Get all site patterns for the triple (p1, hyb, p2) return { "AAAA": [t["AAAA"] for t in self.breps[(p1, hyb, p2)]], "AAAB": [t["AAAB"] for t in self.breps[(p1, hyb, p2)]], "AABA": [t["AABA"] for t in self.breps[(p1, hyb, p2)]], "AABB": [t["AABB"] for t in self.breps[(p1, hyb, p2)]], "AABC": [t["AABC"] for t in self.breps[(p1, hyb, p2)]], "ABAA": [t["ABAA"] for t in self.breps[(p1, hyb, p2)]], "ABAB": [t["ABAB"] for t in self.breps[(p1, hyb, p2)]], "ABAC": [t["ABAC"] for t in self.breps[(p1, hyb, p2)]], "ABBA": [t["ABBA"] for t in self.breps[(p1, hyb, p2)]], "BAAA": [t["BAAA"] for t in self.breps[(p1, hyb, p2)]], "ABBC": [t["ABBC"] for t in self.breps[(p1, hyb, p2)]], "CABC": [t["CABC"] for t in self.breps[(p1, hyb, p2)]], "BACA": [t["BACA"] for t in self.breps[(p1, hyb, p2)]], "BCAA": [t["BCAA"] for t in self.breps[(p1, hyb, p2)]], "ABCD": [t["ABCD"] for t in self.breps[(p1, hyb, p2)]], }
[docs] def write_summary(self, summary_file): """ Write a summary of the bootstrap replicates for each tested triple. The summary written to file includes the mean and standard deviation of the Zscore, P-value, and estimate of :math:`\gamma`. :param str summary_file: name of file. """ print("Writing summary to file:", summary_file) summ = self.summarize() with open(summary_file, "w") as f: print( "P1", "Hybrid", "P2", "Zscore_Mean", "Zscore_StdDev", "Pvalue_Mean", "Pvalue_StdDev", "Gamma_Mean", "Gamma_StdDev", sep="\t", file=f, ) for k in summ.keys(): for i in list(k): print(i, "\t", sep="", end="", file=f) print( summ[k]["Zscore"][0], summ[k]["Zscore"][1], summ[k]["Pvalue"][0], summ[k]["Pvalue"][1], summ[k]["Gamma"][0], summ[k]["Gamma"][1], sep="\t", file=f, )