""" Class for processing HyDe bootsrapping output. """
import numpy as np
[docs]
class Bootstrap:
"""
A class representing a dictionary of
species triplets with info for each
bootstrap replicate stored as a dictionary
containing parameter values.
:param str bootfile: name of file with bootstrapping results.
Example:
.. code:: py
import phyde as hd
boot = hd.Bootstrap("hyde-boot.txt")
"""
def __init__(self, bootfile):
"""
Bootstrap constructor.
"""
self.bootfile = bootfile
self.breps = {}
self.triples = []
self._read_bootstraps(bootfile)
[docs]
def __call__(self, attr, p1, hyb, p2):
"""
A callable method (the object can be called as a function) to access
attributes from a bootstrapped HyDe analysis. Returned as a list.
:param str attr: name of hypothesis test attribute to plot (e.g., "Gamma", "Zscore", "Pvalue", etc.)
:param str p1: parent one.
:param str hyb: putative hybrid.
:param str p2: parent two.
:rtype: list
Returns a list of the given attribute across all bootstrap replicates
for the given triple ``(p1, hyb, p2)``.
Example:
.. code:: py
import phyde as hd
boot = hd.Bootstrap("hyde-boot.txt")
boot("Gamma", "sp1", "sp2", "sp3")
"""
return [t[attr] for t in self.breps[(p1, hyb, p2)]]
def _read_bootstraps(self, bootfile):
with open(bootfile) as b:
boot_read_in = b.read()
if boot_read_in[-1] == "\n":
boot_reps = boot_read_in[:-1].split("####\n")
else:
boot_reps = boot_read_in.split("####\n")
split_bootreps = [r.splitlines() for r in boot_reps]
print("Number of boot reps:", len(boot_reps[0].splitlines()[1:]))
for s in split_bootreps:
lines = [line.split("\t") for line in s[1:]]
for l in lines:
tripl = (l[0], l[1], l[2])
if tripl not in self.triples:
self.triples.append(tripl)
self.breps[tripl] = []
else:
pass
self.breps[tripl].append(self._boot_info(l[3:]))
def _boot_info(self, b):
if len(b) != 18:
raise ValueError(
"** Warning: length of bootrep entry is incorrect. **"
)
else:
pass
boot_info = {
"Zscore": float(b[0]),
"Pvalue": float(b[1]),
"Gamma": float(b[2]),
"AAAA": float(b[3]),
"AAAB": float(b[4]),
"AABA": float(b[5]),
"AABB": float(b[6]),
"AABC": float(b[7]),
"ABAA": float(b[8]),
"ABAB": float(b[9]),
"ABAC": float(b[10]),
"ABBA": float(b[11]),
"BAAA": float(b[12]),
"ABBC": float(b[13]),
"CABC": float(b[14]),
"BACA": float(b[15]),
"BCAA": float(b[16]),
"ABCD": float(b[17]),
}
return boot_info
def summarize(self):
# Summarizes the results of a bootsrapped HyDe analysis.
summaries = {}
for t in self.triples:
summaries[t] = {}
summaries[t]["Zscore"] = [
np.mean(self.zscore(t)),
np.std(self.zscore(t)),
]
summaries[t]["Pvalue"] = [
np.mean(self.pvalue(t)),
np.std(self.pvalue(t)),
]
summaries[t]["Gamma"] = [
np.mean(self.gamma(t)),
np.std(self.gamma(t)),
]
return summaries
def gamma(self, p1, hyb, p2):
# Return the values of gamma for the triple (p1, hyb, p2).
return [t["Gamma"] for t in self.breps[(p1, hyb, p2)]]
def zscore(self, p1, hyb, p2):
# Return the values of the test statistic for the triple (p1, hyb, p2).
return [t["Zscore"] for t in self.breps[(p1, hyb, p2)]]
def pvalue(self, p1, hyb, p2):
# Return the p-values for the triple (p1, hyb, p2).
return [t["Pvalue"] for t in self.breps[(p1, hyb, p2)]]
[docs]
def abba_baba(self, p1, hyb, p2):
"""
Calculate Patterson's D-Statistic for all bootstrap replicates
for the triple (p1, hyb, p2). Uses vectorization with numpy arrays.
:param str p1: parent 1.
:param str hyb: putative hybrid.
:param str p2:
:rtype: numpy.array
Example:
.. code:: py
import phyde as hd
boot = hd.Bootstrap("hyde-boot.txt")
boot.abba_baba("sp1", "sp2", "sp3")
"""
return (
np.array([t["ABBA"] for t in self.breps[(p1, hyb, p2)]])
- np.array([t["ABAB"] for t in self.breps[(p1, hyb, p2)]])
) / (
np.array([t["ABBA"] for t in self.breps[(p1, hyb, p2)]])
+ np.array([t["ABAB"] for t in self.breps[(p1, hyb, p2)]])
)
def site_patterns(self, p1, hyb, p2):
# Get all site patterns for the triple (p1, hyb, p2)
return {
"AAAA": [t["AAAA"] for t in self.breps[(p1, hyb, p2)]],
"AAAB": [t["AAAB"] for t in self.breps[(p1, hyb, p2)]],
"AABA": [t["AABA"] for t in self.breps[(p1, hyb, p2)]],
"AABB": [t["AABB"] for t in self.breps[(p1, hyb, p2)]],
"AABC": [t["AABC"] for t in self.breps[(p1, hyb, p2)]],
"ABAA": [t["ABAA"] for t in self.breps[(p1, hyb, p2)]],
"ABAB": [t["ABAB"] for t in self.breps[(p1, hyb, p2)]],
"ABAC": [t["ABAC"] for t in self.breps[(p1, hyb, p2)]],
"ABBA": [t["ABBA"] for t in self.breps[(p1, hyb, p2)]],
"BAAA": [t["BAAA"] for t in self.breps[(p1, hyb, p2)]],
"ABBC": [t["ABBC"] for t in self.breps[(p1, hyb, p2)]],
"CABC": [t["CABC"] for t in self.breps[(p1, hyb, p2)]],
"BACA": [t["BACA"] for t in self.breps[(p1, hyb, p2)]],
"BCAA": [t["BCAA"] for t in self.breps[(p1, hyb, p2)]],
"ABCD": [t["ABCD"] for t in self.breps[(p1, hyb, p2)]],
}
[docs]
def write_summary(self, summary_file):
"""
Write a summary of the bootstrap replicates for each tested triple.
The summary written to file includes the mean and standard deviation
of the Zscore, P-value, and estimate of :math:`\gamma`.
:param str summary_file: name of file.
"""
print("Writing summary to file:", summary_file)
summ = self.summarize()
with open(summary_file, "w") as f:
print(
"P1",
"Hybrid",
"P2",
"Zscore_Mean",
"Zscore_StdDev",
"Pvalue_Mean",
"Pvalue_StdDev",
"Gamma_Mean",
"Gamma_StdDev",
sep="\t",
file=f,
)
for k in summ.keys():
for i in list(k):
print(i, "\t", sep="", end="", file=f)
print(
summ[k]["Zscore"][0],
summ[k]["Zscore"][1],
summ[k]["Pvalue"][0],
summ[k]["Pvalue"][1],
summ[k]["Gamma"][0],
summ[k]["Gamma"][1],
sep="\t",
file=f,
)