#########################################################################################################
#
# RevBayes Chromosome Evolution Tutorial:
#
# A BiChroM Analysis
#
# by Will Freyman
#
#
# This is an example of how to test for an association of phenotype and chromosome number evolution
# in RevBayes. Here we set up a binary phenotypic character and estimate separate rates of chromosome
# evolution for each state of the phenotype. The model describes the joint evolution of both the
# phenotypic character and chromosome evolution. This model (BiChroM) was introduced in ZenilāFerguson
# et al. (2017). In RevBayes the BiChroM model can easily be extended to multistate phenotypes and/or
# hidden states, plus cladogenetic changes could be incorporated into the model.
#
# We'll again use chromosome count data from Ohi-Toma et al. 2006 (Systematic Botany) for the plant
# genus Aristolochia (commonly called Dutchman's pipe).
#
# For the phenotype we will examine gynostemium morphology. Aristolochia flowers have an extensively
# modified perianth that traps and eventually releases pollinators to ensure cross pollination
# (this is why the flowers resemble pipes and are commonly called Dutchman's pipes). The gynostemium
# is a reproductive organ found only in Aristolchiaceae and Orchids that consists of fused stamens
# and pistil that pollinators must interact with during pollination. The subgenus Isotrema has highly
# reduced three-lobed gynostemium. Other members of Aristolochia have gynostemium subdivided into
# 5 to 24 lobes. We'll test for an association of this phenotype with changes in the rates of
# chromosome evolution. This is probably a silly example, but works well to demonstrate the model.
#
# phenotype state 0 = 3 lobed gynostemium
# phenotype state 1 = 5 t0 24 lobed gynostemium
#
# In this example the phylogeny is assumed known. See the ChromEvol_joint.Rev for an example of jointly
# estimating chromosome evolution and phylogeny.
#
#########################################################################################################
#########################
# Read in the data
#########################
# Create some vector for the moves and monitors of this analysis
moves = VectorMoves()
monitors = VectorMonitors()
# Read in the phylogeny. In this example the phylogeny is assumed known.
phylogeny <- readBranchLengthTrees("data/aristolochia.tree")[1]
# We need to limit the maximum number of chromosomes,
# so here we just use the largest chromosome count plus 10.
max_chromo = 26
# Get the chromosome counts from a tab-delimited file.
chromo_data = readCharacterDataDelimited("data/aristolochia_bichrom_counts.tsv", stateLabels=2*(max_chromo + 1), type="NaturalNumbers", delimiter="\t", headers=FALSE)
#########################
# Chromosome Model
#########################
# We'll use exponential priors to model the rates of polyploidy and
# dysploidy events along the branches of the phylogeny
# Here we set up two rate parameters for each type of chromosome change --
# one for phenotype state 0 and one for phenotype state 1
# rate of chromosome gains
gamma_0 ~ dnExponential(10.0)
gamma_1 ~ dnExponential(10.0)
# rate of chromosome losses
delta_0 ~ dnExponential(10.0)
delta_1 ~ dnExponential(10.0)
# rate of polyploidization
rho_0 ~ dnExponential(10.0)
rho_1 ~ dnExponential(10.0)
# Add MCMC moves for each of the rates.
moves.append( mvScale(gamma_0, lambda=1, weight=1) )
moves.append( mvScale(delta_0, lambda=1, weight=1) )
moves.append( mvScale(rho_0, lambda=1, weight=1) )
moves.append( mvScale(gamma_1, lambda=1, weight=1) )
moves.append( mvScale(delta_1, lambda=1, weight=1) )
moves.append( mvScale(rho_1, lambda=1, weight=1) )
# Now we create the rate matrix for the chromosome evolution model.
# We will set up two rate matrices, one for each phenotype state.
# We will use a simple ChromEvol model that includes
# only the rate of chromosome gain, loss, and polyploidization.
Q_0 := fnChromosomes(max_chromo, gamma_0, delta_0, rho_0)
Q_1 := fnChromosomes(max_chromo, gamma_1, delta_1, rho_1)
# Parameters for demi-polyploidization and rate modifiers could also
# be added at this step for more complex models. For example we
# could have included the rate of demi-polyploidization like this:
# Q := fnChromosomes(max_chromo, gamma, delta, rho, eta)
# Now we create the rates of transitioning between phenotype states:
q_01 ~ dnExponential(10.0)
q_10 ~ dnExponential(10.0)
moves.append( mvScale(q_01, lambda=1, weight=1) )
moves.append( mvScale(q_10, lambda=1, weight=1) )
# And finally we create the transition rate matrix for the joint
# model of phenotypic and chromosome evolution.
# First we will initialize the matrix with all zeros:
s = Q_0[1].size()
for (i in 1:(2 * s)) {
for (j in 1:(2 * s)) {
Q[i][j] := 0.0
}
}
# And now populate the matrix with transition rates:
for (i in 1:(2 * s)) {
for (j in 1:(2 * s)) {
if (i <= s) {
if (j <= s) {
if (i != j) {
# chromosome changes within phenotype state 0
Q[i][j] := abs(Q_0[i][j])
}
} else {
if (i == (j - s)) {
# transition from phenotype state 0 to 1
Q[i][j] := q_01
}
}
} else {
if (j <= s) {
if (i == (j + s)) {
# transition from phenotype state 1 to 0
Q[i][j] := q_10
}
} else {
if (i != j) {
# chromosome changes within phenotype state 1
k = i - s
l = j - s
Q[i][j] := abs(Q_1[k][l])
}
}
}
}
}
Q_b := fnFreeK(Q, rescaled=false)
# Here we assume the frequency of chromosome numbers at the root of the tree
# are equal. Alternatively, you may want to treat the root frequencies
# as a free variable and estimate them from the observed data (we discuss this elsewhere).
root_frequencies := simplex(rep(1, 2 * s))
# Now create the stochastic node for the chromosome evolution continuous-time Markov chain (CTMC).
chromo_ctmc ~ dnPhyloCTMC(Q=Q_b, tree=phylogeny, rootFreq=root_frequencies, type="NaturalNumbers")
# and clamp the observed chromosome counts data.
chromo_ctmc.clamp(chromo_data)
# Finally we wrap our model into a single model object.
mymodel = model(phylogeny)
#########################
# MCMC
#########################
# Create the MCMC monitors:
# First, a screen monitor with some useful variables:
monitors.append( mnScreen(printgen=10) )
# The ancestral state monitor for sampling ancestral states:
monitors.append( mnJointConditionalAncestralState(filename="output/BiChroM_anc_states.log", printgen=10, tree=phylogeny, ctmc=chromo_ctmc, type="NaturalNumbers") )
# And another monitor for all the model variables:
monitors.append( mnModel(filename="output/BiChroM_model.log", printgen=10) )
# Now set up the MCMC.
mymcmc = mcmc(mymodel, monitors, moves)
# Run the MCMC. Note that for a real analysis you'd want to run many more
# iterations and check for convergence.
mymcmc.run(200)
#########################
# Summarize the results
#########################
# Now let's summarize the sampled ancestral chromosome numbers.
# First, read in the ancestral state trace:
anc_state_trace = readAncestralStateTrace("output/BiChroM_anc_states.log")
# Finally, summarize the values from the traces over the phylogeny.
# Here we do a marginal reconstruction of the ancestral states, discarding the first 25% of samples
# as burnin.
ancestralStateTree(phylogeny, anc_state_trace, "output/BiChroM_final.tree", burnin=0.25, reconstruction="marginal")
#########################################################################################################
#
# Open BiChroM_final.tree in FigTree and look at the ancestral state node labels.
# The 3 states with the highest posterior probabilities are given, along with their marginal
# posterior probabilities.
#
# Finally, you could use the RevGadgets R package to generate a PDF plot of the ancestral states
# like in the other examples (see plot_BiChroM.R).
#
#########################################################################################################