################################################################################
#
# RevBayes Analysis: Bayesian inference of diversification rates under a
# cladogenetic character-dependent birth-death model (also
# called Cladogenetic State Speciation and Extinction or
# ClaSSE). In this example we use ClaSSE to model biogeographic
# range evolution similar to the DEC models. Unlike the DEC
# model, ClaSSE accounts for speciation events unobserved due
# to extinction or incomplete sampling.
#
# authors: Will Freyman
#
################################################################################
#######################
# Reading in the Data #
#######################
# Get the tree
observed_phylogeny <- readTrees("data/primates_biogeo.tre")[1]
# Get the taxa in the tree. We'll need this later on.
taxa = observed_phylogeny.taxa()
# Read biogeographic range data. The areas are represented as the
# following character states:
# 0 = 00 = the null state with no range
# 1 = 01 = New World only
# 2 = 10 = Old World only
# 3 = 11 = both New and Old World
data_biogeo = readCharacterDataDelimited("data/primates_biogeo.tsv", stateLabels="0123", type="NaturalNumbers", delimiter="\t", headers=TRUE)
# Create some vector for the moves and monitors of this analysis
moves = VectorMoves()
monitors = VectorMonitors()
###############################
# Set up the extinction rates #
###############################
# We are going to draw both anagenetic transition rates
# and diversification rates from a lognormal distribution.
# The mean of the prior distribution will be the expected net
# diversification rate, and the SD will be 1.0 so the 95%
# prior interval ranges well over 2 orders of magnitude.
num_species <- 424 # approximate total number of primate species
rate_mean <- ln( ln(num_species/2.0) / observed_phylogeny.rootAge() )
rate_sd <- 1.0
# The extinction rates will be stored in a vector where each element represents
# the extinction rate for the corresponding character state.
# We have chosen to allow a lineage to go extinct in both the New and Old World
# at the same time (like a global extinction event). As an alternative, you could
# restrict the model so that a lineage can only go extinct if it's range is limited
# to one area.
extinction_rates[1] <- 0.0 # the null state (state 0)
extinction_rates[2] ~ dnLognormal(rate_mean, rate_sd) # extinction when the lineage is in New World (state 1)
extinction_rates[3] ~ dnLognormal(rate_mean, rate_sd) # extinction when the lineage is in Old World (state 2)
extinction_rates[4] ~ dnLognormal(rate_mean, rate_sd) # extinction when in both (state 3)
# Note Rev vectors are indexed starting with 1, yet our character states start
# at 0. So extinction_rate[1] will represent the extinction rate for character
# state 0.
# add MCMC moves for each extinction rate
moves.append( mvSlide( extinction_rates[2], weight=4 ) )
moves.append( mvSlide( extinction_rates[3], weight=4 ) )
moves.append( mvSlide( extinction_rates[4], weight=4 ) )
# Let's also create a deterministic variable to monitor the overall extinction rate
total_extinction := sum(extinction_rates)
################################################
# Set up the anagenetic transition rate matrix #
################################################
# First, let's create the rates of anagenetic dispersal:
anagenetic_dispersal_13 ~ dnLognormal(rate_mean, rate_sd) # disperse from New to Old World 01 -> 11
anagenetic_dispersal_23 ~ dnLognormal(rate_mean, rate_sd) # disperse from Old to New World 10 -> 11
# and add MCMC moves for each anagenetic dispersal rate
moves.append( mvSlide( anagenetic_dispersal_13, weight=4 ) )
moves.append( mvSlide( anagenetic_dispersal_23, weight=4 ) )
# The anagenetic transitions will be stored in a 4 by 4
# instantaneous rate matrix. We will construct this by
# first creating a vector of vectors. Let's begin by
# initalizing all rates to 0.0:
for (i in 1:4) {
for (j in 1:4) {
r[i][j] <- 0.0
}
}
# Now we can populate non-zero rates into the anagenetic transition rate matrix:
r[2][4] := anagenetic_dispersal_13
r[3][4] := anagenetic_dispersal_23
r[4][2] := extinction_rates[3]
r[4][3] := extinction_rates[2]
# Note that we have modeled the rate of 11 -> 01 (3 -> 1) as being
# the rate of going extinct in area 2, and the rate of 11 -> 10 (3 -> 2)
# as being the rate of going extinct in area 1.
# Now we pass our vector of vectors into the fnFreeK function to create
# the instaneous rate matrix.
ana_rate_matrix := fnFreeK(r, rescaled=false)
##################################################
# Set up the cladogenetic speciation rate matrix #
##################################################
# Here we need to define each cladogenetic event type in the form
# [ancestor_state, daughter1_state, daughter2_state]
# and assign each cladogenetic event type a corresponding
# speciation rate.
# The first type of cladogenetic event we'll specify is widespread sympatry.
# Widespread sympatric cladogenesis is where the biogeographic range does
# not change; that is the daughter lineages inherit the same range as
# the ancestor. In this example we are not going to allow the change
# 11 -> 11, 11, as it seems biologically implausible. However if you wanted
# you could add this to your model.
# We'll set all these widespread sympatric events to have the same speciation rate:
speciation_wide_sympatry ~ dnLognormal(rate_mean, rate_sd)
moves.append( mvSlide( speciation_wide_sympatry, weight=4 ) )
# Define the widespread sympatric cladogenetic events:
clado_events[1] = [1, 1, 1] # 01 -> 01, 01
clado_events[2] = [2, 2, 2] # 10 -> 10, 10
# and assign each the same speciation rate:
speciation_rates[1] := speciation_wide_sympatry/2
speciation_rates[2] := speciation_wide_sympatry/2
# Subset sympatry is where one daughter lineage inherits the full
# ancestral range but the other lineage inherits only a single region.
speciation_sub_sympatry ~ dnLognormal(rate_mean, rate_sd)
moves.append( mvSlide( speciation_sub_sympatry, weight=4 ) )
# Define the subset sympatry events and assign each a speciation rate:
clado_events[3] = [3, 3, 1] # 11 -> 11, 01
clado_events[4] = [3, 1, 3] # 11 -> 10, 11
clado_events[5] = [3, 3, 2] # 11 -> 11, 10
clado_events[6] = [3, 2, 3] # 11 -> 10, 11
speciation_rates[3] := speciation_sub_sympatry/4
speciation_rates[4] := speciation_sub_sympatry/4
speciation_rates[5] := speciation_sub_sympatry/4
speciation_rates[6] := speciation_sub_sympatry/4
# Allopatric cladogenesis is when the two daughter lineages
# split the ancestral range:
speciation_allopatry ~ dnLognormal(rate_mean, rate_sd)
moves.append( mvSlide( speciation_allopatry, weight=4 ) )
# Define the allopatric events:
clado_events[7] = [3, 1, 2] # 11 -> 01, 10
clado_events[8] = [3, 2, 1] # 11 -> 10, 01
speciation_rates[7] := speciation_allopatry/2
speciation_rates[8] := speciation_allopatry/2
# Now let's create a deterministic variable to monitor the overall speciation rate
total_speciation := sum(speciation_rates)
# Finally, we construct the cladogenetic speciation rate
# matrix from the cladogenetic event types and the speciation rates
clado_matrix := fnCladogeneticSpeciationRateMatrix(clado_events, speciation_rates, 4)
# let's view the cladogenetic matrix to see if we have set it up correctly:
clado_matrix
# As we have specified the model, we did not allow cladogenetic long
# distance (jump) dispersal, for example 01 -> 01, 10
# As an exercise try implementing a model with cladogenetic
# long distance dispersal and see which model fits the data better.
########################################################################
# Set up the cladogenetic character state dependent birth death process #
#########################################################################
# For simplicity we will fix the root frequences to be equal except for the null state
# which has probability of 0.
root_frequencies <- simplex([0, 1, 1, 1])
# rho is the probability of sampling species at the present
rho <- observed_phylogeny.ntips()/num_species
# Now we construct a stochastic variable drawn from the cladogenetic
# character state dependent birth death process (more commonly
# called ClaSSE):
classe ~ dnCDCladoBDP( rootAge = observed_phylogeny.rootAge(),
cladoEventMap = clado_matrix,
extinctionRates = extinction_rates,
Q = ana_rate_matrix,
delta = 1.0,
pi = root_frequencies,
rho = rho,
condition = "time",
taxa = taxa )
# clamp the model with the observed data
classe.clamp( observed_phylogeny )
classe.clampCharData( data_biogeo )
#############
# The Model #
#############
# workspace model wrapper
mymodel = model(classe)
# set up the monitors that will output parameter values to file and screen
monitors.append( mnModel(filename="output/primates_ClaSSE.log", printgen=1) )
monitors.append( mnJointConditionalAncestralState(tree=observed_phylogeny, cdbdp=classe, type="NaturalNumbers", printgen=1, withTips=true, withStartStates=true, filename="output/anc_states_primates_ClaSSE.log") )
monitors.append( mnScreen(printgen=1, speciation_wide_sympatry, speciation_sub_sympatry, speciation_allopatry, extinction_rates) )
################
# The Analysis #
################
# workspace mcmc
mymcmc = mcmc(mymodel, monitors, moves, nruns=2, combine="mixed")
# run the MCMC
mymcmc.run(generations=3000, tuningInterval=200)
##############################
# Summarize ancestral states #
##############################
anc_states = readAncestralStateTrace("output/anc_states_primates_ClaSSE.log")
anc_tree = ancestralStateTree(tree=observed_phylogeny, ancestral_state_trace_vector=anc_states, include_start_states=true, file="output/anc_states_primates_ClaSSE_results.tree", burnin=0, summary_statistic="MAP", site=1)
q()