Twinning

Richèl J.C. Bilderbeek

2023-01-19

pirouette has the option to investigate the error BEAST2 makes on a twin tree. A twin tree is a tree with the same topology as the original phylogeny, yet where the branch lengths follow a birth-death branch length distribution.

library(pirouette)
library(ggplot2)

Twinning is useful to separate the effect of an unknown tree prior (i.e. speciation model) on a phylogeny’s shape from the noise (the minimal error) made by BEAST2.

In this example, the following tree is used:

phylogeny <- ape::read.tree(text = "((A:1, B:1):1, (C:1, D:1):1);")
ape::plot.phylo(phylogeny, main = "The True Phylogeny")

This phylogeny follows a speciation model unknown to BEAST2, as two speciation events happened at exactly the same time. Such a phylogeny will have a likelihood of zero for all of the BEAST2 tree priors.

Twinning:

twinning_params <- pirouette::create_twinning_params()
alignment_params <- pirouette::create_alignment_params(
  root_sequence = pirouette::create_blocked_dna(length = 100),
  rng_seed = 314
)

We select our inference models in two ways:

  1. Using the generative model: we use the same site and clock model the alignment was created with. We do have to specify a BEAST2 tree prior that is closest to what we think generated the phylogeny
  2. (for non-Windows users only) Using the inference model that has the most evidence (also: marginal likelihood) from a set of inference models. In this example, we pick all combinations of the one correct site model, the one correct clock model and two tree priors.
type run_if measure evidence inference model
generative always TRUE Birth-Death
candidate best_candidate TRUE Yule
experiment_1 <- create_test_gen_experiment(
  inference_model = create_test_inference_model(
    tree_prior = create_bd_tree_prior()
  )
)
if (rappdirs::app_dir()$os != "win") {
  experiment_2 <- create_test_cand_experiment(
    inference_model = create_test_inference_model(
      tree_prior = create_yule_tree_prior()
    )
  )
  experiment_1$inference_conditions$do_measure_evidence <- TRUE

  experiments <- list(experiment_1, experiment_2)

  twinning_params$twin_evidence_filename <- get_temp_evidence_filename()

  pir_params <- create_pir_params(
    alignment_params = alignment_params,
    experiments = experiments,
    twinning_params = twinning_params,
    evidence_filename = get_temp_evidence_filename()
  )

} else {
  experiments <- list(experiment_1)

  pir_params <- create_pir_params(
    alignment_params = alignment_params,
    experiments = experiments,
    twinning_params = twinning_params
  )
}

Run:

df <- NULL
if (rappdirs::app_dir()$os != "win" &&
    is_beast2_installed() &&
    is_beast2_ns_pkg_installed()
) {
  df <- pir_run(
    phylogeny = phylogeny,
    pir_params = pir_params
  )
} else {
  df <- create_test_pir_run_output()
}

Show as a table:

knitr::kable(df)
tree inference_model inference_model_weight site_model clock_model tree_prior error_1 error_2 error_3
true generative 0.5 JC69 relaxed_log_normal birth_death 0.1 0.11 0.12

Show as a figure:

pirouette::pir_plot(df)

Appendix

library(ggplot2)

True tree

See true tree again:

ape::plot.phylo(phylogeny, main = "The True Phylogeny")

See the alignment generated from the true tree:

if (rappdirs::app_dir()$os != "win" &&
    is_beast2_installed() &&
    is_beast2_ns_pkg_installed()
) {
  check_file_exists(pir_params$alignment_params$fasta_filename)
  ape::image.DNAbin(
    ape::read.FASTA(file = pir_params$alignment_params$fasta_filename)
  )
}

See the posterior trees generated from the true alignment, for the generative model:

if (rappdirs::app_dir()$os != "win" &&
    is_beast2_installed() &&
    is_beast2_ns_pkg_installed()
) {
  # BEAUti offers the '$(tree)' shorthand notation.
  # Here, do what BEAUti does...
  treelog_filename <-
    pir_params$experiments[[1]]$inference_model$mcmc$treelog$filename

  treelog_filename <- gsub(
    x = treelog_filename,
    pattern = "\\$\\(tree\\)",
    replacement = beautier::get_alignment_id(
      pir_params$alignment_params$fasta_filename
    )
  )
  check_file_exists(treelog_filename)
  babette::plot_densitree(tracerer::parse_beast_trees(treelog_filename))
}

See the evidence of the true alignments for the models:

if (rappdirs::app_dir()$os != "win" &&
    is_beast2_installed() &&
    is_beast2_ns_pkg_installed()
) {
  knitr::kable(readr::read_csv(pir_params$evidence_filename))
}

See the posterior trees generated from the true alignment, for the model with the most evidence:

if (rappdirs::app_dir()$os != "win" &&
    is_beast2_installed() &&
    is_beast2_ns_pkg_installed()
) {
  experiment <- pir_params$experiments[[2]]
  trees_filename <- experiment$inference_model$mcmc$treelog$filename
  check_file_exists(trees_filename)
  babette::plot_densitree(tracerer::parse_beast_trees(trees_filename))
}

See the posterior parameter estimates generated from the true alignment, for the generative model:

if (rappdirs::app_dir()$os != "win" &&
    is_beast2_installed() &&
    is_beast2_ns_pkg_installed()
) {
    # A tracelog's filename is set to NA by default.
    # Here, do what BEAUti does...
    tracelog_filename <-
      pir_params$experiments[[1]]$inference_model$mcmc$tracelog$filename
    if (is.na(tracelog_filename)) {
      pir_params$experiments[[1]]$inference_model$mcmc$tracelog$filename <-
      paste0(
        beautier::get_alignment_id(pir_params$alignment_params$fasta_filename),
        ".log"
      )
    }

  check_file_exists(
    pir_params$experiments[[1]]$inference_model$mcmc$tracelog$filename
  )
  df <- tracerer::parse_beast_tracelog_file(
    pir_params$experiments[[1]]$inference_model$mcmc$tracelog$filename
  )
  ggplot(data = df, aes(x = Sample, y = likelihood)) + geom_line()
}

See the posterior parameter estimates generated from the true alignment, for the model with the most evidence:

if (rappdirs::app_dir()$os != "win" &&
  is_beast2_installed() &&
  is_beast2_ns_pkg_installed()
) {
  experiment <- pir_params$experiments[[2]]
  log_filename <- experiment$inference_model$mcmc$tracelog$filename
  check_file_exists(log_filename)
  df <- tracerer::parse_beast_tracelog_file(log_filename)
  ggplot(data = df, aes(x = Sample, y = likelihood)) + geom_line()
}

Twin tree

See the twin tree:

if (rappdirs::app_dir()$os != "win" &&
    is_beast2_installed() &&
    is_beast2_ns_pkg_installed()
) {
  ape::plot.phylo(
    ape::read.tree(pir_params$twinning_params$twin_tree_filename),
    main = "The Twin Tree"
  )
}

See the alignment generated from the twin tree:

if (rappdirs::app_dir()$os != "win" &&
    is_beast2_installed() &&
    is_beast2_ns_pkg_installed()
) {
  check_file_exists(pir_params$twinning_params$twin_alignment_filename)
  ape::image.DNAbin(
    ape::read.FASTA(file = pir_params$twinning_params$twin_alignment_filename)
  )
}

See the posterior trees generated from the twin alignment, for the generative model:

if (rappdirs::app_dir()$os != "win" &&
  is_beast2_installed() &&
  is_beast2_ns_pkg_installed()
) {
  trees_filename <- to_twin_filename(
    pir_params$experiments[[1]]$inference_model$mcmc$treelog$filename
  )
  check_file_exists(trees_filename)
  babette::plot_densitree(
    tracerer::parse_beast_trees(trees_filename)
  )
}

See the evidence of the true alignments for the models:

if (rappdirs::app_dir()$os != "win" &&
  is_beast2_installed() &&
  is_beast2_ns_pkg_installed()
) {
  twin_evidence_filename <- pir_params$twinning_params$twin_evidence_filename
  check_file_exists(twin_evidence_filename)
  knitr::kable(readr::read_csv(twin_evidence_filename))
}

See the posterior trees generated from the twin alignment, for the model with the most evidence:

if (rappdirs::app_dir()$os != "win" &&
  is_beast2_installed() &&
  is_beast2_ns_pkg_installed()
) {
  trees_filename <- to_twin_filename(
    pir_params$experiments[[2]]$inference_model$mcmc$treelog$filename
  )
  check_file_exists(trees_filename)
  babette::plot_densitree(tracerer::parse_beast_trees(trees_filename))
}

See the posterior parameter estimates generated from the true alignment, for the generative model:

if (rappdirs::app_dir()$os != "win" &&
    is_beast2_installed() && is_beast2_ns_pkg_installed()) {
  log_filename <- to_twin_filename(
    pir_params$experiments[[2]]$inference_model$mcmc$tracelog$filename
    )
  check_file_exists(log_filename)
  df <- tracerer::parse_beast_tracelog_file(log_filename)
  ggplot(data = df, aes(x = Sample, y = likelihood)) + geom_line()
}

See the posterior parameter estimates generated from the true alignment, for the model with the most evidence:

if (rappdirs::app_dir()$os != "win" &&
  is_beast2_installed() &&
  is_beast2_ns_pkg_installed()
) {
  log_filename <- to_twin_filename(
    pir_params$experiments[[2]]$inference_model$mcmc$tracelog$filename
  )
  check_file_exists(log_filename)
  df <- tracerer::parse_beast_tracelog_file(log_filename)
  ggplot(data = df, aes(x = Sample, y = likelihood)) + geom_line()
}