This vignette illustrates the most useful functions of yatah.
library(dplyr)
#> Warning: le package 'dplyr' a été compilé avec la version R 4.1.2
library(yatah)For this example, we use data from Zeller et al. (2014). It is the abundances of bacteria present in 199 stool samples.
abundances <- as_tibble(yatah::abundances)
print(abundances, max_extra_cols = 2)
#> # A tibble: 1,585 × 200
#> lineages CCIS0…¹ CCIS0…² CCIS0…³ CCIS0…⁴ CCIS0…⁵ CCIS0…⁶ CCIS0…⁷ CCIS0…⁸
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 k__Bacteria 1.00e+2 9.98e+1 96.3 9.99e+1 9.76e+1 9.41e+1 1.00e+2 98.7
#> 2 k__Viruses 6.97e-3 1.28e-1 3.70 4.71e-3 2.39e+0 1.10e+0 1.37e-3 0
#> 3 k__Bacteria|… 6.62e+1 2.46e+1 74.2 4.51e+1 6.46e+1 6.34e+1 8.25e+1 50.5
#> 4 k__Bacteria|… 1.91e+1 7.44e+1 11.9 5.30e+1 2.26e+1 2.17e+1 1.24e+1 21.5
#> 5 k__Bacteria|… 1.21e+1 4.28e-2 7.22 7.88e-2 1.73e+0 4.66e+0 3.59e+0 21.1
#> 6 k__Bacteria|… 1.86e+0 4.28e-1 0.765 3.61e-1 6.93e-1 2.27e+0 1.17e-2 0.978
#> 7 k__Bacteria|… 7.58e-1 3.88e-1 2.28 9.85e-1 7.94e+0 2.07e+0 1.44e+0 4.66
#> 8 k__Viruses|p… 6.97e-3 1.28e-1 3.70 4.71e-3 2.39e+0 1.10e+0 1.37e-3 0
#> 9 k__Bacteria|… 1.55e-3 4.15e-3 0 0 6.43e-3 4.76e-3 2.05e-3 0.0042
#> 10 k__Bacteria|… 6.24e+1 2.17e+1 62.3 4.40e+1 6.00e+1 5.78e+1 7.41e+1 50.2
#> # … with 1,575 more rows, 191 more variables: `CCIS06260551ST-3-0` <dbl>,
#> # `CCIS07277498ST-4-0` <dbl>, …, and abbreviated variable names
#> # ¹`CCIS00146684ST-4-0`, ²`CCIS00281083ST-3-0`, ³`CCIS02124300ST-4-0`,
#> # ⁴`CCIS02379307ST-4-0`, ⁵`CCIS02856720ST-4-0`, ⁶`CCIS03473770ST-4-0`,
#> # ⁷`CCIS03857607ST-4-0`, ⁸`CCIS05314658ST-4-0`taxonomy <- select(abundances, lineages)
taxonomy
#> # A tibble: 1,585 × 1
#> lineages
#> <chr>
#> 1 k__Bacteria
#> 2 k__Viruses
#> 3 k__Bacteria|p__Firmicutes
#> 4 k__Bacteria|p__Bacteroidetes
#> 5 k__Bacteria|p__Actinobacteria
#> 6 k__Bacteria|p__Verrucomicrobia
#> 7 k__Bacteria|p__Proteobacteria
#> 8 k__Viruses|p__Viruses_noname
#> 9 k__Bacteria|p__Candidatus_Saccharibacteria
#> 10 k__Bacteria|p__Firmicutes|c__Clostridia
#> # … with 1,575 more rowsHere, we have all the present bacteria at all different ranks. As we
are just interested in genera that belong to the
Gammaproteobacteria class, we filter() the
lineages with is_clade() and is_rank(). The
genus name is accessible with last_clade().
gammap_genus <-
taxonomy %>%
filter(is_clade(lineages, "Gammaproteobacteria"),
is_rank(lineages, "genus")) %>%
mutate(genus = last_clade(lineages))
gammap_genus
#> # A tibble: 26 × 2
#> lineages genus
#> <chr> <chr>
#> 1 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteria… Esch…
#> 2 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pasteurellales… Haem…
#> 3 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteria… Ente…
#> 4 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadale… Pseu…
#> 5 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteria… Ente…
#> 6 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pasteurellales… Aggr…
#> 7 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteria… Hafn…
#> 8 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pasteurellales… Acti…
#> 9 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Xanthomonadale… Sino…
#> 10 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteria… Citr…
#> # … with 16 more rowsIt is useful to have a taxonomic table. taxtable() do
the job.
gammaprot_table <-
gammap_genus %>%
pull(lineages) %>%
taxtable()
as_tibble(gammaprot_table)
#> # A tibble: 26 × 6
#> kingdom phylum class order family genus
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 Bacteria Proteobacteria Gammaproteobacteria Enterobacteriales Enteroba… Esch…
#> 2 Bacteria Proteobacteria Gammaproteobacteria Pasteurellales Pasteure… Haem…
#> 3 Bacteria Proteobacteria Gammaproteobacteria Enterobacteriales Enteroba… Ente…
#> 4 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomo… Pseu…
#> 5 Bacteria Proteobacteria Gammaproteobacteria Enterobacteriales Enteroba… Ente…
#> 6 Bacteria Proteobacteria Gammaproteobacteria Pasteurellales Pasteure… Aggr…
#> 7 Bacteria Proteobacteria Gammaproteobacteria Enterobacteriales Enteroba… Hafn…
#> 8 Bacteria Proteobacteria Gammaproteobacteria Pasteurellales Pasteure… Acti…
#> 9 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales Sinobact… Sino…
#> 10 Bacteria Proteobacteria Gammaproteobacteria Enterobacteriales Enteroba… Citr…
#> # … with 16 more rowsTo have a tree, use taxtree() with a taxonomic table in
input. By default, it collapses ranks with only one subrank.
gammaprot_tree <- taxtree(gammaprot_table)
gammaprot_tree
#>
#> Phylogenetic tree with 26 tips and 7 internal nodes.
#>
#> Tip labels:
#> Escherichia, Enterobacteriaceae_noname, Enterobacter, Hafnia, Citrobacter, Pantoea, ...
#> Node labels:
#> Gammaproteobacteria, Enterobacteriaceae, Pasteurellaceae, Pseudomonadales, Moraxellaceae, Xanthomonadales, ...
#>
#> Rooted; includes branch lengths.plot(gammaprot_tree, show.node.label = TRUE, cex = 0.7,
main = "Taxonomy of Gammaproteobacteria")