Global Patterns Data Vignette

This vignette explores the Global Patterns microbiome data available from phyloseq, which includes water samples, land samples, and human samples.

Learn more about the phyloseq package here.

Additionally, the package speedyseq is necessary to use the function prep_mdf(). The package speedyseq provides faster versions of phyloseq’s plotting and taxonomic merging functions. Alternatively, the phyloseq object can be melted and transformed by using phyloseq functions tax_glom() and/or transform_sample_counts(), and melted by using psmelt().

Load the required packages

library(microshades)
library(phyloseq)
library(ggplot2)
library(dplyr)
library(cowplot)
library(patchwork)
library(forcats)
library(tidyverse)

# The dataset Global Patterns is a phyloseq object available from the Phyloseq package
data(GlobalPatterns)

Use the microshades functions

# Agglomerate and normalize the phyloseq object, and melt to a data frame
mdf_prep <- prep_mdf(GlobalPatterns, remove_na = TRUE)

# There is an alternative to using this function if you do not have speedyseq:
# 
# mdf_prep <- GlobalPatterns %>%
#         tax_glom("Genus") %>%
#         phyloseq::transform_sample_counts(function(x) { x/sum(x) }) %>%
#         psmelt() %>%
#         filter(Abundance > 0)
#
# both options will produce the same results, however the microshades prep_mdf() uses the   
# speedyseq package to increase the speed of tax_glom() and psmelt(), which may be preferable  
# when working with large datasets

  

# Generate a color object for the specified data
color_objs_GP <- create_color_dfs(mdf_prep,selected_groups = c("Verrucomicrobia", "Proteobacteria", "Actinobacteria", "Bacteroidetes",
    "Firmicutes") , cvd = TRUE)

# Extract
mdf_GP <- color_objs_GP$mdf
cdf_GP <- color_objs_GP$cdf

Plot

Use mdf_GP as the object to plot and use cdf_GP to assign the correct color assignments.

# Plot
plot <- plot_microshades(mdf_GP, cdf_GP)

plot_1 <- plot + scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.key.size = unit(0.2, "cm"), text=element_text(size=10)) +
  theme(axis.text.x = element_text(size= 6)) 

plot_1 

The plot_microshades returns a ggplot object, that allows for additional specifications for the plot to be declared. For example, this allows users to facet samples and more descriptive elements.

plot_2 <- plot + scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.key.size = unit(0.2, "cm"), text=element_text(size=10)) +
  theme(axis.text.x = element_text(size= 6)) +
  facet_wrap(~SampleType, scales = "free_x", nrow = 2) +
  theme (strip.text.x = element_text(size = 6))

plot_2

Plot with custom legend

To Ensure that all elements of the custom legend are visible, adjust the legend_key_size and legend_text_size. Additionaly, the fig.height and fig.width may need to be declared.

GP_legend <-custom_legend(mdf_GP, cdf_GP)

plot_diff <- plot + scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.position = "none")  +
  theme(axis.text.x = element_text(size= 6)) +
  facet_wrap(~SampleType, scales = "free_x", nrow = 2) +
  theme(axis.text.x = element_text(size= 6)) + 
  theme(plot.margin = margin(6,20,6,6))

plot_grid(plot_diff, GP_legend,  rel_widths = c(1, .25))

Plot with extended Proteobacteria colors

new_groups <- extend_group(mdf_GP, cdf_GP, "Phylum", "Genus", "Proteobacteria", existing_palette = "micro_cvd_orange", new_palette = "micro_orange", n_add = 5)
## Joining with `by = join_by(Top_Phylum, Top_Genus, group, hex)`
## Joining with `by = join_by(Top_Phylum, Top_Genus, group, hex)`
GP_legend_new <-custom_legend(new_groups$mdf, new_groups$cdf)

plot_diff <- plot_microshades(new_groups$mdf, new_groups$cdf) + 
  scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.position = "none")  +
  theme(axis.text.x = element_text(size= 6)) +
  facet_wrap(~SampleType, scales = "free_x", nrow = 2) +
  theme(axis.text.x = element_text(size= 6)) + 
  theme(plot.margin = margin(6,20,6,6))

plot_grid(plot_diff, GP_legend_new,  rel_widths = c(1, .25))

Re-examine data with smaller groups

# Subset Global Patterns to smaller groups in the dataset
ps_water <- subset_samples(GlobalPatterns, SampleType %in% c("Freshwater", "Freshwater (creek)", "Ocean"))

ps_land <- subset_samples(GlobalPatterns, SampleType %in% c("Soil", "Sediment (estuary)"))

ps_human <- subset_samples(GlobalPatterns, SampleType %in% c("Skin", "Feces", "Tongue"))

# Agglomerate and normalize the phyloseq objects, and melt to a data frame
mdf_water <- prep_mdf(ps_water)
mdf_land <- prep_mdf(ps_land)
mdf_human <- prep_mdf(ps_human)

# Generate a color object for the specified data
color_objs_water <- create_color_dfs(mdf_water,selected_groups = c("Verrucomicrobia", "Proteobacteria", "Actinobacteria", "Bacteroidetes",
    "Firmicutes") , cvd = TRUE)

color_objs_water <- reorder_samples_by(color_objs_water$mdf, color_objs_water$cdf)

color_objs_land <- create_color_dfs(mdf_land,selected_groups = c("Verrucomicrobia", "Proteobacteria", "Actinobacteria", "Bacteroidetes",
    "Firmicutes") , cvd = TRUE)

color_objs_land <- reorder_samples_by(color_objs_land$mdf, color_objs_land$cdf)

color_objs_human <- create_color_dfs(mdf_human,selected_groups = c("Verrucomicrobia", "Proteobacteria", "Actinobacteria", "Bacteroidetes",
    "Firmicutes") , cvd = TRUE)

color_objs_human <- reorder_samples_by(color_objs_human$mdf, color_objs_human$cdf)

# Extract
mdf_water <- color_objs_water$mdf
cdf_water <- color_objs_water$cdf

mdf_land <- color_objs_land$mdf
cdf_land <- color_objs_land$cdf

mdf_human <- color_objs_human$mdf
cdf_human <- color_objs_human$cdf
Water Samples
water_legend <-custom_legend(mdf_water, cdf_water)

water_plot <- plot_microshades(mdf_water, cdf_water) + 
  scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(size= 8)) +
  facet_wrap(~SampleType, scales = "free_x") +
  theme (strip.text.x = element_text(size = 8)) 


plot_grid(water_plot, water_legend,  rel_widths = c(1, .25))

Plot contributions
freshwater_contribution <- plot_contributions(mdf_water, cdf_water, "SampleType", "Freshwater")
## Adding missing grouping variables: `Sample`
creek_contribution <- plot_contributions(mdf_water, cdf_water, "SampleType", "Freshwater (creek)")
## Adding missing grouping variables: `Sample`
ocean_contribution <- plot_contributions(mdf_water, cdf_water, "SampleType", "Ocean")
## Adding missing grouping variables: `Sample`
freshwater_contribution$box + 
  creek_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) + 
  ocean_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())

freshwater_contribution$mean + 
  creek_contribution$mean + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) + 
  ocean_contribution$mean + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())

freshwater_contribution$median + 
  creek_contribution$median + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) +
  ocean_contribution$median + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())

Land samples
land_legend <-custom_legend(mdf_land, cdf_land)

land_plot <- plot_microshades(mdf_land, cdf_land) + 
  scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(size= 8)) +
  facet_wrap(~SampleType, scales = "free_x") +
  theme (strip.text.x = element_text(size = 8)) 


plot_grid(land_plot, land_legend,  rel_widths = c(1, .25))

Plot contributions
sediment_contribution <- plot_contributions(mdf_land, cdf_land, "SampleType", "Sediment (estuary)")
## Adding missing grouping variables: `Sample`
soil_contribution <- plot_contributions(mdf_land, cdf_land, "SampleType", "Soil")
## Adding missing grouping variables: `Sample`
sediment_contribution$box + 
  soil_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) 

sediment_contribution$mean + 
  soil_contribution$mean + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) 

sediment_contribution$median + 
  soil_contribution$median + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) 

Human Samples
human_legend <-custom_legend(mdf_human, cdf_human)

human_plot <- plot_microshades(mdf_human, cdf_human) + 
  scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(size= 8)) +
  facet_wrap(~SampleType, scales = "free_x") +
  theme (strip.text.x = element_text(size = 8)) 


plot_grid(human_plot, human_legend,  rel_widths = c(1, .25))

Plot contributions
feces_contribution <- plot_contributions(mdf_human, cdf_human, "SampleType", "Feces")
## Adding missing grouping variables: `Sample`
skin_contribution <- plot_contributions(mdf_human, cdf_human, "SampleType", "Skin")
## Adding missing grouping variables: `Sample`
tongue_contribution <- plot_contributions(mdf_human, cdf_human, "SampleType", "Tongue")
## Adding missing grouping variables: `Sample`
feces_contribution$box + 
  skin_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) + 
  tongue_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())

feces_contribution$mean + 
  skin_contribution$mean + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) + 
  tongue_contribution$mean + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())

feces_contribution$median + 
  skin_contribution$median + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) +
  tongue_contribution$median + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())