: No 1. Indicator Analysis

Data Preparation

Here, we use unrarefied data to identify the core. First, we get ps object containing all unrarefied sample fractions (i.e. fish, water, & potential prey items).

ps.slv.tree.mbio.bocas <- readRDS("rdata/p1/ps_16S_capis_bocas_all_unrar.rds")
ps.slv.tree.mbio.bocas

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 10711 taxa and 127 samples ]
sample_data() Sample Data:       [ 127 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 10711 taxa by 11 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 10711 tips and 10710 internal nodes ]

Here is a summary table of all samples.

Next, we make a column in taxonomy table for ASV IDs by number. Caution, ASV ID numbers will differ between the core (unrarefied) and the rarefied data set.

tax_table(ps.slv.tree.mbio.bocas) <- cbind(tax_table(ps.slv.tree.mbio.bocas),
                                           rownames(tax_table(ps.slv.tree.mbio.bocas)))

colnames(tax_table(ps.slv.tree.mbio.bocas)) <-
        c("Kingdom", "Phylum", "Class",
          "Order", "Family", "Genus", "ASV")

Next, we change row names in taxonomy table to AVS IDs for plotting purposes.

taxa_names(ps.slv.tree.mbio.bocas) <- 1:ntaxa(ps.slv.tree.mbio.bocas)
taxa_names(ps.slv.tree.mbio.bocas) <- paste("ASV_",
                                            taxa_names(ps.slv.tree.mbio.bocas),
                                            sep = "") #JJS# Changed

tax.unrar.bocas.all <- data.frame(tax_table(ps.slv.tree.mbio.bocas))
tax.unrar.new <- data.frame(ASV_ID = row.names(tax.unrar.bocas.all),
                            tax.unrar.bocas.all)
tax.unrar.new <- tax.unrar.new[, c(2,3,4,5,6,7,1,8)]
tax.unrar.new2 <- as.matrix(tax.unrar.new)
ps.slv.tree.mbio.bocas <- merge_phyloseq(otu_table(ps.slv.tree.mbio.bocas),
                                         tax_table(tax.unrar.new2),
                                         sample_data(ps.slv.tree.mbio.bocas),
                                         phy_tree(ps.slv.tree.mbio.bocas))
saveRDS(ps.slv.tree.mbio.bocas, "rdata/p1/ps_16S_bocas_unrar_all_ASVID.rds")

We will also rename all NA taxa by the next highest named rank. To do this we first, create a copy of the original ps object before renaming taxa. That way we have a copy for other analyses.

ps.slv.tree.mbio.bocas_o <- ps.slv.tree.mbio.bocas

tax.clean <- data.frame(tax_table(ps.slv.tree.mbio.bocas))
for (i in 1:6){ tax.clean[,i] <- as.character(tax.clean[,i])}
tax.clean[is.na(tax.clean)] <- ""

for (i in 1:nrow(tax.clean)){
    if (tax.clean[i,2] == ""){
        kingdom <- base::paste("k", tax.clean[i,1], sep = "_")
        tax.clean[i, 2:6] <- kingdom
    } else if (tax.clean[i,3] == ""){
        phylum <- base::paste("p", tax.clean[i,2], sep = "_")
        tax.clean[i, 3:6] <- phylum
    } else if (tax.clean[i,4] == ""){
        class <- base::paste("c", tax.clean[i,3], sep = "_")
        tax.clean[i, 4:6] <- class
    } else if (tax.clean[i,5] == ""){
        order <- base::paste("o", tax.clean[i,4], sep = "_")
        tax.clean[i, 5:6] <- order
    } else if (tax.clean[i,6] == ""){
        tax.clean$Genus[i] <- base::paste("f",tax.clean$Family[i], sep = "_")
        }
}
rm(class, order, phylum, kingdom)

Then, we create new ASV names that have lowest rank name attached.

tax.clean <- tax.clean %>% unite("ASV_IDa", Genus:ASV_ID,
                                 remove = FALSE, sep = "_")
tax.clean <- tax.clean %>% unite("ASV_IDb", ASV_ID:Genus,
                                 remove = FALSE, sep = "_")
tax.clean <- tax.clean[, c(1,2,3,4,5,8,9,6,7,10)]
tax.clean$ASV_IDa <-
  str_replace_all(tax.clean$ASV_IDa,
                  'Clostridium_sensu_stricto_[0-9]',
                  'Clostridium')
tax.clean$ASV_IDb <-
  str_replace_all(tax.clean$ASV_IDb,
                  'Clostridium_sensu_stricto_[0-9]',
                  'Clostridium')
tax.clean$ASV_IDc <- tax.clean$ASV_IDa
tax.clean$ASV_IDc <-
  str_replace_all(tax.clean$ASV_IDc,
                  '_ASV', '')
tax.clean <- tax.clean[, c(1,2,3,4,5,6,7,8,9,11,10)]
write.csv(tax.clean, "tables/p1/tax_all_new_no_na.csv")

And finally, add the taxonomy table back to the phyloseq object.

tax_table(ps.slv.tree.mbio.bocas) <- as.matrix(tax.clean)
rank_names(ps.slv.tree.mbio.bocas)
saveRDS(ps.slv.tree.mbio.bocas, "rdata/p1/ps_16S_bocas_unrar_all_ASVID_no_NA.rds")

Indicator Analysis to Identify Core Taxa

Now we can run the Indicator Analysis.

Step one is to generate an ASV table without row names.

data.IndVal.env.fish <- data.frame(otu_table(ps.slv.tree.mbio.bocas))
data.IndVal.env.fish
data.IndVal.ASV <- tibble::remove_rownames(data.IndVal.env.fish)

And then generate an Indicator value group file.

data.IndVal.group <- data.frame(sample_data(ps.slv.tree.mbio.bocas)) %>%
  select(Fraction)
data.IndVal.group$Status <- data.IndVal.group$Fraction
data.IndVal.group <- data.IndVal.group[, c(2,1)]
data.IndVal.group$Status <- str_replace(data.IndVal.group$Status, "Environment", "1")
data.IndVal.group$Status <- str_replace(data.IndVal.group$Status, "Fish", "2")
data.IndVal.group <- tibble::rownames_to_column(data.IndVal.group, "Label")
data.IndVal.group$Status <-  as.integer(data.IndVal.group$Status)
data.IndVal.group$Fraction <-  as.character(data.IndVal.group$Fraction)

Next, calculate the indicator values. We set a seed for reproducibility and saved a table of results.

set.seed(1280)
iva <- indval(data.IndVal.ASV, data.IndVal.group$Status)
#Table of the significant indicator species at p= 0.01
gr <- iva$maxcls[iva$pval <= 0.01]
iv <- iva$indcls[iva$pval <= 0.01]
pv <- iva$pval[iva$pval <= 0.01]
fr <- apply(data.IndVal.ASV > 0, 2, sum)[iva$pval <= 0.01]
indval.out <- data.frame(group = gr, indval = iv, pval = pv, freq = fr)
indval.out <- indval.out[order(indval.out$group, -indval.out$indval),]
indval.out
write.csv(indval.out,
          file = "tables/p1/IndVal_microbiome_env_fish_output_p01_new0.csv")

Finally, we correct p-values for repeated testing and save a new table of corrected results.

indval.out.prob.corrected = p.adjust(indval.out$pval, "bonferroni")
write.csv(indval.out.prob.corrected,
          file = "tables/p1/IndVal_microbiome_env_fish_bonf_new0.csv")

Fish Core Summary

Now we plot the results of the fish core by subsetting only the ASVs that were significant for fish. First, we grab the ASVs from the phyloseq object.

fish_ind_asvs <- indval.out[indval.out$group == 2, ]
fish_ind_asvs_list <- row.names(fish_ind_asvs)
ps.indv.core.01 <- subset_taxa(ps.slv.tree.mbio.bocas,
                               rownames(tax_table(ps.slv.tree.mbio.bocas)) %in%
                                       fish_ind_asvs_list)
ps.indv.core.01

Then pull out the fish-only samples.

ps.indv.core.fish <- subset_samples(ps.indv.core.01, Fraction == "Fish")
sample_data(ps.indv.core.fish)
tax.core <- tax_table(ps.indv.core.fish)

write.csv(tax.core, file = "tables/p1/tax_indVal_core.csv")
tax_core_mod <- read.csv("tables/p1/tax_indVal_core.csv", header = TRUE, row.names = 1)
tax_core_mod2 <- as.matrix(tax_core_mod)

And add the modified tax table to ps object and save ps object. This contains all ASVs identified with Indicator analysis at 0.01 for fish gut samples.

ps.indv.core.fish <- merge_phyloseq(otu_table(ps.indv.core.fish),
                                    tax_table(tax_core_mod2),
                                    sample_data(ps.indv.core.fish),
                                    phy_tree(ps.indv.core.fish))
saveRDS(ps.indv.core.fish, "rdata/p1/ps_indv01_core_fish.rds")
ps.indv.core.fish <- readRDS("rdata/p1/ps_indv01_core_fish.rds")

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 27 taxa and 89 samples ]
sample_data() Sample Data:       [ 89 samples by 11 sample variables ]
tax_table()   Taxonomy Table:    [ 27 taxa by 11 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 27 tips and 26 internal nodes ]

Here is a summary of the data set.

Metric	Results
Min. number of reads	326
Max. number of reads	63937
Total number of reads	2775814
Average number of reads	31189
Median number of reads	33658
Sparsity	0.513
Any ASVs sum to 1 or less?	FALSE
Number of singleton ASVs	NA
Percent of ASVs that are singletons	NA

And a look at the individual ASVs.

Fish Core Plots

First, we set all of the plot parameters

ASV_colors <- c(
        "mediumaquamarine","darkcyan", "darkseagreen2",
        "springgreen4", "lightseagreen","aquamarine2",
        "turquoise", "aquamarine","aquamarine4", "darkseagreen4",
        "dodgerblue1","dodgerblue3", "lightsalmon3", "coral",
        "coral2","darksalmon","lightsalmon","coral3", "salmon3" ,
        "indianred", "indianred2","salmon2", "salmon","lightsalmon4",
        "lavender","darkslategrey","plum3")

level_order <- c('Outer bay',
                 'Inner bay',
                 'Inner bay disturbed')
level_order2 <- c('SCR', 'PPR', 'CCR', 'ALR',
                  'SIS','ROL','RNW', 'PST', 'PBL')
asv_order <- rev(c("ASV_95", "ASV_589", "ASV_2", "ASV_25", "ASV_18",
               "ASV_15", "ASV_14", "ASV_94", "ASV_30", "ASV_10",
               "ASV_9", "ASV_24", "ASV_74", "ASV_19", "ASV_39",
               "ASV_41", "ASV_27", "ASV_7", "ASV_68", "ASV_6",
               "ASV_59", "ASV_5", "ASV_3", "ASV_17", "ASV_163",
               "ASV_11", "ASV_1"))

Then we agglomerate at phylum level and transform read counts to relative abundance.

capis.core <- ps.indv.core.fish %>%
        tax_glom(taxrank = "ASV_IDa") %>%
        transform_sample_counts(function(x) {x/sum(x)} ) %>%
        psmelt() %>%
        filter(Abundance > 0.00) %>%
        arrange(ASV_IDa)
capis.core$ASV_ID <- gdata::reorder.factor(capis.core$ASV_ID, new.order = asv_order)
capis.core <- capis.core %>% dplyr::arrange(ASV_ID)
capis.core$ASV_IDa <- factor(capis.core$ASV_IDa, levels = unique(capis.core$ASV_IDa))
levels(capis.core$ASV_IDa)
attributes(capis.core$ASV_IDa)

Plot first figure.

fig01 <- ggplot(capis.core, aes(x = factor(Zone, level = level_order),
                                y = Abundance, fill = ASV_IDa)) +
        geom_bar(stat = "identity", position = "fill") +
        scale_fill_manual(values = ASV_colors) +
        scale_x_discrete("Zone", expand = waiver(),
                         position = "bottom", drop = FALSE) +
        guides(fill = guide_legend(reverse = TRUE,
                                   keywidth = 1,
                                   keyheight = 1,
                                   title="ASV ID")) + #JJS# added new title
        ylab("") +
        ggtitle("", subtitle = "Core across zones") +
        theme(plot.title = element_text(size = 18)) +
        theme(plot.subtitle = element_text(size = 16)) +
        theme_cowplot()

Plot second figure.

fig02 <-
        ggplot(capis.core, aes(x = Species, y = Abundance, fill = ASV_IDa)) +
        geom_bar(stat = "identity", position = "fill") +
        scale_fill_manual(values = ASV_colors) +
        scale_x_discrete("", expand = waiver(),
                         position = "bottom", drop = FALSE) +
        guides(fill = guide_legend(reverse = TRUE,
                                   keywidth = 1,
                                   keyheight = 1,
                                   title = "ASV ID")) + #JJS# added new title
        ylab("Relative Abundance (ASV)") +
        ggtitle("Fish gut core microbiome", subtitle = "") +
        theme(plot.title = element_text(size = 18)) +
        theme_cowplot()

That’s the end of Script 1. In the next Script we compare taxonomic composition across different sample fractions.

Source Code

The source code for this page can be accessed on GitHub by clicking this link.

No 1. Indicator Analysis

Data Preparation

Indicator Analysis to Identify Core Taxa

Fish Core Summary

Fish Core Plots

Source Code

Corrections

Reuse