Loading the data

rm (list = ls())
library("phyloseq");library("ggplot2");library("gridExtra");library("RColorBrewer");library("colorspace");
library("lattice");library("plyr");library("pracma");library("ggthemes");library("pheatmap");library("ComplexHeatmap");
library("BiocParallel");library("genefilter");library("DESeq2");library("tidyr");library("viridis");library(data.table);
library("KEGGREST")

setwd("~/Desktop/metagenomics/Jan04_17_results")

register(MulticoreParam(30))
asinTransform <- function(p) { asin(sqrt(p)) }

mainDir <- "../metagenomics/"
subDir <- "metaphlan_profiling_cured"
dir.create(file.path(mainDir, subDir), showWarnings = TRUE,  recursive = TRUE)
results_folder <- paste(mainDir,subDir,sep="")
mtph_subDir <- "metaphlan_profiling/"
dir.create(file.path(mainDir, mtph_subDir), showWarnings = TRUE,  recursive = TRUE)
mtph_results_folder <- paste(mainDir,mtph_subDir,sep="")
mtph_dat <- read.csv("../tbru_ss_data/metaphlan2_taxonomic_table_joined.tsv",sep="\t")
mtph <-mtph_dat
rownames(mtph) <-mtph$X.SampleID
mtph$X.SampleID<-NULL
mtph_otudata <-otu_table(mtph,taxa_are_rows = TRUE)
mtph_taxmat <- matrix(sample(letters, nrow(mtph_otudata)/10, replace = TRUE), nrow = nrow(mtph_otudata), ncol = 7)
rownames(mtph_taxmat) <- rownames(mtph_otudata)
colnames(mtph_taxmat) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")
mtph_taxmat <-tax_table(mtph_taxmat)
mtph_phy <- phyloseq(mtph_otudata, mtph_taxmat)

Import the metaphlan data to phyloseq

metadata.file <- "../tbru_ss_data/TBRU_Metadata_May2017.csv"
metadata <- read.csv(metadata.file,na.strings=c("N/A","99999")) 
metadata[is.na(metadata)]<-"99999"
metadata$sample<-gsub("-",".",metadata$sample)
samp <- metadata %>% data.frame() 

rownames(samp)<-metadata$sample
snames<-as.data.frame(do.call('rbind',strsplit(sample_names(mtph_phy),"_")))[,1]
sample_names(mtph_phy)<-snames
sample_data(mtph_phy)<-samp

# subsettings
mtph_phy_pos_cured_no_treat<-subset_samples(mtph_phy,!is.na(TB_status) & TB_status!="treatment")
mtph_phy_pos_cured_no_treat <- subset_samples(mtph_phy_pos_cured_no_treat,!is.na(Group6_TB_category) & Group6_TB_category!="family_member")

# keep only <38 years old in age
mtph_phy_pos_cured_no_treat <- subset_samples(mtph_phy_pos_cured_no_treat,age<=38)
sample_data(mtph_phy_pos_cured_no_treat)$TB_status <- relevel(sample_data(mtph_phy_pos_cured_no_treat)$TB_status,"healthy")

splitted_names_tbru<-strsplit(taxa_names(mtph_phy_pos_cured_no_treat),"\\|")
splitted_names_tbru_length<-lengths(splitted_names_tbru)
taxa_names(mtph_phy_pos_cured_no_treat)[splitted_names_tbru_length==7]
mtph_phy_pos_cured_no_treat.speclevel<-prune_taxa(taxa_names(mtph_phy_pos_cured_no_treat)[splitted_names_tbru_length==7],
                                                  mtph_phy_pos_cured_no_treat)
taxa_names(mtph_phy_pos_cured_no_treat.speclevel)<-do.call('rbind',strsplit(taxa_names(mtph_phy_pos_cured_no_treat.speclevel),"\\|"))[,7]

Principle corrdinate analysis

pdf(paste(mtph_results_folder,'pca_plot_cured_NMDS.pdf',sep=""),height = 7, width = 10)
mtph_GP.ord <- ordinate(mtph_phy_pos_cured_no_treat, "NMDS", "bray")
mtph.p.pca <- plot_ordination(mtph_phy_pos_cured_no_treat, mtph_GP.ord, type="samples", 
                              color="TB_status") 
Palette_LTBI_cured <- c("#377eb8","#e7298a")
mtph.p.pca <- mtph.p.pca + geom_point(size=4)+
  theme_base()+
  scale_color_manual(values=c(Palette_LTBI_cured))
print(mtph.p.pca)
dev.off()

metaphlan_profilingpca_plot_cured_NMDS

assignCols <- function(names,selection) {
  mymap<-list()
  availableCols <-c("#000000", "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059",
                    "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87",
                    "#5A0007", "#809693", "#FEFFE6", "#1B4400", "#4FC601", "#3B5DFF", "#4A3B53", "#FF2F80",
                    "#61615A", "#BA0900", "#6B7900", "#00C2A0", "#FFAA92", "#FF90C9", "#B903AA", "#D16100",
                    "#DDEFFF", "#000035", "#7B4F4B", "#A1C299", "#300018", "#0AA6D8", "#013349", "#00846F",
                    "#372101", "#FFB500", "#C2FFED", "#A079BF", "#CC0744", "#C0B9B2", "#C2FF99", "#001E09",
                    "#00489C", "#6F0062", "#0CBD66", "#EEC3FF", "#456D75", "#B77B68", "#7A87A1", "#788D66",
                    "#885578", "#FAD09F", "#FF8A9A", "#D157A0", "#BEC459", "#456648", "#0086ED", "#886F4C",
                    "#34362D", "#B4A8BD", "#00A6AA", "#452C2C", "#636375", "#A3C8C9", "#FF913F", "#938A81",
                    "#575329", "#00FECF", "#B05B6F", "#8CD0FF", "#3B9700", "#04F757", "#C8A1A1", "#1E6E00",
                    "#7900D7", "#A77500", "#6367A9", "#A05837", "#6B002C", "#772600", "#D790FF", "#9B9700",
                    "#549E79", "#FFF69F", "#201625", "#72418F", "#BC23FF", "#99ADC0", "#3A2465", "#922329",
                    "#5B4534", "#FDE8DC", "#404E55", "#0089A3", "#CB7E98", "#A4E804", "#324E72", "#6A3A4C",
                    "#83AB58", "#001C1E", "#D1F7CE", "#004B28", "#C8D0F6", "#A3A489", "#806C66", "#222800",
                    "#BF5650", "#E83000", "#66796D", "#DA007C", "#FF1A59", "#8ADBB4", "#1E0200", "#5B4E51",
                    "#C895C5", "#320033", "#FF6832", "#66E1D3", "#CFCDAC", "#D0AC94", "#7ED379", "#012C58")
  mymap[[1]]<-rev(availableCols[match(intersect(names,selection),names)])
  mymap[[2]]<-intersect(names,selection)
  mymap
};
most_abundant_number <-40
topN =  most_abundant_number
most_abundant_taxa = sort(taxa_sums(mtph_phy_pos_cured_no_treat.speclevel), TRUE)[1:topN]
GP = prune_taxa(names(most_abundant_taxa), mtph_phy_pos_cured_no_treat.speclevel)
mGPr <- psmelt(GP)
mycol <- assignCols(unique(mGPr$OTU),unique(mGPr$OTU))
pdf(paste(results_folder,'barplot_all.pdf',sep=""),height = 7, width = 10)
p.bar<-ggplot(data=mGPr,aes(Patient.ID, Abundance, fill=OTU))+
  geom_bar(stat = "identity") +
  theme_base()+
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) +
  scale_fill_manual(values = mycol[[1]],breaks=mycol[[2]]) +
  facet_wrap(~TB_status,scales="free")+
  xlab("")
print(p.bar)
dev.off()

metaphlan_profilingbarplot_alls

Run LEfSe on Cured vs LTBI

system("echo $PATH")

# # #aquire sample data from phy
phy.lefse<-mtph_phy_pos_cured_no_treat.speclevel
class <- "TB_status"
subclass<-FALSE
subject<-"sample"
anova.alpha<-0.05
wilcoxon.alpha<-0.05
lda.cutoff<-2.0
wilcoxon.within.subclass <- TRUE
one.against.one <- FALSE 
mult.test.correction <- 0 
make.lefse.plots <- FALSE
by_otus <- FALSE
# 
sample.data <- phyloseq::sample_data(phy.lefse) %>% data.frame(stringsAsFactors = FALSE)
sample.data$sample <- rownames(sample.data)
# 
keepvars <- c("sample","TB_status")
keepvars <- unique(keepvars[!is.na(keepvars)])
lefse.samp <- sample.data[, keepvars]
# 
sample0 <- t(lefse.samp) %>% as.matrix()
colnames(sample0) <- sample0[1,]
sample0 <- as.data.frame(sample0)
# 
data0 <- otu_table(phy.lefse) %>% as.data.frame()
data1 <- data0 %>% as.data.table(keep.rownames=T)
sample1 <- sample0 %>% as.data.table(keep.rownames=T)
common <- intersect(colnames(data1), colnames(sample1))
pre.lefse <- rbind(sample1, data1,fill=T) %>% t() %>% na.omit() %>% t()
# 
write.table(pre.lefse,file =paste(results_folder,"lefse.txt",sep=""),sep = "\t",row.names = FALSE,col.names = FALSE,quote = FALSE)
# 
opt.class <- paste("-c", which(keepvars %in% class))
opt.subclass <- ifelse(is.na(subclass), "", paste("-s", which(keepvars %in% 
                                                                subclass)))
opt.subject <- ifelse(is.na(subject), "", paste("-u", which(keepvars %in% 
                                                              subject)))
format.command <- paste(paste("~/miniconda2/bin/python ~/lefse/format_input.py ",results_folder,"lefse.txt ",results_folder,"lefse.in",sep=""), 
                        opt.class, opt.subject, "-o 1000000")
system(format.command)
# 
lefse.command <- paste(paste("run_lefse.py ",results_folder,"lefse.in " ,results_folder, "lefse.res",sep=""), 
                       "-a", anova.alpha, "-w", wilcoxon.alpha, "-l", lda.cutoff, 
                       "-e", as.numeric(wilcoxon.within.subclass), "-y", as.numeric(one.against.one), 
                       "-s", mult.test.correction)
lefse.command
system(lefse.command)


lefse.out <- read.table(paste(results_folder,"lefse.res",sep=""), header = FALSE, sep = "\t")
names(lefse.out)<-c("taxon","log.max.pct","direction","lda","p.value")
(lefse.out<-na.omit(lefse.out))

if (make.lefse.plots) {
  system("plot_res.py lefse.res lefse_lda.png")
  print("Wrote lefse_lda.png")
  system("plot_cladogram.py lefse.res lefse_clado.pdf --format pdf")
  print("Wrote lefse_clado.pdf")
}
# 
pdf(paste(results_folder,'lda_plot_LEFSE.pdf',sep=""),height = 4, width = 10)
Palette_LTBI_cured <- c("#e7298a","#377eb8")
if(length(unique(lefse.out$direction))<3){
  lefse.out$lda[lefse.out$direction==unique(lefse.out$direction)[1]] <- 
    -1*lefse.out$lda[lefse.out$direction==unique(lefse.out$direction)[1]]
}
lefse.out$taxon<-factor(lefse.out$taxon,levels=lefse.out$taxon[order(lefse.out$lda)])
g1<-ggplot(data=lefse.out,aes(x=taxon,y=lda,color=direction,fill=direction))+
  geom_bar(stat="identity")+
  coord_flip()+
  theme_base()+
  xlab("LDA Score")+
  ylab("")
if(length(unique(lefse.out$direction))<3){
  g1<-g1+scale_color_manual(values=c(Palette_LTBI_cured))+
    scale_fill_manual(values=c(Palette_LTBI_cured))
}
print(g1)
dev.off()

metaphlan_profilinglda_plot_LEFSE

ltk<-as.character(lefse.out$taxon)
phy_ra_ltk<-prune_taxa(ltk,phy.lefse)
phy_ra_ltk_m<-psmelt(phy_ra_ltk)

phy_ra_ltk_m$OTU<-factor(phy_ra_ltk_m$OTU, levels=lefse.out$taxon[order(lefse.out$lda)])

Palette_LTBI_cured <- c("#377eb8","#e7298a")
g2<-ggplot(phy_ra_ltk_m,aes(x=OTU,
                            y=Abundance,color=TB_status,
                            fill=TB_status))+
  geom_boxplot(position=position_dodge(),
               colour="black", # Use black outlines,
               size=.3,alpha=0.5) +      # Thinner lines
  theme_base()+
  xlab("")+
  coord_flip()

if(length(unique(lefse.out$direction))<3){
  g2<-g2+scale_color_manual(values=c(Palette_LTBI_cured))+
    scale_fill_manual(values=c(Palette_LTBI_cured))
}
pdf(paste(results_folder,'significant_LEFSE.pdf',sep=""),height = 6, width = 10)
print(g2)
dev.off()

metaphlan_profilingsignificant_LEFSE