This repository was archived by the owner on Feb 26, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
2. Statistical analyses in R
ahkim87 edited this page Oct 1, 2021
·
84 revisions
- Fisher's exact test
- Shapiro normality test
- t-test
- Wilcoxon Mann Whitney test
- ANOVA test
- Kruskal-Wallis test
R markdown
rstatix.Rmd
Data frame
rstatix_df.txt
Load packages
# load packages
library("tidyverse"); packageVersion("tidyverse")
library("ggplot2"); packageVersion("ggplot2")
library("rstatix"); packageVersion("rstatix")
library("dplyr"); packageVersion("dplyr")
library("ggpubr"); packageVersion("ggpubr")
Read dataframe
# read rstatix.df.txt
rstatix.df <- read.delim(file.choose())
rstatix.df
# count how many samples have 0 ASV_abundance in each group
rstatix.df %>% group_by(Groups) %>% count(ASV_abundance == 0)
# generate matrix based on above counting
xtab <- as.table(rbind(c(10,7), c(0,3)))
dimnames(xtab) <- list(ASV_abudnance = c("present", "absent"), Group = c("Group2", "Group3"))
xtab
# run Fisher's exact test
fisher_test(xtab, detailed = TRUE)
# null hypothesis = normally distributed
# reject null if p < 0.05
# run normality test
rstatix.df %>%
group_by(Groups) %>%
shapiro_test(ASV_abundance)
# plot ASV abundance in each sample
rstatix.df.distribution = ggplot(rstatix.df, aes(x = SampleID, y = ASV_abundance)) +
geom_bar(stat = "identity") +
ggtitle("abundance distribution")+
facet_wrap("Groups")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
theme_bw()
rstatix.df.distribution
# seems like ASV 1 is not normally distributed while Group2 and Group3 are normally distributed
# run t_test for normally distributed group comparison
# keep only normally distributed groups in the dataframe
t.test <- filter(rstatix.df, Groups == "Group2" | Groups == "Group3") %>%
droplevels()
# run t-test
# note: p-value correction is not feasible here because there are only 2 groups and you need more than 2 groups for multiple comparison correction.
t.test.stat <- as.tibble(t.test) %>%
t_test(ASV_abundance ~ Groups, p.adjust.method= "holm")
t.test.stat
# generate box plot
t.test.box <- ggplot(t.test,aes(x = Groups, y = ASV_abundance, colour = Groups)) +
geom_boxplot()+
geom_point()+
theme(panel.grid = element_blank())+
theme_bw()
t.test.box
# generate box plot with p-values on the plot
t.test.stat <- t.test.stat %>% add_xy_position (x = "Groups")
t.test.box + stat_pvalue_manual(t.test.stat, label = "p", tip.length=0.02)
# multiple comparisons correction using holm
t.test.stat <- as.tibble(rstatix.df) %>%
t_test(ASV_abundance ~ Groups, p.adjust.method= "holm")
t.test.stat
# generate box plot
t.test.box <- ggplot(rstatix.df,aes(x = Groups, y = ASV_abundance, colour = Groups)) +
geom_boxplot()+
geom_point()+
theme(panel.grid = element_blank())+
theme_bw()
t.test.box
# generate box plot with p-values on the plot
t.test.stat <- t.test.stat %>% add_xy_position (x = "Groups")
t.test.box + stat_pvalue_manual(t.test.stat, label = "{p.adj} {p.adj.signif}", tip.length=0.02)
# run wilcox_test for not normally distributed group comparison
# remove normally distributed group from the dataframe
w.test <- filter(rstatix.df, Groups == "Group1" | Groups == "Group2") %>%
droplevels()
# run wilcox_test
# note: p-value correction is not feasible here because there are only 2 groups and you need more than 2 groups for multiple comparison correction.
w.test.stat <- w.test %>%
wilcox_test(ASV_abundance~ASV, p.adjust.method= "holm")
w.test.stat
# generate box plot
w.test.box <- ggplot(w.test,aes(x = Groups, y = ASV_abundance, colour = Groups)) +
geom_boxplot()+
geom_point()+
theme(panel.grid = element_blank())+
theme_bw()
w.test.box
# generate box plot with p-values on the plot
w.test.stat <- w.test.stat %>% add_xy_position (x = "Groups")
w.test.box + stat_pvalue_manual(w.test.stat, label = "p", tip.length=0.02)
# multiple comparisons correction using holm
w.test.stat <- rstatix.df %>%
wilcox_test(ASV_abundance~Groups, p.adjust.method= "holm")
w.test.stat
# generate box plot
w.test.box <- ggplot(rstatix.df,aes(x = Groups, y = ASV_abundance, colour = Groups)) +
geom_boxplot()+
geom_point()+
theme(panel.grid = element_blank())+
theme_bw()
w.test.box
# generate box plot with p-values on the plot
w.test.stat <- w.test.stat %>% add_xy_position (x = "Groups")
w.test.box + stat_pvalue_manual(w.test.stat, label = "{p.adj} {p.adj.signif}", tip.length=0.02)
# run anova_test for all groups comparison
A.test.stat <- rstatix.df %>%
anova_test(formula= ASV_abundance~Groups)
# run dunn_test for all groups comparison
A.test.stat.adj <- dunn_test(data=rstatix.df, formula=ASV_abundance~Groups, p.adjust.method = "BH")
A.test.stat.adj
# generate box plot
A.test.box <- ggplot(rstatix.df, aes(x = Groups, y = ASV_abundance, colour = Groups)) +
geom_boxplot()+
geom_point()+
theme(panel.grid = element_blank())+
theme_bw()
A.test.box
# generate box plot with p-values on the plot
A.test.stat.adj <-A.test.stat.adj %>% add_xy_position (x = "Groups") %>% p_round(digits = 3)
A.test.box + stat_pvalue_manual(A.test.stat.adj, label = "{p.adj} {p.adj.signif}", tip.length=0.02)
# run kruskal_test for all groups comparison
k.test.stat <- rstatix.df %>%
kruskal_test(formula= ASV_abundance~Groups)
k.test.stat
# run dunn_test for all groups comparison
k.test.stat.adj <- dunn_test(data=rstatix.df, formula=ASV_abundance~Groups, p.adjust.method = "BH")
k.test.stat.adj
# generate box plot
k.test.box <- ggplot(rstatix.df, aes(x = Groups, y = ASV_abundance, colour = Groups)) +
geom_boxplot()+
geom_point()+
theme(panel.grid = element_blank())+
theme_bw()
k.test.box
# generate box plot with p-values on the plot
k.test.stat.adj <- k.test.stat.adj %>% add_xy_position (x = "Groups") %>% p_round(digits = 3)
k.test.box + stat_pvalue_manual(k.test.stat.adj, label = "{p.adj} {p.adj.signif}", tip.length=0.02)