Skip to content
This repository was archived by the owner on Feb 26, 2025. It is now read-only.

2. Statistical analyses in R

ahkim87 edited this page Oct 1, 2021 · 84 revisions

Introduction

Topics

R markdown
rstatix.Rmd

Data frame
rstatix_df.txt

Load packages

# load packages
library("tidyverse"); packageVersion("tidyverse")
library("ggplot2"); packageVersion("ggplot2")
library("rstatix"); packageVersion("rstatix")
library("dplyr"); packageVersion("dplyr")
library("ggpubr"); packageVersion("ggpubr")

Read dataframe

# read rstatix.df.txt
rstatix.df <- read.delim(file.choose())
rstatix.df

Fisher's exact test

# count how many samples have 0 ASV_abundance in each group
rstatix.df %>% group_by(Groups) %>% count(ASV_abundance == 0)

# generate matrix based on above counting
xtab <- as.table(rbind(c(10,7), c(0,3)))
dimnames(xtab) <- list(ASV_abudnance = c("present", "absent"), Group = c("Group2", "Group3"))
xtab

# run Fisher's exact test
fisher_test(xtab, detailed = TRUE)

Shapiro normality test

# null hypothesis = normally distributed
# reject null if p < 0.05

# run normality test
rstatix.df %>% 
group_by(Groups) %>% 
shapiro_test(ASV_abundance)

# plot ASV abundance in each sample
rstatix.df.distribution = ggplot(rstatix.df, aes(x = SampleID, y = ASV_abundance)) +
  geom_bar(stat = "identity") +
  ggtitle("abundance distribution")+
  facet_wrap("Groups")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  theme_bw()
rstatix.df.distribution

# seems like ASV 1 is not normally distributed while Group2 and Group3 are normally distributed

t-test

# run t_test for normally distributed group comparison

# keep only normally distributed groups in the dataframe
t.test <- filter(rstatix.df, Groups == "Group2" | Groups == "Group3") %>%
droplevels()

# run t-test
# note: p-value correction is not feasible here because there are only 2 groups and you need more than 2 groups for multiple comparison correction.
t.test.stat <- as.tibble(t.test) %>% 
t_test(ASV_abundance ~ Groups, p.adjust.method= "holm")
t.test.stat

# generate box plot 
t.test.box <- ggplot(t.test,aes(x = Groups, y = ASV_abundance, colour = Groups)) +
  geom_boxplot()+
  geom_point()+
  theme(panel.grid = element_blank())+
  theme_bw()
t.test.box

# generate box plot with p-values on the plot
t.test.stat <- t.test.stat %>% add_xy_position (x = "Groups")
t.test.box + stat_pvalue_manual(t.test.stat, label = "p", tip.length=0.02)

# multiple comparisons correction using holm
t.test.stat <- as.tibble(rstatix.df) %>% 
t_test(ASV_abundance ~ Groups, p.adjust.method= "holm")
t.test.stat

# generate box plot 
t.test.box <- ggplot(rstatix.df,aes(x = Groups, y = ASV_abundance, colour = Groups)) +
  geom_boxplot()+
  geom_point()+
  theme(panel.grid = element_blank())+
  theme_bw()
t.test.box

# generate box plot with p-values on the plot
t.test.stat <- t.test.stat %>% add_xy_position (x = "Groups")
t.test.box + stat_pvalue_manual(t.test.stat, label = "{p.adj} {p.adj.signif}", tip.length=0.02)

Wilcoxon Mann Whitney test

# run wilcox_test for not normally distributed group comparison

# remove normally distributed group from the dataframe
w.test <- filter(rstatix.df, Groups == "Group1" | Groups == "Group2") %>%
droplevels()

# run wilcox_test
# note: p-value correction is not feasible here because there are only 2 groups and you need more than 2 groups for multiple comparison correction.
w.test.stat <- w.test %>% 
wilcox_test(ASV_abundance~ASV, p.adjust.method= "holm")
w.test.stat

# generate box plot 
w.test.box <- ggplot(w.test,aes(x = Groups, y = ASV_abundance, colour = Groups)) +
  geom_boxplot()+
  geom_point()+
  theme(panel.grid = element_blank())+
  theme_bw()
w.test.box

# generate box plot with p-values on the plot
w.test.stat <- w.test.stat %>% add_xy_position (x = "Groups")
w.test.box + stat_pvalue_manual(w.test.stat, label = "p", tip.length=0.02)

# multiple comparisons correction using holm
w.test.stat <- rstatix.df %>% 
wilcox_test(ASV_abundance~Groups, p.adjust.method= "holm")
w.test.stat

# generate box plot 
w.test.box <- ggplot(rstatix.df,aes(x = Groups, y = ASV_abundance, colour = Groups)) +
  geom_boxplot()+
  geom_point()+
  theme(panel.grid = element_blank())+
  theme_bw()
w.test.box

# generate box plot with p-values on the plot
w.test.stat <- w.test.stat %>% add_xy_position (x = "Groups")
w.test.box + stat_pvalue_manual(w.test.stat, label = "{p.adj} {p.adj.signif}", tip.length=0.02)

ANOVA test

# run anova_test for all groups comparison
A.test.stat <- rstatix.df %>% 
anova_test(formula= ASV_abundance~Groups)

# run dunn_test for all groups comparison
A.test.stat.adj <- dunn_test(data=rstatix.df, formula=ASV_abundance~Groups, p.adjust.method = "BH")
A.test.stat.adj

# generate box plot 
A.test.box <- ggplot(rstatix.df, aes(x = Groups, y = ASV_abundance, colour = Groups)) +
  geom_boxplot()+
  geom_point()+
  theme(panel.grid = element_blank())+
  theme_bw()
A.test.box

# generate box plot with p-values on the plot
A.test.stat.adj <-A.test.stat.adj %>% add_xy_position (x = "Groups") %>% p_round(digits = 3)
A.test.box + stat_pvalue_manual(A.test.stat.adj, label = "{p.adj} {p.adj.signif}", tip.length=0.02)

Kruskal-Wallis test

# run kruskal_test for all groups comparison
k.test.stat <- rstatix.df %>% 
kruskal_test(formula= ASV_abundance~Groups)
k.test.stat

# run dunn_test for all groups comparison
k.test.stat.adj <- dunn_test(data=rstatix.df, formula=ASV_abundance~Groups, p.adjust.method = "BH")
k.test.stat.adj

# generate box plot 
k.test.box <- ggplot(rstatix.df, aes(x = Groups, y = ASV_abundance, colour = Groups)) +
  geom_boxplot()+
  geom_point()+
  theme(panel.grid = element_blank())+
  theme_bw()
k.test.box

# generate box plot with p-values on the plot
k.test.stat.adj <- k.test.stat.adj %>% add_xy_position (x = "Groups") %>% p_round(digits = 3)
k.test.box + stat_pvalue_manual(k.test.stat.adj, label = "{p.adj} {p.adj.signif}", tip.length=0.02)

Clone this wiki locally