# load the config file
yaml.file <- yaml.load_file('config_ongoing_run.yaml')
meta <- read.table(yaml.file$METAFILE, sep='\t', header=T)
#Argument Parser
RDATA_DMT <- params$param1
CONDI <- params$param2
RDATA_PREP_FIGS <- params$param3
PLOT_RDATA <- params$param4
OUTPUT_PATH <- params$param5
control.cond <- unlist(strsplit(CONDI, "_"))[1]
treat.cond <- unlist(strsplit(CONDI, "_"))[2]
mark <- unlist(strsplit(CONDI, "_"))[3]
LEVEL <- yaml.file$LEVEL
CUSTOM_ANNOT <- yaml.file$CUSTOM_ANNOT
# colors - pinkish
#full.color = "#de0000"
#high.color = "#ff8092"
#mid.color = "#f5e0e8"
#low.color = "#996d9a"
#unmeth.color = "#0a1661"
#hyper.color = "#de0000"
#hypo.color = "#0a1661"
#not.color = "#f5e0e8"
# moins rose
full.color = "#de0000"
high.color = "#e17e65"
mid.color = "#c6c6c6"
low.color = "#7c6fac"
unmeth.color = "#0f208f"
hyper.color = "#de0000"
hypo.color = "#0f208f"
not.color = "#c6c6c6"
# couleurs Elouan
#full.color = "#342A1F",
#high.color = "#C59434",
#mid.color = "#6F7498",
#low.color = "#A3B7F9",
#unmeth.color = "#F4D4D4"
titre <- paste0("Differential Methylation Analysis on ", LEVEL, " (", CONDI, ")")
MKit_differential.Rmd : Developped & Maintained by Olivier Kirsh and Elouan Bethuel.
Perform a differential methylation analysis between pair of conditions.
Launch as Rscript on HPC cluster with the workflow : WGBSflow.
Requires 2 RData environments produced by the following R scripts :
- MKit_diff_bed.R which performs MethylKit DM CpG (DMC) or Tiles (DMT) analysis,
- MKit_diff_fig.R which builds all dataframes (wide and long format) needed to build the figures.
#load packages
library(ggplot2)
library(dbplyr)
library(magrittr)
library(methylKit)
library(yaml)
library(stringr)
load(RDATA_DMT)
load(RDATA_PREP_FIGS)
# extract the information from the yaml file
MINCOV <- yaml.file$MINCOV
MINQUALI <- yaml.file$MINQUALI
UNITE <- yaml.file$UNITE
TILESIZE <- yaml.file$TILESIZE
STEPSIZE <- yaml.file$STEPSIZE
SIGNIDIF <- yaml.file$SIGNIDIF
QVALUE <- yaml.file$QVALUE
DESTRAND <- yaml.file$DESTRAND
merge_annot <- yaml.file$MERGE_WITH_BASICS_ANNOT
Analysis on CpG or Tiles - LEVEL : CpG
Merge reads from both strands? DESTRAND : TRUE
Minimum coverage - MINCOV : 5
Discards the bases that have more than COV.PERCth percentile of coverage - COV.PERC : 99.9
Type of unification (all or at least one per group) - UNITE : all
Threshold for significant differential methylation in % : 20
Qvalue for significant differential methylation: 0.05
head(dm_condi, n = 20)
## [[1]]
## methylRaw object with 77141 rows
## --------------
## chr start end strand coverage numCs numTs
## 1 chr19 3051793 3051793 - 1 0 1
## 2 chr19 3051820 3051820 - 1 0 1
## 3 chr19 3051822 3051822 - 1 0 1
## 4 chr19 3051861 3051861 - 1 1 0
## 5 chr19 3051903 3051903 - 2 0 2
## 6 chr19 3051908 3051908 - 2 0 2
## --------------
## sample.id: 1KO_1
## assembly: mm39
## context: CpG
## resolution: base
##
##
## [[2]]
## methylRaw object with 83572 rows
## --------------
## chr start end strand coverage numCs numTs
## 1 chr19 3050119 3050119 + 1 0 1
## 2 chr19 3050120 3050120 - 1 0 1
## 3 chr19 3050317 3050317 - 1 0 1
## 4 chr19 3050813 3050813 - 1 0 1
## 5 chr19 3050818 3050818 - 1 0 1
## 6 chr19 3050955 3050955 + 1 1 0
## --------------
## sample.id: DKO_1
## assembly: mm39
## context: CpG
## resolution: base
##
##
## [[3]]
## methylRaw object with 81732 rows
## --------------
## chr start end strand coverage numCs numTs
## 1 chr19 3050603 3050603 + 1 0 1
## 2 chr19 3051012 3051012 + 1 0 1
## 3 chr19 3051067 3051067 + 1 0 1
## 4 chr19 3051154 3051154 + 1 0 1
## 5 chr19 3051573 3051573 + 1 0 1
## 6 chr19 3051820 3051820 - 1 0 1
## --------------
## sample.id: 1KO_2
## assembly: mm39
## context: CpG
## resolution: base
##
##
## [[4]]
## methylRaw object with 67543 rows
## --------------
## chr start end strand coverage numCs numTs
## 1 chr19 3051154 3051154 + 2 0 2
## 2 chr19 3051895 3051895 - 1 0 1
## 3 chr19 3051903 3051903 - 2 0 2
## 4 chr19 3051908 3051908 - 2 1 1
## 5 chr19 3051914 3051914 - 2 1 1
## 6 chr19 3052077 3052077 + 1 1 0
## --------------
## sample.id: DKO_2
## assembly: mm39
## context: CpG
## resolution: base
head(f.dm_condi, n = 20)
## [[1]]
## methylRaw object with 38271 rows
## --------------
## chr start end strand coverage numCs numTs
## 38 chr19 3078799 3078799 + 5 3 2
## 44 chr19 3078955 3078955 + 6 1 5
## 53 chr19 3079664 3079664 - 5 0 5
## 65 chr19 3080405 3080405 + 5 0 5
## 67 chr19 3080609 3080609 + 11 1 10
## 71 chr19 3081126 3081126 + 7 1 6
## --------------
## sample.id: 1KO_1
## assembly: mm39
## context: CpG
## resolution: base
##
##
## [[2]]
## methylRaw object with 32249 rows
## --------------
## chr start end strand coverage numCs numTs
## 40 chr19 3078603 3078603 + 9 1 8
## 41 chr19 3078604 3078604 - 5 1 4
## 42 chr19 3078649 3078649 + 7 1 6
## 47 chr19 3078832 3078832 - 6 0 6
## 50 chr19 3078956 3078956 - 7 2 5
## 53 chr19 3079445 3079445 - 6 2 4
## --------------
## sample.id: DKO_1
## assembly: mm39
## context: CpG
## resolution: base
##
##
## [[3]]
## methylRaw object with 49836 rows
## --------------
## chr start end strand coverage numCs numTs
## 45 chr19 3076963 3076963 + 5 0 5
## 56 chr19 3078832 3078832 - 5 2 3
## 59 chr19 3078955 3078955 + 6 1 5
## 70 chr19 3079896 3079896 - 5 0 5
## 72 chr19 3079903 3079903 - 5 1 4
## 73 chr19 3080073 3080073 - 7 1 6
## --------------
## sample.id: 1KO_2
## assembly: mm39
## context: CpG
## resolution: base
##
##
## [[4]]
## methylRaw object with 44619 rows
## --------------
## chr start end strand coverage numCs numTs
## 44 chr19 3078832 3078832 - 5 0 5
## 46 chr19 3078875 3078875 - 5 0 5
## 47 chr19 3078955 3078955 + 5 0 5
## 66 chr19 3080168 3080168 + 8 3 5
## 67 chr19 3080169 3080169 - 5 0 5
## 68 chr19 3080405 3080405 + 6 1 5
## --------------
## sample.id: DKO_2
## assembly: mm39
## context: CpG
## resolution: base
head(methst, n = 20)
## Methstatus condi
## 1 Low 1KO
## 2 Unmeth 1KO
## 3 Low 1KO
## 4 Low 1KO
## 5 Low 1KO
## 6 Low 1KO
## 7 Low 1KO
## 8 Low 1KO
## 9 Low 1KO
## 10 Low 1KO
## 11 Low 1KO
## 12 Low 1KO
## 13 Low 1KO
## 14 Mid 1KO
## 15 Low 1KO
## 16 Mid 1KO
## 17 Low 1KO
## 18 Low 1KO
## 19 Low 1KO
## 20 Unmeth 1KO
head(AllDiffdm, n = 20)
## chr start end strand pvalue qvalue meth.diff coverage1
## 1 chr19 3078955 3078955 + 1.000000e+00 1.000000000 0.0000000 12
## 2 chr19 3080405 3080405 + 5.563483e-02 0.185740858 19.0476190 11
## 3 chr19 3080609 3080609 + 1.549374e-01 0.346780439 12.7976190 32
## 4 chr19 3083013 3083013 + 9.570106e-01 1.000000000 -0.7936508 18
## 5 chr19 3083062 3083062 + 1.306411e-01 0.312498799 19.4805195 22
## 6 chr19 3083094 3083094 + 2.667613e-02 0.120033045 30.1587302 18
## 7 chr19 3089018 3089018 + 2.537119e-01 0.453397326 12.3569794 23
## 8 chr19 3089404 3089404 + 8.699143e-01 1.000000000 1.8062397 29
## 9 chr19 3128966 3128966 + 3.796431e-01 0.580791108 8.7912088 35
## 10 chr19 3129682 3129682 + 5.862075e-01 0.791203761 7.5757576 11
## 11 chr19 3129995 3129995 + 2.134959e-01 0.414103616 20.0000000 15
## 12 chr19 3130088 3130088 + 1.219941e-01 0.298754874 18.0250784 29
## 13 chr19 3130109 3130109 + 8.483397e-01 1.000000000 -2.3529412 25
## 14 chr19 3130113 3130113 + 9.462780e-01 1.000000000 0.9469697 33
## 15 chr19 3130130 3130130 + 9.333123e-01 1.000000000 1.0101010 33
## 16 chr19 3130134 3130134 + 9.814118e-01 1.000000000 -0.3030303 33
## 17 chr19 3130170 3130170 + 2.234475e-01 0.424219365 15.5789474 25
## 18 chr19 3130191 3130191 + 4.556063e-02 0.164523756 24.7407407 25
## 19 chr19 3130195 3130195 + 2.950109e-01 0.494144784 10.0000000 30
## 20 chr19 3130209 3130209 + 8.962014e-05 0.004064613 38.0952381 27
## numCs1 numTs1 coverage2 numCs2 numTs2 Methyl.Ctrl Methyl.Cond Methstatus_Ct
## 1 2 10 12 2 10 0.16666667 0.1666667 Low
## 2 0 11 21 4 17 0.00000000 0.1904762 Unmeth
## 3 2 30 21 4 17 0.06250000 0.1904762 Low
## 4 4 14 14 3 11 0.22222222 0.2142857 Low
## 5 2 20 14 4 10 0.09090909 0.2857143 Low
## 6 1 17 14 5 9 0.05555556 0.3571429 Low
## 7 2 21 19 4 15 0.08695652 0.2105263 Low
## 8 5 24 21 4 17 0.17241379 0.1904762 Low
## 9 5 30 26 6 20 0.14285714 0.2307692 Low
## 10 1 10 12 2 10 0.09090909 0.1666667 Low
## 11 2 13 12 4 8 0.13333333 0.3333333 Low
## 12 4 25 22 7 15 0.13793103 0.3181818 Low
## 13 5 20 17 3 14 0.20000000 0.1764706 Low
## 14 10 23 16 5 11 0.30303030 0.3125000 Mid
## 15 7 26 18 4 14 0.21212121 0.2222222 Low
## 16 10 23 20 6 14 0.30303030 0.3000000 Mid
## 17 4 21 19 6 13 0.16000000 0.3157895 Low
## 18 4 21 27 11 16 0.16000000 0.4074074 Low
## 19 3 27 25 5 20 0.10000000 0.2000000 Low
## 20 0 27 21 8 13 0.00000000 0.3809524 Unmeth
## Methstatus_Cd Diff_expr DM_status
## 1 Low non-significant None
## 2 Low non-significant None
## 3 Low non-significant None
## 4 Low non-significant None
## 5 Mid non-significant None
## 6 Mid non-significant None
## 7 Low non-significant None
## 8 Low non-significant None
## 9 Low non-significant None
## 10 Low non-significant None
## 11 Mid non-significant None
## 12 Mid non-significant None
## 13 Low non-significant None
## 14 Mid non-significant None
## 15 Low non-significant None
## 16 Mid non-significant None
## 17 Mid non-significant None
## 18 Mid non-significant None
## 19 Low non-significant None
## 20 Mid Significant Hyper
head(dfpc, n = 20)
## chr start end strand Meth_perc Coverage condi Diff_expr
## 1 chr19 3078955 3078955 + 0.16666667 12 1KO non-significant
## 2 chr19 3080405 3080405 + 0.00000000 11 1KO non-significant
## 3 chr19 3080609 3080609 + 0.06250000 32 1KO non-significant
## 4 chr19 3083013 3083013 + 0.22222222 18 1KO non-significant
## 5 chr19 3083062 3083062 + 0.09090909 22 1KO non-significant
## 6 chr19 3083094 3083094 + 0.05555556 18 1KO non-significant
## 7 chr19 3089018 3089018 + 0.08695652 23 1KO non-significant
## 8 chr19 3089404 3089404 + 0.17241379 29 1KO non-significant
## 9 chr19 3128966 3128966 + 0.14285714 35 1KO non-significant
## 10 chr19 3129682 3129682 + 0.09090909 11 1KO non-significant
## 11 chr19 3129995 3129995 + 0.13333333 15 1KO non-significant
## 12 chr19 3130088 3130088 + 0.13793103 29 1KO non-significant
## 13 chr19 3130109 3130109 + 0.20000000 25 1KO non-significant
## 14 chr19 3130113 3130113 + 0.30303030 33 1KO non-significant
## 15 chr19 3130130 3130130 + 0.21212121 33 1KO non-significant
## 16 chr19 3130134 3130134 + 0.30303030 33 1KO non-significant
## 17 chr19 3130170 3130170 + 0.16000000 25 1KO non-significant
## 18 chr19 3130191 3130191 + 0.16000000 25 1KO non-significant
## 19 chr19 3130195 3130195 + 0.10000000 30 1KO non-significant
## 20 chr19 3130209 3130209 + 0.00000000 27 1KO Significant
## DM_status Methstatus Methstatus_Ct Methstatus_Cd
## 1 None Low Low Low
## 2 None Unmeth Unmeth Low
## 3 None Low Low Low
## 4 None Low Low Low
## 5 None Low Low Mid
## 6 None Low Low Mid
## 7 None Low Low Low
## 8 None Low Low Low
## 9 None Low Low Low
## 10 None Low Low Low
## 11 None Low Low Mid
## 12 None Low Low Mid
## 13 None Low Low Low
## 14 None Mid Mid Mid
## 15 None Low Low Low
## 16 None Mid Mid Mid
## 17 None Low Low Mid
## 18 None Low Low Mid
## 19 None Low Low Low
## 20 Hyper Unmeth Unmeth Mid
head(dfpc_annotated, n = 20)
## GRanges object with 20 ranges and 9 metadata columns:
## seqnames ranges strand | Meth_perc Coverage condi Diff_expr
## <Rle> <IRanges> <Rle> | <numeric> <numeric> <factor> <factor>
## [1] chr19 3078955 + | 0.1666667 12 1KO non-significant
## [2] chr19 3080405 + | 0.0000000 11 1KO non-significant
## [3] chr19 3080609 + | 0.0625000 32 1KO non-significant
## [4] chr19 3083013 + | 0.2222222 18 1KO non-significant
## [5] chr19 3083062 + | 0.0909091 22 1KO non-significant
## ... ... ... ... . ... ... ... ...
## [16] chr19 3130088 + | 0.137931 29 1KO non-significant
## [17] chr19 3130109 + | 0.200000 25 1KO non-significant
## [18] chr19 3130109 + | 0.200000 25 1KO non-significant
## [19] chr19 3130113 + | 0.303030 33 1KO non-significant
## [20] chr19 3130113 + | 0.303030 33 1KO non-significant
## DM_status Methstatus Methstatus_Ct Methstatus_Cd annot
## <factor> <factor> <factor> <factor> <GRanges>
## [1] None Low Low Low chr19:1-3103070
## [2] None Unmeth Unmeth Low chr19:1-3103070
## [3] None Low Low Low chr19:1-3103070
## [4] None Low Low Low chr19:1-3103070
## [5] None Low Low Mid chr19:1-3103070
## ... ... ... ... ... ...
## [16] None Low Low Mid chr19:3125886-3195867:-
## [17] None Low Low Low chr19:3103071-3247732:-
## [18] None Low Low Low chr19:3125886-3195867:-
## [19] None Mid Mid Mid chr19:3103071-3247732:-
## [20] None Mid Mid Mid chr19:3125886-3195867:-
## -------
## seqinfo: 26 sequences from mm39 genome
#################################################################
# Methylation & coverage Statistics on raw data (cpg or tiles)
dfpc_dm <- NULL
groupe <- NULL
for(i in 1:length(dm_condi)){
print(paste0(LEVEL, " raw data"))
print("sample ID")
print(dm_condi[[i]]@sample.id)
groupe <- c(groupe, rep(strsplit(dm_condi[[i]]@sample.id, "_")[[1]][1], dim(dm_condi[[i]])[1]) )
dm_condi[[i]] %>% getData() %>%
dplyr::mutate(., Meth_perc = 100 * (numCs/coverage)) %>%
dplyr::mutate(., ID = dm_condi[[i]]@sample.id) -> df_dm
dfpc_dm <- rbind(dfpc_dm, df_dm)
rm(df_dm)
}
dfpc_dm <- cbind(dfpc_dm, groupe)
#Plot of CpG Methylation on raw data (before filtering coverage and unite)
p_dm <- ggplot(dfpc_dm, aes(x=Meth_perc, fill=groupe, color=groupe)) +
geom_histogram(position="identity",
alpha=0.6,
breaks = seq(0, 100, 5)) +
ggtitle(paste0("% CpG Methylation on raw data ", "(", LEVEL, ")")) +
facet_grid(. ~ ID ) +
labs(x = "% of methylation", y = "Counts") +
theme_bw(base_size = 14) +
theme(
legend.title = element_text(),
axis.title.x=element_text(),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
strip.text.x = element_text(size = 12, color = "black", face = "bold.italic"))
p_dm
rm(groupe)
cov_dm <- ggplot(dfpc_dm, aes(x=coverage, fill=groupe, color=groupe)) +
geom_histogram(position="identity",
alpha=0.6,
breaks = seq(0, 200, 10)) +
ggtitle(paste0("Coverage on raw data ", "(", LEVEL, ")")) +
facet_grid(. ~ ID ) +
labs(x = "Coverage", y = "Counts") +
theme_bw(base_size = 14) +
theme(
legend.title = element_text(),
axis.title.x=element_text(),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
strip.text.x = element_text(size = 12, color = "black", face = "bold.italic"))
cov_dm
rm(dfpc_dm)
#################################################################
# Methylation & coverage Statistics on filtered data
dfpc_fdm <- NULL
groupe <- NULL
for(i in 1:length(f.dm_condi)){
print("f.dm_condi, filtered data")
print("sample ID")
print(f.dm_condi[[i]]@sample.id)
groupe <- c(groupe, rep(strsplit(f.dm_condi[[i]]@sample.id, "_")[[1]][1], dim(f.dm_condi[[i]])[1]) )
f.dm_condi[[i]] %>% getData() %>%
dplyr::mutate(., Meth_perc = 100 * (numCs/coverage)) %>%
dplyr::mutate(., ID = f.dm_condi[[i]]@sample.id) -> df_fdm
dfpc_fdm <- rbind(dfpc_fdm, df_fdm)
rm(df_fdm)
}
dfpc_fdm <- cbind(dfpc_fdm, groupe)
#Plot of CpG Methylation on filtered data
p_fdm <- ggplot(dfpc_fdm, aes(x=Meth_perc, fill=groupe, color=groupe)) +
geom_histogram(position="identity",
alpha=0.6,
breaks = seq(0, 100, 5)) +
ggtitle(paste0("% CpG Methylation on filtered data ", "(", LEVEL, ")")) +
facet_grid(. ~ ID ) +
labs(x = "% of methylation", y = "Counts") +
theme_bw(base_size = 14) +
theme(
legend.title = element_text(),
axis.title.x=element_text(),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
strip.text.x = element_text(size = 12, color = "black", face = "bold.italic"))
p_fdm
rm(groupe)
cov_fdm <- ggplot(dfpc_fdm, aes(x=coverage, fill=groupe, color=groupe)) +
geom_histogram(position="identity",
alpha=0.6,
breaks = seq(0, 100, 5)) +
ggtitle(paste0("Coverage on filtered data ", "(", LEVEL, ")")) +
facet_grid(. ~ ID) +
labs(x = "Coverage", y = "Counts") +
theme_bw(base_size = 14) +
theme(
legend.title = element_text(),
axis.title.x=element_text(),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
strip.text.x = element_text(size = 12, color = "black", face = "bold.italic"))
cov_fdm
rm(dfpc_fdm)
For each condition, the plot shows the number of CpGs at different methylation status.
The methylation level is categorised into five classes:
- Full : 100% methylated
- High : between 75 and 100% excluded
- Mid : between 50 et 75% exluded
- Low : between 0 exluded and 25% excluded
- Unmeth : unmethylated
#################################################################
#Barplot Methylation Status bpst object
#requires methst object
bpst <- ggplot(data=methst ,
aes( x = condi, fill = factor( Methstatus, rev( levels(Methstatus) ) ) )
) +
geom_bar() +
labs(fill = '') +
scale_fill_manual("Methylation status : ",
values = c( 'Full' = full.color,
'High' = high.color,
'Mid' = mid.color,
'Low' = low.color,
'Unmeth' = unmeth.color)) +
ggtitle(paste0(CONDI, " Methylation status") ) +
theme_bw(base_size = 14) +
theme(axis.title.x=element_blank(),
plot.title = element_text(hjust = 0.5))
bpst
These plots give the proportion of differentially methylated tiles or CpG present in the three categories:
- Hyper: DKO > 1KO by more than 20 % and qvalue < 0.05
- Hypo: DKO < 1KO by more than 20 % and qvalue < 0.05
- None: no significant difference, qvalue > 0.05
The significance threshold of the methylation difference (20 %) is modifiable in the configuration file
as well as the threshold on the qvalue (0.05).
#################################################################
# Piechart Differentially methylated CpG or Tiles distribution
# piedm object on AllDiffdm$DM_status %>% table %>% data.frame
summary(AllDiffdm$DM_status)
## Hyper Hypo None
## 265 1285 11013
piedm <- ggplot(AllDiffdm$DM_status %>% table %>% data.frame,
aes(x="", y=Freq, fill=.)) +
geom_col() +
scale_fill_manual("", values = c( 'Hyper' = hyper.color,
'Hypo' = hypo.color,
'None' = not.color)
) +
coord_polar("y", start=0) +
theme_void() +
ggtitle(paste0(CONDI),
subtitle = paste0(" Differentially methylated ", LEVEL, " distribution (all)")
) +
theme(plot.title = element_text(hjust = 0.5))
piedm
# piedms object on filter(AllDiffdm, DM_status != \"None\") -> cnt
dplyr::filter(AllDiffdm, DM_status != "None") %>% dplyr::select(. , DM_status) %>% table %>% data.frame() -> cnt
piedms <- ggplot( cnt[-3,], aes(x="", y=Freq, fill=.) ) +
geom_col() +
scale_fill_manual("", values = c( 'Hyper' = hyper.color,
'Hypo' = hypo.color)
) +
coord_polar("y", start=0) +
theme_void() +
ggtitle(paste0(CONDI),
subtitle = paste0(" Differentially methylated " ,LEVEL, " distribution (Significant)")
) +
theme(plot.title = element_text(hjust = 0.5))
piedms
For each annotation track (promoters, exons …), the plots show the number of differentially methylated CpGs or tiles as a function of the methylation status in both conditions.
#################################################################
# New barplot fraction low mid high in Diff meth on All the Annots
for(i in IGV ){
dfpc_temp <- dfpc_annotated %>% data.frame
dfpc_temp <- dfpc_temp[ which( dfpc_temp$annot.type %in% i ) , ]
exist_annot <- row.names(table(dfpc_temp$annot.type))
if(is.na(match(x = i , exist_annot)) == FALSE){
dfpc_temp$annot.type <- factor(dfpc_temp$annot.type, levels = IGV[i] )
bp2ct <- ggplot( dfpc_temp, aes(x=condi, fill = DM_status)) +
geom_bar( ) +
facet_grid(~Methstatus, drop = FALSE) +
theme_bw(base_size = 14) +
scale_fill_manual("", values = c( 'Hyper' = hyper.color,
'Hypo' = hypo.color,
'None' = not.color)
) +
ggtitle(paste0(label = CONDI, " Differential methylation status"),
subtitle = paste0( "On annotation track ", i,
" and by ", LEVEL, " methylation level in ",
sample.ids[1])
) +
theme(
axis.title.x=element_blank(),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
strip.text.x = element_text(size = 12, color = "black", face = "bold.italic")
)
print(bp2ct)
}
}
rm(dfpc_temp)
#################################################################
#Histogram of methylation all
histmeth <- ggplot(dfpc %>% data.frame,
aes(x=Meth_perc*100, fill= condi, color= condi)
) +
geom_histogram(position="identity",
alpha=0.6,
breaks = seq(0, 100, 5)) +
ggtitle(paste0("% of methylation, for all unified ", LEVEL)) +
facet_grid(. ~ condi, scales = "free", space = "free" ) +
labs(x = "% of methylation", y = "Counts") +
theme_bw(base_size = 14) +
theme(
legend.title = element_text(),
axis.title.x=element_text(),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
strip.text.x = element_text(size = 12, color = "black", face = "bold.italic"))
histmeth
#################################################################
#Histogram of methylation Signif
histmeths <- ggplot(dfpc_Sig %>% data.frame,
aes(x=Meth_perc*100, fill= condi, color= condi)
) +
geom_histogram(position="identity",
alpha=0.6,
breaks = seq(0, 100, 5)) +
ggtitle(paste0("% of methylation, for differentially methylated ", LEVEL)) +
facet_grid(. ~ condi, scales = "free", space = "free" ) +
labs(x = "% of methylation", y = "Counts") +
theme_bw(base_size = 14) +
theme(
legend.title = element_text(),
axis.title.x=element_text(),
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
strip.text.x = element_text(size = 12, color = "black", face = "bold.italic"))
histmeths
#################################################################
#Histogram of methylation all by group
histmethg <- ggplot(dfpc %>% data.frame,
aes(x=Meth_perc*100, fill= condi, color= condi)
) +
geom_histogram(position="identity",
alpha=0.6,
breaks = seq(0, 100, 5)) +
ggtitle(paste0("Methylation distribution on all ", LEVEL, ", by group") ) +
xlab("% of methylation") + ylab("Counts") +
theme_bw(base_size = 14) +
theme(legend.title = element_blank(),
plot.title = element_text(hjust = 0.5))
histmethg
#################################################################
#Histogram of methylation Signif by group
histmethsg <- ggplot(dfpc_Sig %>% data.frame,
aes(x=Meth_perc*100, fill= condi, color=condi)
) +
#scale_fill_manual(values=c("darkorange2", "cornflowerblue", "red", "yellow")) +
geom_histogram(position="identity",
alpha=0.6,
breaks = seq(0, 100, 5)) +
ggtitle(paste0("Methylation distribution on differentially methylated ", LEVEL, ", by group") ) +
xlab("% of methylation") + ylab("Counts") +
theme_bw(base_size = 14) +
theme(legend.title = element_blank(),
plot.title = element_text(hjust = 0.5))
histmethsg
#################################################################
#Violin plot of methylation All
#vp2
vp2 <- ggplot(dfpc, aes(x= condi, y=Meth_perc*100, fill = condi)) +
geom_violin(trim=FALSE) +
geom_boxplot(width=0.05) +
labs(title = paste0("Methylation distribution on all ", LEVEL) ,
x = "Samples", y = "% of methylation" ) +
theme_bw(base_size = 14) +
theme(axis.title.x=element_blank(),
plot.title = element_text(hjust = 0.5),
legend.position = "none")
vp2
#Violin plot of methylation Signif
#vp2s
vps2 <- ggplot(dfpc_Sig, aes(x= condi , y=Meth_perc*100, fill = condi)) +
geom_violin(trim=FALSE) +
geom_boxplot(width=0.05) +
labs( title = paste0("Methylation distribution on differentially methylated ", LEVEL) ,
x = "Samples", y = "Methylation %" ) +
theme_bw(base_size = 14) +
theme(axis.title.x=element_blank(),
plot.title = element_text(hjust = 0.5),
legend.position = "none")
vps2
#################################################################
#Volcano plot
if(min(AllDiffdm$qvalue) == 0){
ymax <- .Machine$double.xmin # if qvalue == 0 , set the ylim max at the smallest representable number in R (2.225074e-308)
}else{
ymax <- min(AllDiffdm$qvalue)
}
vc <- ggplot(data = AllDiffdm,
aes(x = meth.diff, y = -log10(qvalue), col = DM_status)) +
geom_point() +
theme_bw(base_size = 14) +
labs(col = '') +
xlab("Methylation difference (%)") +
xlim(-100, 100) +
ylim(0, -log10(ymax)) +
scale_color_manual(values=c('Hyper' = hyper.color,
'Hypo' = hypo.color,
'None' = not.color)) +
geom_vline(xintercept = c(-DIF, DIF), linetype="dashed", col="black") +
geom_hline(yintercept = -log10(QV), linetype="dashed", col="black") +
ggtitle(paste0(CONDI, " (", LEVEL, ")")) +
theme(plot.title = element_text(hjust = 0.5))
vc
for(i in IGV ){
Adtdf <- AllDiffdm_annotated %>% data.frame
Adtdf <- Adtdf[ which( Adtdf$annot.type %in% i ) , ]
exist_annot <- row.names(table(Adtdf$annot.type))
if(is.na(match(x = i , exist_annot)) == FALSE){
Adtdf$annot.type <- factor(Adtdf$annot.type, levels = IGV[i] )
vc_temp <- ggplot(data = Adtdf,
aes(x = meth.diff, y = -log10(qvalue), col = DM_status)) +
geom_point() +
theme_bw(base_size = 14) +
labs(col = '') +
xlab("Methylation difference (%)") +
xlim(-100, 100) +
ylim(0, -log10(ymax)) +
scale_color_manual(values=c('Hyper' = hyper.color,
'Hypo' = hypo.color,
'None' = not.color)) +
geom_vline(xintercept = c(-DIF, DIF), linetype="dashed", col="black") +
geom_hline(yintercept = -log10(QV), linetype="dashed", col="black") +
ggtitle(paste0(CONDI, " (", LEVEL, "), on annotation ", i))
print(vc_temp)
}
}
rm(Adtdf)
rm(exist_annot)
# MH corrected for cases with low number of differences
if (length(AllDiffdm_annotated) > 40){
top_diff <- data.frame(AllDiffdm_annotated[AllDiffdm_annotated$pvalue <= sort(AllDiffdm_annotated$pvalue)[30] , ])
} else {
top_diff <- data.frame(AllDiffdm_annotated[AllDiffdm_annotated$pvalue <= sort(AllDiffdm_annotated$pvalue), ])
}
top_diff <- top_diff[, c(1,2,3,6,20, 28, 30)]
colnames(top_diff) <- c("Chr", "start", "end", "Pval", "Status", "Gene ID", "Annotation")
knitr::kable(top_diff , "pipe")
| Chr | start | end | Pval | Status | Gene ID | Annotation |
|---|---|---|---|---|---|---|
| chr19 | 3190364 | 3190364 | 0 | Hyper | ENSMUSG00000100969.8 | mm39_genes |
| chr19 | 3190364 | 3190364 | 0 | Hyper | NA | mm39_introns |
| chr19 | 3190364 | 3190364 | 0 | Hyper | ENSMUSG00000118966.1 | mm39_near_tss |
| chr19 | 3190364 | 3190364 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_near_tss |
| chr19 | 3190406 | 3190406 | 0 | Hyper | ENSMUSG00000100969.8 | mm39_genes |
| chr19 | 3190406 | 3190406 | 0 | Hyper | NA | mm39_introns |
| chr19 | 3190406 | 3190406 | 0 | Hyper | ENSMUSG00000118966.1 | mm39_near_tss |
| chr19 | 3190406 | 3190406 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_near_tss |
| chr19 | 3190828 | 3190828 | 0 | Hyper | ENSMUSG00000100969.8 | mm39_genes |
| chr19 | 3190828 | 3190828 | 0 | Hyper | NA | mm39_introns |
| chr19 | 3190828 | 3190828 | 0 | Hyper | ENSMUSG00000118966.1 | mm39_near_tss |
| chr19 | 3190828 | 3190828 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_near_tss |
| chr19 | 3190828 | 3190828 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_promoters |
| chr19 | 3192376 | 3192376 | 0 | Hyper | ENSMUSG00000100969.8 | mm39_genes |
| chr19 | 3192376 | 3192376 | 0 | Hyper | NA | mm39_introns |
| chr19 | 3192376 | 3192376 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_near_tss |
| chr19 | 3192376 | 3192376 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_promoters |
| chr19 | 3192378 | 3192378 | 0 | Hyper | ENSMUSG00000100969.8 | mm39_genes |
| chr19 | 3192378 | 3192378 | 0 | Hyper | NA | mm39_introns |
| chr19 | 3192378 | 3192378 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_near_tss |
| chr19 | 3192378 | 3192378 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_promoters |
| chr19 | 3192413 | 3192413 | 0 | Hyper | ENSMUSG00000100969.8 | mm39_genes |
| chr19 | 3192413 | 3192413 | 0 | Hyper | NA | mm39_introns |
| chr19 | 3192413 | 3192413 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_near_tss |
| chr19 | 3192413 | 3192413 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_promoters |
| chr19 | 3192943 | 3192943 | 0 | Hyper | ENSMUSG00000100969.8 | mm39_genes |
| chr19 | 3192943 | 3192943 | 0 | Hyper | NA | mm39_introns |
| chr19 | 3192943 | 3192943 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_near_tss |
| chr19 | 3192943 | 3192943 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_promoters |
| chr19 | 3192943 | 3192943 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_exons |
| chr19 | 3192943 | 3192943 | 0 | Hyper | ENSMUSG00000118182.2 | mm39_genes |
| chr19 | 3808599 | 3808599 | 0 | Hypo | NA | mm39_intergenic |
| chr19 | 4077308 | 4077308 | 0 | Hyper | NA | mm39_intergenic |
| chr19 | 4596039 | 4596039 | 0 | Hyper | ENSMUSG00000024892.18 | mm39_genes |
| chr19 | 4596039 | 4596039 | 0 | Hyper | NA | mm39_introns |
| chr19 | 4913792 | 4913792 | 0 | Hyper | ENSMUSG00000006457.5 | mm39_genes |
| chr19 | 4913792 | 4913792 | 0 | Hyper | NA | mm39_introns |
| chr19 | 5006821 | 5006821 | 0 | Hyper | NA | mm39_intergenic |
rm(top_diff)
if(LEVEL == "Tiles"){
df_alldiff <- data.frame(AllDiffdm_annotated)
sort_alldif_annot <- df_alldiff[order(df_alldiff$pvalue, decreasing=FALSE),]
sort_annotated_genes <- sort_alldif_annot[ sort_alldif_annot$annot.type == paste0(ORG, "_genes"),]
background <- sort_annotated_genes[!duplicated(sort_annotated_genes[, "start"]),]
write.csv(background, file= paste0(OUTPUT_PATH , CONDI, "_background_tiles.csv"))
sort_sig_annotated_genes <- sort_annotated_genes[sort_annotated_genes$Diff_expr == "Significant", ]
write.csv(sort_sig_annotated_genes, file= paste0(OUTPUT_PATH , CONDI, "_significant_annot_tiles.csv"))
print(table(sort_sig_annotated_genes$DM_status))
}
Show the differential methylation status :
- Hyper
- Hypo
- None
On basic genomic annotation tracks.
#################################################################
#plot categorical
dm_annotated <- AllDiffdm_annotated %>% data.frame
dm_annotated$annot.type <- factor(dm_annotated$annot.type , levels = IGV )
anno_re <- ggplot(data=dm_annotated, aes(x=annot.type, fill=DM_status)) +
geom_bar(position = "fill") +
scale_fill_manual("",
values = c( 'Hyper' = hyper.color,
'Hypo' = hypo.color,
'None' = not.color)
) +
scale_x_discrete("", labels = gsub("ORG_", "", IGV ) ) +
theme_bw(base_size = 14) +
ggtitle( paste0('DM status on annotation tracks (relative counts)'),
subtitle = paste0(CONDI) ) +
theme(axis.text.x = element_text( size = 12, angle = 45, vjust = 1, hjust = 1),
axis.title.x = element_blank(), legend.title = element_blank()
)
anno_re
anno_abs <- ggplot(data=dm_annotated, aes(x=annot.type, fill=DM_status)) +
geom_bar() +
scale_fill_manual("",
values = c( 'Hyper' = hyper.color,
'Hypo' = hypo.color,
'None' = not.color)
) +
scale_x_discrete("", labels = gsub("ORG_", "", IGV) ) +
theme_bw(base_size = 14) +
ggtitle( paste0('DM status on annotation tracks (absolute counts)'),
subtitle = paste0(CONDI) ) +
theme(axis.text.x = element_text( size = 8, angle = 45, vjust = 1, hjust = 1),
axis.title.x = element_blank(), legend.title = element_blank()
)
anno_abs
if((CUSTOM_ANNOT == TRUE) && (merge_annot == FALSE)){
library(GenomicRanges)
library(annotatr)
df_custom_tracks <- data.frame(list_custom_tracks[[1]])
for(i in 2:length(list_custom_tracks)){
df <- data.frame(list_custom_tracks[[i]])
df_custom_tracks <- rbind(df_custom_tracks, df)
}
GR_custom_tracks <- makeGRangesFromDataFrame(df_custom_tracks, keep.extra.columns = TRUE)
for(i in 1:length(unique(meta_annot$group))){
print(i)
AllDiffdm_annotated <- annotate_regions(regions = AllDiffdm_regions,
annotations = GR_custom_tracks[GR_custom_tracks$group == unique(meta_annot$group)[i] ],
ignore.strand = TRUE,
quiet = TRUE)
dm_annotated <- AllDiffdm_annotated %>% data.frame
dm_annotated$annot.type <- factor(dm_annotated$annot.type )
anno_re_custom <- ggplot(data=dm_annotated, aes(x=annot.type, fill=DM_status)) +
geom_bar(position = "fill") +
scale_fill_manual("",
values = c( 'Hyper' = hyper.color,
'Hypo' = hypo.color,
'None' = not.color)
) +
scale_x_discrete("", labels = gsub("ORG_", "", dm_annotated$annot.type ) ) +
theme_bw(base_size = 14) +
ggtitle( paste0('DM status on annotation tracks (relative counts)'),
subtitle = paste0(CONDI) ) +
theme(axis.text.x = element_text( size = 8, angle = 45, vjust = 1, hjust = 1),
axis.title.x = element_blank(), legend.title = element_blank()
)
print(anno_re_custom)
anno_abs_custom <- ggplot(data=dm_annotated, aes(x=annot.type, fill=DM_status)) +
geom_bar() +
scale_fill_manual("",
values = c( 'Hyper' = hyper.color,
'Hypo' = hypo.color,
'None' = not.color)
) +
scale_x_discrete("", labels = gsub("ORG_", "", dm_annotated$annot.type) ) +
theme_bw(base_size = 14) +
ggtitle( paste0('DM status on annotation tracks (absolute counts)'),
subtitle = paste0(CONDI) ) +
theme(axis.text.x = element_text( size = 8, angle = 45, vjust = 1, hjust = 1),
axis.title.x = element_blank(), legend.title = element_blank()
)
print(anno_abs_custom)
}
rm(df_custom_tracks)
rm(GR_custom_tracks)
rm(dm_annotated)
rm(AllDiffdm_annotated)
}
For each chromosome show the number of DMCs/DMTs hypo or hyper-methylated
The category named “others” correspond to chromsomes with unconventional names (like Unmapped chromsomes for example)
if( dim(dfpc_Sig)[1] > 0){
count_per_chromosome <- table(dfpc_Sig$chr, dfpc_Sig$DM_status)
count_df <- as.data.frame.matrix(count_per_chromosome)
count_df$chr <- rownames(count_df)
# test si absence de hypo ou hyper
if(dim(count_df)[2] == 2){
if(colnames(count_df[1]) == "hypo"){
count_df$Hyper <- rep(0, dim(count_df)[1])
}else{
count_df$Hypo <- rep(0, dim(count_df)[1])
}
}
chr_others <- count_df[1,]
rownames(chr_others) <- "others"
chr_others$Hypo <- chr_others$Hyper <- 1
chr_others$chr <- "others"
delete_row <- 0
for(i in 1:length(count_df$chr)){
if((nchar(count_df$chr[i]) > 5) == TRUE){
chr_others$Hypo <- (chr_others$Hypo + count_df[i,]$Hypo)
chr_others$Hyper <- (chr_others$Hyper + count_df[i,]$Hyper)
delete_row <- c(delete_row, i)
}
}
if(is.na(delete_row[2]) == FALSE){ # test si il n'y a aucune colonne à remove (delete_row = 0), car sinon génère des bugs
count_df <- count_df[-delete_row,]
}
count_df<- rbind(count_df, chr_others)
count_df$chr <- str_sort(count_df$chr, numeric = TRUE)
count_df_long <- tidyr::gather(count_df, key = "status", value = "count", Hypo, Hyper)
hist_dmr_chr <- ggplot(count_df_long, aes(x = chr, y = count, fill = status)) +
geom_bar(position = "dodge" , stat = "identity", colour = "black") +
labs(title = paste0("Number of differentially methylated ", LEVEL, " by chromosome"),
x = "Chromosome",
y = LEVEL) +
theme_bw(base_size = 14) +
scale_fill_manual("", values=c('Hyper' = hyper.color,
'Hypo' = hypo.color,
'None' = not.color)) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
rm(delete_row)
rm(count_df, count_df_long)
rm(chr_others)
hist_dmr_chr
}
Saves the whole R session of the script (variables, objects …)
at the end of its execution. The RData file produced can then be opened in a local environment (if you have enough space)
to make modifications. For example to retouch figures.
save.image(file = PLOT_RDATA)
# R Session
sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 11 (bullseye)
##
## Matrix products: default
## BLAS/LAPACK: /opt/conda/envs/wgbsflow/lib/libopenblasp-r0.3.21.so
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] stringr_1.5.0 methylKit_1.20.0 GenomicRanges_1.46.1
## [4] GenomeInfoDb_1.30.1 IRanges_2.28.0 S4Vectors_0.32.4
## [7] BiocGenerics_0.40.0 magrittr_2.0.3 dbplyr_2.3.0
## [10] ggplot2_3.3.6 yaml_2.3.5
##
## loaded via a namespace (and not attached):
## [1] MatrixGenerics_1.6.0 Biobase_2.54.0
## [3] tidyr_1.3.0 sass_0.4.5
## [5] jsonlite_1.8.4 splines_4.1.3
## [7] R.utils_2.12.2 gtools_3.9.4
## [9] bslib_0.4.2 assertthat_0.2.1
## [11] highr_0.10 GenomeInfoDbData_1.2.7
## [13] Rsamtools_2.10.0 numDeriv_2016.8-1.1
## [15] pillar_1.8.1 lattice_0.20-45
## [17] glue_1.6.2 limma_3.50.3
## [19] bbmle_1.0.25 digest_0.6.31
## [21] XVector_0.34.0 qvalue_2.26.0
## [23] colorspace_2.1-0 htmltools_0.5.4
## [25] Matrix_1.5-3 R.oo_1.25.0
## [27] plyr_1.8.8 XML_3.99-0.11
## [29] pkgconfig_2.0.3 emdbook_1.3.12
## [31] zlibbioc_1.40.0 purrr_1.0.1
## [33] mvtnorm_1.1-3 scales_1.2.1
## [35] BiocParallel_1.28.3 tibble_3.1.8
## [37] farver_2.1.1 generics_0.1.3
## [39] SummarizedExperiment_1.24.0 cachem_1.0.6
## [41] withr_2.5.0 cli_3.6.0
## [43] crayon_1.5.2 mclust_6.0.0
## [45] evaluate_0.20 R.methodsS3_1.8.2
## [47] fansi_1.0.4 MASS_7.3-58.2
## [49] tools_4.1.3 data.table_1.14.2
## [51] matrixStats_0.63.0 BiocIO_1.4.0
## [53] lifecycle_1.0.3 munsell_0.5.0
## [55] DelayedArray_0.20.0 Biostrings_2.62.0
## [57] compiler_4.1.3 jquerylib_0.1.4
## [59] fastseg_1.40.0 rlang_1.0.6
## [61] grid_4.1.3 RCurl_1.98-1.10
## [63] rjson_0.2.21 labeling_0.4.2
## [65] bitops_1.0-7 rmarkdown_2.20
## [67] restfulr_0.0.15 gtable_0.3.1
## [69] DBI_1.1.3 reshape2_1.4.4
## [71] R6_2.5.1 GenomicAlignments_1.30.0
## [73] knitr_1.42 dplyr_1.0.10
## [75] rtracklayer_1.54.0 bdsmatrix_1.3-6
## [77] fastmap_1.1.0 utf8_1.2.3
## [79] stringi_1.7.12 parallel_4.1.3
## [81] Rcpp_1.0.10 vctrs_0.5.2
## [83] tidyselect_1.2.0 xfun_0.37
## [85] coda_0.19-4