To identify a set of sites where ERG is changing with IK induction.
We’re going to use chipstne2 to help us understand different ways of defining differential sites and change our strategy accordingly.
suppressPackageStartupMessages({
library(IKdata)
library(chiptsne2)
library(seqsetvis)
library(GenomicRanges)
library(data.table)
})
options(mc.cores = 20)
treatment_colors = c("IK" = "purple", GFP = "green")
IKdata provides convenient shortcuts to setup bam and peak files with metadata.
bam_cfg_dt = cutnrun.setup_bam_files()
bam_cfg_dt = bam_cfg_dt[mark == "ERG"]
bam_cfg_dt = bam_cfg_dt[order(treatment)]
bam_cfg_dt$name = factor(bam_cfg_dt$name, levels = unique(bam_cfg_dt$name))
bam_cfg_dt$name = droplevels(bam_cfg_dt$name)
np_cfg_dt = cutnrun.setup_peak_files()
np_cfg_dt = np_cfg_dt[mark == "ERG"]
Load peaks.
peak_grs = easyLoad_narrowPeak(np_cfg_dt$file, file_names = np_cfg_dt$name)
ssvFeatureBars(peak_grs)
ssvFeatureEuler(peak_grs)
Overlap peaks.
olap_grs = ssvConsensusIntervalSets(peak_grs, min_number = 2, min_fraction = 0)
## min_number of 2 used for consensus threshold.
olap_grs = prepare_fetch_GRanges_names(olap_grs)
Define “differential” peaks such that peaks are in both cell lines in one condition and neither cell line in the other condition.
gr_peaks_GFP_only = subset(olap_grs, (PDX2_ERG_GFP_rep1 & MXP5_ERG_GFP_rep1) & !(PDX2_ERG_IK_rep1 | MXP5_ERG_IK_rep1))
gr_peaks_GFP_only$peak_group = "GFP"
gr_peaks_IK_only = subset(olap_grs, !(PDX2_ERG_GFP_rep1 | MXP5_ERG_GFP_rep1) & (PDX2_ERG_IK_rep1 & MXP5_ERG_IK_rep1))
gr_peaks_IK_only$peak_group = "IK"
diff_gr = c(
gr_peaks_GFP_only,
gr_peaks_IK_only
)
#invert selects regions NOT overlapping
notDiff_gr = subsetByOverlaps(olap_grs, diff_gr, invert = TRUE)
set.seed(0)
notDiff_gr = sample(notDiff_gr, length(diff_gr)/2)
notDiff_gr$peak_group = "not_diff"
query_gr = c(diff_gr, notDiff_gr)
Creating a FetchConfig to define how to fetch signal allows us to easily create a ChIPtsne2 object.
cfg = FetchConfig(bam_cfg_dt, read_mode = "bam_PE")
ct2.raw = ChIPtsne2.from_FetchConfig(cfg, query_gr)
setPositionVariable(ct2.raw, "bp")
setRegionVariable(ct2.raw, "ERG peak")
So far, ct2.raw only has fetched raw signal, no normalization has been applied, let alone t-SNE.
A standard workflow looks like this:
ct2 = ct2.raw %>%
setSeed(0) %>%
normalizeSignalRPM() %>%
calculateSignalCapValue() %>%
normalizeSignalCapValue() %>%
centerProfilesAndRefetch() %>%
normalizeSignalRPM() %>%
calculateSignalCapValue() %>%
normalizeSignalCapValue() %>%
flipProfilesToMatch() %>%
groupRegionsBySignalCluster() %>%
dimReduceTSNE() %>%
groupRegionsByDimReduceCluster() %>%
setValueVariable("normalized reads")
plotSignalHeatmap(ct2, group_VAR = "peak_group", name_FUN = function(x)x, relative_heatmap_width = .8)
plotSignalLinePlot(ct2,
group_VAR = "peak_group",
facet_VAR = "cell",
color_VAR = "treatment") +
scale_color_manual(values = treatment_colors)
plotSignalLinePlot(ct2,
group_VAR = "peak_group",
facet_VAR = "cell",
color_VAR = "treatment",
n_splines = 5)+
scale_color_manual(values = treatment_colors)
Treatment differences
plotDimReduceSummaryProfiles(ct2, x_bins = 10, color_VAR = "treatment") +
scale_color_manual(values = treatment_colors) +
facet_wrap(~cell) +
theme(panel.background = element_rect(fill = "gray65"),
panel.grid = element_line(color = "gray60"))
Cell to cell differences
plotDimReduceSummaryProfiles(ct2, x_bins = 10, color_VAR = "cell") +
facet_wrap(~treatment)
t-SNE arithmetic
cn = colnames(ct2)
names(cn) = cn
ct2.split = lapply(cn, function(x)ct2[, x])
lapply(ct2.split, dim)
## $PDX2_ERG_GFP_rep1
## [1] 7152 1
##
## $MXP5_ERG_GFP_rep1
## [1] 7152 1
##
## $PDX2_ERG_IK_rep1
## [1] 7152 1
##
## $MXP5_ERG_IK_rep1
## [1] 7152 1
ct2.gfp = (ct2.split$MXP5_ERG_GFP_rep1 + ct2.split$PDX2_ERG_GFP_rep1)/2
ct2.ik = (ct2.split$MXP5_ERG_IK_rep1 + ct2.split$PDX2_ERG_IK_rep1)/2
ct2.diff = ct2.ik - ct2.gfp
p.gfp = plotDimReducePoints(ct2.gfp, has_symmetrical_limits = FALSE)+
theme(panel.background = element_rect(fill = "gray30"), panel.grid = element_line(color = "gray40"))
p.ik = plotDimReducePoints(ct2.ik, has_symmetrical_limits = FALSE)+
theme(panel.background = element_rect(fill = "gray30"), panel.grid = element_line(color = "gray40"))
p.diff = plotDimReducePoints(ct2.diff,
has_symmetrical_limits = TRUE, point_color_limits = c(-.5, .5),
point_colors = c("green", "gray45", "gray35", "gray45", "purple"))+
theme(panel.background = element_rect(fill = "gray30"), panel.grid = element_line(color = "gray40"))
p.peaks = plotDimReducePoints(ct2, color_VAR = "peak_group") +
guides(color = guide_legend(override.aes = list(size = 3))) +
scale_color_manual(values = c(GFP = "green", IK = "purple", mixed = "gray"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
cowplot::plot_grid(
p.gfp, p.ik, p.diff, p.peaks, nrow = 2
)
Peak group is not doing a good job capture areas where ERG differs between cell lines.
csaw_file = "~/R_workspace.combined/preB_IK_induction/output_csaw.091923/ERG_cutnrun/IK_vs_GFP_in_all.csaw_regions.csv"
dt = fread(csaw_file)
dt = dt[PValue < .05]
dt = dt[direction != "mixed"]
# split(dt, dt$direction)
table(dt$direction)
##
## down up
## 2477 1883
csaw_gr = GRanges(dt)
csaw_gr$up_treatment = ifelse(csaw_gr$direction == "up", "IK", "GFP")
csaw_gr$original_width = width(csaw_gr)
ct_gr = rowRanges(ct2)
olap = findOverlaps(query = ct_gr, subject = csaw_gr)
ct_gr$csaw_group = "ns"
ct_gr$csaw_group[queryHits(olap)] = csaw_gr$up_treatment[subjectHits(olap)]
ct_gr$csaw_width = NA
ct_gr$csaw_width[queryHits(olap)] = csaw_gr$original_width[subjectHits(olap)]
table(ct_gr$csaw_group)
##
## GFP IK ns
## 713 221 6218
ct2.csaw1 = setRegionMetaData(ct2, as.data.frame(ct_gr))
## setRegionMetaData ...
plotSignalHeatmap(ct2.csaw1, group_VARS = c("peak_group", "csaw_group"), name_FUN = function(x)x, relative_heatmap_width = .8)
plotSignalLinePlot(ct2.csaw1,
group_VAR = "csaw_group",
facet_VAR = "cell",
color_VAR = "treatment",
n_splines = 5)+
scale_color_manual(values = treatment_colors)
cn = colnames(ct2.csaw1)
names(cn) = cn
ct2.csaw1.split = lapply(cn, function(x)ct2.csaw1[, x])
lapply(ct2.csaw1.split, dim)
## $PDX2_ERG_GFP_rep1
## [1] 7152 1
##
## $MXP5_ERG_GFP_rep1
## [1] 7152 1
##
## $PDX2_ERG_IK_rep1
## [1] 7152 1
##
## $MXP5_ERG_IK_rep1
## [1] 7152 1
ct2.csaw1.gfp = (ct2.csaw1.split$MXP5_ERG_GFP_rep1 + ct2.csaw1.split$PDX2_ERG_GFP_rep1)/2
ct2.csaw1.ik = (ct2.csaw1.split$MXP5_ERG_IK_rep1 + ct2.csaw1.split$PDX2_ERG_IK_rep1)/2
ct2.csaw1.diff = ct2.csaw1.ik - ct2.csaw1.gfp
colnames(ct2.csaw1.gfp) = "mean GFP"
colnames(ct2.csaw1.ik) = "mean IK induced"
colnames(ct2.csaw1.diff) = "mean IK induced - GFP"
p.gfp = plotDimReducePoints(ct2.csaw1.gfp, has_symmetrical_limits = FALSE)+
theme(panel.background = element_rect(fill = "gray30"), panel.grid = element_line(color = "gray40"))
p.ik = plotDimReducePoints(ct2.csaw1.ik, has_symmetrical_limits = FALSE)+
theme(panel.background = element_rect(fill = "gray30"), panel.grid = element_line(color = "gray40"))
p.diff = plotDimReducePoints(ct2.csaw1.diff,
has_symmetrical_limits = TRUE, point_color_limits = c(-.5, .5),
point_colors = c("green", "gray45", "gray35", "gray45", "purple"))+
theme(panel.background = element_rect(fill = "gray30"), panel.grid = element_line(color = "gray40"))
p.peaks = plotDimReducePoints(ct2.csaw1, color_VAR = "csaw_group") +
guides(color = guide_legend(override.aes = list(size = 3))) +
scale_color_manual(values = c(GFP = "green", IK = "purple", mixed = "gray"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
cowplot::plot_grid(
p.gfp, p.ik, p.diff, p.peaks, nrow = 2
)
Still not good!
plotDimReducePoints(subsetRow(ct2.csaw1, !is.na(csaw_width)), color_VAR = "csaw_width", point_size = 1) +
scale_color_gradientn(colours = c("blue", "red"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
Low width of csaw results (few diff windows in a row)
csaw_file = "~/R_workspace.combined/preB_IK_induction/output_csaw.091923/ERG_cutnrun/IK_vs_GFP_in_all.csaw_regions.csv"
dt = fread(csaw_file)
dt = dt[PValue < .05]
dt = dt[direction != "mixed"]
# split(dt, dt$direction)
table(dt$direction)
##
## down up
## 2477 1883
csaw_gr = GRanges(dt)
csaw_gr = subset(csaw_gr, width >= 1e3)
csaw_gr$up_treatment = ifelse(csaw_gr$direction == "up", "IK", "GFP")
csaw_gr$original_width = width(csaw_gr)
if(FALSE){
ct_gr = rowRanges(ct2)
olap = findOverlaps(query = ct_gr, subject = csaw_gr)
ct_gr$csaw_group = "ns"
ct_gr$csaw_group[queryHits(olap)] = csaw_gr$up_treatment[subjectHits(olap)]
ct_gr$csaw_width = NA
ct_gr$csaw_width[queryHits(olap)] = csaw_gr$original_width[subjectHits(olap)]
table(ct_gr$csaw_group)
ct2.csaw2 = setRegionMetaData(ct2, as.data.frame(ct_gr))
}
#adding annotation is now easy with GRanges
ct2.csaw2 = ct2 %>%
addRegionAnnotation(csaw_gr, anno_VAR = "up_treatment", anno_VAR_renames = "csaw_group", no_overlap_value = "ns") %>%
addRegionAnnotation(csaw_gr, anno_VAR = "original_width", anno_VAR_renames = "csaw_width", no_overlap_value = NA)
## addRegionAnnotation ...
## addRegionAnnotation ...
ct2.csaw3 = ct2 %>%
addRegionAnnotation(csaw_gr,
anno_VAR = c("up_treatment", "original_width"),
anno_VAR_renames = c("csaw_group", "csaw_width"),
no_overlap_value = c("ns", NA))
## addRegionAnnotation ...
plotSignalHeatmap(ct2.csaw2, group_VARS = c("peak_group", "csaw_group"), name_FUN = function(x)x, relative_heatmap_width = .8)
plotSignalLinePlot(ct2.csaw2,
group_VAR = "csaw_group",
facet_VAR = "cell",
color_VAR = "treatment",
n_splines = 5)+
scale_color_manual(values = treatment_colors)
cn = colnames(ct2.csaw2)
names(cn) = cn
ct2.csaw2.split = lapply(cn, function(x)ct2.csaw2[, x])
lapply(ct2.csaw2.split, dim)
## $PDX2_ERG_GFP_rep1
## [1] 7152 1
##
## $MXP5_ERG_GFP_rep1
## [1] 7152 1
##
## $PDX2_ERG_IK_rep1
## [1] 7152 1
##
## $MXP5_ERG_IK_rep1
## [1] 7152 1
ct2.csaw2.gfp = (ct2.csaw2.split$MXP5_ERG_GFP_rep1 + ct2.csaw2.split$PDX2_ERG_GFP_rep1)/2
ct2.csaw2.ik = (ct2.csaw2.split$MXP5_ERG_IK_rep1 + ct2.csaw2.split$PDX2_ERG_IK_rep1)/2
ct2.csaw2.diff = ct2.csaw2.ik - ct2.csaw2.gfp
p.gfp = plotDimReducePoints(ct2.csaw2.gfp, has_symmetrical_limits = FALSE)+
theme(panel.background = element_rect(fill = "gray30"), panel.grid = element_line(color = "gray40"))
p.ik = plotDimReducePoints(ct2.csaw2.ik, has_symmetrical_limits = FALSE)+
theme(panel.background = element_rect(fill = "gray30"), panel.grid = element_line(color = "gray40"))
p.diff = plotDimReducePoints(ct2.csaw2.diff,
has_symmetrical_limits = TRUE, point_color_limits = c(-.5, .5),
point_colors = c("green", "gray35", "purple"))+
theme(panel.background = element_rect(fill = "gray30"), panel.grid = element_line(color = "gray40"))
p.peaks = plotDimReducePoints(subsetRow(ct2.csaw2, expr = csaw_group != "ns"), color_VAR = "csaw_group") +
guides(color = guide_legend(override.aes = list(size = 3))) +
scale_color_manual(values = c(GFP = "green", IK = "purple", mixed = "gray"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
cowplot::plot_grid(
p.gfp, p.ik, p.diff, p.peaks, nrow = 2
)