if(!file.exists("packages.R"))setwd("..")
source("packages.R")
## Warning in pkg_ok_have("R", Rvers, getRversion()): works with R version
## 3.5.1, have 3.5.2
## Loading required package: ggplot2
## Loading required package: PeakSegOptimal
## Loading required package: data.table
## Warning in pkg_ok_have(pkg, vers, packageVersion(pkg)): works with
## data.table version 1.11.8, have 1.12.0
pepseq9.labels <- fread("pepseq9.labels.csv")
all.labels.list <- list()
all.cov.list <- list()
ann.colors <- c(
noPeaks="#f6f4bf",
peakStart="#ffafaf",
peakEnd="#ff4c4c",
peaks="#a445ee")
for(uname in unique(pepseq9.labels$uncleaved.name)){
sample.labels <- pepseq9.labels[uname, on=list(uncleaved.name)]
sample.labels[, uncleaved_ann := "noPeaks"]
col.name.list <- list(
cleaved=c("labelStart", "labelEnd", "label"),
not=c("uncleaved_min", "uncleaved_max", "uncleaved_ann"))
cov.dt.list <- list()
show.labels.list <- list()
for(sample.type in names(col.name.list)){
f <- paste0(
"bedGraph/", uname,
ifelse(sample.type=="cleaved", "_cleaved", ""),
".bedGraph.gz")
suffix.cov <- fread(
cmd=paste("zcat", f), header=FALSE,
drop=3,
col.names=c("protein", "position", "coverage"))
cov.dt.list[[sample.type]] <- data.table(sample.type, suffix.cov)
col.name.vec <- c("protein", col.name.list[[sample.type]])
ldt <- unique(sample.labels[, col.name.vec, with=FALSE])
setnames(ldt, c("protein", col.name.list$cleaved))
show.labels.list[[sample.type]] <- data.table(
sample.type, ldt)
}
show.labels <- do.call(rbind, show.labels.list)
cov.dt <- do.call(rbind, cov.dt.list)
max.dt <- cov.dt[, list(
max.pos=max(position)
), by=list(protein)][order(max.pos)]
cov.dt[, Protein := factor(protein, max.dt$protein)]
show.labels[, Protein := factor(protein, max.dt$protein)]
all.labels.list[[uname]] <- data.table(uname, show.labels)
all.cov.list[[uname]] <- data.table(uname, cov.dt)
gg <- ggplot()+
coord_cartesian(xlim=range(cov.dt$position), expand=FALSE)+
theme_bw()+
suppressWarnings(theme(panel.margin=grid::unit(0, "lines")))+
facet_grid(Protein + sample.type ~ ., scales="free")+
scale_fill_manual(
values=ann.colors, breaks=names(ann.colors))+
penaltyLearning::geom_tallrect(aes(
xmin=labelStart, xmax=labelEnd, fill=label),
alpha=0.5,
size=0.5,
color="grey",
data=show.labels)+
geom_point(aes(
position, coverage),
shape=1,
data=cov.dt)
cat(uname)
print(gg)
}
## unbioDRB1.01.01

## DRB5.01.01

## DRB4.01.01

## DRB1.15.02

## DRB1.15.01

## DRB1.14.03

## DRB1.14.01

## DRB1.09.01

## DRB1.08.03

## DRB1.07.01

## DRB1.04.06

## DRB1.04.05

## DRB1.04.01

## DRB1.03.01

## DRB1.01.01

## DQB1.05.01

## DQB1.03.02

## DPB1.04.01

all.labels <- do.call(rbind, all.labels.list)
all.cov <- do.call(rbind, all.cov.list)
cov.max.dt <- all.cov[, list(
max.cov=max(coverage)
), by=list(uname, sample.type)]
h.px <- sum(cov.max.dt$max.cov)/5
gg <- ggplot()+
theme_bw()+
suppressWarnings(theme(panel.margin=grid::unit(0, "lines")))+
facet_grid(uname + sample.type ~ Protein, scales="free", space="free_x")+
coord_cartesian(expand=FALSE)+
scale_fill_manual(
values=ann.colors, breaks=names(ann.colors))+
penaltyLearning::geom_tallrect(aes(
xmin=labelStart, xmax=labelEnd, fill=label),
alpha=0.5,
size=0.5,
color="grey",
data=all.labels)+
geom_point(aes(
position, coverage),
shape=1,
data=all.cov)+
scale_x_continuous(breaks=seq(100, 900, by=100))+
scale_y_continuous(breaks=c(1, 10, seq(100, 10000, by=100)))
w.px <- 4000
png("figure-pepseq-labels/figure-all-free-x.png", w.px, h.px)
print(gg)
dev.off()
## png
## 2
png("figure-pepseq-labels/figure-all-free.png", w.px, h.px)
gg2 <- gg+
facet_grid(uname + sample.type ~ Protein, scales="free", space="free")+
scale_y_log10()
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
print(gg2)
## Warning: Transformation introduced infinite values in continuous y-axis
dev.off()
## png
## 2
pos.labels <- pepseq9.labels[c("peakStart", "peakEnd"), list(
posStart=min(labelStart),
posEnd=max(labelEnd)
), on=list(label), by=list(
uncleaved.name, protein, uncleaved_min, uncleaved_max)]
pos.labels <- pepseq9.labels[c("peakStart", "peakEnd"), on=list(label)]
pos.labels[, Protein := factor(protein, max.dt$protein)]
max.dt[, Protein := factor(protein, max.dt$protein)]
ggl <- ggplot()+
geom_segment(aes(
labelStart, uncleaved.name,
color=label,
xend=labelEnd, yend=uncleaved.name),
size=3,
data=pos.labels)+
geom_blank(aes(
max.pos, pos.labels$uncleaved.name[1]),
data=max.dt)+
geom_blank(aes(
0, pos.labels$uncleaved.name[1]),
data=max.dt)+
scale_color_manual(values=ann.colors, breaks=names(ann.colors))+
##coord_cartesian(expand=FALSE)+
theme_bw()+
ylab("uncleaved sample name")+
scale_x_continuous(
"positions/range of positive labels on cleaved samples",
breaks=seq(100, 900, by=100))+
suppressWarnings(theme(panel.margin=grid::unit(0, "lines")))+
facet_grid(. ~ Protein, scales="free", space="free_x")
png("figure-pepseq-labels/figure-positive-labels.png", 1800, 200)
print(ggl)
dev.off()
## png
## 2