if(!file.exists("packages.R"))setwd("..")
source("packages.R")
## Warning in pkg_ok_have("R", Rvers, getRversion()): works with R version
## 3.5.1, have 3.5.2
## Loading required package: ggplot2
## Loading required package: PeakSegOptimal
## Loading required package: data.table
## Warning in pkg_ok_have(pkg, vers, packageVersion(pkg)): works with
## data.table version 1.11.8, have 1.12.0
pepseq9.labels <- fread("pepseq9.labels.csv")
all.labels.list <- list()
all.cov.list <- list()
ann.colors <- c(
  noPeaks="#f6f4bf",
  peakStart="#ffafaf",
  peakEnd="#ff4c4c",
  peaks="#a445ee")
for(uname in unique(pepseq9.labels$uncleaved.name)){
  sample.labels <- pepseq9.labels[uname, on=list(uncleaved.name)]
  sample.labels[, uncleaved_ann := "noPeaks"]
  col.name.list <- list(
    cleaved=c("labelStart", "labelEnd", "label"),
    not=c("uncleaved_min", "uncleaved_max", "uncleaved_ann"))
  cov.dt.list <- list()
  show.labels.list <- list()
  for(sample.type in names(col.name.list)){
    f <- paste0(
      "bedGraph/", uname,
      ifelse(sample.type=="cleaved", "_cleaved", ""),
      ".bedGraph.gz")
    suffix.cov <- fread(
      cmd=paste("zcat", f), header=FALSE,
      drop=3,
      col.names=c("protein", "position", "coverage"))
    cov.dt.list[[sample.type]] <- data.table(sample.type, suffix.cov)
    col.name.vec <- c("protein", col.name.list[[sample.type]])
    ldt <- unique(sample.labels[, col.name.vec, with=FALSE])
    setnames(ldt, c("protein", col.name.list$cleaved))
    show.labels.list[[sample.type]] <- data.table(
      sample.type, ldt)
  }
  show.labels <- do.call(rbind, show.labels.list)
  cov.dt <- do.call(rbind, cov.dt.list)
  max.dt <- cov.dt[, list(
    max.pos=max(position)
  ), by=list(protein)][order(max.pos)]
  cov.dt[, Protein := factor(protein, max.dt$protein)]
  show.labels[, Protein := factor(protein, max.dt$protein)]
  all.labels.list[[uname]] <- data.table(uname, show.labels)
  all.cov.list[[uname]] <- data.table(uname, cov.dt)
  gg <- ggplot()+
    coord_cartesian(xlim=range(cov.dt$position), expand=FALSE)+
    theme_bw()+
    suppressWarnings(theme(panel.margin=grid::unit(0, "lines")))+
    facet_grid(Protein + sample.type ~ ., scales="free")+
    scale_fill_manual(
      values=ann.colors, breaks=names(ann.colors))+
    penaltyLearning::geom_tallrect(aes(
      xmin=labelStart, xmax=labelEnd, fill=label),
      alpha=0.5,
      size=0.5,
      color="grey",
      data=show.labels)+
    geom_point(aes(
      position, coverage),
      shape=1,
      data=cov.dt) 
  cat(uname)
  print(gg)
}
## unbioDRB1.01.01

## DRB5.01.01

## DRB4.01.01

## DRB1.15.02

## DRB1.15.01

## DRB1.14.03

## DRB1.14.01

## DRB1.09.01

## DRB1.08.03

## DRB1.07.01

## DRB1.04.06

## DRB1.04.05

## DRB1.04.01

## DRB1.03.01

## DRB1.01.01

## DQB1.05.01

## DQB1.03.02

## DPB1.04.01

all.labels <- do.call(rbind, all.labels.list)
all.cov <- do.call(rbind, all.cov.list)

cov.max.dt <- all.cov[, list(
  max.cov=max(coverage)
), by=list(uname, sample.type)]
h.px <- sum(cov.max.dt$max.cov)/5
gg <- ggplot()+
  theme_bw()+
  suppressWarnings(theme(panel.margin=grid::unit(0, "lines")))+
  facet_grid(uname + sample.type ~ Protein, scales="free", space="free_x")+
  coord_cartesian(expand=FALSE)+
  scale_fill_manual(
    values=ann.colors, breaks=names(ann.colors))+
  penaltyLearning::geom_tallrect(aes(
    xmin=labelStart, xmax=labelEnd, fill=label),
    alpha=0.5,
    size=0.5,
    color="grey",
    data=all.labels)+
  geom_point(aes(
    position, coverage),
    shape=1,
    data=all.cov)+
  scale_x_continuous(breaks=seq(100, 900, by=100))+
  scale_y_continuous(breaks=c(1, 10, seq(100, 10000, by=100)))

w.px <- 4000
png("figure-pepseq-labels/figure-all-free-x.png", w.px, h.px)
print(gg)
dev.off()
## png 
##   2
png("figure-pepseq-labels/figure-all-free.png", w.px, h.px)
gg2 <- gg+
  facet_grid(uname + sample.type ~ Protein, scales="free", space="free")+
  scale_y_log10()
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
print(gg2)
## Warning: Transformation introduced infinite values in continuous y-axis
dev.off()
## png 
##   2
pos.labels <- pepseq9.labels[c("peakStart", "peakEnd"), list(
  posStart=min(labelStart),
  posEnd=max(labelEnd)
), on=list(label), by=list(
  uncleaved.name, protein, uncleaved_min, uncleaved_max)]
pos.labels <- pepseq9.labels[c("peakStart", "peakEnd"), on=list(label)]
pos.labels[, Protein := factor(protein, max.dt$protein)]
max.dt[, Protein := factor(protein, max.dt$protein)]
ggl <- ggplot()+
  geom_segment(aes(
    labelStart, uncleaved.name,
    color=label,
    xend=labelEnd, yend=uncleaved.name),
    size=3,
    data=pos.labels)+
  geom_blank(aes(
    max.pos, pos.labels$uncleaved.name[1]),
    data=max.dt)+
  geom_blank(aes(
    0, pos.labels$uncleaved.name[1]),
    data=max.dt)+
  scale_color_manual(values=ann.colors, breaks=names(ann.colors))+
  ##coord_cartesian(expand=FALSE)+
  theme_bw()+
  ylab("uncleaved sample name")+
  scale_x_continuous(
    "positions/range of positive labels on cleaved samples",
    breaks=seq(100, 900, by=100))+
  suppressWarnings(theme(panel.margin=grid::unit(0, "lines")))+
  facet_grid(. ~ Protein, scales="free", space="free_x")
png("figure-pepseq-labels/figure-positive-labels.png", 1800, 200)
print(ggl)
dev.off()
## png 
##   2