## the job of this script is to make zzz.stats.RData, which caches the
## results of the model smoothing in a nice R data structure
smoothdir <- file.path(Sys.getenv("HOME"),"smooth")
## TODO: maybe should actually check that all algo files are present
processed.cids <- dir(smoothdir)
nparams <- sapply(processed.cids,function(cid){
  algos <- dir(file.path(smoothdir,cid))
  sapply(algos,function(a){
    f <- file.path(smoothdir,cid,a,"parameters.csv.gz")
    if(file.exists(f)){
      length(scan(f,quiet=TRUE,what="char"))
    }else 0
  })
},simplify=FALSE)
processed.algos <- unique(unlist(lapply(nparams,names)))
count.mat <- sapply(nparams,function(param.counts){
  param.counts[processed.algos]
})
count.mat[is.na(count.mat)] <- 0
## useful diagnostic == progress of the cluster
count.vecs <- apply(count.mat,1,table)
not.finished <- sapply(count.vecs,length)>1
print(count.vecs[not.finished])
## assume max is done...
done.mat <- apply(count.mat,1,function(x)x==max(x))
processed.cids <- rownames(done.mat)[apply(done.mat,1,all)]
data(neuroblastoma,package="neuroblastoma")
all.cids <- levels(neuroblastoma$profiles$profile.id)
to.process <- all.cids[!all.cids %in% processed.cids]
print(count.mat[not.finished,to.process,drop=FALSE])

## we have figured out which ones need processing, now do it.
profiles <- split(neuroblastoma$profiles,neuroblastoma$profiles$profile.id)
annotations <-
  split(neuroblastoma$annotations,neuroblastoma$annotations$profile.id)
library(bams)
for(cid in to.process){
  print(cid)
  done <- done.mat[cid,]
  algos.to.run <- names(done)[!done]
  pro <- profiles[[cid]]
  ann <- annotations[[cid]]
  run.smoothers(pro,ann,smoothers[algos.to.run])
}
algos <- dir(file.path(smoothdir,all.cids[1]))
all.stats <- list()
chrom.order <- as.character(c(1,2,3,4,11,17))
## each all.stats array is nparam x nprofiles x nann
for(a in algos){
  print(a)
  f <- file.path(smoothdir,processed.cids[1],a,"parameters.csv.gz")
  parameters <- tryCatch({
    scan(f,quiet=TRUE)
  },error=function(e){
    scan(f,quiet=TRUE,what="char")
  })
  param.names <- as.character(parameters)
  breakpoint.anns <-
    matrix(0,length(all.cids),length(chrom.order),
           dimnames=list(profile=all.cids,chromosome=chrom.order))
  normal.anns <- breakpoint.anns
  errors <-
    array(NA,
          list(length(param.names),length(all.cids),length(chrom.order)),
          list(param.names,all.cids,chrom.order))
  labels <- errors
  predictions <- errors
  false.positive <- errors
  false.negative <- errors
  for(cid in all.cids){
    errfile <- file.path(smoothdir,cid,a,"errors.csv.gz")
    e <- as.matrix(read.csv(errfile,header=FALSE))
    labfile <- file.path(smoothdir,cid,a,"breakpoint.labels.csv.gz")
    anns <-
      read.csv(labfile,header=FALSE,
               col.names=c("profile.id","chromosome","min","max","annotation"))
    ann.mat <- matrix(as.character(anns$annotation),
                      nrow=nrow(e),ncol=ncol(e),byrow=TRUE)
    colnames(e) <- anns$chromosome
    errors[,cid,colnames(e)] <- e
    for(chr in colnames(e)){
      normal.anns[cid,chr] <- if(is.na(e[1,chr]))0 else
        nrow(subset(anns,chromosome==chr & annotation=="normal"))
      breakpoint.anns[cid,chr] <- if(is.na(e[1,chr]))0 else
        nrow(subset(anns,chromosome==chr & annotation=="breakpoint"))
    }
    ## Careful: is.na(NA & TRUE) but !is.na(NA & FALSE)
    false.negative[,cid,colnames(e)] <-
      ifelse(is.na(e),NA,ifelse(e & ann.mat=="breakpoint",1L,0L))
    false.positive[,cid,colnames(e)] <-
      ifelse(is.na(e),NA,ifelse(e & ann.mat=="normal",1L,0L))
  }
  ## TODO: use "finished" file instead of checking for all these
  ## files.
  readsecs <- function(cid){
    secfile <- file.path(smoothdir,cid,a,"seconds.csv.gz")
    scan(secfile,quiet=TRUE)
  }
  seconds <- sapply(all.cids,readsecs)
  all.stats[[a]] <- list(errors=errors,
                         false.positive=false.positive,
                         false.negative=false.negative,
                         parameters=parameters,
                         seconds=seconds,
                         normal.anns=normal.anns,
                         breakpoint.anns=breakpoint.anns)
}
save(all.stats,file="zzz.stats.RData")