### Write down what package versions work with your R code, and ### attempt to download and load those packages. The first argument is ### the version of R that you used, e.g. "3.0.2" and then the rest of ### the arguments are package versions. For ### CRAN/Bioconductor/R-Forge/etc packages, write ### e.g. RColorBrewer="1.0.5" and if RColorBrewer is not installed ### then we use install.packages to get the most recent version, and ### warn if the installed version is not the indicated version. For ### GitHub packages, write "user/repo@commit" ### e.g. "tdhock/animint@f877163cd181f390de3ef9a38bb8bdd0396d08a4" and ### we use install_github to get it, if necessary. works_with_R <- function(Rvers,...){ local.lib <- file.path(getwd(), "library") dir.create(local.lib, showWarnings=FALSE, recursive=TRUE) .libPaths(c(local.lib, .libPaths())) pkg_ok_have <- function(pkg,ok,have){ stopifnot(is.character(ok)) if(!as.character(have) %in% ok){ warning("works with ",pkg," version ", paste(ok,collapse=" or "), ", have ",have) } } pkg_ok_have("R",Rvers,getRversion()) pkg.vers <- list(...) for(pkg.i in seq_along(pkg.vers)){ vers <- pkg.vers[[pkg.i]] pkg <- if(is.null(names(pkg.vers))){ "" }else{ names(pkg.vers)[[pkg.i]] } if(pkg == ""){# Then it is from GitHub. ## suppressWarnings is quieter than quiet. if(!suppressWarnings(require(requireGitHub))){ ## If requireGitHub is not available, then install it using ## devtools. if(!suppressWarnings(require(devtools))){ install.packages("devtools") require(devtools) } install_github("tdhock/requireGitHub") require(requireGitHub) } print(search()) requireGitHub(vers) }else{# it is from a CRAN-like repos. if(!suppressWarnings(require(pkg, character.only=TRUE))){ install.packages(pkg) } pkg_ok_have(pkg, vers, packageVersion(pkg)) library(pkg, character.only=TRUE) } } } options(repos=c( "http://www.bioconductor.org/packages/release/bioc", ##"http://r-forge.r-project.org", "http://cloud.r-project.org", "http://cran.r-project.org")) works_with_R( "4.1.0", data.table="1.14.0", future="1.21.0", future.apply="1.7.0", RJSONIO="1.3.1.4", R.utils="2.10.1", "tdhock/penaltyLearning@4e14a0b0e022d919884277d68b8e47bd158459f3", jointseg="1.0.2", gridExtra="2.3", neuroblastoma="1.0", tikzDevice="0.12.3.1", microbenchmark="1.4.7", animint2="1.0") curveAlignment <- readRDS("curveAlignment.rds") AUC.dt.list <- list() roc.dt.list <- list() err.dt.list <- list() roc.segs.list <- list() roc.win.err.list <- list() off.by <- 0.2 offset.prob <- curveAlignment$problems$prob.dir[2] for(offset in seq(-5, 5, by=off.by)){ print(offset) pred.dt <- data.table( curveAlignment$problems, pred.log.lambda=10+c(0, offset)) pred.eval <- curveAlignment$evaluation[pred.dt, on=list(prob.dir)] pred.eval[, possible.fn := possible.tp] roc <- penaltyLearning::ROChange( pred.eval, pred.dt, "prob.dir") ## compute derivative of Area under min(FP, FN). thresh.dt <- pred.eval[order(-min.log.lambda), { fp.diff <- diff(fp) fp.change <- fp.diff != 0 fn.diff <- diff(fn) fn.change <- fn.diff != 0 fp.dt <- if(any(fp.change))data.table( log.lambda=min.log.lambda[c(fp.change, FALSE)], fp=as.numeric(fp.diff[fp.change]), fn=0) fn.dt <- if(any(fn.change))data.table( log.lambda=min.log.lambda[c(fn.change, FALSE)], fp=0, fn=as.numeric(fn.diff[fn.change])) ##browser(expr=sample.id=="McGill0322") rbind(fp.dt, fn.dt) }, by=.(prob.dir)] pred.with.thresh <- thresh.dt[pred.dt, on=.(prob.dir), nomatch=0L] pred.with.thresh[, thresh := log.lambda - pred.log.lambda] first.dt <- pred.eval[max.log.lambda==Inf] thresh.ord <- pred.with.thresh[order(-thresh), .( prob.dir=c(NA, prob.dir), min.thresh=c(-Inf, log.lambda), max.thresh=c(log.lambda, Inf), fp = cumsum(c(sum(first.dt$fp), fp)), fn = cumsum(c(sum(first.dt$fn), fn)), change=c(0, ifelse(fp==0, fn, fp)) )] thresh.ord[, min.fp.fn := ifelse(fp=log.lambda)] roc.win.err.list[[paste(offset)]] <- curveAlignment$errors[roc.off, nomatch=0L, on=list( prob.dir, min.log.lambda<=log.lambda, max.log.lambda>=log.lambda)] off.min <- roc$roc[errors==min(errors)] roc$roc[, min.fp.fn := ifelse(fp0][1], by=offset][AUC.dt, .( offset, sample, min.thresh, AUM), on="offset"] animint( title="Changepoint detection ROC curve alignment problem", ##first=list(offset=0.5), out.dir="figure-curveAlignment", duration=list(offset=250), time=list(variable="offset", ms=250), profiles=ggplot()+ ylab("Number of aligned DNA sequence reads (coverage)")+ ggtitle( "Noisy coverage data, labels, and predicted model")+ theme_bw()+ theme(panel.margin=grid::unit(0, "lines"))+ theme_animint(width=1300)+ facet_grid(sample ~ window, scales="free", labeller=label_both)+ geom_text(aes( 43447, y, key=paste(offset, thresh, variable), label=paste0( variable, "=", value.str)), hjust=0, showSelected=c("offset", "thresh"), data=data.table( text.dt, window=1, sample="MSC83"))+ geom_tallrect(aes( xmin=labelStart/1e3, xmax=labelEnd/1e3, fill=annotation), data=curveAlignment$labels, alpha=0.5, color="grey")+ scale_linetype_manual( "Error type", values=c( correct=0, "false negative"=3, "false positive"=1))+ geom_tallrect(aes( xmin=chromStart/1e3, xmax=chromEnd/1e3, key=paste(chromStart, chromEnd), linetype=status), data=roc.win.err.dt, chunk_vars=chunk_vars, showSelected=c("offset", "thresh"), fill=NA, size=2, color="black")+ scale_fill_manual(values=ann.colors)+ geom_step(aes( chromStart/1e3, coverage), data=curveAlignment$profiles, color="grey50")+ geom_segment(aes( segStart/1e3, mean, key=paste(segStart, segEnd), xend=segEnd/1e3, yend=mean), color="green", alpha=0.7, chunk_vars=chunk_vars, showSelected=c("offset", "thresh"), data=roc.segs)+ geom_segment(aes( segStart/1e3, 0, key=paste(segStart, segEnd), xend=segEnd/1e3, yend=0), color="deepskyblue", showSelected=c("offset", "thresh"), chunk_vars=chunk_vars, size=3, alpha=0.7, data=roc.peaks)+ geom_point(aes( segStart/1e3, 0, key=paste(segStart, segEnd)), color="deepskyblue", showSelected=c("offset", "thresh"), chunk_vars=chunk_vars, size=4, fill="white", alpha=0.7, data=roc.peaks)+ scale_x_continuous( "Position on chrX (kb = kilo bases, reference genome hg19)", breaks=seq(4e4, 5e4, by=5)), metrics=ggplot()+ ggtitle( "AUC, select offset")+ theme_bw()+ theme(panel.margin=grid::unit(0, "lines"))+ facet_grid(variable ~ ., scales="free")+ geom_blank(aes( x, y), data=data.table( x=0, y=c(3.4, 1.6), variable="min.errors"))+ xlab("Offset = Difference between predicted values of samples")+ geom_point(aes( offset, value), fill=NA, data=AUC.tall)+ ylab("")+ make_tallrect(AUC.dt, "offset"), error=ggplot()+ ggtitle( "Error curves, select threshold")+ theme_bw()+ theme(panel.margin=grid::unit(0, "lines"))+ facet_grid(sample ~ ., scales="free")+ scale_color_manual(values=c( fp="red", fn="deepskyblue", "min(fp,fn)"="black"))+ scale_size_manual(values=c( fp=5, fn=3, "min(fp,fn)"=1))+ xlab("Prediction threshold")+ scale_y_continuous( "Number of incorrectly predicted labels", breaks=seq(0, 20, by=2))+ geom_tallrect(aes( xmin=min.thresh, xmax=max.thresh, key=min.thresh), showSelected=c("offset", "Errors"), color="grey50", alpha=0.5, data=min.tallrects)+ geom_text(aes( min.thresh, labels*0.9, key=1, label=paste0( "Min Error=", errors)), showSelected=c("offset", "Errors"), hjust=0, color="grey50", data=data.table( AUC.dt, Errors="Min", sample="Total"))+ geom_rect(aes( xmin=min.thresh, ymin=0, key=piece, xmax=max.thresh, ymax=value), chunk_vars=chunk_vars, showSelected="offset", fill="black", data=area.show)+ geom_segment(aes( min.thresh, value, key=paste(piece, error.type), color=error.type, size=error.type, xend=max.thresh, yend=value), chunk_vars=chunk_vars, showSelected="offset", data=err.dt.show)+ geom_text(aes( min.thresh, 0.5, key=1, label=sprintf("AUM=%.1f", AUM)), showSelected="offset", hjust=1, data=AUM.text)+ geom_tallrect(aes( xmin=min.thresh, xmax=max.thresh, tooltip=sprintf( "%.1f