### Write down what package versions work with your R code, and ### attempt to download and load those packages. The first argument is ### the version of R that you used, e.g. "3.0.2" and then the rest of ### the arguments are package versions. For ### CRAN/Bioconductor/R-Forge/etc packages, write ### e.g. RColorBrewer="1.0.5" and if RColorBrewer is not installed ### then we use install.packages to get the most recent version, and ### warn if the installed version is not the indicated version. For ### GitHub packages, write "user/repo@commit" ### e.g. "tdhock/animint@f877163cd181f390de3ef9a38bb8bdd0396d08a4" and ### we use install_github to get it, if necessary. works_with_R <- function(Rvers,...){ local.lib <- file.path(getwd(), "library") dir.create(local.lib, showWarnings=FALSE, recursive=TRUE) .libPaths(c(local.lib, .libPaths())) pkg_ok_have <- function(pkg,ok,have){ stopifnot(is.character(ok)) if(!as.character(have) %in% ok){ warning("works with ",pkg," version ", paste(ok,collapse=" or "), ", have ",have) } } pkg_ok_have("R",Rvers,getRversion()) pkg.vers <- list(...) for(pkg.i in seq_along(pkg.vers)){ vers <- pkg.vers[[pkg.i]] pkg <- if(is.null(names(pkg.vers))){ "" }else{ names(pkg.vers)[[pkg.i]] } if(pkg == ""){# Then it is from GitHub. ## suppressWarnings is quieter than quiet. if(!suppressWarnings(require(requireGitHub))){ ## If requireGitHub is not available, then install it using ## devtools. if(!suppressWarnings(require(devtools))){ install.packages("devtools") require(devtools) } install_github("tdhock/requireGitHub") require(requireGitHub) } print(search()) requireGitHub(vers) }else{# it is from a CRAN-like repos. if(!suppressWarnings(require(pkg, character.only=TRUE))){ install.packages(pkg) } pkg_ok_have(pkg, vers, packageVersion(pkg)) library(pkg, character.only=TRUE) } } } options(repos=c( "http://www.bioconductor.org/packages/release/bioc", ##"http://r-forge.r-project.org", "http://cloud.r-project.org", "http://cran.r-project.org")) works_with_R( "4.1.0", data.table="1.14.0", future="1.21.0", future.apply="1.7.0", RJSONIO="1.3.1.4", R.utils="2.10.1", "tdhock/penaltyLearning@4e14a0b0e022d919884277d68b8e47bd158459f3", jointseg="1.0.2", gridExtra="2.3", neuroblastoma="1.0", tikzDevice="0.12.3.1", microbenchmark="1.4.7", animint2="1.0") data(neuroblastomaProcessed, package="penaltyLearning") counts <- neuroblastomaProcessed$errors[, { diff.tab <- table(factor(diff(errors), c("-1", "0", "1"))) L <- as.list(diff.tab) size <- max.log.lambda-min.log.lambda for(fun.name in c("min", "max")){ fun <- get(fun.name) L[[paste0(fun.name, ".size")]] <- min(size[errors==fun(errors)]) } L$mean.size <- with(L, (min.size+max.size)/2) L }, by=list(profile.id, chromosome)] two.changes <- counts[1 < `-1` | 1 < `1`] two.changes <- counts[order(-`-1`, -`1`, -mean.size)][profile.id != 481][1:8] two.changes[, panel := paste0( ifelse(`-1`==2, "p", "n"), #positive or negative label profile.id, ".", chromosome)] some.err <- neuroblastomaProcessed$errors[two.changes, on=list( profile.id, chromosome)] err.sizes <- c( fp=3, fn=2, errors=1) err.colors <- c( fp="red", fn="deepskyblue", errors="black") some.err.tall <- melt( some.err, measure.vars=names(err.colors)) ggplot()+ theme_bw()+ theme(panel.margin=grid::unit(0, "lines"))+ facet_grid(profile.id + chromosome ~ .)+ geom_segment(aes( min.log.lambda, value, xend=max.log.lambda, yend=value, color=variable, size=variable), data=some.err.tall)+ scale_y_continuous( "errors", breaks=c(0,1), limits=c(-0.2, 1.2))+ scale_color_manual(values=err.colors)+ scale_size_manual(values=err.sizes) some.err.tall[, value.i := cumsum( c(FALSE, diff(value) != 0) ), by=list(panel, profile.id, chromosome, variable)] segs.err.tall <- some.err.tall[, list( min.log.lambda=min(min.log.lambda), max.log.lambda=max(max.log.lambda), value=value[1] ), by=list(panel, profile.id, chromosome, variable, value.i)] segs.min.tall <- segs.err.tall[, { .SD[value==min(value)] }, by=list(panel, profile.id, chromosome, variable)] segs.min.err <- segs.min.tall[variable=="errors"] segs.min.err[, mid.log.lambda := (min.log.lambda+max.log.lambda)/2] set.seed(1) size <- segs.min.err[is.finite(mid.log.lambda), log(1+rexp(.N, mean( max.log.lambda-min.log.lambda)))] size <- 0.1 segs.min.err[, pred.log.lambda := ifelse( min.log.lambda == -Inf, max.log.lambda-size, ifelse( max.log.lambda == Inf, min.log.lambda+size, mid.log.lambda))] segs.min.err[, interval := ifelse( is.finite(mid.log.lambda), "finite", "infinite")] auc.dt <- segs.min.err[, { L <- penaltyLearning::ROChange( some.err, .SD, c("panel")) with(L, data.table(auc, thresholds[threshold=="predicted"])) }, by=list(interval)] roc.dt <- segs.min.err[, { L <- penaltyLearning::ROChange( some.err, .SD, c("panel")) L$roc }, by=list(interval)] roc.dt[, row.i := 1:.N, by=interval] ggplot()+ theme_bw()+ geom_path(aes( FPR, TPR, color=interval, size=interval), data=roc.dt)+ geom_point(aes( FPR, TPR, color=interval, size=interval), fill=NA, shape=21, data=auc.dt)+ scale_size_manual(values=c( finite=2, infinite=1)) ggplot()+ theme_bw()+ theme(panel.margin=grid::unit(0, 'lines'))+ facet_grid(. ~ interval)+ geom_path(aes( FPR, TPR), data=roc.dt)+ geom_point(aes( FPR, TPR), fill=NA, shape=21, data=auc.dt)+ geom_text(aes( FPR, TPR, label=row.i), data=roc.dt) roc.tall <- melt( roc.dt, measure.vars=names(err.colors) )[order(interval, variable, -min.thresh)] vert.dt <- roc.tall[, { data.table( thresh=min.thresh[-.N], value=value[-.N], next.value=value[-1]) }, by=list(interval, variable)] ggplot()+ theme_bw()+ theme(panel.margin=grid::unit(0, 'lines'))+ facet_grid(. ~ interval)+ geom_segment(aes( min.thresh, value, xend=max.thresh, yend=value, size=variable, color=variable), data=roc.tall)+ geom_segment(aes( thresh, value, xend=thresh, yend=next.value, color=variable), data=vert.dt)+ scale_color_manual(values=err.colors)+ scale_size_manual(values=err.sizes) data(neuroblastoma, package="neuroblastoma") some.profiles <- data.table(neuroblastoma$profiles)[two.changes, on=list( profile.id, chromosome)] some.labels <- data.table(neuroblastoma$annotations)[two.changes, on=list( profile.id, chromosome)] some.changes.list <- list() some.models.list <- list() for(profile.i in 1:nrow(two.changes)){ profile.info <- two.changes[profile.i] one.profile <- some.profiles[profile.info, on=list( profile.id, chromosome, panel)] max.segments <- 20L fit <- jointseg::Fpsn(one.profile$logratio, max.segments) some.models.list[[profile.i]] <- data.table( profile.info, n.segments=1:max.segments, loss=fit$J.est) for(n.segments in 2:max.segments){ end.i <- fit$t.est[n.segments, 1:n.segments] before <- end.i[-length(end.i)] after <- before+1 change.pos <- one.profile[, (position[before]+position[after])/2] some.changes.list[[paste(profile.i, n.segments)]] <- data.table( profile.info, n.segments, change.pos) } } some.models <- do.call(rbind, some.models.list) some.changes <- do.call(rbind, some.changes.list) err.list <- penaltyLearning::labelError( some.models, some.labels, some.changes, change.var="change.pos", label.vars=c("min", "max"), model.vars="n.segments", problem.vars="panel") thresh.dt <- roc.dt[, { data.table(thresh=seq( floor(min(max.thresh)), ceiling(max(min.thresh)), by=0.04)) }] min.thresh <- -2.1 max.thresh <- 2 thresh.dt <- data.table(thresh=seq(min.thresh, max.thresh, by=0.04)) thresh.pred <- data.table(id=1, thresh.dt)[data.table( id=1, segs.min.err), on=list(id), allow.cartesian=TRUE] thresh.pred[, pred.plus.thresh := pred.log.lambda+thresh] thresh.pred[, pred0 := pred.plus.thresh] setkey(thresh.pred, panel, pred0, pred.plus.thresh) setkey(some.err, panel, min.log.lambda, max.log.lambda) show.pred <- foverlaps( some.err, thresh.pred[, .(thresh, interval, panel, pred0, pred.plus.thresh)], nomatch=0L) show.pred[, value.fac := ifelse(errors==0, "correct", "error")] show.label.errors <- show.pred[err.list$label.errors, nomatch=0L, on=list( panel, n.segments)] show.changes <- show.pred[some.changes, nomatch=0L, on=list( panel, n.segments), allow.cartesian=TRUE] thresh.dt[, thresh0 := thresh] setkey(thresh.dt, thresh, thresh0) setkey(roc.dt, min.thresh, max.thresh) roc.points <- foverlaps(roc.dt, thresh.dt, nomatch=0L) tail.size <- 0.5 roc.points[, tail.thresh := thresh-tail.size] setkey(roc.points, interval, tail.thresh, thresh) setkey(roc.dt, interval, min.thresh, max.thresh) roc.tails <- foverlaps( roc.points[, .(thresh, interval, tail.thresh)], roc.dt[, .(interval, min.thresh, max.thresh, FPR, TPR)], nomatch=0L) err.sizes.animint <- c( fp=5, fn=2.5, errors=1.5) roc.segs <- roc.dt[order(interval, min.thresh), { dt <- data.table( from.FPR=FPR[-.N], to.FPR=FPR[-1], from.TPR=TPR[-.N], to.TPR=TPR[-1]) print(dt) ##browser() dt }, by=list(interval)] yexp <- 1 roc.u <- roc.points[, .SD[1], by=list(FPR, TPR, interval)] roc.color <- "violet" vstat <- c( fp="false positive", fn="false negative", errors="errors") vert.dt[, status := vstat[variable] ] roc.tall[, status := vstat[variable] ] segs.err.tall[, status := vstat[variable] ] err.leg <- "Error type" animint( title="ROC curves for neuroblastoma data with several minima", duration=list(thresh=250), time=list(variable="thresh", ms=300), samples=ggplot()+ ggtitle("Sample label error curves")+ theme_bw()+ theme(panel.margin=grid::unit(0, "lines"))+ theme_animint(width=400)+ facet_grid(panel ~ .)+ geom_point(aes( pred.plus.thresh, errors, key=interval, fill=value.fac), color=roc.color, size=4, showSelected=c("interval", "thresh"), data=show.pred)+ scale_fill_manual( "Prediction", values=c(correct="white", error="black"))+ geom_tallrect(aes( xmin=pred.plus.thresh-tail.size, xmax=pred.plus.thresh, key=interval), fill=roc.color, color=NA, alpha=0.2, showSelected=c("interval", "thresh"), data=show.pred)+ geom_vline(aes( xintercept=pred.plus.thresh, key=interval, linetype=interval), color=roc.color, showSelected="thresh", data=show.pred)+ geom_segment(aes( min.log.lambda, value, xend=max.log.lambda, yend=value, color=status, size=status), data=segs.err.tall)+ scale_y_continuous( "errors", breaks=c(0,1), limits=c(0-yexp, 1+yexp))+ scale_x_continuous("log(penalty)")+ scale_color_manual( err.leg, values=structure(err.colors[names(vstat)], names=vstat))+ scale_size_manual( err.leg, values=structure(err.sizes[names(vstat)], names=vstat)), thresholds=ggplot()+ coord_cartesian(xlim=c(min.thresh, max.thresh))+ ggtitle("Total label error curves, select threshold")+ scale_color_manual( err.leg, values=structure(err.colors[names(vstat)], names=vstat))+ scale_size_manual( err.leg, values=structure(err.sizes[names(vstat)], names=vstat))+ theme_bw()+ theme_animint(width=600)+ facet_grid(. ~ interval)+ geom_segment(aes( min.thresh, value, xend=max.thresh, yend=value, size=status, color=status), showSelected="interval", data=roc.tall)+ geom_segment(aes( thresh, value, xend=thresh, yend=next.value, color=status), showSelected="interval", data=vert.dt)+ geom_tallrect(aes( xmin=thresh-tail.size, xmax=thresh, key=1), data=thresh.dt, fill=roc.color, color=NA, showSelected="thresh", alpha=0.2)+ make_tallrect(thresh.dt, "thresh")+ scale_y_continuous( "errors", breaks=seq(0, 20, by=2))+ guides(color="none", size="none")+ scale_x_continuous("Threshold = constant added to predicted values"), roc=ggplot()+ ggtitle("ROC curves, select threshold")+ theme_bw()+ theme(panel.margin=grid::unit(0, 'lines'))+ facet_grid(. ~ interval)+ geom_text(aes( 0.2, 0.7, label=sprintf("AUC=%.2f", auc)), showSelected="interval", data=auc.dt)+ geom_segment(aes( from.FPR, from.TPR, xend=to.FPR, yend=to.TPR), data=roc.segs, showSelected="interval", alpha=0.2)+ geom_path(aes( FPR, TPR, key=1), color=roc.color, showSelected=c("thresh", "interval"), size=3, data=roc.tails)+ coord_equal()+ scale_x_continuous( "False Positive Rate", breaks=seq(0, 1, by=0.2))+ scale_y_continuous( "True Positive Rate", breaks=seq(0, 1, by=0.2))+ geom_point(aes( FPR, TPR, label=row.i), clickSelects="thresh", size=4, alpha=0.7, showSelected="interval", data=roc.u)+ geom_point(aes( FPR, TPR, key=1, label=row.i), showSelected=c("interval", "thresh"), color=roc.color, size=4, data=roc.points), profiles=ggplot()+ ggtitle("Noisy data with predicted changes and label errors")+ theme_bw()+ theme(panel.margin=grid::unit(0, "lines"))+ theme_animint(height=400, width=1400)+ facet_grid(panel ~ interval, scales="free")+ geom_tallrect(aes( xmin=min/1e6, xmax=max/1e6, fill=annotation), data=some.labels, alpha=0.5, color="grey")+ scale_linetype_manual( err.leg, values=c( correct=0, "false negative"=3, "false positive"=1))+ geom_tallrect(aes( xmin=min/1e6, xmax=max/1e6, key=paste(min, max), linetype=status), data=show.label.errors, showSelected=c("thresh", "interval"), fill=NA, size=2, color="black")+ scale_fill_manual(values=penaltyLearning::change.colors)+ geom_point(aes( position/1e6, logratio), color="grey50", data=some.profiles)+ xlab("Position on chromosome (Mb = mega bases)")+ geom_vline(aes( xintercept=change.pos/1e6, key=change.pos), data=show.changes, showSelected=c("thresh", "interval"), color="green"), out.dir="figure-neuroblastomaProcessed-complex")