source("packages.R") requireGitHub::requireGitHub_package( "tdhock", "postCP_Improvement/postCP", "99f02e241bb66fd2c9cb80e4673c152d75f605da", "postCP") source("packages.R") ## Read clinical data for six patients (relapse or ok, five years ## after treatment). clinical.limited <- read.csv("clinical-limited.csv") ids.str <- paste(clinical.limited$profile.id) relapse.profile <- with(clinical.limited, paste(relapse, profile.id)) names(relapse.profile) <- ids.str ## Consider the subset of profiles and labels for these six patients. someProfiles <- function(all.profiles){ some <- subset(all.profiles, profile.id %in% ids.str) some$relapse.profile <- relapse.profile[paste(some$profile.id)] data.table(some) } data(neuroblastoma) profiles <- someProfiles(neuroblastoma$profiles) labels <- someProfiles(neuroblastoma$annotations) ## Plot noisy data sets. ggplot()+ ggtitle("unsupervised change-point detection = only noisy data series")+ theme_bw()+ theme(panel.margin=grid::unit(0, "lines"))+ facet_grid(relapse.profile ~ chromosome, scales="free", space="free_x")+ geom_point(aes(position/1e6, logratio), data=profiles, shape=1)+ scale_x_continuous( "position on chromosome (mega bases)", breaks=c(100, 200)) zoom.pid <- 4 zoom.chr <- 2 zoom.pro <- profiles[profile.id==zoom.pid & chromosome==zoom.chr] max.segments <- 4 fit <- Segmentor3IsBack::Segmentor(zoom.pro$logratio, model=2, Kmax=max.segments) zoom.segs.list <- list() zoom.loss.vec <- rep(NA, max.segments) for(n.segments in 1:max.segments){ end <- fit@breaks[n.segments, 1:n.segments] data.before.change <- end[-n.segments] data.after.change <- data.before.change+1 pos.before.change <- as.integer( (zoom.pro$position[data.before.change]+ zoom.pro$position[data.after.change])/2) start <- c(1, data.after.change) chromStart <- c(zoom.pro$position[1], pos.before.change) chromEnd <- c(pos.before.change, max(zoom.pro$position)) seg.mean.vec <- fit@parameters[n.segments, 1:n.segments] zoom.segs.list[[n.segments]] <- data.frame( profile.id=zoom.pid, chromosome=zoom.chr, n.segments, start, end, chromStart, chromEnd, mean=seg.mean.vec, row.names=NULL) data.mean.vec <- rep(seg.mean.vec, end-start+1) stopifnot(length(data.mean.vec)==nrow(zoom.pro)) sq.res.vec <- (zoom.pro$logratio-data.mean.vec)^2 zoom.loss.vec[n.segments] <- sum(sq.res.vec) } n.segs <- 4 var.est <- zoom.loss.vec[n.segs]/nrow(zoom.pro) sd.est <- sqrt(var.est) segs <- data.table(zoom.segs.list[[n.segs]]) changes <- segs[-.N, ] fit <- postcp(logratio ~ 1, family=gaussian(), data=zoom.pro, bp=changes$end) str(fit) prob.dt <- with(fit, data.table( prob=as.numeric(post.cp), observation=as.integer(row(post.cp)), change=as.integer(col(post.cp)))) ggplot()+ geom_point(aes(seq_along(logratio), logratio),data=data.table(zoom.pro, what="data"))+ geom_line(aes(observation, prob, color=factor(change)), data=data.table(prob.dt, what="prob"))+ geom_vline(aes(xintercept=end+0.5, color=factor(seq_along(end))), data=changes) prob.dt[, prob.norm := prob/sum(prob), by=change] ggplot()+ geom_point(aes(seq_along(logratio), logratio),data=data.table(zoom.pro, what="data"))+ geom_line(aes(observation, prob.norm, color=factor(change)), data=data.table(prob.dt, what="prob"))+ geom_vline(aes(xintercept=end+0.5, color=factor(seq_along(end))), data=changes)+ ylim(0, 0.1) prob.dt[, list(total=sum(prob.norm)), by=change] changes[, change := 1:.N] join.dt <- prob.dt[changes, on=list(change)] prob.dist <- join.dt[, list( total.prob=sum(prob.norm) ), by=list(change, dist.from.change=abs(end-observation))] setkey(prob.dist, change, dist.from.change) prob.dist[, cum.prob := cumsum(total.prob), by=change] min.dist <- prob.dist[0.67 < cum.prob, .SD[1,], by=change] error.bands <- changes[min.dist, on=list(change)] gg <- ggplot()+ geom_point(aes(seq_along(logratio), logratio),data=data.table(zoom.pro, what="data"))+ penaltyLearning::geom_tallrect(aes( xmin=end-dist.from.change, xmax=end+dist.from.change, fill=factor(change)), alpha=0.5, color=NA, data=error.bands)+ geom_vline(aes(xintercept=end+0.5, color=factor(seq_along(end))), data=changes) print(gg) pdf("figure-postCP.pdf") print(gg) dev.off()