R version 3.3.3 (2017-03-06) -- "Another Canoe" Copyright (C) 2017 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ### Write down what package versions work with your R code, and > ### attempt to download and load those packages. The first argument is > ### the version of R that you used, e.g. "3.0.2" and then the rest of > ### the arguments are package versions. For > ### CRAN/Bioconductor/R-Forge/etc packages, write > ### e.g. RColorBrewer="1.0.5" and if RColorBrewer is not installed > ### then we use install.packages to get the most recent version, and > ### warn if the installed version is not the indicated version. For > ### GitHub packages, write "user/repo@commit" > ### e.g. "tdhock/animint@f877163cd181f390de3ef9a38bb8bdd0396d08a4" and > ### we use install_github to get it, if necessary. > works_with_R <- function(Rvers,...){ + pkg_ok_have <- function(pkg,ok,have){ + stopifnot(is.character(ok)) + if(!as.character(have) %in% ok){ + warning("works with ",pkg," version ", + paste(ok,collapse=" or "), + ", have ",have) + } + } + pkg_ok_have("R",Rvers,getRversion()) + pkg.vers <- list(...) + for(pkg.i in seq_along(pkg.vers)){ + vers <- pkg.vers[[pkg.i]] + pkg <- if(is.null(names(pkg.vers))){ + "" + }else{ + names(pkg.vers)[[pkg.i]] + } + if(pkg == ""){# Then it is from GitHub. + ## suppressWarnings is quieter than quiet. + if(!suppressWarnings(require(requireGitHub))){ + ## If requireGitHub is not available, then install it using + ## devtools. + if(!suppressWarnings(require(devtools))){ + install.packages("devtools") + require(devtools) + } + install_github("tdhock/requireGitHub") + require(requireGitHub) + } + requireGitHub(vers) + }else{# it is from a CRAN-like repos. + if(!suppressWarnings(require(pkg, character.only=TRUE))){ + install.packages(pkg) + } + pkg_ok_have(pkg, vers, packageVersion(pkg)) + library(pkg, character.only=TRUE) + } + } + } > works_with_R( + "3.3.3", + partykit="2.0.2",#R CMD INSTALL partykit/pkg/devel/partykit/ + libcoin="0.9.1", + mlt="0.1.3", + trtf="0.1.1",#R CMD INSTALL ctm/pkg/trtf/ + "tdhock/penaltyLearning@2e9ad040a97e2baf4623549f114d3817b10e5fbf", + survival="2.41.2") > > data(neuroblastomaProcessed, package="penaltyLearning") > finite.targets <- with(neuroblastomaProcessed, { + data.frame(log.penalty=target.mat[is.finite(target.mat)]) + }) > m <- ctm(as.basis(~log.penalty, data=finite.targets), todistr="Normal") > train.Surv <- with(neuroblastomaProcessed, { + Surv(target.mat[, 1], target.mat[,2], type="interval2") + }) > > ## Train on n=50 observations and p=2 features => learned constant > ## model (predict all 1). > train.feature.mat <- neuroblastomaProcessed$feature.mat[, c("log.n", "log.mad")] > train.df <- data.frame(log.penalty=train.Surv, train.feature.mat)[1:50,] > mlt.fit <- mlt(m, data=train.df) > tree.fit <- trafotree( + m, formula = log.penalty ~ ., data=train.df, + mltargs=list(theta=coef(mlt.fit))) > predict(tree.fit) 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 > > ## Train on n=30 observations and p=2 features => learned constant > ## model (predict all 1), and Warning message: In > ## object$optimfct(theta, weights = weights, scale = scale, optim = > ## optim) : Optimisation did not converge. > train.feature.mat <- neuroblastomaProcessed$feature.mat[, c("log.n", "log.mad")] > train.df <- data.frame(log.penalty=train.Surv, train.feature.mat)[1:30,] > mlt.fit <- mlt(m, data=train.df) > tree.fit <- trafotree( + m, formula = log.penalty ~ ., data=train.df, + mltargs=list(theta=coef(mlt.fit))) > predict(tree.fit) 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 > > ## Train on all n=3418 observations and p=2 features => learns > ## non-trivial model. > train.feature.mat <- neuroblastomaProcessed$feature.mat[, c("log.n", "log.mad")] > train.df <- data.frame(log.penalty=train.Surv, train.feature.mat) > mlt.fit <- mlt(m, data=train.df) > tree.fit <- trafotree( + m, formula = log.penalty ~ ., data=train.df, + mltargs=list(theta=coef(mlt.fit))) > node_party(tree.fit) # 15-node tree. [1] root | [2] V2 <= 6.48768 | | [3] V2 <= 6.03069 | | | [4] V3 <= -2.09554 | | | | [5] V3 <= -2.99647 * | | | | [6] V3 > -2.99647 | | | | | [7] V2 <= 5.05625 * | | | | | [8] V2 > 5.05625 * | | | [9] V3 > -2.09554 * | | [10] V2 > 6.03069 | | | [11] V3 <= -2.04939 | | | | [12] V3 <= -3.37111 * | | | | [13] V3 > -3.37111 * | | | [14] V3 > -2.04939 * | [15] V2 > 6.48768 * > tree.fit$coef # 8 leaf nodes. (Intercept) log.penalty 5 1.0440323 1.746008 7 0.3447949 1.644704 8 -0.3755673 1.814851 9 -1.5051100 2.351602 12 -3.8683970 22.648800 13 -2.8250245 2.278834 14 -29.3307337 13.870975 15 -4.6792816 1.256380 > > ## I'm not sure if this is the right way to get predicted output > ## values: > pred.vec <- predict(tree.fit) > pred.log.penalty <- tree.fit$coef[names(pred.vec), "(Intercept)"] > is.lo <- pred.log.penalty < neuroblastomaProcessed$target.mat[, 1] > is.hi <- neuroblastomaProcessed$target.mat[, 2] < pred.log.penalty > is.error <- is.lo | is.hi > mean(is.error) [1] 0.2244002 > > ## Ultimately, I would like to train on all n=3418 observations and > ## all p=117 features. > train.df <- data.frame( + log.penalty=train.Surv, neuroblastomaProcessed$feature.mat) > mlt.fit <- mlt(m, data=train.df) > tree.fit <- trafotree( + m, formula = log.penalty ~ ., data=train.df, + mltargs=list(theta=coef(mlt.fit))) > node_party(tree.fit) # 19-node tree. [1] root | [2] V19 <= 265.69344 | | [3] V12 <= 0 | | | [4] V35 <= 2.35227 | | | | [5] V96 <= 0 | | | | | [6] V24 <= 1.87282 | | | | | | [7] V37 <= 1.39945 | | | | | | | [8] V17 <= 2.50753 | | | | | | | | [9] V112 <= 0 * | | | | | | | | [10] V112 > 0 * | | | | | | | [11] V17 > 2.50753 * | | | | | | [12] V37 > 1.39945 * | | | | | [13] V24 > 1.87282 * | | | | [14] V96 > 0 * | | | [15] V35 > 2.35227 | | | | [16] V34 <= 10.98913 * | | | | [17] V34 > 10.98913 * | | [18] V12 > 0 * | [19] V19 > 265.69344 * > tree.fit$coef (Intercept) log.penalty 9 0.44226704 1.927508 10 1.73954183 2.212042 11 0.03508709 3.475329 12 -0.60404144 1.173998 13 -0.60057859 2.291982 14 -2.15048796 3.113779 16 -3.97839478 3.313253 17 -29.26513323 13.843061 18 -4.21761316 1.551078 19 -7.40259579 1.962968 >