Comparison of R graphics subsystems
Different R graphics packages/functions are good for different things. This tutorial is meant to give you a feel for the best uses of each.
good | bad | multipanel axes | speed/animations | legends | direct labels | repetition | documentation | |
---|---|---|---|---|---|---|---|---|
base | flexibility | different | fast | bad | bad | often | good | |
lattice | same | fast | 1 | good | sometimes | good | ||
ggplot2 | grammar | splom | same | slow | several | good | little | extension? |
Example 1: base graphics wins for multipaneled heterogenous displays where the axes are not the same.
par(mfrow=c(2,2)) iplot <- function(i,j,...) plot(iris[,i],iris[,j],col=iris$Species,las=1, xlab=names(iris)[i],ylab=names(iris)[j],...) iplot(1,2,main="scatterplot of 2 vars") iplot(2,3,main="scatterplot of another 2 vars") iplot(3,4,main="scatterplot of another 2 vars") boxplot(iris[,-5],xlab="centimeters",horizontal=TRUE,las=1, main="univariate distributions") par(mfrow=c(1,1)) title("several views of the iris data",line=3)
but base graphics are rather clumsy for multipaneled displays where the axes are the same. You have to manually define the axes scales, and sometimes fiddle with the axis annotation parameters. You also have to change the code quite a bit to exchange which variables are on which axes, and which are on the panels.
## first calculate range of data to plot irange <- range(iris[,-5]) ## plot vertical boxplots and panels going left to right par(mfrow=c(1,3),omi=c(0.5,0,0,0)) for(sp in levels(iris$Species)){ boxplot(iris[iris$Species==sp,-5],main=sp,las=2,ylim=irange) } ## plot horizontal boxplots and panels going up and down par(mfrow=c(3,1),omi=c(0,0.5,0,0)) for(sp in levels(iris$Species)){ boxplot(iris[iris$Species==sp,-5],main=sp,ylim=irange,horizontal=TRUE,las=1) }
in contrast, lattice takes care of axes scaling and annotation for you, and allows you to flexibly experiment with which variables are appropriate where. To use lattice, you need to convert your data to long form, and the easiest way to do that is with reshape:
library.install <- function(l){ if(!require(l,character.only=TRUE)){ install.packages(l) library(l,character.only=TRUE) } } library.install("reshape2") melted.iris <- melt(iris,id="Species")
Also note that each lattice plot is 1 line of code instead of several for the corresponding base graphics version.
library(lattice) bwplot(variable~value|Species,melted.iris,layout=c(3,1)) bwplot(value~variable|Species,melted.iris,layout=c(1,3)) ## some other ways to plot these data: bwplot(value~variable|Species,melted.iris) bwplot(value~Species|variable,melted.iris) bwplot(variable~value|Species,melted.iris) bwplot(Species~value|variable,melted.iris)
Example 2: lattice and ggplot2 win for direct labeled plots. i.e. lasso path.
library.install("lars") data(diabetes) fit <- with(diabetes,lars(x,y)) arc.length <- apply(fit$beta,1,function(x)sum(abs(x))) path <- do.call(rbind,lapply(colnames(fit$beta),function(N){ data.frame(arc.length,beta=fit$beta[,N],variable=N) })) (lasso.plot <- xyplot(beta~arc.length,path,groups=variable,type="l", main="The regularization path of lasso coefficients")) library.install("directlabels") my.labels <- list(cex=1.5,dl.combine(lasso.labels,last.qp)) direct.label(lasso.plot,my.labels) library.install("ggplot2") gglasso <- qplot(arc.length,beta,data=path,group=variable,colour=variable,geom="line") direct.label(gglasso,my.labels)
Example 3: ggplot2 wins for multipanel displays with the same axes and easily encoding multiple variables using color, size, linetype. ex. comparing norms and weights from my clusterpath paper.
set.seed(19) gendata <- function (N = 5, K = 2, D = 3, SD = 0.1) { means <- matrix(rnorm(K * D), K, D) mat <- apply(means, 2, sapply, function(class.mean) rnorm(N, class.mean, SD)) list(mat = mat, means = means, class = rep(1:K, each = N)) } sim <- gendata(N=5,D=2,K=2,SD=0.6) par(mfrow=c(1,1)) plot(sim$mat,asp=1) ## if(!require(clusterpath)){ ## install.packages("clusterpath",repos="http://R-Forge.R-Project.org") ## library(clusterpath) ## } ## cvx <- data.frame() ## for(norm in c(1,2,"inf"))for(gamma in c(0,1)){ ## cvx <- rbind(cvx,cvxmod.cluster(sim$mat,norm=norm,gamma=gamma, ## regularization.points=50)) ## } load(url("http://cbio.ensmp.fr/~thocking/clusterpath-figure-2.RData")) means <- data.frame(alpha=t(colMeans(sim$mat))) normweights <- function(var,val){ val <- as.character(val) if(var=="gamma")var <- "\\gamma" else var <- sprintf("\\textrm{%s}",var) val[val=="inf"] <- "\\infty" sprintf("$%s=%s$",var,val) } cp <- ggplot(cvx,aes(alpha.2,alpha.1))+ geom_text(data=means,label="$\\bar X$",col="grey")+ scale_colour_discrete("$\\gamma$")+ coord_equal()+ scale_size("$s$")+ geom_point(data=data.frame(alpha=sim$mat),pch=21,fill="white")+ scale_x_continuous("",breaks=NA)+ scale_y_continuous("",breaks=NA)+ theme_bw() #opts(title=paste("Solutions of\n", #"$\\operatorname{argmin}_\\alpha ||\\alpha-X||_2^2$\n", #"subject to\n", #"$\\sum_{i<j}||\\alpha_i-\\alpha_j||_q w_{ij}", #"\\leq s\\sum_{i<j}||X_i-X_j||_q w_{ij}$\n", #"$w_{ij}=\\exp(-\\gamma||X_i-X_j||^2_2)$", #sep="")) cp1 <- cp+ geom_path(aes(group=row),colour="black",lwd=1)+ facet_grid(gamma~norm,labeller=normweights) print(cp1) cp2 <- cp+ geom_point(aes(size=s,colour=gamma),pch=21)+ geom_path(aes(group=interaction(row,gamma),colour=gamma))+ facet_grid(.~norm,labeller=normweights)+ scale_colour_discrete("$\\gamma$") print(cp2)
Bonus: use the tikzDevice for LaTeX markup in figures.
library.install("tikzDevice") setwd(tempdir()) tikz("fig.tex",standAlone=TRUE) print(cp2) dev.off() system("pdflatex fig&&xpdf fig.pdf")
Org version 7.5 with Emacs version 22
Validate XHTML 1.0