getwd() #data.path="S:/ST732data" #setwd(data.path) # set the working directory where the data file is saved. dental.data <- read.table(file="dental.txt") ## define the names of the columns mynames = c("obs", "subj", "ages", "distance", "gender") names(dental.data) = mynames # stydy the structure of the data str(dental.data) #'data.frame': 108 obs. of 5 variables: # $ obs : int 1 2 3 4 5 6 7 8 9 10 ... # $ subj : int 1 1 1 1 2 2 2 2 3 3 ... # $ ages : int 8 10 12 14 8 10 12 14 8 10 ... # $ distance: num 21 20 21.5 23 21 21.5 24 25.5 20.5 24 ... # $ gender : int 0 0 0 0 0 0 0 0 0 0 ... ################################################################# ## recover the vector of times (ages) t=unique(dental.data$ages) ## arrange the response in a matrix format with n=4 rows and m=27 columns Y=matrix(dental.data$distance, nrow=length(t)) ## get the vector of subjects and their corresponding geneder index.subj = unique(dental.data$subj) gender.vec = as.vector(by(dental.data$gender,dental.data$subj, function(x) x[1])) ## separate the data Y into two data sets Yboys and Ygirls corresponding to boys and girls Yboys = Y[,which(gender.vec==1)] Ygirls = Y[,which(gender.vec==0)] ## find the min and max values of distance so that we can ask ## R to scale the vertical axis of the plots accordingly ymin <- min(Y) ymax <- max(Y) ## Spaghetti plot pdf(file="dentalplot.pdf" ,paper= "USr" ,onefile=FALSE,height=6,width=12, pagecentre =TRUE, pointsize=12) matplot(t, Y, type="b",lty=1,pch=as.character(gender.vec), cex=0.7, lwd=2, col=rgb(0, 0, 0, alpha=.20) , ylim=c(ymin, ymax), xlab="Age (years)", ylab="Distance for girls (0) and boys (1)") dev.off() ## Plot standardized residuals ## define function which does the standardization using the sample mean and sample standard deviation standardize.fn = function(x){ (x - mean(x))/ sqrt(var(x))} cs.Yboys= apply(Yboys, 1, function(x) standardize.fn(x)) ; cs.Yboys=t(cs.Yboys) cs.Ygirls= apply(Ygirls, 1, function(x) standardize.fn(x)) ; cs.Ygirls=t(cs.Ygirls) ## find the min and max values of standardized distance so that we can ask csymin=min(cbind(cs.Yboys, cs.Ygirls)) csymax=max(cbind(cs.Yboys, cs.Ygirls)) ## Plot the standarddized residuals for boys/girls pdf(file="girlsandboys.pdf" ,paper= "USr" ,onefile=FALSE,height=6,width=12, pagecentre =TRUE, pointsize=12) par(mfrow=c(1,2)) matplot(t, cs.Yboys, type="b",lty=1,pch=as.character(1), cex=0.7, lwd=2, ylim=c(csymin, csymax), xlab="Age (years)", ylab="Standardized distance for boys (1)") matplot(t, cs.Ygirls, type="b",lty=1,pch=as.character(0), cex=0.7, lwd=2 , ylim=c(csymin, csymax), xlab="Age (years)", ylab="Standardized distance for girls (0)") dev.off() ## Scatterplot matrix pdf(file="girls_scatterplot.pdf" ,paper= "USr" ,onefile=FALSE,height=6,width=12, pagecentre =TRUE, pointsize=12) pairs(t(cs.Ygirls), labels=c("age8", "age10", "age12", "age14"), main="Scatetrplot matrix for girls") dev.off() pairs(t(cs.Yboys), labels=c("age8", "age10", "age12", "age14"), main="Scatetrplot matrix for boys")