# R syntax for bootstrap confidence intervals for effect sizes

The bootstrap can be used to obtain confidence intervals for effect sizes by repeatedly sampling from the data a large number of times and obtaining percentiles corresponding to a given confidence interval.

The SPSS spreadsheet file, skewed data.sav, is passed into R and a 95% confidence interval for Cohen's d evaluated and plotted representing the standardised difference between males and females in the sample. The data file consists of hypothetical set of id numbers, gender and time to an operation. The program assumes the data file is located in the *My Documents* folder and requires the extension package called *foreign* which is downloadable from here and can be inserted into the R library folder located in C:\Program Files\R\R2.x.

This also will work on EXCEL spreadsheet files as they can be entered into R via SPSS.

library(foreign) x <- read.spss("skewed data.sav") x1 <- data.frame(x) y <- x1$time_o2 y <- na.omit(y) # Adapted syntax in Gilbert MacKenzie's Crash course on R handout # nb<-1000 n <- length(y) boot<-matrix(NA,nb,6) attributes(boot) for (i in 1:nb) { bs<-sample(y,n,replace =T) boot[i,1]<-mean(bs) boot[i,2]<-var(bs) } boot[,3]<-sqrt(boot[,2]) # #look at shape of bootstrap distributions # hist(boot[,1]) hist(boot[,2]) hist(boot[,3]) # # Do separately for Males and Females as in RW macro # This is R equivalent of the SPSS macro of RW (2004) nb <- 1000 ymale <- matrix(NA,length(y),1) for (i in 1:length(y)) { if (x1$sex[i] == 'Male') { ymale[i] <- y[i] } } ymale <- na.omit(ymale) nm <- length(ymale) nm <- length(ymale) for (i in 1:nb) { bs<-sample(ymale,nm,replace =T) boot[i,1]<-mean(bs) boot[i,2]<-var(bs) } boot[,3]<-sqrt(boot[,2]) # #look at shape of bootstrap distributions # hist(boot[,1]) hist(boot[,2]) hist(boot[,3]) } yfemale <- matrix(NA,length(y),1) for (i in 1:length(y)) { if (x1$sex[i] == 'Female') { yfemale[i] <- y[i] } } yfemale <- na.omit(yfemale) nf <- length(yfemale) for (i in 1:nb) { bs<-sample(yfemale,nf,replace =T) boot[i,4]<-mean(bs) boot[i,5]<-var(bs) } boot[,6]<-sqrt(boot[,5]) # #look at shape of bootstrap distributions # hist(boot[,4]) hist(boot[,5]) hist(boot[,6]) } poolsd <- matrix(NA,1000,1) cohensd <- matrix(NA,1000,1) for (i in 1:nb) { poolsd[i] <- ( (nm-1)*boot[i,3]+(nf-1)*boot[i,6] )/(nm+nf-2) cohensd[i] <- (boot[i,1] - boot[i,4])/poolsd[i] } hist(cohensd) quantile(cohensd,0.05) quantile(cohensd,0.95)