## R-code for assignment 2 ## AHW3 ## Feb 19, 2008 #### 1 rm(list = ls()) ## code in the data y1 <- c(65,82,81,67,57,59,66,75,82,70) y2 <- c(64,56,71,69,83,74,59,82,65,79) y <- c(y1,y2) x <- c(rep(1,10), rep(2,10)) ## conduct the randomization test g.m <- real() g.v <- real() for(nsim in 1:5000){ xsim <- sample(x) g.m[nsim] <- abs(mean(y[xsim==1])-mean(y[xsim==2])) g.v[nsim] <- abs(var(y[xsim==1])-var(y[xsim==2])) } g.m.obs <- abs(mean(y[x==1])-mean(y[x==2])) g.v.obs <- abs(var(y[x==1])-var(y[x==2])) p.value.m <- length(g.m[g.m>=g.m.obs])/length(g.m) p.value.v <- length(g.v[g.v>=g.v.obs])/length(g.v) p.value.m p.value.v ##### 2 rm(list = ls()) ## read in the data data <- read.table("http://faculty.unlv.edu/westveld/Teaching/Sta713/Data/HW2.txt") yA <- data$A yB <- data$B summary(yA) summary(yB) ## make the plots pdf("fig1.pdf",family="Times",height=5,width=6) par(mfrow=c(2,2)) boxplot(yA, col="yellow", main="Boxplot of yA") boxplot(yB, col="yellow", main="Boxplot of yB") hist(yA, col="yellow") hist(yB, col="yellow") dev.off() ## Use the absolute value of the difference of the IQR as test statistic ## conduct the randomization test y <- c(yA, yB) x <- c(rep("A", length(yA)), rep("B", length(yB))) g <- real() for(nsim in 1:5000){ xsim <- sample(x) IQR.A.sim <- quantile(y[xsim=="A"], prob=0.75)-quantile(y[xsim=="A"], prob=0.25) IQR.B.sim <- quantile(y[xsim=="B"], prob=0.75)-quantile(y[xsim=="B"], prob=0.25) g[nsim] <- abs(IQR.A.sim-IQR.B.sim) } IQR.A.obs <- quantile(y[x=="A"], prob=0.75)-quantile(y[x=="A"], prob=0.25) IQR.B.obs <- quantile(y[x=="B"], prob=0.75)-quantile(y[x=="B"], prob=0.25) g.obs <- abs(IQR.A.obs-IQR.B.obs) p.value<- length(g[g>=g.obs])/length(g) p.value ##### 3 rm(list = ls()) n.size <- c(5, 50, 100, 1000) p.value.out <- real() for(i in 1:4){ print(i) p.value.avg <- real() for(j in 1:100){ ## sample size n <- n.size[i] yA <- rnorm(n, 5, 2) yB <- rnorm(n, 6, 2) y <- c(yA, yB) x <- c(rep("A", n), rep("B", n)) ## randomization test g <- real() for(nsim in 1:5000){ xsim <- sample(x) g[nsim] <- abs(mean(y[xsim=="A"])-mean(y[xsim=="B"])) } g.obs <- abs(mean(y[x=="A"])-mean(y[x=="B"])) p.value <- length(g[g>=g.obs])/length(g) ## save the p-values for the 100 different simulations p.value.avg[j] <- p.value } ## save the mean of the p-values p.value.out[i] <- mean(p.value.avg) }