## Empirical CDF ## AHW3 ## January 30, 2008 rm(list = ls()) ## generate 5 values from a normal distribution with mean 5 and standard deviation 2 n <- 5 y <- rnorm(n, mean=0, sd=2) round(sort(y),3) ## the empirical probability of Pr(a,b] a <- 0 b <- 1 length(y[y>a & y<=b])/n ## now lets sort the data ## the empirical cdf puts i/n mass at each data point y.sort <- sort(y) y.sort ## lets plot the empirical CDF plot(ecdf(y), xlab="y", lwd=2, cex.axis=1.5, cex.lab=1.5) ### par(mfrow=c(1,2)) ## so in this case what is the 20% quantile? plot(ecdf(y), xlab="y", lwd=2, cex.axis=1.5, cex.lab=1.5) abline(h=0.20, lwd=2, col="red") quantile(y, prob=0.20, type=4) ## so in this case what is the 80% quantile? plot(ecdf(y), xlab="y", lwd=2, cex.axis=1.5, cex.lab=1.5) abline(h=0.80, lwd=2, col="red") quantile(y, prob=0.80, type=4) ## so in this case what is the 70% quantile? par(mfrow=c(1,1)) plot(ecdf(y), xlab="y", lwd=2, cex.axis=1.5, cex.lab=1.5) abline(h=0.75, lwd=2, col="red") # the length between F(y(3)) and F(y(4)) l.y4.y3 <- 0.8 - 0.6 # the length between F(y[3]) and 0.75 l.yq.y3 <- 0.75 - 0.6 gamma <- l.yq.y4/l.y5.y4 # thus the 75% quantile is gamma*y(0.60) + (1-gamma)*y(0.80) (1-gamma)*y.sort[3] + gamma*y.sort[4] # or quantile(y, prob=0.75, type=4)