## R-code (September 4, 2007) ## get the mathmarks data set ## note: if the first row in the txt file is one shorter than the rest, then R automatically sets header=T. data <- read.table("mathmarks.txt", header=T) ## look at the data data[1:3,] ## set the variable statistics to be the response (y) and algebra to be the covariate (x) y <- data$statistics x <- data$algebra ## plot x against y plot(x, y) ## change the axis labels plot(x, y, xlab="algebra", ylab="statistics", main="scatter plot") ## let solve for the least-squares estimate of the best fitting line. ## remember that we are estimating a an intercept and a slope, so we have to ## create a vector of 1's and add it to the vextor x to make the matrix X. X <- cbind(rep(1, length(x)), x) X[1:4,] beta.lse <- solve(t(X)%*%X)%*%t(X)%*%y ## add the least-squares line to the scatter plot plot(x, y, xlab="algebra", ylab="statistics", main="scatter plot") abline(beta.lse) ## put two plots on one page. par(mfrow=c(2,1)) plot(x, y, xlab="algebra", ylab="statistics", main="scatter plot") plot(x, y, xlab="algebra", ylab="statistics", main="scatter plot") abline(beta.lse) ## suppose we couldn't determine the solution. use optim() to optimize an expression. ## note the default is minimization. ls.func <- function(beta){ out <- t((y - X%*%beta))%*%(y-X%*%beta) return(out) } ## test the function. pick some beta to try. beta.try <- c(2,5) ls.func(beta.try) ## ok use optim() to minimize ls.func wrt b. optim(par=c(0,0), fn=ls.func, method="BFGS")