##Linear Regression: Head Circumference Example data_lbwi<-read.csv("C:/YU/teaching/致远交叉创新/data/low birth weight infants.csv",header=TRUE) fit_lbwi<-lm(headcirc~gestage,data=data_lbwi) summary(fit_lbwi) plot(data_lbwi$gestage,data_lbwi$headcirc) abline(fit_lbwi,col="red") par(mfrow=c(2,2)) plot(fit_lbwu,main="Head Circumference") ## Multiple linear regression library(usdm) library(caret) vif(data_lbwi[,2:6]) #calculate the VIF. summary(lm(gestage~+length+birthwt+momage+toxemia ,data=data_lbwi)) ##calculate the R-square of gestage mfit_lbwi<-lm(headcirc~gestage+length+gestage+birthwt+momage+toxemia ,data=data_lbwi) par(mfrow=c(2,2)) plot(mfit_lbwi,main="Head Circumference") ## Variable selection install.packages(olsrr) library(olsrr) ##stepwise ols_step_both_p(mfit_lbwi,pent = 0.2, prem = 0.15, progress = TRUE, details = FALSE) stsel=ols_step_both_p(mfit_lbwi,pent = 0.2, prem = 0.15, progress = TRUE, details = FALSE) plot(stsel) ##AIC ols_step_backward_aic(mfit_lbwi,pent = 0.2, prem = 0.15, progress = TRUE, details = FALSE) aicsel=ols_step_backward_aic(mfit_lbwi,pent = 0.2, prem = 0.15, progress = TRUE, details = FALSE) plot(aicsel) ##All possible models ols_step_all_possible(mfit_lbwi,pent = 0.2, prem = 0.15, progress = TRUE, details = FALSE) allpos=ols_step_all_possible(mfit_lbwi,pent = 0.2, prem = 0.15, progress = TRUE, details = FALSE) plot(allpos) ##Best subset ols_step_best_subset(mfit_lbwi,pent = 0.2, prem = 0.15, progress = TRUE, details = FALSE) bestsubset=ols_step_best_subset(mfit_lbwi,pent = 0.2, prem = 0.15, progress = TRUE, details = FALSE) plot(bestsubset) ##LASSO library(glmnet) x=as.matrix(data_lbwi[,2:6]) y=as.vector(data_lbwi[,1]) las=glmnet(x,y, family = c("gaussian"),alpha=1) las$beta lascv=cv.glmnet(x,y, family = c("gaussian"),alpha=1) plot(cv.glmnet) ## Local Kernel Method library(np) ord=order(data_lbwi$length) data_lbwi=data_lbwi[ord,] model.np <- npreg(data_lbwi$headcir~data_lbwi$length ,regtype = "ll",bwmethod = "cv.aic",gradients = TRUE) summary(model.np) npsigtest(model.np) model.par <- lm(data_lbwi$headcir~data_lbwi$length) plot(data_lbwi$length, data_lbwi$headcir, xlab = "length", ylab = "head circumference", cex=.1) lines(data_lbwi$length, fitted(model.np), lty=1, col = "blue") lines(data_lbwi$length, fitted(model.par), lty = 2, col = " red") plot(model.np, plot.errors.method = "asymptotic") plot(model.np, gradients = TRUE) plot(model.np, gradients = TRUE, plot.errors.method = "asymptotic") ## Linear Regression (Birth rate) data_gnp<-read.csv("C:/YU/teaching/致远交叉创新/data/gross national product.csv",header=TRUE) gnp<-data_gnp$gnp birthrt<-data_gnp$birthrt fit_gnp<-lm(birthrt~gnp) plot(gnp,birthrt ) abline(fit_gnp) par(mfrow=c(2,2)) plot(fit_gnp,main="birthrt~gnp") ln_gnp<-log(gnp) fit_ln_gnp<-lm(birthrt~ln_gnp) plot(ln_gnp,birthrt ) abline(fit_ln_gnp) par(mfrow=c(2,2)) plot(fit_ln_gnp,main="birthrt~ln_gnp") ##local kernel method ord=order(data_gnp$gnp) data_gnp=data_gnp[ord,] model.np <- npreg(data_gnp$birthrt~data_gnp$gnp ,regtype = "ll",bwmethod = "cv.aic",gradients = TRUE) summary(model.np) npsigtest(model.np) model.par <- lm(data_gnp$birthrt~data_gnp$gnp) plot(data_gnp$gnp, data_gnp$birthrt, xlab = "gnp", ylab = "birth rate", cex=.1) lines(data_gnp$gnp, fitted(model.np), lty=1, col = "blue") lines(data_gnp$gnp, fitted(model.par), lty = 2, col = " red") plot(model.np, plot.errors.method = "asymptotic") plot(model.np, gradients = TRUE) plot(model.np, gradients = TRUE, plot.errors.method = "asymptotic") ##change bandwidth model.np <- npreg(data_gnp$birthrt~data_gnp$gnp ,regtype = "ll",bws=1000,gradients = TRUE) #plot(data_gnp$gnp, data_gnp$birthrt, xlab = "gnp", ylab = "birth rate", cex=.1) lines(data_gnp$gnp, fitted(model.np), lty=1, col = "blue") model.np <- npreg(data_gnp$birthrt~data_gnp$gnp ,regtype = "ll",bws=2000,gradients = TRUE) lines(data_gnp$gnp, fitted(model.np), lty=1, col = "red") model.np <- npreg(data_gnp$birthrt~data_gnp$gnp ,regtype = "ll",bws=3000,gradients = TRUE) lines(data_gnp$gnp, fitted(model.np), lty=1, col = "green") ## Smoothing spline library(mgcv) b <- gam(birthrt~gnp,family=gaussian(),data=data_gnp) b <- gam(birthrt~s(gnp),family=gaussian(),bs=cr, data=data_gnp) plot(b)