2.3 Lab: Introduction to R
阿新 • • 發佈:2019-01-01
2.3 Lab: Introduction to R
#ISL2.3 Lab Intro to R #2018.12.24 #by Qikun ################################ 2.3.1 Basic Commands ########################## x = c(1,3,2,5)#create a vector of numbers # c() for concatenate # "=" = "<-" x #?funcname additional information about the function funcname x=c(1,6,2) y=c(1,4,3) length(x) length(y) x+y ls()#look at a list of all of the objects rm(x,y)#delete rm(list=ls())#remove all objects at once x=matrix(data=c(1,2,3,4),nrow=2,ncol=2)#default R creates matrices by successively filling in columns x=matrix(c(1,2,3,4),2,2)#omit typing data=,nrow= and ncol= x matrix(c(1,2,3,4),2,2,byrow=TRUE) sqrt(x) x^2 x=rnorm(50) # mean of 0 and a standard deviation of 1 y=x+rnorm(50,mean = 50,sd=.1) cor(x,y)# use the cor() function to compute the correlation between them set.seed(3)# reproduce the exact same set of random numbers y=rnorm(100) mean(y) var(y) # mean() and var() functions can be used to compute the mean and variance of a vector of numbers. sqrt(var(y)) # Applying sqrt() to the output of var() will give the standard deviation. Or we can simply use the sd() function sd(y) ############################# 2.3.2 Graphics ############################# x=rnorm(100) y=rnorm(100) # plot(x,y) produces a scatterplot of the numbers in x versus the numbers in y plot(x,y) # For example, passing in the argument xlab will result in a label on the x-axis plot(x,y,xlab="this is the x-axis",ylab = "this is the y-axis", main = "Plot of X vs Y") pdf("Figure.pdf") plot(x,y,col="green") # dev.off() indicates to R that we are done creating the plot dev.off() x=seq(1:10) # Typing 3:11 is a shorthand for seq(3,11) for integer arguments. x=1:10 # seq(0,1,length=10) makes a sequence of 10 numbers that are equally spaced between 0 and 1. x=seq(-pi,pi,length=50) x plot.new() y=x f=outer(x,y,function(x,y)cos(y)/(1+x^2)) f contour(x,y,f) contour(x,y,f,nlevels = 45,add = T) fa=(f-t(f))/2 contour(x,y,fa,nlevels=15) fa #The image() function works the same way as contour(), except that it image() produces a color-coded plot whose colors depend on the z value #heatmap image(x,y,fa) persp(x,y,fa) persp() #plot. The arguments theta and phi control the angles at which the plot is viewed. persp(x,y,fa,theta=30) persp(x,y,fa,theta=30,phi = 20) persp(x,y,fa,theta=30,phi = 70) persp(x,y,fa,theta=30,phi = 40) ########################### 2.3.3 Indexing Data ############################# A=matrix(1:16,4,4) A A[2,3] A[c(1,3),c(2,4)] A[1:3,2:4] A[1:2,] A[,1:2] # R treats a single row or column of a matrix as a vector. A[1,] #The use of a negative sign - in the index tells R to keep all rows or columns except those indicated in the index. A[-c(1,3),-c(1,3,4)] dim(A) ########################### 2.3.4 Loading Data ############################## library('ISLR') Auto=read.table("Auto.data") # Using the option header=T (or header=TRUE) in the read.table() function tells R that the first line of the file contains the variable names fix(Auto) #and using the option na.strings tells R that any time it sees a particular character or set of characters (such as a question mark), it should be treated as a missing element of the data matrix. Auto=read.table("Auto.data",header = T,na.strings = "?") Auto=read.csv("Auto.csv",header = T,na.strings = "?") fix(Auto) dim(Auto) Auto[1:4,] # In this case, only five of the rows contain missing observations, and so we choose to use the na.omit() function to simply remove these rows Auto=na.omit(Auto) dim(Auto) # Once the data are loaded correctly, we can use names() to check the variable names names(Auto) ####################### 2.3.5 Additional Graphical and Numerical Summaries ################# # R does not know to look in the Auto data set for those variables. # To refer to a variable, we must type the data set and the variable name joined with a $ symbol. plot(Auto$cylinders,Auto$mpg) attach(Auto) plot(cylinders,mpg) #The cylinders variable is stored as a numeric vector, #so R has treated it as quantitative. #However, since there are only a small number of possible values for cylinders, #one may prefer to treat it as a qualitative variable # as.factor() function converts quantitative variables into qualitative variables. cylinders=as.factor(cylinders) cylinders # If the variable plotted on the x-axis is categorial, #then boxplots will automatically be produced by the plot() function plot(cylinders,mpg) plot(cylinders,mpg,col="red") plot(cylinders,mpg,col="red",varwidth=T) plot(cylinders,mpg,col="red",varwidth=T,horizontal=T) plot(cylinders,mpg,col="red",varwidth=T,xlab="cylinders",ylab="MPG") #The hist() function can be used to plot a histogram. #Note thatcol=2 has the same effect as col="red". hist(mpg) hist(mpg,col=2) hist(mpg,col=2,breaks = 15) #The pairs() function creates a scatterplot matrix #i.e. a scatterplot for every pair of variables for any given data set. #We can also produce scatterplots for just a subset of the variables. pairs(Auto) pairs(~mpg+displacement,Auto) #pairs(~Auto$mpg+Auto$displacement,Auto) plot(horsepower,mpg) identify(horsepower,mpg,name) #The summary() function produces a numerical summary of each variable in a particular data set. summary(Auto) summary(mpg)
Reference
《An Introduction to Statistical Learning》
若需引用請註明出處。
若有錯誤歡迎指正、討論。