1. 程式人生 > >2.3 Lab: Introduction to R

2.3 Lab: Introduction to R

2.3 Lab: Introduction to R

#ISL2.3 Lab Intro to R
#2018.12.24
#by Qikun

################################ 2.3.1 Basic Commands ##########################

x = c(1,3,2,5)#create a vector of numbers
# c() for concatenate
# "=" = "<-" 
x
#?funcname additional information about the function funcname

x=c(1,6,2)
y=c(1,4,3) 
length(x)
length(y)
x+y
 
ls()#look at a list of all of the objects
rm(x,y)#delete
rm(list=ls())#remove all objects at once

x=matrix(data=c(1,2,3,4),nrow=2,ncol=2)#default R creates matrices by successively filling in columns
x=matrix(c(1,2,3,4),2,2)#omit typing data=,nrow= and ncol=
x

matrix(c(1,2,3,4),2,2,byrow=TRUE)

sqrt(x)
x^2

x=rnorm(50) #  mean of 0 and a standard deviation of 1
y=x+rnorm(50,mean = 50,sd=.1)
cor(x,y)#  use the cor() function to compute the correlation between them

set.seed(3)# reproduce the exact same set of random numbers
y=rnorm(100)
mean(y)
var(y)
# mean() and var() functions can be used to compute the mean and variance of a vector of numbers. 
sqrt(var(y))
#  Applying sqrt() to the output of var() will give the standard deviation. Or we can simply use the sd() function
sd(y)

############################# 2.3.2 Graphics #############################

x=rnorm(100)
y=rnorm(100)
# plot(x,y) produces a scatterplot of the numbers in x versus the numbers in y
plot(x,y)
# For example, passing in the argument xlab will result in a label on the x-axis
plot(x,y,xlab="this is the x-axis",ylab = "this is the y-axis", main = "Plot of X vs Y")

pdf("Figure.pdf")
plot(x,y,col="green")
# dev.off() indicates to R that we are done creating the plot
dev.off()

x=seq(1:10)
# Typing 3:11 is a shorthand for seq(3,11) for integer arguments.
x=1:10
# seq(0,1,length=10) makes a sequence of 10 numbers that are equally spaced between 0 and 1. 
x=seq(-pi,pi,length=50)
x
plot.new()
y=x
f=outer(x,y,function(x,y)cos(y)/(1+x^2))
f
contour(x,y,f)
contour(x,y,f,nlevels = 45,add = T)
fa=(f-t(f))/2
contour(x,y,fa,nlevels=15)
fa

#The image() function works the same way as contour(), except that it image() produces a color-coded plot whose colors depend on the z value
#heatmap
image(x,y,fa)
persp(x,y,fa)
persp()
#plot. The arguments theta and phi control the angles at which the plot is viewed.
persp(x,y,fa,theta=30)
persp(x,y,fa,theta=30,phi = 20)
persp(x,y,fa,theta=30,phi = 70)
persp(x,y,fa,theta=30,phi = 40)

########################### 2.3.3 Indexing Data #############################

A=matrix(1:16,4,4)
A
A[2,3]
A[c(1,3),c(2,4)]
A[1:3,2:4]
A[1:2,]
A[,1:2]
# R treats a single row or column of a matrix as a vector.
A[1,]
#The use of a negative sign - in the index tells R to keep all rows or columns except those indicated in the index.
A[-c(1,3),-c(1,3,4)]
dim(A)

########################### 2.3.4 Loading Data ##############################
library('ISLR')
Auto=read.table("Auto.data")
# Using the option header=T (or header=TRUE) in the read.table() function tells R that the first line of the file contains the variable names
fix(Auto)
#and using the option na.strings tells R that any time it sees a particular character or set of characters (such as a question mark), it should be treated as a missing element of the data matrix.
Auto=read.table("Auto.data",header = T,na.strings = "?")

Auto=read.csv("Auto.csv",header = T,na.strings = "?")
fix(Auto)
dim(Auto)
Auto[1:4,]
# In this case, only five of the rows contain missing observations, and so we choose to use the na.omit() function to simply remove these rows
Auto=na.omit(Auto)
dim(Auto)
# Once the data are loaded correctly, we can use names() to check the variable names
names(Auto)

####################### 2.3.5 Additional Graphical and Numerical Summaries #################

# R does not know to look in the Auto data set for those variables.
# To refer to a variable, we must type the data set and the variable name joined with a $ symbol.
plot(Auto$cylinders,Auto$mpg)
attach(Auto)
plot(cylinders,mpg)

#The cylinders variable is stored as a numeric vector, 
#so R has treated it as quantitative. 
#However, since there are only a small number of possible values for cylinders, 
#one may prefer to treat it as a qualitative variable

# as.factor() function converts quantitative variables into qualitative variables.
cylinders=as.factor(cylinders)
cylinders

# If the variable plotted on the x-axis is categorial, 
#then boxplots will automatically be produced by the plot() function
plot(cylinders,mpg)
plot(cylinders,mpg,col="red")
plot(cylinders,mpg,col="red",varwidth=T)
plot(cylinders,mpg,col="red",varwidth=T,horizontal=T)
plot(cylinders,mpg,col="red",varwidth=T,xlab="cylinders",ylab="MPG")

#The hist() function can be used to plot a histogram. 
#Note thatcol=2 has the same effect as col="red".
hist(mpg)
hist(mpg,col=2)
hist(mpg,col=2,breaks = 15)

#The pairs() function creates a scatterplot matrix 
#i.e. a scatterplot for every pair of variables for any given data set. 
#We can also produce scatterplots for just a subset of the variables.
pairs(Auto)
pairs(~mpg+displacement,Auto)
#pairs(~Auto$mpg+Auto$displacement,Auto)

plot(horsepower,mpg)
identify(horsepower,mpg,name)

#The summary() function produces a numerical summary of each variable in a particular data set.
summary(Auto)
summary(mpg)

Reference

《An Introduction to Statistical Learning》

若需引用請註明出處。
若有錯誤歡迎指正、討論。