Sharing a few notes, mainly commands, that helped me when I started playing with R:
Initial set up:
· Download:
o R: http://cran.r-project.org/bin/windows/base/
o R Studio: http://www.rstudio.com/products/rstudio/download/
· Display available packages to be installed:
a <- available.packages()
— Please select a CRAN mirror for use in this session —
head(rownames(a),10)
[1] "A3" "ABCExtremes" "ABCoptim" "ABCp2" "ACCLMA"
[6] "ACD" "ACNE" "ACTCD" "ADGofTest" "ADM3"
· Installing a package – using the command line:
May need to extend the permissions on R’s folder ("C:/Program Files/R/R-3.1.1/library")
install.packages ("A3")
(Installs all dependencies as well)
Files temporary downloaded to:
C:\Users\<user>\AppData\Local\Temp\Rtmp6JVTkp\downloaded_packages
· Installing a package – using R studio:
· After installing a package you need to load it to be able to use the functions. The load is done using the library() command (you shouldn’t use quotes) – All dependencies are loaded as well:
library(A3)
Loading required package: xtable
Loading required package: pbapply
· Installing R tools and Dev tools
http://cran.r-project.org/bin/windows/Rtools/
install.packages("devtools")
library(devtools)
Basic Commands:
· Check working directory: getwd()
o On the R console: Go to file -> Change dir to change the working directory
· Read data: read.csv("test.csv")
· Show what’s loaded on the workspace: ls()
· Load a R function: source("mycode.r")
· Show the object class:
o x <- 0:6
o class(x)
o [1] "integer"
· Create vector of objects:
o c()
§ x <- c(0,2,0.4)
§ x
§ [1] 0.0 2.0 0.4
o vector
§ x <- vector("numeric", length = 5)
§ x
§ [1] 0 0 0 0 0
· Converting data
o x <- 0:6
o as.character(x)
o [1] "0" "1" "2" "3" "4" "5" "6"
o ——
o x <- c("foo","foo2")
o as.numeric(x)
o [1] NA NA
o Warning message:
o NAs introduced by coercion
· Matrices
o Basic
§ m <- matrix(nrow = 2, ncol = 3)
§ m
[,1] [,2] [,3]
[1,] NA NA NA
[2,] NA NA NA
§ dim(m)
[1] 2 3
§ attributes(m)
$dim
[1] 2 3
o Option 2
§ m <- 1:10
§ m
[1] 1 2 3 4 5 6 7 8 9 10
§ dim(m) <- c(2,5) #assign vector (2,5) to the dim attribute of m
§ m
[,1] [,2] [,3] [,4] [,5]
[1,] 1 3 5 7 9
[2,] 2 4 6 8 10
o Option 3 (binding)
§ x <- 1:3
§ y <- 10:12
§ foo1 <- cbind(x, y)
§ foo1
x y
[1,] 1 10
[2,] 2 11
[3,] 3 12
§ foo2 <- rbind (x,y)
§ foo2
[,1] [,2] [,3]
x 1 2 3
y 10 11 12
· Factors
o x <- factor(c("one","two","two","three","one"))
o x
[1] one two two three one
Levels: one three two
o table(x)
x
one three two
2 1 2
o unclass(x)
[1] 1 3 3 2 1
attr(,"levels")
[1] "one" "three" "two"
o Setting the levels:
§ x <- factor(c("one","two","two","three","one"), levels = c("one", "two", "three"))
§ x
[1] one two two three one
Levels: one two three
· Data Frames
o x <- data.frame(foo = 1:4, bar = c(T,T,F,F))
o x
foo bar
1 1 TRUE
2 2 TRUE
3 3 FALSE
4 4 FALSE
o nrow(x)
[1] 4
o ncol(x)
[1] 2
· Names:
o Objects
§ x <- 1:3
§ names(x)
NULL
§ names(x) <- c("foo","bar","norf")
§ x
foo bar norf
1 2 3
o Vector
§ x <- list(a=1,b=2,c=3)
§ x
$a
[1] 1
$b
[1] 2
$c
[1] 3
o Matrices
§ m <- matrix (1:4, nrow =2, ncol=2)
§ dimnames(m) <- list(c("a","b"), c("c","d"))
§ m
c d
a 1 3
b 2 4
· Sub setting
o a Matrix:
§ x <- matrix(1:6, 2, 3)
§ x[1, ]
[1] 1 3 5
§ x[1, , drop = FALSE]
§ [,1] [,2] [,3]
§ [1,] 1 3 5
o A list:
§ x <- list (foo = 1:4, bar = 0.6)
§ x
$foo
[1] 1 2 3 4
$bar
[1] 0.6
§ x[1] ##produces a list that contains 1,2,3,4
$foo
[1] 1 2 3 4
§ x[[1]] ## produces just the sequence
[1] 1 2 3 4
o List2
§ x <- list(foo = 1:4, bar = 0.6, baz = "hello")
§ x
$foo
[1] 1 2 3 4
$bar
[1] 0.6
$baz
[1] "hello"
§ name <- "foo" #variable with the string foo
§ x[[name]]
[1] 1 2 3 4
§ x[name]
$foo
[1] 1 2 3 4
o Nested elements:
§ x <- list (a=list(1,2,3), b = list(4,5,6))
§ x[[c(1,3)]]
[1] 3
§ x[[c(2,1)]]
[1] 4
· Partial Matching
o x <- list(awrajhf = 1:5)
o x
$awrajhf
[1] 1 2 3 4 5
o x$a #matches the partial name
o [1] 1 2 3 4 5
o x[["a"]] #name doesn’t exist
NULL
o x[["a", exact = FALSE]]
[1] 1 2 3 4 5
· Removing NA values
o x <- c(1,2,NA,4,NA,5)
o bad <- is.na(x)
o bad
[1] FALSE FALSE TRUE FALSE TRUE FALSE
o y <- x[!bad]
y
o [1] 1 2 4 5
· Removing NA values – 2 vectors
o x <- c (1,2,NA,4,NA,5)
o y <- c("a","b", NA,"d", NA, "f")
o good <- complete.cases(x,y) #which positions are there that have both elements no missing
o good
[1] TRUE TRUE FALSE TRUE FALSE TRUE
o x[good]
[1] 1 2 4 5
o y[good]
[1] "a" "b" "d" "f"
· Read data
o Pass the columns types:
§ initial <- read.table("foo.txt", nrows=10)
§ classes <-sapply(initial, class)
§ all <- read.table("foo.txt", colClasses = classes)
· Dput-ting Objects:
o y <- data.frame (a=1, b="a")
o dput(y)#writes R code that can be used to reconstruct an R object
o
o structure(list(a = 1, b = structure(1L, .Label = "a", class = "factor")), .Names = c("a",
o "b"), row.names = c(NA, -1L), class = "data.frame")
o
o dput(y, file="y.R") # creates the y.R file
o
o new.z <- dget("y.R")
o new.z
o a b
o 1 1 a
o
o
o foo <- dget("y.R")
o foo
o a b
· Dumping Objects
o x<-"foo"
o y<-data.frame(a=1, b="a")
o dump(c("x","y"),file ="data.R") # dump can be used on multple R objects
o rm(x,y) # remove objects
o x
Error: object ‘x’ not found
o source("data.R")
o x
[1] "foo"
· Data Frame:
o Print first n rows: head(mydf, n=2)
o Last two rows: tail(mydf,2)
o Number of rows: nrow(mydf)
o Show line 47: mydf[47,]
o Find number of missing values on colum:
§ length(which(is.na(mydf$Ozone)))
§ miss <- is.na(mydf[, "Ozone"]) ## A vector of TRUE/FALSE
§ sum(miss)
o Subset of rows of the data frame where Ozone values are above 31 and Temp values are above 90:
§ mydf_sub <- subset(mydf, Ozone >31 & Temp >90)
o Mean:
§ Option1:
· mean(mydf[, "Ozone"], na.rm = TRUE)
§ Option2:
· <- !is.na(mydf[, "Ozone"])
· mean(mydf[use, "Ozone"])
· CSV:
o cameradata <- read.table ("c:\\rwd\\cameras\\cameras.csv", sep=",", header = TRUE)
o head(cameradata)
· Excel:
o cameraData <- read.xlsx("cameras\\cameras.xlsx",sheetIndex=1,header=TRUE)
o head(cameraData)
· library(XML)
· XML – basic
o fileUrl <- http://www.w3schools.com/xml/simple.xml
o doc <- xmlTreeParse(fileUrl,useInternal=TRUE)
o rootNode <- xmlRoot(doc)
o xmlName(rootNode)
o [1] "breakfast_menu"
· XML
o xpathSApply(rootNode,"//name",xmlValue)
o [1] "Belgian Waffles" "Strawberry Belgian Waffles" "Berry-Berry Belgian Waffles" "French Toast" "Homestyle Breakfast"
· Json
o install.packages("jsonlite")
o library(jsonlite)
§ Dependency: install.packages(‘httr’)
o jsonData <- fromJSON("https://api.github.com/users/jtleek/repos")
o names(jsonData) #shows the name of the attributes (names of the data frame)
§ names(jsonData$owner)
o Writing data frames to JSON:
§ myjson <- toJSON(iris, pretty=TRUE)
§ cat(myjson)
o Convert back to JSON
§ iris2 <- fromJSON(myjson)
§ head(iris2)
· Data Table
o data.table is an extension of data.frame. Should be used for fast aggregation of large data
§ http://cran.r-project.org/web/packages/data.table/index.html
o install.packages("data.table")
o library(data.table)
o DF = data.frame(x=rnorm(9),y=rep(c("a","b","c"),each=3),z=rnorm(9))
o DT = data.table(x=rnorm(9),y=rep(c("a","b","c"),each=3),z=rnorm(9))
o Tables() #see all tables in memory
o DT[2,] #Subsetting rows
o DT[DT$y=="a",] # looking at rows based on criteria
o DT[c(2,3)] #subsets second and third rows
o Calculating values for variables with expressions
§ DT[,list(mean(x),sum(z))] #applies mean and sum functions on variables x and z on the DT
o Create table of the Y values:
§ DT[,table(y)]
o Adding new columns: DT[,w:=z^2]
o Set all values on Colum to 2: DT[, y:= 2]
o Multiple operations
§ DT[,m:= {tmp <- (x+z); log2(tmp+5)}]
§ It does both operations inside the brackets and return the result