Um blog sobre nada

Um conjunto de inutilidades que podem vir a ser úteis

Basic R

Posted by Diego em Dezembro 2, 2014


 

Sharing a few notes, mainly commands, that helped me when I started playing with R:

Initial set up:

·         Download:

o   R: http://cran.r-project.org/bin/windows/base/

o   R Studio: http://www.rstudio.com/products/rstudio/download/

 

·         Display available packages to be installed:

a <- available.packages()

— Please select a CRAN mirror for use in this session —

 head(rownames(a),10)

 [1] "A3"          "ABCExtremes" "ABCoptim"    "ABCp2"       "ACCLMA"    

 [6] "ACD"         "ACNE"        "ACTCD"       "ADGofTest"   "ADM3" 

 

·         Installing a package – using the command line:
May need to extend the permissions on R’s folder ("C:/Program Files/R/R-3.1.1/library")

install.packages ("A3")

(Installs all dependencies as well)

Files temporary downloaded to:
C:\Users\<user>\AppData\Local\Temp\Rtmp6JVTkp\downloaded_packages
image

·         Installing a package – using R studio:
image

·         After installing a package you need to load it to be able to use the functions. The load is done using the library() command (you shouldn’t use quotes) – All dependencies are loaded as well:

 

 library(A3)

Loading required package: xtable

Loading required package: pbapply

 

·         Installing R tools and Dev tools
http://cran.r-project.org/bin/windows/Rtools/

install.packages("devtools")

library(devtools)

 

Basic Commands:

 

·         Check working directory:  getwd()

o   On the R console: Go to file -> Change dir to change the working directory

·         Read data: read.csv("test.csv")

·         Show what’s loaded on the workspace: ls()

·         Load a R function: source("mycode.r")

·         Show the object class:

o   x <- 0:6

o   class(x)

o   [1] "integer"

·         Create vector of objects:

o   c()

§  x <- c(0,2,0.4)

§  x

§  [1] 0.0 2.0 0.4

o   vector

§  x <- vector("numeric", length = 5)

§  x

§  [1] 0 0 0 0 0

·         Converting data

o   x <- 0:6

o    as.character(x)

o   [1] "0" "1" "2" "3" "4" "5" "6"

o   ——

o    x <- c("foo","foo2")

o    as.numeric(x)

o   [1] NA NA

o   Warning message:

o   NAs introduced by coercion

·         Matrices

o   Basic

§   m <- matrix(nrow = 2, ncol = 3)

§   m

     [,1] [,2] [,3]

[1,]   NA   NA   NA

[2,]   NA   NA   NA

§   dim(m)

[1] 2 3

§   attributes(m)

$dim

[1] 2 3

o   Option 2

§   m <- 1:10

§   m

 [1]  1  2  3  4  5  6  7  8  9 10

§   dim(m) <- c(2,5) #assign vector (2,5) to the dim attribute of m

§   m

     [,1] [,2] [,3] [,4] [,5]

[1,]    1    3    5    7    9

[2,]    2    4    6    8   10

o   Option 3 (binding)

§   x <- 1:3

§   y <- 10:12

§   foo1 <- cbind(x, y)

§   foo1

     x  y

[1,] 1 10

[2,] 2 11

[3,] 3 12

§   foo2 <- rbind (x,y)

§   foo2

  [,1] [,2] [,3]

x    1    2    3

y   10   11   12

·         Factors

o    x <- factor(c("one","two","two","three","one"))

o    x

[1] one   two   two   three one 

Levels: one three two

o    table(x)

x

  one three   two

    2     1     2  

o    unclass(x)

[1] 1 3 3 2 1

attr(,"levels")

[1] "one"   "three" "two" 

o   Setting the levels:

§   x <- factor(c("one","two","two","three","one"), levels = c("one", "two", "three"))

§   x

[1] one   two   two   three one 

Levels: one two three

·         Data Frames

o   x <- data.frame(foo = 1:4, bar = c(T,T,F,F))

o   x

  foo   bar

1   1  TRUE

2   2  TRUE

3   3 FALSE

4   4 FALSE

o    nrow(x)

[1] 4

o    ncol(x)

[1] 2

·         Names:

o   Objects

§   x <- 1:3

§   names(x)

NULL

§   names(x) <- c("foo","bar","norf")

§   x

 foo  bar norf

   1    2    3

o   Vector

§   x <- list(a=1,b=2,c=3)

§   x

$a

[1] 1

 

$b

[1] 2

 

$c

[1] 3

o   Matrices

§   m <- matrix (1:4, nrow =2, ncol=2)

§   dimnames(m) <- list(c("a","b"), c("c","d"))

§   m

  c d

a 1 3

b 2 4

·         Sub setting

o   a Matrix:

§   x <- matrix(1:6, 2, 3)

§   x[1, ]

[1] 1 3 5

§   x[1, , drop = FALSE]

§   [,1] [,2] [,3]

§  [1,] 1 3 5

o   A list:

§   x <- list (foo = 1:4, bar = 0.6)

§   x

$foo

[1] 1 2 3 4

 

$bar

[1] 0.6

 

§   x[1]     ##produces a list that contains 1,2,3,4

$foo

[1] 1 2 3 4

 

§   x[[1]]   ## produces just the sequence

[1] 1 2 3 4

o   List2

§   x <- list(foo = 1:4, bar = 0.6, baz = "hello")

§   x

$foo

[1] 1 2 3 4

 

$bar

[1] 0.6

 

$baz

[1] "hello"

 

§   name <- "foo" #variable with the string foo

§   x[[name]]

[1] 1 2 3 4

§   x[name]

$foo

[1] 1 2 3 4

o   Nested elements:

§   x <- list (a=list(1,2,3), b = list(4,5,6))

§   x[[c(1,3)]]

[1] 3

§   x[[c(2,1)]]

[1] 4

·         Partial Matching

o    x <- list(awrajhf = 1:5)

o    x

$awrajhf

[1] 1 2 3 4 5

o    x$a #matches the partial name

o   [1] 1 2 3 4 5

o    x[["a"]] #name doesn’t exist

NULL

o    x[["a", exact = FALSE]]

[1] 1 2 3 4 5

·         Removing NA values

o    x <- c(1,2,NA,4,NA,5)

o    bad <- is.na(x)

o    bad

[1] FALSE FALSE  TRUE FALSE  TRUE FALSE

o    y <- x[!bad]

 y

o   [1] 1 2 4 5

·         Removing NA values – 2 vectors

o    x <- c (1,2,NA,4,NA,5)

o    y <- c("a","b", NA,"d", NA, "f")

o    good <- complete.cases(x,y) #which positions are there that have both elements no missing

o    good

[1]  TRUE  TRUE FALSE  TRUE FALSE  TRUE

o    x[good]

[1] 1 2 4 5

o    y[good]

[1] "a" "b" "d" "f"

·         Read data

o   Pass the columns types:

§   initial <- read.table("foo.txt", nrows=10)

§   classes <-sapply(initial, class)

§   all <- read.table("foo.txt", colClasses = classes)

·         Dput-ting Objects:

o    y <- data.frame (a=1, b="a")

o    dput(y)#writes R code that can be used to reconstruct an R object

o    

o   structure(list(a = 1, b = structure(1L, .Label = "a", class = "factor")), .Names = c("a",

o   "b"), row.names = c(NA, -1L), class = "data.frame")

o    

o    dput(y, file="y.R")   # creates the y.R file

o    

o    new.z <- dget("y.R")

o    new.z

o     a b

o   1 1 a

o    

o    

o    foo <- dget("y.R")

o    foo

o     a b

·         Dumping Objects

o    x<-"foo"

o    y<-data.frame(a=1, b="a")

o    dump(c("x","y"),file ="data.R") # dump can be used on multple R objects

o    rm(x,y) # remove objects

 

o    x

Error: object ‘x’ not found

o    source("data.R")

o    x

[1] "foo"

·         Data Frame:

o   Print first n rows: head(mydf, n=2)

o   Last two rows: tail(mydf,2)

o   Number of rows: nrow(mydf)

o   Show line 47: mydf[47,]

o   Find number of missing values on colum:

§  length(which(is.na(mydf$Ozone)))

§   miss <- is.na(mydf[, "Ozone"])  ## A vector of TRUE/FALSE

§   sum(miss)

o   Subset of rows of the data frame where Ozone values are above 31 and Temp values are above 90:

§  mydf_sub <- subset(mydf, Ozone >31 & Temp >90)

o   Mean:

§  Option1:

·         mean(mydf[, "Ozone"], na.rm = TRUE)

§  Option2:

·         <- !is.na(mydf[, "Ozone"])

·          mean(mydf[use, "Ozone"])

 

 

·         CSV:

o    cameradata <- read.table ("c:\\rwd\\cameras\\cameras.csv", sep=",", header = TRUE)

o    head(cameradata)

·         Excel:

o    cameraData <- read.xlsx("cameras\\cameras.xlsx",sheetIndex=1,header=TRUE)

o    head(cameraData)

 

·          library(XML)

·         XML – basic

o    fileUrl <- http://www.w3schools.com/xml/simple.xml

o    doc <- xmlTreeParse(fileUrl,useInternal=TRUE)

o    rootNode <- xmlRoot(doc)

o    xmlName(rootNode)

o   [1] "breakfast_menu"

·         XML

o    xpathSApply(rootNode,"//name",xmlValue)

o   [1] "Belgian Waffles"             "Strawberry Belgian Waffles"  "Berry-Berry Belgian Waffles" "French Toast"                "Homestyle Breakfast"       

·         Json

o   install.packages("jsonlite")

o   library(jsonlite)

§  Dependency: install.packages(‘httr’)

o   jsonData <- fromJSON("https://api.github.com/users/jtleek/repos&quot;)

o    names(jsonData)   #shows the name of the attributes (names of the data frame)

§   names(jsonData$owner)

o   Writing data frames to JSON:

§  myjson <- toJSON(iris, pretty=TRUE)

§  cat(myjson)

o   Convert back to JSON

§  iris2 <- fromJSON(myjson)

§  head(iris2)

·         Data Table

o   data.table is an extension of data.frame. Should be used for fast aggregation of large data

§  http://cran.r-project.org/web/packages/data.table/index.html

o   install.packages("data.table")

o   library(data.table)

o   DF = data.frame(x=rnorm(9),y=rep(c("a","b","c"),each=3),z=rnorm(9))

o   DT = data.table(x=rnorm(9),y=rep(c("a","b","c"),each=3),z=rnorm(9))

o   Tables() #see all tables in memory

o    DT[2,] #Subsetting rows

o    DT[DT$y=="a",] # looking at rows based on criteria

o   DT[c(2,3)] #subsets second and third rows

o   Calculating values for variables with expressions

§  DT[,list(mean(x),sum(z))]  #applies mean and sum functions on variables x and z on the DT

o   Create table of the Y values:

§  DT[,table(y)]

o   Adding new columns: DT[,w:=z^2]

o   Set all values on Colum to 2: DT[, y:= 2]

o   Multiple operations

§  DT[,m:= {tmp <- (x+z); log2(tmp+5)}]    

§  It does both operations inside the brackets  and return the result

Deixe uma Resposta

Preencha os seus detalhes abaixo ou clique num ícone para iniciar sessão:

Logótipo da WordPress.com

Está a comentar usando a sua conta WordPress.com Terminar Sessão / Alterar )

Imagem do Twitter

Está a comentar usando a sua conta Twitter Terminar Sessão / Alterar )

Facebook photo

Está a comentar usando a sua conta Facebook Terminar Sessão / Alterar )

Google+ photo

Está a comentar usando a sua conta Google+ Terminar Sessão / Alterar )

Connecting to %s

 
%d bloggers like this: