September 11, 2014

From last time

Application Exercise 4

Below are 100 values,

x = c(56, 3, 17, 2, 4, 9, 6, 5, 19, 5, 2, 3, 5, 0, 13, 12, 6, 31, 10, 21, 8, 4, 1, 1, 2, 5, 16, 1, 3, 8, 1, 
      3, 4, 8, 5, 2, 8, 6, 18, 40, 10, 20, 1, 27, 2, 11, 14, 5, 7, 0, 3, 0, 7, 0, 8, 10, 10, 12, 8, 82, 
      21, 3, 34, 55, 18, 2, 9, 29, 1, 4, 7, 14, 7, 1, 2, 7, 4, 74, 5, 0, 3, 13, 2, 8, 1, 6, 13, 7, 1, 10, 
      5, 2, 4, 4, 14, 15, 4, 17, 1, 9)

write down how you would create a subset to accomplish each of the following:

  • Select all observations with values greater than or equal to 40

  • Select all observations with values less than 30 or greater than 50

  • Select all observations with values between 35 and 75

  • Remove all observations with an odd index (e.g. 1, 3, etc.)

Factor Subsetting

(x = factor(c("BS", "MS", "PhD", "MS")))
## [1] BS  MS  PhD MS 
## Levels: BS MS PhD
x[1:2]
## [1] BS MS
## Levels: BS MS PhD
x[1:2, drop=TRUE]
## [1] BS MS
## Levels: BS MS

Data Frame Subsetting

df = data.frame(a = 1:2, b = 3:4, c = 5:6)
df[1,]
##   a b c
## 1 1 3 5
df[,-2]
##   a c
## 1 1 5
## 2 2 6
df[, c("a","b")]
##   a b
## 1 1 3
## 2 2 4

Subsetting and assignment

Subsetting and assignment

Subsets can also be used with assignment to update specific values within an object.

x = c(1, 4, 7)
x[2] = 2
x
## [1] 1 2 7
x[1] = x[1] + 1
x
## [1] 2 2 7

Assignment with factors

x = c(1,2,1,3,2,1,2,1,3)
x[x == 1] = "male"
x[x == 2] = "female"
x[x == 3] = "other"
str(x)
##  chr [1:9] "male" "female" "male" "other" "female" "male" ...
x = factor(x, levels = c("male","female","other")); str(x)
##  Factor w/ 3 levels "male","female",..: 1 2 1 3 2 1 2 1 3
y = x[x != "other"]; str(y)
##  Factor w/ 3 levels "male","female",..: 1 2 1 2 1 2 1
w = x[x != "other", drop = TRUE]; str(w)
##  Factor w/ 2 levels "male","female": 1 2 1 2 1 2 1

Simple Looping

for loops

Simplest, and most common type of loop in R - iterate through the elements of a vector and evaluate the code block for each.

for(x in 1:10)
{
  cat(x^2," ", sep="") # cat: concatenate and print
}
## 1 4 9 16 25 36 49 64 81 100
for(y in list(1:3, LETTERS[1:7], c(TRUE,FALSE)))
{
  cat(length(y)," ",sep="")
}
## 3 7 2

Storing results

Almost always it is better to create an object to store your results first, rather than growing the object as you go.

# Good
res = rep(NA,10)
for(x in 1:10)
{
  res[x] = x^2
}
res
##  [1]   1   4   9  16  25  36  49  64  81 100
# Bad
res = c()
for (x in 1:10)
{
  res = c(res,x^2)
}
res
##  [1]   1   4   9  16  25  36  49  64  81 100

Alternative loops: while

Repeat until the given condition is not met (FALSE)

i = 1
y = rep(NA,10)
while (i <= 10)
{
  y[i] = i^2
  i = i+1
}
y
##  [1]   1   4   9  16  25  36  49  64  81 100

Alternative loops: repeat

Repeat until the given condition is not met

i = 1
y = rep(NA,10)
repeat
{
  y[i] = i^2
  i = i+1
  if (i > 10) break
}
y
##  [1]   1   4   9  16  25  36  49  64  81 100

if statements

if statements

Many tasks you use an if statement for could be done with subsetting as well.

x = c(1,2,4,9,16)
n = length(x)
y = rep(NA, n)
for(i in 1:n)
  {
    if(x[i] < 5){ y[i] = "lt 5" }
    if(x[i] >= 5){ y[i] = "geq 5" }
  }
y
## [1] "lt 5"  "lt 5"  "lt 5"  "geq 5" "geq 5"

ifelse

x = c(1,2,4,9,16)
n = length(x)
y = rep(NA, n)
for(i in 1:n)
  {
    y[i] = ifelse(x[i] < 5, "lt 5", "geq 5")
  }
y
## [1] "lt 5"  "lt 5"  "lt 5"  "geq 5" "geq 5"

Application Exercise 6

  1. Goal: a vector of length 100, where the first 50 elements are the numbers 1 through 50, and the next 50 are the squares of the numbers 1 through 50.Your task is to create this vector two ways:

    • First, without a for loop.
    • Then, with a for loop. Remember, you will first need to create a vector of NAs of length 100, then populate it to obtain the desired vector.
  2. Seasons, two ways: Create a data frame that looks like the following:
    • Then, add a new column, season_loop to the dataset, and populate it using a for loop. This column should tell which season the person is born in.
    • Now add another column, season_noloop, and do the same thing without using a for loop.
##    ID birth_mo
## 1   1      Jan
## 2   2      Jul
## 3   3      May
## 4   4      Aug
## 5   5      Apr
## 6   6      Sep
## 7   7      Nov
## 8   8      Oct
## 9   9      Feb
## 10 10      Jun

Misc.

HW