for
loopsSimplest, and most common type of loop in R - given a vector (atomic or generic) iterate through the elements and evaluate the code block for each.
for(x in 1:10)
{
cat(x^2," ", sep="")
}
## 1 4 9 16 25 36 49 64 81 100
for(y in list(1:3, LETTERS[1:7], c(TRUE,FALSE)))
{
cat(length(y)," ",sep="")
}
## 3 7 2
Almost always it is better to create an object to store your results first, rather than growing the object as you go.
# Good
res = rep(NA,10)
for(x in 1:10)
{
res[x] = x^2
}
res
## [1] 1 4 9 16 25 36 49 64 81 100
# Bad
res = c()
for (x in 1:10)
{
res = c(res,x^2)
}
res
## [1] 1 4 9 16 25 36 49 64 81 100
while
)Repeat until the given condition is not met (FALSE
)
i = 1
res = rep(NA,10)
while (i <= 10)
{
res[i] = i^2
i = i+1
}
res
## [1] 1 4 9 16 25 36 49 64 81 100
repeat
)Repeat until the given condition is not met
i = 1
res = rep(NA,10)
repeat
{
res[i] = i^2
i = i+1
if (i > 10) break
}
res
## [1] 1 4 9 16 25 36 49 64 81 100
for
loopsOften we want to use a loop across the indexes of an object and not the elements themselves. There are several useful functions to help you do this: :
, seq
, seq_along
, seq_len
, etc.
l = list(1:3, LETTERS[1:7], c(TRUE,FALSE))
res = rep(NA, length(l))
for(x in seq_along(l))
{
res[x] = length(l[[x]])
}
res
## [1] 3 7 2
1:length(l)
## [1] 1 2 3
seq_along(l)
## [1] 1 2 3
seq_len(length(l))
## [1] 1 2 3
Now lets go back to the evals.csv
data set and see how we can use looping to address the issues we were looking to correct.
Note that everything here can also be done using judicious use of subsetting (which also tends to be faster as well).
There are almost multiple possible approaches - for a first pass the best approach is the one you can get working the quickest, once something is working you can worry about making it faster / efficient.
In R functions are first order objects, this means we can work with them like any other object in R.
f = function(x) x*x
list(f)
## [[1]]
## function (x)
## x * x
typeof(f)
## [1] "closure"
Similarly, functions can return other functions (functor)
f2 = function(x) function(y) x+y
f2(1)
## function(y) x+y
## <environment: 0x7ff84c43ae38>
f2(1)(1)
## [1] 2
The two parts of a function are the arguments (formals
) and the code (body
).
gcd = function(loc1, loc2)
{
deg2rad = function(deg) return(deg*pi/180)
lat1 = deg2rad( loc1[1] )
lat2 = deg2rad( loc2[1] )
long1 = deg2rad( loc1[2] )
long2 = deg2rad( loc2[2] )
R = 6371 # Earth mean radius in km
d = acos(sin(lat1)*sin(lat2) + cos(lat1)*cos(lat2) * cos(long2-long1)) * R
return(d) # distance in km
}
formals(gcd)
## $loc1
##
##
## $loc2
body(gcd)
## {
## deg2rad = function(deg) return(deg * pi/180)
## lat1 = deg2rad(loc1[1])
## lat2 = deg2rad(loc2[1])
## long1 = deg2rad(loc1[2])
## long2 = deg2rad(loc2[2])
## R = 6371
## d = acos(sin(lat1) * sin(lat2) + cos(lat1) * cos(lat2) *
## cos(long2 - long1)) * R
## return(d)
## }
los_angeles = c(34.052235, -118.243683)
durham = c(36.002453, -78.905869)
gcd(los_angeles, durham)
## [1] 3564
In the preceding slides we have seen two approaches for returning values: explicit and implicit return values. The former should be preferred of the later except in the case of very simple functions.
Explicit - includes one or more return
s
f = function(x)
return(x*x)
Implicit - value from last statement is returned.
f = function(x)
x*x
If we want a function to return more than one value we can group things using either vectors or lists.
f = function(x) list(x, x^2, x^3)
f(2)
## [[1]]
## [1] 2
##
## [[2]]
## [1] 4
##
## [[3]]
## [1] 8
f(2:3)
## [[1]]
## [1] 2 3
##
## [[2]]
## [1] 4 9
##
## [[3]]
## [1] 8 27
When defining a function we are also implicitly defining names for the arguments, when calling the function we can use these names to
f = function(x,y,z) paste0("x=",x," y=",y," z=",z)
f(1,2,3)
## [1] "x=1 y=2 z=3"
f(z=1,x=2,y=3)
## [1] "x=2 y=3 z=1"
f(y=2,1,3)
## [1] "x=1 y=2 z=3"
f(y=2,1,x=3)
## [1] "x=3 y=2 z=1"
f(1,2,3,m=1)
## Error: unused argument (m = 1)
In R it is possible to give function arguments default values,
f = function(x=1,y=1,z=1) paste0("x=",x," y=",y," z=",z)
f()
## [1] "x=1 y=1 z=1"
f(2)
## [1] "x=2 y=1 z=1"
f(z=3)
## [1] "x=1 y=1 z=3"
R has generous scoping rules, if it can’t find a variable in the functions body’s scope, it will look for it in the next higher scope, and so on.
y = 1
f = function(x)
{
x+y
}
f(3)
## [1] 4
g = function(x)
{
y=2
x+y
}
g(3)
## [1] 5
Additionally, variables defined within a scope only persist for the duration of that scope, and do not overwrite variables at the higher scopes.
x = 1
y = 1
z = 1
f = function()
{
y = 2
g = function()
{
z = 3
return(x + y + z)
}
return(g())
}
f()
## [1] 6
c(x,y,z)
## [1] 1 1 1
typeof(`+`)
## [1] "builtin"
x = 4:1
`[`(x,2)
## [1] 3
`&`
## function (e1, e2) .Primitive("&")
Prefixing any function name with a ?
will open the related help file for that function.
?`+`
?sum
For functions not in the base package, you can also see their implementation by entering the function name without parentheses (or using body
function).
lm
## function (formula, data, subset, weights, na.action, method = "qr",
## model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE,
## contrasts = NULL, offset, ...)
## {
## ret.x <- x
## ret.y <- y
## cl <- match.call()
## mf <- match.call(expand.dots = FALSE)
## m <- match(c("formula", "data", "subset", "weights", "na.action",
## "offset"), names(mf), 0L)
## mf <- mf[c(1L, m)]
## mf$drop.unused.levels <- TRUE
## mf[[1L]] <- quote(stats::model.frame)
## mf <- eval(mf, parent.frame())
## if (method == "model.frame")
## return(mf)
## else if (method != "qr")
## warning(gettextf("method = '%s' is not supported. Using 'qr'",
## method), domain = NA)
## mt <- attr(mf, "terms")
## y <- model.response(mf, "numeric")
## w <- as.vector(model.weights(mf))
## if (!is.null(w) && !is.numeric(w))
## stop("'weights' must be a numeric vector")
## offset <- as.vector(model.offset(mf))
## if (!is.null(offset)) {
## if (length(offset) != NROW(y))
## stop(gettextf("number of offsets is %d, should equal %d (number of observations)",
## length(offset), NROW(y)), domain = NA)
## }
## if (is.empty.model(mt)) {
## x <- NULL
## z <- list(coefficients = if (is.matrix(y)) matrix(, 0,
## 3) else numeric(), residuals = y, fitted.values = 0 *
## y, weights = w, rank = 0L, df.residual = if (!is.null(w)) sum(w !=
## 0) else if (is.matrix(y)) nrow(y) else length(y))
## if (!is.null(offset)) {
## z$fitted.values <- offset
## z$residuals <- y - offset
## }
## }
## else {
## x <- model.matrix(mt, mf, contrasts)
## z <- if (is.null(w))
## lm.fit(x, y, offset = offset, singular.ok = singular.ok,
## ...)
## else lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok,
## ...)
## }
## class(z) <- c(if (is.matrix(y)) "mlm", "lm")
## z$na.action <- attr(mf, "na.action")
## z$offset <- offset
## z$contrasts <- attr(x, "contrasts")
## z$xlevels <- .getXlevels(mt, mf)
## z$call <- cl
## z$terms <- mt
## if (model)
## z$model <- mf
## if (ret.x)
## z$x <- x
## if (ret.y)
## z$y <- y
## if (!qr)
## z$qr <- NULL
## z
## }
## <bytecode: 0x7ff84c50c008>
## <environment: namespace:stats>
The goal of a function should be to encapsulate a small reusable piece of code.
Name should make it clear what the function does (think in terms of simple verbs).
Functionality should be simple enough to be quickly understood.
The smaller and more modular the code the easier it will be to reuse elsewhere.
Better to change code in one location than code everywhere.
We can define our own infix functions like +
or *
, the only requirement is that they must start and end with a %
.
`%nand%` = function(x, y) !(x & y)
TRUE %nand% TRUE
## [1] FALSE
TRUE %nand% FALSE
## [1] TRUE
FALSE %nand% TRUE
## [1] TRUE
FALSE %nand% FALSE
## [1] TRUE
We can also define functions that allow for ‘inplace’ modification like attr
or names
.
`last<-` = function(x, value)
{
x[length(x)] = value
x
}
x = 1:10
last(x) = 5L
x
## [1] 1 2 3 4 5 6 7 8 9 5
last(1)
## Error: could not find function "last"
Using what we have just learned about functions and looping, create two new R infix functions %op%
and %ip%
that calculate the outer and inner product of two numeric vectors (no error checking is necessary). These functions should be generic enough that they will accept vectors of any length. You should not use any of Rs built in matrix multiplication functionality.
`%op%` = function(x,y)
{
stopifnot(length(x) == length(y))
...
}
Some examples of the correct function output is included on the next slide.
c(1,2) %op% c(3,4)
## [,1] [,2]
## [1,] 3 4
## [2,] 6 8
c(1,2,3) %op% c(4,5,6)
## [,1] [,2] [,3]
## [1,] 4 5 6
## [2,] 8 10 12
## [3,] 12 15 18
c(1,2) %ip% c(3,4)
## [1] 11
c(1,2,3) %ip% c(4,5,6)
## [1] 32
1:50 %ip% 51:100
## [1] 106675
Above materials are derived in part from the following sources: