Programming Exercises

Exercise 1

`for` loop

Task 1.1: Compute the mean of each row in myMA by applying the mean function in a for loop.

myMA <- matrix(rnorm(500), 100, 5, dimnames=list(1:100, paste("C", 1:5, sep="")))
myve_for <- NULL
for(i in seq(along=myMA[,1])) {
	myve_for <- c(myve_for, mean(as.numeric(myMA[i, ])))
}
myResult <- cbind(myMA, mean_for=myve_for)
myResult[1:4, ]

##           C1         C2         C3          C4         C5   mean_for
## 1 -0.9832766  0.8446066  0.4196481  0.23814667  0.3493797  0.1737009
## 2  1.2980835  0.6924483  0.5996748 -0.51642965  0.6701380  0.5487830
## 3 -1.1466949  0.6752775 -0.9384848  0.07464206 -0.7651351 -0.4200791
## 4  0.8122978 -1.3107710  0.6664631  0.12316103 -0.1726270  0.0237048

`while` loop

Task 1.2: Compute the mean of each row in myMA by applying the mean function in a while loop.

z <- 1
myve_while <- NULL
while(z <= length(myMA[,1])) {
	myve_while <- c(myve_while, mean(as.numeric(myMA[z, ])))
	z <- z + 1
}
myResult <- cbind(myMA, mean_for=myve_for, mean_while=myve_while)
myResult[1:4, -c(1,2)]

##           C3          C4         C5   mean_for mean_while
## 1  0.4196481  0.23814667  0.3493797  0.1737009  0.1737009
## 2  0.5996748 -0.51642965  0.6701380  0.5487830  0.5487830
## 3 -0.9384848  0.07464206 -0.7651351 -0.4200791 -0.4200791
## 4  0.6664631  0.12316103 -0.1726270  0.0237048  0.0237048

Task 1.3: Confirm that the results from both mean calculations are identical

all(myResult[,6] == myResult[,7])

## [1] TRUE

`apply` loop

Task 1.4: Compute the mean of each row in myMA by applying the mean function in an apply loop

myve_apply <- apply(myMA, 1, mean)
myResult <- cbind(myMA, mean_for=myve_for, mean_while=myve_while, mean_apply=myve_apply)
myResult[1:4, -c(1,2)]

##           C3          C4         C5   mean_for mean_while mean_apply
## 1  0.4196481  0.23814667  0.3493797  0.1737009  0.1737009  0.1737009
## 2  0.5996748 -0.51642965  0.6701380  0.5487830  0.5487830  0.5487830
## 3 -0.9384848  0.07464206 -0.7651351 -0.4200791 -0.4200791 -0.4200791
## 4  0.6664631  0.12316103 -0.1726270  0.0237048  0.0237048  0.0237048

Avoiding loops

Task 1.5: When operating on large data sets it is much faster to use the rowMeans function

mymean <- rowMeans(myMA)
myResult <- cbind(myMA, mean_for=myve_for, mean_while=myve_while, mean_apply=myve_apply, mean_int=mymean)
myResult[1:4, -c(1,2,3)]

##            C4         C5   mean_for mean_while mean_apply   mean_int
## 1  0.23814667  0.3493797  0.1737009  0.1737009  0.1737009  0.1737009
## 2 -0.51642965  0.6701380  0.5487830  0.5487830  0.5487830  0.5487830
## 3  0.07464206 -0.7651351 -0.4200791 -0.4200791 -0.4200791 -0.4200791
## 4  0.12316103 -0.1726270  0.0237048  0.0237048  0.0237048  0.0237048

Exercise 2

Custom functions

Task 2.1: Use the following code as basis to implement a function that allows the user to compute the mean for any combination of columns in a matrix or data frame. The first argument of this function should specify the input data set, the second the mathematical function to be passed on (e.g. mean, sd, max) and the third one should allow the selection of the columns by providing a grouping vector.

myMA <- matrix(rnorm(100000), 10000, 10, dimnames=list(1:10000, paste("C", 1:10, sep="")))
myMA[1:2,]

##           C1         C2         C3        C4         C5        C6         C7         C8         C9
## 1 0.17958477 -0.5714262 -0.8866647  2.463907 -0.8126814 -0.933438 -0.8118745 -0.5546591 -0.9026947
## 2 0.04191538  1.2456075  0.4953736 -2.703100  0.2055566 -1.945671 -2.8696620 -1.5043589 -1.8783809
##          C10
## 1 -0.5698361
## 2 -1.6321152

myList <- tapply(colnames(myMA), c(1,1,1,2,2,2,3,3,4,4), list) 
names(myList) <- sapply(myList, paste, collapse="_")
myMAmean <- sapply(myList, function(x) apply(myMA[,x], 1, mean))
myMAmean[1:4,]

##     C1_C2_C3   C4_C5_C6      C7_C8     C9_C10
## 1 -0.4261687  0.2392626 -0.6832668 -0.7362654
## 2  0.5942988 -1.4810715 -2.1870104 -1.7552480
## 3  0.1217488 -0.7225502 -0.6295343  0.4990018
## 4 -0.9118941 -0.3107419  0.3284317 -0.5693107

Exercise 3

Nested loops to generate similarity matrices

Task 3.1: Create a sample list populated with character vectors of different lengths

setlist <- lapply(11:30, function(x) sample(letters, x, replace=TRUE))
names(setlist) <- paste("S", seq(along=setlist), sep="") 
setlist[1:6]

## $S1
##  [1] "x" "r" "j" "n" "l" "z" "b" "o" "v" "j" "i"
## 
## $S2
##  [1] "k" "b" "p" "c" "z" "f" "v" "u" "e" "d" "c" "f"
## 
## $S3
##  [1] "l" "e" "p" "j" "i" "k" "y" "i" "w" "l" "w" "x" "p"
## 
## $S4
##  [1] "d" "e" "v" "o" "h" "q" "i" "e" "d" "y" "o" "m" "q" "y"
## 
## $S5
##  [1] "s" "q" "r" "j" "o" "z" "q" "g" "s" "v" "w" "j" "l" "r" "d"
## 
## $S6
##  [1] "c" "l" "h" "v" "e" "a" "i" "u" "g" "h" "s" "f" "u" "b" "e" "y"

Task 3.2: Compute the length for all pairwise intersects of the vectors stored in setlist. The intersects can be determined with the %in% function like this: sum(setlist[[1]] %in% setlist[[2]])

setlist <- sapply(setlist, unique)
olMA <- sapply(names(setlist), function(x) sapply(names(setlist), 
               function(y) sum(setlist[[x]] %in% setlist[[y]])))
olMA[1:12,]

##     S1 S2 S3 S4 S5 S6 S7 S8 S9 S10 S11 S12 S13 S14 S15 S16 S17 S18 S19 S20
## S1  10  3  4  3  6  4  6  6  7   8   7   2   5   8   8   8   8   7   8   7
## S2   3 10  3  3  3  6  7  4  7   2   5   6   4   8   7   6   8   6   7   7
## S3   4  3  9  3  3  4  4  5  4   5   5   3   4   5   7   7   8   5   5   5
## S4   3  3  3  9  4  5  3  4  5   5   5   7   7   5   8   7   7   5   6   5
## S5   6  3  3  4 11  4  3  5  6   6   7   7   8  10   9   8   8   7   8   8
## S6   4  6  4  5  4 13  4  5  7   5   7   8   6   9   9   8   9   7   7   9
## S7   6  7  4  3  3  4 12  6  7   5   9   4   6   9   9   8  10   8  10   8
## S8   6  4  5  4  5  5  6 13  9   9  10   7   7   9  12   9  11   9   9   7
## S9   7  7  4  5  6  7  7  9 14   7   8   7   7  11  11  10  11   8   9  11
## S10  8  2  5  5  6  5  5  9  7  13   8   6   6   6  10  11   9   9   8   9
## S11  7  5  5  5  7  7  9 10  8   8  15   7   9  11  11  10  12  11  12  10
## S12  2  6  3  7  7  8  4  7  7   6   7  14   8  10  11   9  11   9   9   8

Task 3.3 Plot the resulting intersect matrix as heat map. The image or the heatmap.2 function from the gplots library can be used for this.

image(olMA)

Exercise 4

Build your own R package

Task 4.1: Save one or more of your functions to a file called script.R and build the package with the package.skeleton function.

package.skeleton(name="mypackage", code_files=c("script1.R"), namespace=TRUE)

Task 4.2: Build tarball of the package

system("R CMD build mypackage")

Task 4.3: Install and use package

install.packages("mypackage_1.0.tar.gz", repos=NULL, type="source")
library(mypackage)
?myMAcomp # Opens help for function defined by mypackage

Jump to: