Miscellaneous

Optimazation

Say we want to maximaize the following one parameter optimazation problem. f(k) = \binom{12}{k} * theta^k * (1-theta)^{12-k}.

# binomial
k = 9:12
binom <- function(theta){
	     sum(choose(12,k)* theta^k * (1 - theta)^(12-k))
	 }

optimize(binom, c(0, 0.5), maximum=TRUE)
$maximum
[1] 0.4999542

$objective
[1] 0.07295374

In the two paramter optimazation problem: f = \lambda^5 * (\mu + \lambda)^4 * exp(\mu + 2*\lambda)

x <- as.numeric()
> x[1] = "lam"
> x[2] = "mu"

model2 <- function(x){
             x[1]^5 * (x[1] + x[2])^4 * exp(-(x[2] + 2*x[1]))
          }

optim(c(1,1), model2, method="L-BFGS-B", lower=c(0,0), control=list(fnscale=-1))
$par
[1] 4.5 0.0

$value
[1] 93.38181

$counts
function gradient 
      12       12 

$convergence
[1] 0

$message
[1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

Create empty vector and list

a <- array()
b <- vector()
c <- list()
d <- vector(mode = "list")
'''

### Keyword search

Find a function using a keyword. 

```r
??transpose
apropos("time")
 [1] ".difftime"              "[.difftime"             "*.difftime"            
 [4] "/.difftime"             "as.data.frame.difftime" "as.difftime"           
 [7] "as.double.difftime"     "difftime"               "format.difftime"       
[10] "gc.time"                "is.numeric.difftime"    "ISOdatetime"           
[13] "Math.difftime"          "mean.difftime"          "Ops.difftime"          
[16] "print.difftime"         "print.proc_time"        "proc.time"             
[19] "setSessionTimeLimit"    "setTimeLimit"           "strftime"              
[22] "strptime"               "Summary.difftime"       "Sys.time"              
[25] "Sys.timezone"           "system.time"            "time"                  
[28] "timestamp"              "units.difftime"         "units<-.difftime"      
[31] "unix.time"              "xtfrm.difftime"

Remove variables

# all = TRUE removes variables starting with '.' too.
rm(list = ls( all = TRUE))

# remove variables except some
# keep only 'a' and 'b'
ls()
[1] "a" "b" "c" "d" "e" "f" "g"
rm(list = setdiff(ls(), c('a', 'b')))
ls()
[1] "a" "b"

Combine vectors in an alternating way

x <- c(1, 3, 5, 7)
y <- c(2, 4, 6, 8)
c(rbind(x, y))
[1] 1 2 3 4 5 6 7 8
c(matrix(c(x, y), 2, byrow = T))
[1] 1 2 3 4 5 6 7 8

Testing for missing values

x <- c(1,7,1,3,NA, 10)
!all(complete.cases(x))
[1] TRUE
any(is.na(x))
[1] TRUE

Vector comparison

Given two vectors a and b, for each element in a, find the max element in b which is smaller than it.

a <- c(5, 10, 13, 19, 23)
b <- c(1, 4, 7, 9, 15)
c <- outer(a, b, ">") 
b[rowSums(c)]
[1]  4  9  9 15 15

Debugging

browser()
debug()
trace()
traceback()
recover()
options(error = recover)
options(warn=2)

Comment out multiple lines

if (0){
   code ...
   code ...
+ }

Check whether a value returned is integer(0)

x <- 1:5
which(x==6)
integer(0)
length(which(x==6)) == 0
[1] TRUE

Get size of an object

object.size(sleep)
1776 bytes

Testing for floating point numbers

isTRUE(all.equal(1/3, 0.333))
[1] FALSE
isTRUE(all.equal(1/3, 0.3333333))
[1] FALSE
isTRUE(all.equal(1/3, 0.33333333)
[1] TRUE

all.equal(1/3, 0.3)
[1] "Mean relative difference: 0.1"
all.equal(1/3, 0.33)
[1] "Mean relative difference: 0.01"
all.equal(1/3, 0.333)
[1] "Mean relative difference: 0.001"

Difference between & and && operators

# & is elementwise comparison, && only evaluates the first elements 
c(1,1,0,0) & c(0,1,0,1)
[1] FALSE  TRUE FALSE FALSE
c(1,1,0,0) && c(0,1,0,1)
[1] FALSE

Update own functions

hoge <- function(x , y){
	     sum <- x  + y
             multi <-  x*y 
             out <-  list(sum = sum, multi=multi, call=sys.call() )
             return(out)
         } 
fit <- hoge(x=2,y=3)
fit
$sum
[1] 5

$multi
[1] 6

$call
hoge(x = 2, y = 3)

# update 
update(fit, y=4)
$sum
[1] 6

$multi
[1] 8

$call
hoge(x = 2, y = 4)

Frequency table including zero frequencies

x <- c(sample(c(1:10), 5, rep=TRUE))
table(x)
x
2 3 7 9 
1 2 1 1 

table(factor(x, levels = 0:10))

 0  1  2  3  4  5  6  7  8  9 10 
 0  0  1  2  0  0  0  1  0  1  0 
tabulate(x)
[1] 0 1 2 0 0 0 1 0 1

Extract p-values from aov objects

utils::data(npk, package="MASS")
x <- aov(yield ~ block + N * P + K, npk)
summary(x)
            Df Sum Sq Mean Sq F value Pr(>F)
block        5    343    68.7    4.39 0.0130
N            1    189   189.3   12.11 0.0037
P            1      8     8.4    0.54 0.4756
K            1     95    95.2    6.09 0.0271
N:P          1     21    21.3    1.36 0.2628
Residuals   14    219    15.6               
summary(x)[[1]][[5]]
[1] 0.0129536 0.0036837 0.4756370 0.0271138 0.2628406        NA

Turn off scientific notation

# ex 1
options()$scipen
[1] 0

# ex 2
x <- 10e-6
x
[1] 1e-05
format(x, sci=FALSE)
[1] "0.00001"

Capture output

str(iris)
'data.frame':150 obs. of  5 variables:
 $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
 $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
 $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

capture.output(str(iris))
[1] "'data.frame':\t150 obs. of  5 variables:"                                                 
[2] " $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ..."                            
[3] " $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ..."                          
[4] " $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ..."                        
[5] " $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ..."                        
[6] " $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ..."
 
capture.output(str(iris))[1]
[1] "'data.frame':\t150 obs. of  5 variables:"

Root finding

# unit root
f <- function(x) 0.0016^2 - 0.001^2*x^2 - 0.002^2*(1-x)^2 -
+ 2*0.7*0.001*0.002*x*(1-x)
uniroot(f, c(-1,1))
$root
[1] 0.32034

$f.root
[1] 2.1463e-11

$iter
[1] 7

$estim.prec
[1] 6.1035e-05

# polynomial
polyroot(c(1, 2, 1))
[1] -1-0i -1+0i

Find row indices in a matrix

x <- data.frame(A=c(1, 2, 2), B=c(4, 5, 5))
x
  A B
1 1 4
2 2 5
3 2 5
r <- c(2,5)
which(apply(t(x) == r, 2, all))
[1] 2 3

Pass an operator to a function

hoge<- function(a, op, b) {
	   op(a,b)
        }
# use back quotes to pass a operator
hoge(1, `<`, 2)
[1] TRUE
hoge(2, `==`, 3)
[1] FALSE

Find arguements of a function

names(formals(sweep))
[1] "x"            "MARGIN"       "STATS"        "FUN"          "check.margin"
[6] "..."         
args(sweep)
function (x, MARGIN, STATS, FUN = "-", check.margin = TRUE, ...) 
NULL

Rolling sum

require(zoo)
a <- c(1,5,3,7,8,4,2,5)
a <- zoo(a)
rollapply(a, 2, sum)
 1  2  3  4  5  6  7 
 6  8 10 15 12  6  7 
rollapply(a, 3, sum)
 2  3  4  5  6  7 
 9 15 18 19 14 11

Find duplicated elements

x <- c(1,1,2,3,3,4)
duplicated(x)
[1] FALSE  TRUE FALSE FALSE  TRUE FALSE
ave(x, x, FUN = length) > 1
[1]  TRUE  TRUE FALSE  TRUE  TRUE FALSE
x %in% x[duplicated(x)]
[1]  TRUE  TRUE FALSE  TRUE  TRUE FALSE

Split a string containing spaces

output <- c("a     b   3 4 cc        5555")
unlist(strsplit(output, split=" +"))
[1] "a"    "b"    "3"    "4"    "cc"   "5555"

Return the ordering index for a matrix

M <- matrix(c(rnorm(16)), ncol=4)
M
         [,1]     [,2]     [,3]     [,4]
[1,] -1.29592 -0.38361  0.14752 -0.69644
[2,]  0.39755  0.38478 -0.30034 -0.27125
[3,]  0.47507 -0.35334 -1.81646 -1.35324
[4,] -0.27713 -0.43579  0.73404  0.67316

arrayInd(order(M, decreasing=TRUE), dim(M))
      [,1] [,2]
 [1,]    4    3
 [2,]    4    4
 [3,]    3    1
 [4,]    2    1
 [5,]    2    2
 [6,]    1    3
 [7,]    2    4
 [8,]    4    1
 [9,]    2    3
[10,]    3    2
[11,]    1    2
[12,]    4    2
[13,]    1    4
[14,]    1    1
[15,]    3    4
[16,]    3    3

Find indices where 10 contiguous 1 occur

x <- c(0,1,1,0,0,1,1,1,0,1,1,1,1,1,1)

rle(x)
Run Length Encoding
  lengths: int [1:6] 1 2 2 3 1 6
  values : num [1:6] 0 1 0 1 0 1

filter(x, rep(1/5, 5), sides=1)
Time Series:
Start = 1 
End = 15 
Frequency = 1 
 [1]  NA  NA  NA  NA 0.4 0.6 0.6 0.6 0.6 0.8 0.8 0.8 0.8 1.0 1.0

which(filter(x, rep(1/5, 5), sides=1)==1)
[1] 14 15

For each `a[i]`, find the proportion of `a` < `b`

n = 20
a <- rnorm(n)
b <- rnorm(n)
findInterval(a, sort(b)) / n
 [1] 0.70 0.55 0.30 0.30 0.85 0.00 0.50 0.00 0.90 0.55 0.65 0.70 0.90 0.05 0.25
[16] 0.30 0.95 0.25 0.25 0.15

Table function with `exclude` argument

table(c(1,2,3,4,NA), exclude=2, useNA='ifany') 
   1    3    4  
   1    1    1    2

table(as.factor(c(1,2,3,4,NA)), exclude=2, useNA='ifany') 
   1    3    4  
   1    1    1    1

Send outputs to a file

sink(file="myoutput.txt", type="output")
YOUR CODE COMES HERE
sink()

writeLines(as.character(body((glm.fit))), con = "glm.fit.R")

Rank of a matrix

X <- matrix(c(1,0,0,0,1,0,0,0,1,1,1,1), ncol=4)
X
     [,1] [,2] [,3] [,4]
[1,]    1    0    0    1
[2,]    0    1    0    1
[3,]    0    0    1    1

qr(X)$rank
[1] 3

require(Matrix)
rankMatrix(X)[1]
[1] 3

Miscellaneous

Optimazation

Create empty vector and list

Remove variables

Combine vectors in an alternating way

Testing for missing values

Vector comparison

Debugging

Comment out multiple lines

Check whether a value returned is integer(0)

Get size of an object

Testing for floating point numbers

Difference between & and && operators

Update own functions

Frequency table including zero frequencies

Extract p-values from aov objects

Turn off scientific notation

Capture output

Root finding

Find row indices in a matrix

Pass an operator to a function

Find arguements of a function

Rolling sum

Find duplicated elements

Split a string containing spaces

Return the ordering index for a matrix

Find indices where 10 contiguous 1 occur

For each a[i], find the proportion of a < b

Table function with exclude argument

Send outputs to a file

Rank of a matrix

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Clone this wiki locally

For each `a[i]`, find the proportion of `a` < `b`

Table function with `exclude` argument