Skip to content

Miscellaneous

Gota Morota edited this page Feb 14, 2014 · 10 revisions

Optimazation

Say we want to maximaize the following one parameter optimazation problem. f(k) = \binom{12}{k} * theta^k * (1-theta)^{12-k}.

# binomial
k = 9:12
binom <- function(theta){
	     sum(choose(12,k)* theta^k * (1 - theta)^(12-k))
	 }

optimize(binom, c(0, 0.5), maximum=TRUE)
$maximum
[1] 0.4999542

$objective
[1] 0.07295374

In the two paramter optimazation problem: f = \lambda^5 * (\mu + \lambda)^4 * exp(\mu + 2*\lambda)

x <- as.numeric()
> x[1] = "lam"
> x[2] = "mu"

model2 <- function(x){
             x[1]^5 * (x[1] + x[2])^4 * exp(-(x[2] + 2*x[1]))
          }

optim(c(1,1), model2, method="L-BFGS-B", lower=c(0,0), control=list(fnscale=-1))
$par
[1] 4.5 0.0

$value
[1] 93.38181

$counts
function gradient 
      12       12 

$convergence
[1] 0

$message
[1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

Create empty vector and list

a <- array()
b <- vector()
c <- list()
d <- vector(mode = "list")
'''

### Keyword search

Find a function using a keyword. 

```r
??transpose
apropos("time")
 [1] ".difftime"              "[.difftime"             "*.difftime"            
 [4] "/.difftime"             "as.data.frame.difftime" "as.difftime"           
 [7] "as.double.difftime"     "difftime"               "format.difftime"       
[10] "gc.time"                "is.numeric.difftime"    "ISOdatetime"           
[13] "Math.difftime"          "mean.difftime"          "Ops.difftime"          
[16] "print.difftime"         "print.proc_time"        "proc.time"             
[19] "setSessionTimeLimit"    "setTimeLimit"           "strftime"              
[22] "strptime"               "Summary.difftime"       "Sys.time"              
[25] "Sys.timezone"           "system.time"            "time"                  
[28] "timestamp"              "units.difftime"         "units<-.difftime"      
[31] "unix.time"              "xtfrm.difftime"

Remove variables

# all = TRUE removes variables starting with '.' too.
rm(list = ls( all = TRUE))

# remove variables except some
# keep only 'a' and 'b'
ls()
[1] "a" "b" "c" "d" "e" "f" "g"
rm(list = setdiff(ls(), c('a', 'b')))
ls()
[1] "a" "b"

Combine vectors in an alternating way

x <- c(1, 3, 5, 7)
y <- c(2, 4, 6, 8)
c(rbind(x, y))
[1] 1 2 3 4 5 6 7 8
c(matrix(c(x, y), 2, byrow = T))
[1] 1 2 3 4 5 6 7 8

Testing for missing values

x <- c(1,7,1,3,NA, 10)
!all(complete.cases(x))
[1] TRUE
any(is.na(x))
[1] TRUE

Vector comparison

Given two vectors a and b, for each element in a, find the max element in b which is smaller than it.

a <- c(5, 10, 13, 19, 23)
b <- c(1, 4, 7, 9, 15)
c <- outer(a, b, ">") 
b[rowSums(c)]
[1]  4  9  9 15 15

Debugging

browser()
debug()
trace()
traceback()
recover()
options(error = recover)
options(warn=2)

Comment out multiple lines

if (0){
   code ...
   code ...
+ }

Check whether a value returned is integer(0)

x <- 1:5
which(x==6)
integer(0)
length(which(x==6)) == 0
[1] TRUE

Get size of an object

object.size(sleep)
1776 bytes

Testing for floating point numbers

isTRUE(all.equal(1/3, 0.333))
[1] FALSE
isTRUE(all.equal(1/3, 0.3333333))
[1] FALSE
isTRUE(all.equal(1/3, 0.33333333)
[1] TRUE

all.equal(1/3, 0.3)
[1] "Mean relative difference: 0.1"
all.equal(1/3, 0.33)
[1] "Mean relative difference: 0.01"
all.equal(1/3, 0.333)
[1] "Mean relative difference: 0.001"

Difference between & and && operators

# & is elementwise comparison, && only evaluates the first elements 
c(1,1,0,0) & c(0,1,0,1)
[1] FALSE  TRUE FALSE FALSE
c(1,1,0,0) && c(0,1,0,1)
[1] FALSE

Update own functions

hoge <- function(x , y){
	     sum <- x  + y
             multi <-  x*y 
             out <-  list(sum = sum, multi=multi, call=sys.call() )
             return(out)
         } 
fit <- hoge(x=2,y=3)
fit
$sum
[1] 5

$multi
[1] 6

$call
hoge(x = 2, y = 3)

# update 
update(fit, y=4)
$sum
[1] 6

$multi
[1] 8

$call
hoge(x = 2, y = 4)

Frequency table including zero frequencies

x <- c(sample(c(1:10), 5, rep=TRUE))
table(x)
x
2 3 7 9 
1 2 1 1 

table(factor(x, levels = 0:10))

 0  1  2  3  4  5  6  7  8  9 10 
 0  0  1  2  0  0  0  1  0  1  0 
tabulate(x)
[1] 0 1 2 0 0 0 1 0 1

Extract p-values from aov objects

utils::data(npk, package="MASS")
x <- aov(yield ~ block + N * P + K, npk)
summary(x)
            Df Sum Sq Mean Sq F value Pr(>F)
block        5    343    68.7    4.39 0.0130
N            1    189   189.3   12.11 0.0037
P            1      8     8.4    0.54 0.4756
K            1     95    95.2    6.09 0.0271
N:P          1     21    21.3    1.36 0.2628
Residuals   14    219    15.6               
summary(x)[[1]][[5]]
[1] 0.0129536 0.0036837 0.4756370 0.0271138 0.2628406        NA

Turn off scientific notation

# ex 1
options()$scipen
[1] 0

# ex 2
x <- 10e-6
x
[1] 1e-05
format(x, sci=FALSE)
[1] "0.00001"

Capture output

str(iris)
'data.frame':150 obs. of  5 variables:
 $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
 $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
 $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
 $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

capture.output(str(iris))
[1] "'data.frame':\t150 obs. of  5 variables:"                                                 
[2] " $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ..."                            
[3] " $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ..."                          
[4] " $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ..."                        
[5] " $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ..."                        
[6] " $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ..."
 
capture.output(str(iris))[1]
[1] "'data.frame':\t150 obs. of  5 variables:"

Root finding

# unit root
f <- function(x) 0.0016^2 - 0.001^2*x^2 - 0.002^2*(1-x)^2 -
+ 2*0.7*0.001*0.002*x*(1-x)
uniroot(f, c(-1,1))
$root
[1] 0.32034

$f.root
[1] 2.1463e-11

$iter
[1] 7

$estim.prec
[1] 6.1035e-05

# polynomial
polyroot(c(1, 2, 1))
[1] -1-0i -1+0i

Find row indices in a matrix

x <- data.frame(A=c(1, 2, 2), B=c(4, 5, 5))
x
  A B
1 1 4
2 2 5
3 2 5
r <- c(2,5)
which(apply(t(x) == r, 2, all))
[1] 2 3

Pass an operator to a function

hoge<- function(a, op, b) {
	   op(a,b)
        }
# use back quotes to pass a operator
hoge(1, `<`, 2)
[1] TRUE
hoge(2, `==`, 3)
[1] FALSE

Find arguements of a function

names(formals(sweep))
[1] "x"            "MARGIN"       "STATS"        "FUN"          "check.margin"
[6] "..."         
args(sweep)
function (x, MARGIN, STATS, FUN = "-", check.margin = TRUE, ...) 
NULL

Rolling sum

require(zoo)
a <- c(1,5,3,7,8,4,2,5)
a <- zoo(a)
rollapply(a, 2, sum)
 1  2  3  4  5  6  7 
 6  8 10 15 12  6  7 
rollapply(a, 3, sum)
 2  3  4  5  6  7 
 9 15 18 19 14 11 

Find duplicated elements

x <- c(1,1,2,3,3,4)
duplicated(x)
[1] FALSE  TRUE FALSE FALSE  TRUE FALSE
ave(x, x, FUN = length) > 1
[1]  TRUE  TRUE FALSE  TRUE  TRUE FALSE
x %in% x[duplicated(x)]
[1]  TRUE  TRUE FALSE  TRUE  TRUE FALSE

Split a string containing spaces

output <- c("a     b   3 4 cc        5555")
unlist(strsplit(output, split=" +"))
[1] "a"    "b"    "3"    "4"    "cc"   "5555"

Return the ordering index for a matrix

M <- matrix(c(rnorm(16)), ncol=4)
M
         [,1]     [,2]     [,3]     [,4]
[1,] -1.29592 -0.38361  0.14752 -0.69644
[2,]  0.39755  0.38478 -0.30034 -0.27125
[3,]  0.47507 -0.35334 -1.81646 -1.35324
[4,] -0.27713 -0.43579  0.73404  0.67316

arrayInd(order(M, decreasing=TRUE), dim(M))
      [,1] [,2]
 [1,]    4    3
 [2,]    4    4
 [3,]    3    1
 [4,]    2    1
 [5,]    2    2
 [6,]    1    3
 [7,]    2    4
 [8,]    4    1
 [9,]    2    3
[10,]    3    2
[11,]    1    2
[12,]    4    2
[13,]    1    4
[14,]    1    1
[15,]    3    4
[16,]    3    3

Find indices where 10 contiguous 1 occur

x <- c(0,1,1,0,0,1,1,1,0,1,1,1,1,1,1)

rle(x)
Run Length Encoding
  lengths: int [1:6] 1 2 2 3 1 6
  values : num [1:6] 0 1 0 1 0 1

filter(x, rep(1/5, 5), sides=1)
Time Series:
Start = 1 
End = 15 
Frequency = 1 
 [1]  NA  NA  NA  NA 0.4 0.6 0.6 0.6 0.6 0.8 0.8 0.8 0.8 1.0 1.0

which(filter(x, rep(1/5, 5), sides=1)==1)
[1] 14 15

For each a[i], find the proportion of a < b

n = 20
a <- rnorm(n)
b <- rnorm(n)
findInterval(a, sort(b)) / n
 [1] 0.70 0.55 0.30 0.30 0.85 0.00 0.50 0.00 0.90 0.55 0.65 0.70 0.90 0.05 0.25
[16] 0.30 0.95 0.25 0.25 0.15

Table function with exclude argument

table(c(1,2,3,4,NA), exclude=2, useNA='ifany') 
   1    3    4  
   1    1    1    2

table(as.factor(c(1,2,3,4,NA)), exclude=2, useNA='ifany') 
   1    3    4  
   1    1    1    1

Send outputs to a file

sink(file="myoutput.txt", type="output")
YOUR CODE COMES HERE
sink()

writeLines(as.character(body((glm.fit))), con = "glm.fit.R")

Rank of a matrix

X <- matrix(c(1,0,0,0,1,0,0,0,1,1,1,1), ncol=4)
X
     [,1] [,2] [,3] [,4]
[1,]    1    0    0    1
[2,]    0    1    0    1
[3,]    0    0    1    1

qr(X)$rank
[1] 3

require(Matrix)
rankMatrix(X)[1]
[1] 3

Clone this wiki locally