This page contains all the code snippets from the book for easy copy/paste ppossibilities. # Chapter 1
library(MESS) # Load the package that contains the data
data(bees) # Load the data
head(bees) # Show the first couple of lines of data
load("penny.rda") # Load data
load("penny.rda", verbose=TRUE) # Load data and show objects
x <- c(1, 7, 3, 2)
df <- data.frame(v1=c("A", "B", "C", "D"), v2=1:4)
save(x, df, file="mydata.rda") # Save two objects
indata <- read.table("mydata.txt", header=TRUE)
indata
indata <- read.table("mydata.txt", header=TRUE,
na.strings=c("NA", ""))
indata
indata <- read.table("mydata.txt", header=TRUE,
na.strings=c("NA", "", "."))
indata
data(trees)
trees$Girth[2] <- NA # Set 2nd obs of Girth to missing
write.table(trees, file="savedata.txt")
write.table(trees, file="mydata2.txt", row.names=FALSE,
dec=",", eol="\r\n", na=".", quote=FALSE)
indata <- read.csv("mydata.csv")
head(indata)
data(trees)
write.csv(trees, file = "savedata.csv")
data(trees)
write.csv(trees, file = "savedata2.csv", row.names=FALSE, quote=FALSE)
mydata
library(readxl)
goesforth <- read_excel("data/cunningplan.xlsx", sheet=1)
goesforth
library(xlsx)
data(trees)
write.xlsx(trees, "mydata.xlsx")
write.xlsx(trees, "mydata.xlsx", row.names=FALSE) # No row numbers
library(haven)
indata <- read_sas("hip.sas7bdat")
head(indata)
library(foreign)
indata <- read.xport("somefile.xpt")
head(indata)
library(foreign)
data(trees)
write.foreign(trees, datafile = "toSAS.dat",
codefile="toSAS.sas", package="SAS")
library(haven)
indata <- read_sav("cancer.sav")
head(indata)
data(trees)
write_sav(trees, "myspssfile.sav")
library(haven)
indata <- read_dta("statafile.dta")
head(indata)
data(trees)
write.dta(trees, file="mydata.dta")
library(foreign)
indata <- read.systat("Play4.syd")
head(indata)
library(jsonlite)
indata <- fromJSON("exampledata.json")
class(indata) # Note a list is returned
indata
df <- as.data.frame(indata)
class(df)
df
toJSON(1:5) # JSON string for a numeric vector
outdata <- toJSON(df, pretty=TRUE)
class(outdata)
outdata # Default is row-wise
outdata <- toJSON(df, dataframe="column", pretty=TRUE)
outdata
toJSON(df, dataframe="column", pretty=TRUE, na="null")
cat(outdata, file="output.json")
library(XML)
url <- "http://www.rprimer.dk/download/mydata.xml"
indata <- xmlToDataFrame(url)
head(indata)
library(XML)
# Location of the example XML file
url <- "http://www.rprimer.dk/download/bank.xml"
# Parse the tree
doc <- xmlTreeParse(url, useInternalNodes=TRUE)
top <- xmlRoot(doc) # Identify the root node
xmlName(top) # Show node name of root node
names(top) # Name of root node children
xmlValue(top[[1]]) # Access first element
xmlValue(top[["author"]]) # First element with named index
names(top[[3]]) # Children of node 3
xmlAttrs(top[[3]][[1]]) # Extract tags from a Date node
top[["rates"]][[1]][[1]] # Tree from first bank and date
xmlValue(top[[3]][[1]][[1]][[1]]) # Bank name is node value
xmlAttrs(top[[3]][[1]][[1]][[1]]) # but has no tags
xmlAttrs(top[[3]][[1]][[1]][[2]]) # The <exch> node has tags
xmlValue(top[[3]][[1]][[1]][[2]]) # but no value
# Search tree for all source nodes and return their value
xpathSApply(doc, "//source", xmlValue)
# Search full tree for all exch nodes where currency is "DKK"
xpathApply(doc, "//exch[@currency='DKK']", xmlAttrs)
res <- xpathApply(doc, "//exch",
function(ex) {
c(xmlAttrs(ex),
bank=xmlValue(xmlParent(ex)[["source"]]),
date=xmlGetAttr(xmlParent(xmlParent(ex)), "time"))
})
result <- do.call(rbind, res)
result
library(MESS)
data(bees)
write.xml(bees, file="bees.xml")
library(htmltab)
url <- paste0("http://en.wikipedia.org/wiki/",
"List_of_national_capitals_by_population")
result <- htmltab(doc = url, which=2,
rm_nodata_cols=FALSE) # Select table 2
head(result)
library(rvest)
webaddress <- "http://www.imdb.com/chart/top?ref_=nv_mv_250_6"
htmlpage <- read_html(webaddress)
titlenodes <- html_nodes(htmlpage, ".titleColumn")
ratingnodes <- html_nodes(htmlpage, ".imdbRating")
head(ratingnodes,3)
library(RMySQL)
mydb <- dbConnect(MySQL(), user="rprimer", password="PASSword",
dbname="rprimer", host="192.168.1.151")
indata <- dbGetQuery(mydb, "select * from students")
indata
data(trees)
dbWriteTable(mydb, name="newTableName", value=trees, overwrite=TRUE)
dbListTables(mydb)
dbDisconnect(mydb)
vec1 <- 4 # Numeric vector of length 1
vec1
vec2 <- c(1, 2, 3.4, 5.6, 7) # Numeric vector of length 5
vec2
# Create a character vector
vec3 <- c("Spare", "a talent", "for an", "old", "ex-leper")
vec3
# And a vector of logical values
vec4 <- c(TRUE, TRUE, FALSE, TRUE, FALSE)
vec4
f <- factor(vec3) # Make factor based on vec3
f
x <- 1 + 2i # Enter a complex number
x
sqrt(-1) # R does not see a complex number
sqrt(-1 + 0i) # but it does here
m <- matrix(1:6, ncol=2) # Create a matrix
m
m2 <- matrix(1:6, ncol=2, byrow=TRUE) # Fill the matrix row-wise
m2
list(vec2, comp=x, m) # Combine 3 different objects, one named
data.frame(vec2, f, vec4) # Make a data frame
5/2 + 2*(5.1 - 2.3) # Add, subtract, multiply and divide
2**8 # 2 to the power 8
1.61^5 # 1.61 to the power 5
10 %% 3 # 10 modulus 3 has remainder 1
10 %/% 3 # integer division
abs(-3.1) # absolute value of -3.1
sqrt(5) # square root of 5
ceiling(4.3) # smallest integer larger than 4.3
floor(4.3) # largest integer smaller than 4.3
trunc(4.3) # remove decimals
round(4.5)
round(4.51)
round(4.51, digits=1)
# Angles for trigonometric functions use radians - not degrees
cos(pi/2) # cosine of pi/2.
sin(pi/4) # sine of pi/4
tan(pi/6) # tangent of (pi/6)
log(5) # natural logarithm of 5
log(5, base=2) # binary logarithm of 5
log10(5) # common logarithm of 5
exp(log(5) + log(3)) # exponential function
# Create two matrices
x <- matrix(1:6,ncol=3)
y <- matrix(c(1, 1, 0, 0, 0, 1), ncol=2)
x
y
x %*% y # Matrix multiplication
y %*% x # Matrix multiplication
seq(5) # vector with sequence from 1 to 5
seq(4, 6) # from 4 to 6 with step size 1
4:6 # Alternative way to make sequence with step 1
seq(6, 4) # from 6 to 4 with step size 1
seq(0, 3, .5) # from 0 to 3 with steps 0.5
x <- c(6, 8, 1:4, 9, 7, 5, 3)
x
length(x) # length of x
head(x) # Show the start of x
tail(x) # and the bottom
rev(x) # reverse the order of elements in x
class(1:3) # class of a sequence (only has integers)
class(x) # class of x (numeric because of c())
class(c(1, 2.5)) # and now numeric variables
class(c(TRUE, TRUE, FALSE))
x
which.min(x) # The 3rd element is smallest (value 1)
which.max(x) # The 2nd element is largest (value 8)
y <- c(2, 4, 6, 2, 4, 9, 2)
which.min(y) # Return the first element if there are ties
which.min(c(FALSE, TRUE, FALSE)) # FALSE = 0, TRUE = 1
which.max(c(FALSE, TRUE, FALSE))
unique(y)
duplicated(y)
z1 <- c(TRUE, FALSE, FALSE)
any(z1)
all(z1)
z2 <- c(TRUE, TRUE, TRUE)
any(z2)
all(z2)
z3 <- c(FALSE, FALSE, FALSE)
any(z3)
all(z3)
which(z1)
which(z3)
x
is.na(x)
x[c(2,4)] <- NA # redefine x
x
is.na(x) # identify missing elements
is.numeric(x) # Check if x is a numeric vector
is.character(x) # Check if x is a character vector
x <- c("Suki", "walk", "to", "the", "well", NA)
toupper(x) # convert to upper case
tolower(x) # convert to lower case
nchar(x) # no. of characters in each element of x
grep("a", x) # elements that contain letter "a"
grep("o", x) # elements that contain letter "o"
substr(x, 2, 3) # extract 2nd to 3rd characters
substr(x, 2, 3) <- "X" # replace 2nd char with X
x
substr(x, 2, 3) <- "XXXX"
x
paste(x, "y") # paste "y" to end of each element
paste(x, "y", sep="-")
paste(x, "y", sep="-", collapse=" ")
indata <- read.table("DKdata.txt", header=TRUE)
indata
x <- as.character(indata$name)
x
Encoding(x) # Read the encoding
Encoding(x) <- "latin1" # Set it to latin1
x # Correct answer
x <- as.character(indata$name)
x
iconv(x, from="latin1", to="")
x <- c(2, 4, 1, 5, 3, 4)
x
sort(x)
sort(x, decreasing=TRUE)
y <- c("b", "a", "A", "b", "B", "a")
y
sort(y)
o <- order(x)
o
x[o[1]] # Same as min(x)
x[o] # Same as sort(x)
o1 <- order(y)
o1
o2 <- order(y, x)
o2
df <- data.frame(day=c(1, 2, 1, 2, 3),
time=c("late", "late", "early", "late", "early"))
df
o <- order(df$day, df$time) # Order by day and then time
df[o,]
data(airquality)
names(airquality)
celsius <- (airquality$Temp-32)/1.8 # New variable
airquality$Celsius <- (airquality$Temp-32)/1.8 # Add to data frame
head(airquality$Temp)
head(airquality$Celsius)
newdata <- transform(airquality, logOzone = log(Ozone))
head(newdata)
x <- seq(0, pi/2, .2)
x
y <- sin(2*x)
y
x[which.max(y)]
x <- c(1, 2, 3, 1, NA, 4)
y <- c(1, 2, 5, NA)
x %in% y
y %in% x
! x %in% y # Invert result: find objects NOT present
z <- matrix(2:7, ncol=2)
z
x %in% z
# c(1,2) is found in both
data.frame(a=c(1,2), b=c(3,4)) %in% data.frame(f=c(5,6),
d=c(1,2))
list(1, 2, 3) %in% list(a=c(1,2), b=3)
data(airquality)
tapply(airquality$Temp, airquality$Month, mean)
when <- cut(airquality$Day, 3, labels=c("Early", "Mid", "Late"))
tapply(airquality$Temp, list(airquality$Month, when), mean)
library(MESS)
id <- c(1, NA, NA, NA, 2, NA, NA, NA, 3, NA, NA, NA)
date <- c("2016-02-05", NA, NA, NA,
NA, NA, NA, NA,
"2016-02-06", NA, NA, NA)
newid <- filldown(id)
newid
tapply(date, newid, filldown) # Returns a list
unlist(tapply(date, newid, filldown))
x <- c("1,2", "1,5", "1,8", "1.9", "2") # Example data
as.numeric(sub(",", ".", x)) # Substitute , with . and convert
f <- factor(x) # It also works on factors
as.numeric(sub(",", ".", f)) # Substitute , with . and convert
as.numeric(x) # Does not work - need to substitute first
library(data.table)
x <- 1:8
shift(x)
shift(x, n=3)
shift(x, n=2, fill=0)
shift(LETTERS[1:5], type="lead")
df <- data.frame(1:5, LETTERS[1:5])
shift(df, n=1, type="lag")
shift(x, n=c(1,2))
library(MESS)
x <- 1:5
y <- c(1, 0, 1, 1, 5)
auc(x, y)
x2 <- c(4, 5, 1, 3, 2) # Reorder the same data
y2 <- c(1, 5, 1, 1, 0)
auc(x2, y2) # Reordering is fine. Same result
auc(x, y, from=0) # AUC from 0 to max(x)
auc(x, y, from=0, rule=2) # Allow extrapolation
auc(x, y, from=0, rule=2, yleft=0) # Use value 0 to the left
auc(x, y, from=0, rule=2, yleft=.5) # Use 1/2 to the left
data(ChickWeight)
result <- by(ChickWeight, ChickWeight$Chick,
function(df) { auc(df$Time, df$weight) } )
head(result)
result2 <- by(ChickWeight, ChickWeight$Chick, function(df) {
auc(df$Time, df$weight, from=0, to=21, rule=2)})
head(result2)
m <- matrix(c(NA, 2, 3, 4, 5, 6), ncol=2)
m
apply(m, 1, sum) # Compute the sum for each row
apply(m, 2, mean) # Compute the mean for each column
apply(m, 2, mean, na.rm=TRUE) # Compute the mean for each column
m <- matrix(c(1, 1, 2, 2, 7, 7), 2)
m
prop.table(m)
prop.table(m, margin=1)
prop.table(m, 2)
m <- matrix(1:6, ncol=2) # Create matrix
m
t(m) # Transpose
mpdat <- data.frame(source=c("KU", "Spain", "Secret", "Attic"),
rats=c(97, 119, 210, 358))
mpdat
t(mpdat)
t(mpdat[,2])
x <- rbinom(20, size=4, p=.5)
x
table(x)
sex <- sample(c("Male", "Female"), 20, replace=TRUE)
sex
table(sex, x)
treatment <- rep(c("Placebo", "Active"), 10)
treatment
table(sex, x, treatment)
data(Titanic)
ftable(Titanic)
# Only look at males to keep output short
maletable <- as.table(HairEyeColor[,,1])
maletable
freq <- as.data.frame(maletable)
freq
library(epitools)
fulldata <- expand.table(HairEyeColor)
head(fulldata)
summary(fulldata)
library(lubridate)
string1 <- "1971-08-20"
string2 <- "11-04-01" # Year with only two digits
date1 <- ymd(string1)
date2 <- ymd(string2) # 11 becomes 2011
date1
date2
string3 <- c("02/28/00", "Feb 29th, 2000", "March 01 00", "3-31-00")
date3 <- mdy(string3) # Super flexible
date3
date2 - date1 # Difference between dates
date1 <- ymd(string1, tz="Europe/Copenhagen")
date1
ymd(date2, tz="UTC") - date1 # Difference changed a bit
time1 <- hms(c("08:34:12", "15-32-41"))
time1
time2 <- hm(c("12:11", "5,30"))
time2
ydm_hms("71-20-08 09:05:21") # Combine date and time
ydm_hm("71-20-08 09:05") # Combine date and time no seconds
ydm_hm("71-20-08 09:05 PM") # AM/PM specification allowed
ydm_hm("Trial started on 71-20-08 at 09:05 PM and was successful")
date4 <- mdy(c("Mar 31 2011", "Apr 1 2011"))
date4 + 30 # Add 30 seconds
date4 + days(30) # Add 30 days
date4 + months(1) # Add 1 month
library(lubridate)
string <- c("02/28/00", "Feb 29th, 2000", "March 01 00", "3-31-00")
D <- mdy(string) # Parse as dates
D
stamp("March 1, 1999")(D) # Make and apply stamp like "March 1, 1999"
stamp("March 1, 1999", orders="dy")(D) # Only expect day and year
D2 <- ydm_hm(c("71-20-08 09:05"), c("71-18-08 09:05"),
c("00-28-02 09:15"), c("01-20-12 13:10"))
mystamp <- stamp("Date of birth: Sunday, Jan 1, 1999 3:34 pm",
quiet=TRUE)
mystamp(D2)
mystamp <- stamp("Date of birth: Sunday, Jan 1, 1999 3:34",
orders="mdyHM")
mystamp(D2)
f <- factor(c(1:4, "A", 8, NA, "B"))
f
vect <- as.numeric(levels(f))[f]
vect
as.numeric(f) # Not correct
f <- factor(c(1:4, "A", 8, NA, "B"))
f
vect <- as.character(f)
vect
f <- factor(c(1, 2, 3, 2, 3, 1, 3, 1, 2, 1))
f
f[c(1,2)] <- "A" # Not working
f
f <- factor(f, levels=c(levels(f), "A"))
f[c(1,2)] <- "A" # Can now assign the value "new level"
f
f <- factor(c(1, 1, 2, 3, 2, 1, "A", 3, 3, 3, "A"))
levels(f) # Print existing levels
levels(f) <- c("13", "2", "13", "A")
f
f <- factor(c(1, 1, 2, 3, 2, 1, "A", 3, 3, 3, "A"))
f
levels(f)[c(1,3)] <- "1&3"
f
f <- factor(c(1, 1, 2, 3, 2, 1, "A", 3, 3, 3, "A"))
f
levels(f) <- list("1+3" = c(1, 3), "2"=2, "A"="A")
f
f <- factor(c(1, 2, 3, 2, 3, 2, 1), levels=c(0, 1, 2, 3))
f
ff <- droplevels(f) # Drop unused levels from f
ff
df <- data.frame(f1=f, f2=f)
newdf <- droplevels(df, except=2)
newdf$f1
newdf$f2
f <- factor(LETTERS[1:5])
f
f <- relevel(f, ref="D") # Set level D as reference
f
x <- (1:15)**2
x
cut(x, 3) # Cut the range of x into 3 interval of same length
# Cut the range into 3 intervals of roughly the same size
cut(x, quantile(x, probs=seq(0, 1, length=4)),
include.lowest=TRUE)
library(MESS)
data(earthquakes)
subset(earthquakes, mag>7.5)
subset(earthquakes, type=="mining explosion" & mag>3.6)
subset(earthquakes, mag>7 & depth>100,
select=c("time", "place"))
data(airquality)
head(airquality)
complete <- complete.cases(airquality)
head(complete, 20)
ccdata <- airquality[complete,]
head(ccdata)
complete2 <- complete.cases(airquality$Ozone, airquality$Temp)
oztemp <- airquality[complete2,]
head(oztemp)
mydat <- data.frame(x=1:5, y=factor(c("A", "B", "A", "B", NA)))
mydat
mydat$y <- NULL # Remove the y variable from mydat
mydat
library(MESS)
data(earthquakes)
lapply(earthquakes, FUN=class) # Apply the class function
sapply(earthquakes, class) # Simplify output
sapply(earthquakes, mean) # Compute the mean of each variable
sapply(earthquakes, mean, trim=.1)
newlist <- lapply(earthquakes, function(i) {
if (is.factor(i)) { # Check if it is a factor
as.character(i) # Convert
} else { # Otherwise do nothing
i
}
})
sapply(newlist, class) # New classes
data(airquality)
by(airquality, airquality$Month, colMeans, na.rm=TRUE)
df1 <- data.frame(delta=seq(0.7, 1, .1),
n=seq(20, 50, 10))
do.call("power.t.test", df1)
df2 <- data.frame(delta=seq(0.7, 1, .1),
n=seq(20, 50, 10),
extra=1:4)
df2
do.call("power.t.test", df2)
df3 <- data.frame(delta=seq(0.7, 1, .1),
power=c(0.58, 0.86, 0.98, 0.998))
res <- mapply(power.t.test, delta=df3$delta, power=df3$power)
res
unlist(res[1,])
df1 <- data.frame(id=1:3, x=11:13, y=factor(c("A", "B", "C")))
df1
df2 <- data.frame(id=6:10, y=factor(1:5), x=1:5)
df2
combined <- rbind(df1, df2)
combined
df1 <- data.frame(id=1:3, x=11:13, y=factor(c("A", "B", "C")),
z=1:3)
df2 <- data.frame(id=6:10, y=factor(1:5), x=1:5)
df1$z <- NULL # Remove the z variable from data frame df1
rbind(df1,df2) # Now we can join the data frames
df1 <- data.frame(id=1:3, x=11:13, y=factor(c("A", "B", "C")),
z=1:3)
df2 <- data.frame(id=6:10, y=factor(1:5), x=1:5)
df2$z <- NA # Add a vector z of NAs to data frame df2
rbind(df1,df2) # Now we can join the data frames
dfx <- data.frame(id=factor(1:6), var1=11:16,
fac1=factor(c("A", "B", "C", "D", "E", "F")))
dfy <- data.frame(id=factor(c(4,5,6,7,8,9)), var2=15:10)
dfx
dfy
merge(dfx, dfy, by="id")
merge(dfx, dfy, by="id", all=TRUE)
merge(dfx, dfy, by="id", all.x=TRUE)
merge(dfx, dfy, by="id", all.y=TRUE)
df1 <- data.frame(id=1:3, x=11:13, y=factor(c("A", "B", "C")))
df1
newobs <- list(4, 10, "B") # The new observation to add
df2 <- rbind(df1, newobs)
df2
newobs2 <- list(4, "B", 10) # Order different from data frame
df3 <- rbind(df1, newobs2) # Add the new observation
df3
df3$x # The x variable is converted to character
library(reshape2)
df1 <- data.frame(time1=c(1, 2, 3, 4),
time2=c(3, NA, 6, 7))
df1
melt(df1)
df1$id <- c("Reed", "Sue", "Ben", "Johnny")
df1
melt(df1)
melt(df1, id.vars=c("time1"))
library(reshape2)
indata <- read.table("wide.txt", header=TRUE)
indata
long <- melt(indata, id.vars=c("person", "age", "gender"))
long
dcast(long, person ~ variable)
library(MESS)
data(bdstat)
head(bdstat)
wide <- dcast(bdstat, year ~ month, value.var="births")
head(wide)
indata <- read.table("widedata.txt", header=TRUE)
indata
long <- reshape(indata,
varying=list(c("y1", "y2", "y3", "y4"),
c("t1", "t2", "t3", "t4")),
v.names=c("response", "day"),
direction="long")
long
reshape(long, idvar="person", timevar="time",
v.names=c("day", "response"),
direction="wide")
reshape(long, idvar="person", timevar="time",
v.names=c("day", "response"),
varying=list(c("y1", "y2", "y3", "y4"),
c("t1", "t2", "t3", "t4")),
direction="wide")
df <- data.frame(a=rep(1,5), b=rep(2,5), c=rep(3,5))
df
vector <- as.vector(as.matrix(df)) # Convert to vector
vector
df <- data.frame(a=rep(1,5),
b=factor(c("A", "A", "A", "B", "B")))
df # Show data frame with both numeric and factor
as.vector(as.matrix(df)) # Result is vector of strings
f1 <- factor(c("A", "B", "C", "B", "A", "C"))
f2 <- factor(c(1, 2, 1, 2, 1, 2))
interaction(f1,f2)
interaction(f1,f2, drop=TRUE) # Drop unused factor levels
lm(weight ~ offset(2.7*Time)) # Fix slope at 2.7
x <- c(6, 8, 1:4)
x
mean(x) # mean of x
median(x) # median
sd(x) # standard deviation
IQR(x) # inter-quartile range
y <- (1:6)**2 # New vector
y
cor(x,y) # Pearson correlation
cor(x, y, method="spearman") # Spearman correlation
cor(x, y, method="kendall") # Kendall correlation
sum(x) # sum of elements in x
cumsum(x) # cumulative sum of all elements of x
min(x) # minimum value
max(x) # maximum value
range(x) # range. Same as the values above
quantile(x) # get quartiles
quantile(x, probs=c(.15, .25, .99)) # specific quantiles
x[c(2,4)] <- NA # redefine x
x # vector with missing values
sum(x) # sum returns missing
sum(x, na.rm=TRUE) # unless we remove the NA's
mean(x) # mean also returns missing
mean(x, na.rm=TRUE) # unless we remove the NA's
library(isdals) # Get the data
data(fev)
library(Publish)
univariateTable(Smoke ~ Age + Ht + FEV + Gender, data=fev)
univariateTable(Smoke ~ Age + Q(Ht) + FEV + Gender,
showTotals=FALSE, data=fev)
data(trees)
result <- lm(Volume ~ Height, data=trees)
result
summary(result)
data(trees)
result <- lm(Volume ~ Height + Girth, data=trees)
result
summary(result)
year <- seq(1981, 1999)
deaths <- c(339, 1201, 3153, 6368, 12044, 19404, 29105,
36126, 43499, 49546, 60573, 79657, 79879,
73086, 69984, 61124, 49379, 43225, 41356)
newyear <- year - 1980
model <- lm(deaths ~ newyear + I(newyear^2) + I(newyear^3))
summary(model)
library(isdals)
data(antibio)
head(antibio)
model <- lm(org ~ type, data=antibio)
model
summary(model)
drop1(model, test="F")
library(isdals)
data(antibio)
model <- lm(org ~ type, data=antibio)
drop1(model, test="F")
result <- TukeyHSD(aov(model))
result
plot(result)
library(isdals)
data(fev)
# Convert variables to factors and get meaningful labels
fev$Gender <- factor(fev$Gender, labels=c("Female", "Male"))
fev$Smoke <- factor(fev$Smoke, labels=c("No", "Yes"))
interaction.plot(fev$Gender, fev$Smoke, fev$FEV,
lwd=3, col=c("black", "blue"))
model <- lm(FEV ~ Gender + Smoke + Gender*Smoke, data=fev)
model
drop1(model, test="F")
model2 <- lm(FEV ~ Gender + Smoke, data=fev)
drop1(model2, test="F")
summary(model2)
library(isdals)
data(fev)
# Convert variables to factors and get meaningful labels
fev$Gender <- factor(fev$Gender, labels=c("Female", "Male"))
fev$Smoke <- factor(fev$Smoke, labels=c("No", "Yes"))
summary(fev)
plot(FEV ~ Ht, pch=20, col=c("Blue", "Black")[Smoke],
xlab="Height (inches)", ylab="FEV (liters)", data=fev)
model <- lm(FEV ~ Ht + I(Ht^2) + Smoke*Gender + Smoke*Age,
data=fev)
drop1(model, test="F")
model2 <- lm(FEV ~ Ht + I(Ht^2) + Gender + Smoke*Age,
data=fev)
drop1(model2, test="F")
# Remove the insignificant interaction and refit
model3 <- lm(FEV ~ Ht + I(Ht^2) + Gender + Smoke + Age,
data=fev)
drop1(model3, test="F")
summary(model3)
head(coef(result, s=exp(-1)))
cvres$lambda.min
cvres$lambda.1se
head(coef(cvres, s = "lambda.min"))
head(predict(cvres, newx = x[1:3,], s = "lambda.min"))
library(MASS)
data(birthwt)
model <- glm(low ~ factor(race) + smoke + lwt, family=binomial,
data=birthwt)
summary(model)
exp(1.060 + c(-1,1)*1.96*0.3783) # 95% CI for smoking
drop1(model, test="Chisq")
exp(-0.10922 + 1.06001 + 120*-0.01326) /
(1 + exp(-0.10922 + 1.06001 + 120*-0.01326)) # Logit
pnorm(-0.072490 + 0.652215 + 120*-0.008067) # Probit
library(pROC)
pred <- predict(model, type=c("response"))
roccurve <- roc(low ~ pred, data=birthwt)
plot(roccurve)
library(survival)
library(MESS)
data(matched)
model <- clogit(iscase=="Case" ~ vaccine + lung + strata(id),
data=matched)
summary(model)
drop1(model, test="Chisq")
library(ordinal)
library(MESS)
data(icecreamads)
resp <- ordered(icecreamads$Consumption)
model <- clm(Consumption ~ Temperature + Price + Advertise,
data=icecreamads)
summary(model)
exp(10*0.1473 + 10*qnorm(.975)*c(-1,1)*0.04001)
exp(3.56)/(1 + exp(3.56))
exp(6.941-(0.147*50-6.11*0.28))/(1+exp(6.941-(0.147*50-6.11*0.28))) -
exp(3.560-(0.147*50-6.11*0.28))/(1+exp(3.560-(0.147*50-6.11*0.28)))
head(predict(model, type="prob")$fit)
drop1(model, test="Chisq")
library(nnet)
multi <- multinom(Consumption ~ Price + Advertise + Temperature,
data=icecreamads)
1-pchisq(-2*as.numeric(logLik(model)) - deviance(multi),
df=multi$edf - model$edf)
library(nnet)
library(isdals)
data(alligator)
head(alligator)
model <- multinom(food ~ length, data=alligator)
summary(model)
head(predict(model, response="probs"))
len <- seq(0.3, 4, .1)
matplot(len, predict(model, newdata=data.frame(length=len),
type="probs"),
type="l", xlab="Alligator length", ylab="Probability")
null <- multinom(food ~ 1, data=alligator)
anova(null, model) # Compare the two models
library(MESS)
data(bees)
beedata <- subset(bees, Time=="july1") # Look at one day only
head(beedata)
model <- glm(Number ~ Locality + Type*Color,
family=poisson, data=beedata)
summary(model)
exp(2.5649 + 1.8718 - 2.1342)
drop1(model, test="Chisq")
model2 <- glm(Number ~ Locality + Type*Color,
family=quasipoisson, data=beedata)
drop1(model2, test="Chisq")
summary(model2)
exp(-0.4745 + c(-1,1)*1.96*0.2881) # 95% CI Kragevig/Havreholm
library(lme4)
data(ChickWeight)
head(ChickWeight)
model <- lmer(log(weight) ~ Time + Diet + Time*Diet + (1|Chick),
data=ChickWeight)
summary(model)
model <- lmer(log(weight) ~ Time + Diet + Time*Diet + (1|Chick),
data=ChickWeight, REML=FALSE)
noint <- lmer(log(weight) ~ Time + Diet + (1|Chick),
data=ChickWeight, REML=FALSE)
anova(model, noint)
library(nlme)
data(ChickWeight)
head(ChickWeight)
model <- lme(log(weight) ~ Diet + Time + Time*Diet,
random= ~ 1|Chick,
correlation=corGaus(form= ~ Time|Chick,
nugget=TRUE),
data=ChickWeight)
model
model2 <- lme(log(weight) ~ Diet + Time + Time*Diet,
random= ~ 1|Chick,
correlation=corGaus(form= ~Time|Chick,
nugget=TRUE),
method="ML", data=ChickWeight)
model3 <- lme(log(weight) ~ Diet + Time, random= ~ 1|Chick,
correlation=corGaus(form= ~Time|Chick,
nugget=TRUE),
method="ML", data=ChickWeight)
anova(model3, model2)
summary(model)
model4 <- lme(log(weight) ~ Diet + Time + Time*Diet,
random= ~ 1|Chick,
correlation=corGaus(form= ~ Time|Chick),
data=ChickWeight)
anova(model4, model)
model5 <- lme(log(weight) ~ Diet + Time + Time*Diet,
random= ~ 1|Chick,
correlation=corExp(form= ~ Time|Chick,
nugget=TRUE),
data=ChickWeight)
AIC(model)
AIC(model5)
library(lme4)
data(cbpp)
model <- glmer(cbind(incidence, size-incidence) ~
period + (1|herd), family=binomial, data=cbpp)
summary(model)
exp(-0.9923)
model2 <- glmer(cbind(incidence, size-incidence) ~ 1 +
(1|herd), family=binomial, data=cbpp)
anova(model2, model)
cbpp$obs <- 1:nrow(cbpp) # Construct vector for overdispersion
model3 <- glmer(cbind(incidence, size - incidence) ~ period +
(1|herd) + (1|obs), family=binomial, data=cbpp)
summary(model3)
library(geepack)
library(MESS)
data(ohio)
ordered.clusters(ohio$id) # Check that clusters are ordered
fage <- factor(ohio$age+9) # Add 9 to get the right label
model <- geeglm(resp ~ fage + smoke + fage:smoke, id=id, data = ohio,
family = binomial, corstr="exchangeable")
summary(model)
drop1(model) # Generalized Wald test
drop1(model, test="score") # Score test
model2 <- geeglm(resp ~ fage + smoke, id=id, data = ohio,
family = binomial, corstr="exchangeable")
summary(model2)
sqrt(diag(model$geese$vbeta.naiv))
model3 <- geeglm(resp ~ fage + smoke, id=id, data = ohio,
family = binomial, corstr="ar1")
summary(model3)
library(MESS)
data(greenland)
temp <- ts(greenland$airtemp, start=1960)
temp
plot(temp)
acf(temp, na.action=na.pass)
acf(diff(temp), na.action=na.pass)
pacf(temp, na.action=na.pass)
library(forecast)
model <- Arima(temp, order=c(1,1,1), include.drift=TRUE)
model
forecast(model, h=15)
plot(forecast(model, h=15))
auto.arima(temp)
data(AirPassengers)
model <- stl(log(AirPassengers), s.window="periodic")
plot(model)
summary(model)
acf(model$time.series[,3]) # Plot autocorrelation of residuals
library(MASS)
data(cats)
head(cats)
t.test(Bwt ~ Sex, data=cats)
t.test(Bwt ~ Sex, data=cats, var.equal=TRUE)
x <- c(1.3, 5.5, -2.1, 0.9, -0.4, 1.1)
t.test(x, mu=0)
library(MESS)
data(qpcr)
# Use data from just one of the 14 runs
run1 <- subset(qpcr, transcript==1 & line=="wt")
model <- nls(flour ~ fmax/(1+exp(-(cycle-c)/b))+fb,
start=list(c=25, b=1, fmax=100, fb=0),
data=run1)
model
plot(run1$cycle, run1$flour,
xlab="Cycle", ylab="Fluorescence")
lines(run1$cycle, predict(model)) # Add the fitted line
summary(model)
confint(model)
library(AER)
library(MESS)
data(nh4)
model <- tobit(log(nh4) ~ factor(year), left=log(0.00999), data=nh4)
summary(model)
drop1(model, test="Chisq")
library(pscl)
library(AER)
data(Affairs)
model <- zeroinfl(affairs ~ age + gender + children,
dist="poisson", data=Affairs)
summary(model)
m2 <- zeroinfl(affairs ~ age + gender + children | 1,
dist="poisson", data=Affairs)
summary(m2)
drop1(model, test="Chisq")
library(MESS)
data(lifeexpect)
matplot(lifeexpect$myear, cbind(lifeexpect$male, lifeexpect$female),
xlab="Year", ylab="Life expectancy",
pch=20, col=c("black", "blue"))
lo <- loess(male ~ myear, span=.25, data=lifeexpect)
lo2 <- loess(male ~ myear, span=1, data=lifeexpect)
lines(lifeexpect$myear, predict(lo))
lines(lifeexpect$myear, predict(lo2), lty=2)
library(mgcv)
library(isdals)
data(fev)
model <- gam(FEV ~ Gender + Smoke + s(Age) + s(Ht), data=fev)
summary(model)
plot(model)
linmodel <- gam(FEV ~ Gender + Smoke + Age + s(Ht), data=fev)
anova(linmodel, model, test="Chisq")
predict(model, newdata=data.frame(Age=14, Ht=60, Smoke=1, Gender=0))
model2 <- gam(FEV ~ Gender + Smoke + s(Age, Ht), data=fev)
summary(model2)
plot(model2, pers=TRUE)
library(metafor)
yi <- c(0.095, 0.277, 0.367, 0.664, 0.462, 0.185)
sei <- c(0.033, 0.031, 0.050, 0.011, 0.043, 0.023)
res <- rma(yi, sei=sei)
res
res2 <- rma(yi, sei=sei, method="FE")
res2
data("dat.bcg")
dat.bcg
rma(measure="RR", ai=tpos, bi=tneg, ci=cpos, di=cneg, data=dat.bcg)
url <- "http://www.biostatistics.dk/data/winequality-red.csv"
indata <- read.csv(url, sep=";", header=TRUE)
indata$taste <- cut(indata$quality, breaks=c(0, 4, 6, 10),
labels=c("Bad", "Normal", "Good"))
head(indata)
pick <- sample(nrow(indata), 0.63 * nrow(indata))
traindata <- indata[pick,] # Select a random sample for training
testdata <- indata[-pick,] # and put the rest in test data
traindata$quality <- NULL # Remove the quality variable
testdata$quality <- NULL # from training and test data
table(traindata$taste)
library(caret)
fit <- train(taste ~ . , data=traindata, method="rf",
trControl=trainControl(method="cv",number=5))
print(fit)
mtry <- expand.grid(mtry = 5)
train(taste ~ . , data=traindata, method="rf", tuneGrid=mtry,
trControl=trainControl(method="cv",number=5))
confusionMatrix(fit)
varImp(fit, scale=FALSE)
pred <- predict(fit, newdata=testdata)
confusionMatrix(pred, testdata$taste) # Compare
library(quantreg)
library(isdals)
data(fev)
res <- rq(FEV ~ Age + Gender + Smoke, data=fev)
summary(res)
summary(res, se="nid")
summary(lm(FEV ~ Age + Gender + Smoke, data=fev))
res <- rq(FEV ~ Age + Gender + Smoke, data=fev,
tau=c(0.1, 0.25, 0.5, 0.75, 0.9))
plot(summary(res), lcol="blue", level=0.95)
anova(res, joint=FALSE)
start <- rq(FEV ~ Age + Gender + Smoke, data=fev, tau=0.5)
nested <- rq(FEV ~ Gender + Smoke, data=fev, tau=0.5)
anova(start, nested)
library(nortest)
x <- rnorm(100, mean=5, sd=2) # Normal distribution
y <- (rnorm(100, mean=5, sd=1.5))**2 # Right-skewed distribution
shapiro.test(x)
ad.test(x)
cvm.test(x)
lillie.test(x)
shapiro.test(y)
ad.test(y)
cvm.test(y)
lillie.test(y)
data(ToothGrowth)
bartlett.test(len ~ factor(dose), data=ToothGrowth)
library(car)
leveneTest(len ~ factor(dose), data=ToothGrowth)
fligner.test(len ~ factor(dose), data=ToothGrowth)
bartlett.test(len ~ interaction(supp, dose), data=ToothGrowth)
cr2 <- cumres(model2) # Save the cumulated residuals
plot(cr2) # and make the plots
m <- matrix(c(23, 20, 12, 5, 6, 9), ncol=3) # Input data
m
chisq.test(m) # Chi-square test
fisher.test(m) # Fisher's exact test
m2 <- matrix(c(35, 25, 6, 9), ncol=2) # Input data
m2
chisq.test(m2)
chisq.test(m2, correct=FALSE) # No continuity correction
fisher.test(m2, alternative="greater") # One-sided alternative
library(MESS)
data(smokehealth)
smokehealth
m <- smokehealth
m[,3] <- m[,3]+ m[,4] # Collapse last two columns
m[4,] <- m[4,] + m[5,] # Collapse last two rows
m <- m[1:4,1:3] # Extract result
m
gkgamma(m)
chisq.test(m)
m <- matrix(c(29, 17, 5, 35), 2,
dimnames = list("Baseline"=c("Low PI", "High PI"),
"4 weeks"=c("Low PI", "High PI")))
m
mcnemar.test(m)
binom.test(17, (5+17))
library(MASS)
data(survey)
mydata <- as.data.frame(xtabs(~ Sex + Clap + Exer,
data=survey))
head(mydata)
full <- glm(Freq ~ Sex*Clap*Exer, family=poisson, data=mydata)
indep <- glm(Freq ~ Sex+Clap+Exer, family=poisson, data=mydata)
anova(indep, full, test="Chisq")
block1 <- glm(Freq ~ Sex + Clap*Exer, family=poisson,
data=mydata)
anova(block1, full, test="Chisq")
cond1 <- glm(Freq ~ Sex*Exer+Clap*Exer, family=poisson,
data=mydata)
anova(cond1, full, test="Chisq")
library(MethComp)
library(MESS)
data(rainman)
head(rainman)
mydata <- Meth(item=1, y=3:5, data=rainman)
result <- BA.est(mydata, Transform="log") # Use log transform
result
result2 <- BA.est(mydata, random.raters=TRUE, Transform="log")
result2
result2$LoA
library(irr)
data(anxiety)
head(anxiety)
ftable(anxiety[,1:2]) # Print the concordance table
kappa2(anxiety[,1:2]) # Compute kappa for raters 1 and 2
library(MESS)
m <- matrix(c(10, 4, 1, 5, 9, 4, 0, 3, 13), 3)
m
kappa2(expand_table(m))
kappa2(anxiety[,1:2], weight="squared")
kappam.fleiss(anxiety)
library(lubridate)
data(airquality)
dayno <- yday(ymd(paste("1973",
airquality$Month,
airquality$Day, sep="-")))
head(dayno)
model <- lm(cbind(Ozone, Temp, Solar.R) ~ dayno + I(dayno**2),
data=airquality)
model
anova(model)
library(cluster)
data(agriculture)
fit2 <- kmeans(agriculture, centers=2) # 2 cluster solution
fit2
# Calculate the total within sum of squares for different
# number of clusters (here 2-6)
sapply(2:6, function(i) {
sum(kmeans(agriculture, centers=i)$withinss)})
scale.agriculture <- scale(agriculture)
fit2s <- kmeans(scale.agriculture, centers=2)
fit2s
sapply(2:6, function(i) {
sum(kmeans(scale.agriculture, centers=i)$withinss)})
distances <- dist(agriculture)
hierclust <- hclust(distances)
hierclust
hierclust.single <- hclust(distances, method="single")
plot(hierclust)
plot(hierclust.single)
cutree(hierclust, k=2) # Cut tree to get 2 groups
cutree(hierclust.single, h=5) # Cut at height 5
library(MASS)
data(biopsy)
names(biopsy)
predictors <- biopsy[complete.cases(biopsy),2:10]
fit <- prcomp(predictors, scale.=TRUE)
summary(fit)
plot(fit)
biplot(fit, col=c("gray", "blue"))
fit
head(predict(fit))
fit <- prcomp(~ V1 + V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9,
data=biopsy, scale.=TRUE)
library(MASS)
data(biopsy)
names(biopsy)
fit <- prcomp(~ V1 + V2 + V3 + V4 + V5 + V6 + V7 + V8 + V9,
data=biopsy, scale.=TRUE)
summary(fit)
pc <- predict(fit)[,1:4] # Select first 4 principal comp.
head(pc)
y <- biopsy$class[complete.cases(biopsy)] # Complete outcome cases
model <- glm(y ~ pc, family=binomial) # Fit the pcr model
summary(model)
library(MASS)
data(biopsy)
fit <- lda(class ~ V1 + V2 + V3, data=biopsy)
fit
plot(fit, col="blue")
lapply(predict(fit), head, 4) # List the first 4
result <- table(biopsy$class, predict(fit)$class)
result
sum(diag(result)) / sum(result)
# Split the data up into random training and test samples
train <- sample(1:nrow(biopsy), ceiling(nrow(biopsy)/3))
testdata <- biopsy[-train,]
trainfit <- lda(class ~ V1 + V2 + V3, data=biopsy, subset=train)
trainfit
result2 <- table(testdata$class,
predict(trainfit, testdata)$class)
result2
sum(diag(result2)) / sum(result2)
library(pls)
data(gasoline)
gasTrain <- gasoline[1:50,] # Training dataset with 50 samples
gasTest <- gasoline[51:60,] # Test dataset with 10 samples
gas1 <- plsr(octane ~ NIR, ncomp=5,
data=gasTrain, validation="LOO")
summary(gas1)
plot(gas1, ncomp=3, line=TRUE)
plot(gas1, "loadings", comps=1:3, col=c("black", "blue", "black"))
RMSEP(gas1, newdata=gasTest)
library(boot)
set.seed(1) # Set seed to reproduce simulations
mydata <- rnorm(150, mean=rep(rnorm(30), 5))
mydata <- matrix(mydata,ncol=5)
head(mydata)
heritability <- function(data, indices=1:nrow(data)) {
r <- ncol(data)
df <- stack(as.data.frame(t(data[indices,])))
result <- anova(lm(values ~ ind, data=df))
mssire <- result[1,3]
mswithin <- result[2,3]
heritab <- 4*(mssire - mswithin) / ((r+1)*mssire - mswithin)
return(heritab)
}
heritability(mydata)
result <- boot(mydata, heritability, R=10000)
result
plot(result)
boot.ci(result)
library(boot)
data(trees)
fit1 <- glm(Volume ~ Girth + Height, data=trees)
cv.err.loo <- cv.glm(trees, fit1)
cv.err.loo$delta
cv.err.5 <- cv.glm(trees, fit1, K=5) # 5 fold cross validation
cv.err.5$delta
res <- sapply(1:200, function(i) {cv.glm(trees, fit1, K=5)$delta[1]})
mean(res)
fit2 <- glm(log(Volume) ~ log(Girth) + log(Height), data=trees)
mycost <- function(responses, expected) {
mean((exp(responses) - exp(expected))**2) }
cv2.err.loo <- cv.glm(trees, fit2, mycost)
cv2.err.loo$delta
library(bootstrap) # For the crossval function
library(nnet) # For the nnet function
library(MASS) # For the survey data frame
data(survey)
cc <- complete.cases(survey$Exer, survey$Sex,
survey$Smoke, survey$Age)
full <- survey[cc,] # Keep only the complete cases
fit <- nnet(Exer ~ Sex + Smoke + Age, data=full,
size=10, rang=.05, maxiter=200, trace=FALSE)
fit
table(full$Exer, predict(fit, type="class"))
# Calculate the number of misclassifications
sum(predict(fit, type="class") != full$Exer)
index <- 1:nrow(full) # Work-around to identify obs.
theta.fit <- function(x,y) {
nnet(Exer ~ Sex + Smoke + Age, data=full, size=10,
subset=x, rang=.05, maxiter=200, trace=FALSE) }
theta.predict <- function(fit, x) {
predict(fit, full[x,], type="class") }
o <- crossval(index, full$Exer, theta.fit, theta.predict,
ngroup=5)
table(full$Exer, o$cv.fit)
sum(o$cv.fit != full$Exer, na.rm=TRUE)
power.t.test(delta=2, sd=3.6, power=.8, sig.level=0.05)
power.t.test(n=20, delta=.70, sd=1, sig.level=0.05,
type="one.sample")
power.prop.test(p1=0.43, p2=0.75, power=.8, sig.level=0.05)
groupmeans <- c(4.5, 8, 6.2, 7.0)
power.anova.test(groups = length(groupmeans),
between.var=var(groupmeans),
within.var=5.8, power=.80, sig.level=0.05)
n <- seq(20, 50) # Consider sample sizes from 20 to 50
plot(n, power.t.test(n, delta=.7)$power, type="l",
ylim=c(0.55,1), ylab="Power",
xlab="Number of observations per group")
lines(n, power.t.test(n, delta=.8)$power, lty=2)
lines(n, power.t.test(n, delta=1)$power, lty=3)
delta <- seq(0, 1.6, .1) # Consider effect sizes from 0 to 1.6
plot(delta, power.t.test(n=25, delta=delta)$power,
type="l", ylim=c(0,1), xlab="Effect size",
ylab="Power")
lines(delta, power.t.test(n=40, delta=delta)$power, lty=2)
lines(delta, power.t.test(n=60, delta=delta)$power, lty=3)
x <- rbinom(50, size=1, prob=.6) # Generate random data with 60% 1s
x
wilcox.test(x, mu=0.5) # Test median equal to 0.5
x2 <- x - 0.5 # Deduct 0.5 from all observations
wilcox.test(x2) # Make the same test with median 0
sample1 <- rbinom(24, size=4, p=0.5)
sample1
sample2 <- sample1 + rbinom(24, size=3, p=0.5) - 1
sample2
wilcox.test(sample1, sample2, paired=TRUE)
library(coin)
mu <- rep(.5, length(x))
wilcoxsign_test(x ~ mu, distribution=exact())
wilcoxsign_test(sample1 ~ sample2, distribution=exact())
data(ToothGrowth)
wilcox.test(len ~ supp, data=ToothGrowth)
library(coin)
wilcox_test(len ~ supp, data=ToothGrowth, distribution=exact())
data(ToothGrowth)
# Use only data on ascorbic acid
teeth <- subset(ToothGrowth, supp=="VC")
kruskal.test(len ~ factor(dose), data=teeth)
library(pgirmess)
kruskalmc(len ~ factor(dose), data=teeth)
library(coin)
kruskal_test(len ~ factor(dose), data=teeth,
distribution=approximate(B=9999))
library(isdals)
data(cabbage)
friedman.test(yield ~ method | field, data=cabbage)
library(coin)
friedman_test(yield ~ method | factor(field),
data=cabbage, distribution=approximate(B=9999))
library(survival)
data(ovarian)
head(ovarian)
# Fit Kaplan-Meier curves for each treatment
model <- survfit(Surv(futime, fustat) ~ rx, data=ovarian)
summary(model)
plot(model, lty=2:3, xlab="Days", ylab="Survival") # Plot them
survdiff(Surv(futime, fustat) ~ rx, data=ovarian)
library(coin)
logrank_test(Surv(futime, fustat) ~ factor(rx),
data=ovarian, distribution="exact")
validate <- cox.zph(model)
validate
model4 <- coxph(Surv(futime, fustat) ~ rx + strata(resid.ds),
data=ovarian)
summary(model4)
library(survival)
data(heart)
head(jasa1)
model <- coxph(Surv(start, stop, event) ~
transplant + year + year*transplant,
data=jasa1)
summary(model)
cox.zph(model)
nd <- data.frame(start=c(0, 50, 0), stop=c(50, 250, 300), event=1,
transplant=c(0, 1, 0), year=1, id=c(1, 1, 2))
nd
plot(survfit(model, newdata=nd, id=id),
xlab="Time since admission (days)", ylab="Survival",
col=c("black", "blue"), conf.int=TRUE)
head(sort(pval), 8) # Show smallest p-values
head(p.adjust(sort(pval)), 8) # Holm correction
head(p.adjust(sort(pval), method="fdr"), 8) # FDR
library(MASS)
data(trees)
result <- boxcox(Volume ~ log(Height) + log(Girth), data=trees,
lambda=seq(-0.5, 0.6, length=13))
result$x[which.max(result$y)]
y <- rnorm(100) + 5 # Generate data
y2 <- y**2 # Square the results
result <- boxcox(y2 ~ 1, plotit=FALSE)
result$x[which.max(result$y)]
x <- 1:5 # Generate data
y <- x^2 # Generate data
plot(x, y, pch=3:7) # Use symbols 3-7 for the points
plot(x, y, pch=c("a", "b")) # Use symbols a b a b a
plot(x, y, col="red") # Each symbol is red
plot(x, y, pch=c("a", "b"), # Symbols are a b a b a in colors
col=c("red", "blue", "green")) # red blue green red blue
# Plot circles with a thick red border and blue fill color
plot(x, y, pch=21, lwd=3, col="red", bg="blue")
plot(x, y, cex=seq(1,2,.25)) # Symbols with increasing size
plot(x, y, type="l", lty=2) # Line with line type 2
plot(x, y, type="l", lwd=3) # Line with line width 3
par(lwd=3, pch=16) # Line width 3 and symbol 16
par(cex.lab=2, cex.main=3) # Large labels and larger title
plot(x, y, xlim=c(0, 10)) # x axis goes from 0 to 10
plot(x, y, log="y") # Logarithmic y axis
plot(x, y, las=1) # Rotate numbers on y axis
colors()
plot(1:10, col="cyan") # Plot using cyan color
plot(1:10, col="#cc0076") # Plot using purplish color
rgb(10, 20, 30, maxColorValue=255)
rgb(10, 20, 30, maxColorValue=100)
mycolor <- rgb(10, 20, 30, alpha=50, maxColorValue=100)
plot(1:10, col=mycolor) # Use the new colour
palette() # Show the current palette
palette(c("red", "#1A334D", "black", rgb(.1, .8, 0)))
palette()
palette("default") # Use the default palette
library(RColorBrewer)
head(brewer.pal.info, n=10)
brewer.pal(n=5, name="Spectral") # 5 colors from Spectral
brewer.pal(n=8, name="Accent") # 8 colors from Accent
# Set colors locally for a single plotting call
plot(1:8, col=brewer.pal(n=8, name="Reds"), pch=16, cex=4)
palette(brewer.pal(n=8, name="Greens")) # Set global palette
plot(1:8, col=1:8, pch=16, cex=4) # Plot using colors
library(MESS)
data(happiness)
with(happiness, symbols(tax, happy, circles=sqrt(population)/8,
inches=FALSE, bg="lightgray",
xlab="Tax (% of GDP)", ylab="Happiness"))
data(LakeHuron)
hist(LakeHuron, main=NULL) # Create histogram with no title
boxplot(ToothGrowth$len, horizontal=TRUE, range=0)
barplot(c(2,5,6,3), col="blue",
names.arg=c("Four", "shades", "of", "blue"))
m <- matrix(c(2,5,6,3),2)
m
barplot(m, col=c("lightgray", "blue"),
names.arg=c("Group 1", "Group 2"))
m <- matrix(c(23, 20, 12, 5, 6, 9), ncol=3) # Input data
relfrq <- prop.table(m, margin=1) # Calc rel. freqs
relfrq
# Make juxtaposed barplot
barplot(relfrq, beside=TRUE,
legend.text=c("Hadsten", "Hammel"),
names=c("<0.01", "0.01-0.10", ">0.10"))
# Stacked relative barplot with labels added
barplot(t(relfrq), names=c("Hadsten", "Hammel"), horiz=TRUE)
airquality$Month <- factor(airquality$Month)
model <- lm(Temp ~ Month - 1, data=airquality)
mean.values <- coef(summary(model))[,1]
mean.values
sem <- coef(summary(model))[,2]
scale <- qt(0.975, df=summary(model)$df[2])
barplot2(mean.values, col="blue",
xlab="Month", ylab="Temperature (F)",
plot.ci=TRUE,
ci.u = mean.values+scale*sem,
ci.l = mean.values)
library(gplots)
data(OrchardSprays)
fit <- lm(decrease ~ treatment - 1, data=OrchardSprays)
summary(fit)
plotCI(coef(summary(fit))[,1],
uiw=qt(.975, df=56)*coef(summary(fit))[,2],
ylab="Estimates and 95% confidence intervals", xaxt="n")
axis(1, 1:length(rownames(coef(summary(fit)))),
rownames(coef(summary(fit))))
library(plotrix)
men <- c(334700, 357834, 328545, 367201, 411689,
359363, 336841, 178422, 72296, 9552, 139, 0)
women <- c(318662, 340357, 320462, 365180, 401521,
357405, 345884, 208057, 117843, 27914, 761, 0)
groups <- paste(seq(0, 110, 10), "-", seq(9, 119, 10), sep="")
groups # Show group labels
men <- 100*men/sum(men) # Normalize men
women <- 100*women/sum(women) # and women
pyramid.plot(men, women, labels=groups,
lxcol="lightgray", rxcol="lightgray")
library(MASS)
data(survey)
# Tabulate exercise and smoking status by gender
result <- by(survey, survey$Sex,
function(x) {table(x$Smoke, x$Exer)})
result
x1 <- 1:6
y1 <- x1
x2 <- c(1.5, 2.5, 3.5, 4.5, NA, NA)
y2 <- sqrt(x2)
y3 <- 7-x1
y3[4] <- NA
y3
matplot(cbind(x1, x2, x1), cbind(y1, y2, y3),
type=c("p", "p", "l"), lwd=3)
x <- seq(0, 3, .2) # Set grid points for x
y <- seq(0, 4, .1) # Set grid points for y
# Compute function at all grid points
z <- outer(x, y, FUN=function(xx,yy) { sin(xx*yy) })
contour(x, y, z) # Make contour plot
contour(x, y, z, levels=c(-1, -.3, 0, .1, .2, .3, .8, 1))
filled.contour(x, y, z, color.palette=grey.colors)
library(RColorBrewer)
blue.colors <- brewer.pal(9, "Blues")
image(x, y, z, col=blue.colors)
x <- seq(0, 3, .2) # Set grid for x dimension
y <- seq(0, 4, .1) # Set grid for y dimension
z <- outer(x, y, FUN=function(xx,yy) { sin(xx*yy) })
persp(x, y, z)
library(scatterplot3d)
data(trees)
scatterplot3d(trees$Girth, trees$Height, trees$Volume,
xlab="Girth", ylab="Height", zlab="Volume")
scatterplot3d(trees$Girth, trees$Height, trees$Volume,
type="h", color=grey(31:1/40),
xlab="Girth", ylab="Height", zlab="Volume")
o <- order(trees$Volume)
scatterplot3d(trees$Girth[o], trees$Height[o], trees$Volume[o],
type="l", scale.y=3, angle=75,
xlab="Girth", ylab="Height", zlab="Volume")
myplot <- scatterplot3d(trees$Girth, trees$Height, trees$Volume,
type="h", angle=60,
xlab="Girth", ylab="Height", zlab="Volume")
model <- lm(Volume ~ Girth + Height, data=trees) # Fit a model
myplot$plane3d(model, col="blue") # Add fitted plane
data(eurodist)
x <- as.matrix(eurodist)
library(RColorBrewer)
blue.colors <- brewer.pal(9, "Blues")
heatmap(x, Rowv=NA, Colv=NA, col=blue.colors)
colnames(x)
# Add a color bar to the heatmap
areas <- c("Greece", "Iberia", "Belgium", "France", "France",
"Germany", "Scandinavia", "Central Europe", "Iberia",
"Germany", "Holland", "Iberia", "France", "Iberia",
"France", "Italy", "Germany", "France", "Italy",
"Scandinavia", "Central Europe")
f.areas <- factor(areas)
area.colors <- gray.colors(nlevels(f.areas))
heatmap(x, col=blue.colors, ColSideColors = area.colors[f.areas],
RowSideColors = area.colors[f.areas])
library(gplots) # Load the gplots package
list1 <- c("a", "b", "c", "d", "e", "f", "g", "h")
list2 <- c("a", "b", "c", "i", "j", "k", "l", "m", "p")
list3 <- c("a", "b", "d", "e", "i", "j", "k", "n", "o")
venn(list(Group1=list1, list2=list2, "Set 3"=list3))
list4 <- c("a", "b", "d", "i", "p") # Add extra group
lets <- tolower(LETTERS)[1:17] # Set possible names
# Create a data frame
df <- data.frame(g1=lets %in% list1, g2=lets %in% list2,
g3=lets %in% list3, g4=lets %in% list4)
head(df, 10)
venn(df)
library(plotrix)
year <- 2001:2005
prevalence <- c(10, 15, 12, 60, 65)
prevalence[prevalence>40] <- prevalence[prevalence>40] - 40
prevalence
design <- matrix(c(1,1,0,2), 2, 2, byrow=TRUE)
design # Show the layout matrix
layout1 <- layout(design)
layout.show(layout1)
x <- exp(rnorm(100)) # Simulate data
y <- (rnorm(100)+3)**2 # Simulate more data
plot(x, y) # Make a scatter plot
hist(x, main="") # and a histogram
data(CO2)
matplot(matrix(CO2$conc, ncol=12), matrix(CO2$uptake, ncol=12),
type="l", lwd=2, lty=rep(c(1,2), c(6,6)), xlim=c(80, 1100),
ylim=c(0, 45), xlab="Concentration", ylab="Uptake",
col=rep(c("black", "blue", "black", "blue"), times=rep(3,4)))
legend(600, 9, legend=c("Quebec - nonchilled", "Quebec - chilled",
"Mississippi - nonchilled", "Mississippi - chilled"),
lty=c(1, 1, 2, 2), col=rep(c("black", "blue"), 2))
library(plotrix)
cats <- matrix(c(53, 115, 17, 502, 410, 14),ncol=2)
rownames(cats) <- c("Yes", "No", "No info")
colnames(cats) <- c("Problems", "No problems")
cats
origintreat <- interaction(CO2$Treatment, CO2$Type)
head(origintreat)
levels(origintreat)
plotcode <- as.numeric(origintreat)
plot(CO2$conc, CO2$uptake, cex=2.6,
pch=c(21, 22, 21, 22)[plotcode],
bg=c("black", "blue", "white", "lightblue")[plotcode],
xlab="Concentration", ylab="Uptake")
library(rgl)
data(trees)
plot3d(trees$Height, trees$Girth, trees$Volume,
type="s", size=2) # Plot spheres
plot3d(trees$Height, trees$Girth, trees$Volume,
type="h") # Vertic. lines
data(volcano)
z <- volcano # Height
x <- 10 * (1:nrow(z)) # 10 meter spacing (S to N)
y <- 10 * (1:ncol(z)) # 10 meter spacing (E to W)
persp3d(x, y, z, smooth=TRUE, col="lightgray",
xlab="NS axis", ylab="EW axis", zlab="Height")
persp3d(x, y, z, col="darkgray", front="line", back="cull",
xlab="NS axis", ylab="EW axis", zlab="Height")
dev.print(file="pngfile.png", device=png, width=480)
dev.print(file="jpgfile.jpeg", device=jpeg,
width=480, height=240)
names(pdfFonts())
sd
boxplot
methods(boxplot)
boxplot.matrix # See source for non-hidden method
boxplot.formula # Try to see source for hidden method
getAnywhere("boxplot.formula")
library(lme4)
refit # Try to show the source for the refit function
showMethods("refit") # Show available methods
# List the source code for refit for class type numeric
findMethods("refit", classes="numeric")
library(microbenchmark)
m1 <- matrix(rnorm(120000), ncol=12) # Simulate data
m2 <- rep(1, 10000) # Vector of 1s for multiplication
microbenchmark(apply(m1, 2, sum), # Compare apply,
m2 %*% m1, # matrix multiplication,
colSums(m1)) # and colSums methods
# Example of .Rprofile to have the foreign package loaded
# automatically when R launches
local({
options(defaultPackages=c(getOption("defaultPackages"),
"foreign"))
})
x <- 1:5
myfactor <- factor(c("a", "b", "b", "b", "c", "c"))
ls() # List local objects
x # Print x
rm(x) # Remove object x
x # Try to print x
ls()
ls() # List the current objects
x <- 1:5 # Create a new vector
ls() # List the current objects
save.image(".RData") # Save the workspace to .RData
ls() # List the current objects
load(".RData") # Load the saved image
ls() # List the current objects
x
source("myanalysis.txt")
ls()
y
when <- system("date", intern=TRUE)
when
session_info("MESS")
Created by Claus Ekstrøm 2017, page last updated 24 October, 2020