Working with datasets
Stack columns with melt
stacked <- reshape2::melt(data,
id.vars,
measure.vars,
variable.name = "variable",
na.rm = FALSE,
value.name = "value",
factorsAsStrings = TRUE)
Note: equivalent to gather()
in tidyr
.
Add subtitles or units to column headers
Hmisc::label(dat, column) <- 'Some subtitle'
Hmisc::units(dat, column) <- 'mL/h'
Sort by list of columns
data[do.call('order', as.list(Data[, c('ID', 'TIME', 'MDV')])), ]
Note: equivalent to arrange()
in dplyr
.
Vectors
# Make sure to return a vector and not contain any lists
c(A, B, recursive = TRUE)
# Keep the dimensions of the subsetted object
data[data[, 1] > 4, drop = TRUE]
Create unique factor based on the combination of 2 columns.
interaction(c('yellow', 'Blue'), 1:2)
## [1] yellow.1 Blue.2
## Levels: Blue.1 yellow.1 Blue.2 yellow.2
Remove duplicated elements
# Based on one column
data[!duplicated(data[, 'ID']), ]
# Based on multiple columns
data[!duplicated(data[, c('ID', 'TIME')]), ]
Note: equivalent to distinct()
in dplyr
.
Compare elements
# Given
A <- 1:10
B <- 6:13
# Are A and B identical
identical(A, B)
## [1] FALSE
# Combine elements of A and B
union(A, B)
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13
# Elements of A not in B
setdiff(A, B)
## [1] 1 2 3 4 5
# Comon elements in A and B
intersect(A, B)
## [1] 6 7 8 9 10
# ???
setequal(A,B)
## [1] FALSE
Modulus
# Modulus of 230 by 12
230 %% 12
## [1] 2
# Generate on/off infusion dosing for multiple dosing
Mod <- Time %% Cycle # Modulus of time by treatment cycle
Dose <- ifelse(Mod < 14, 0, 50) # Generate on off dosing switch
# Select rows with odd number
data[rownames(data) %% 2 == 1, ]
Create a dataset skeleton
Creates all combination of ID
and TIME
and adds MDV
at the end.
MDV = 0
expand.grid(ID = 1:4, TIME = 0:10, include = MDV)
Match
# Common use for match()
A %in% B
# Replace COL in A based on the value of B using matching ID
A$var <- B$var[match(A$ID, B$ID)]
Note: see package fmatch for faster equivalent.
for
loops
# Crashes if x not defined
for (i in 1:length(x)) { print(x[i]) }
# Can handle missing x
for (i in seq_along(x)) { print(x[i]) }
Replicate rows
# Replicate each rows 2 times
data[rep(row.names(data), times = 2), ]
# Replicate each row based on the values in a column
data[rep(row.names(data), times = data$n_rep), ]
Remove NULL
elements from a list
x <- x[-(which(sapply(x, is.null), arr.ind = TRUE))]
Aggregate
# Using formula
aggregate(DV ~ TIME, data = data, FUN = mean)
# Using by
aggregate(x = data[, 'DV'], by = list(data[, 'TIME']), FUN = mean)
Note: see also summarise()
in dplyr
.
Read in big data
See packages readr
(read_table()
, read_csv()
) and data.table
(fread()
)
Get the mean between points
Get mid point with previous row in a vectorized manner.
x[-length(x)] + (diff(x) / 2)