20.3 More basic plots
20.3.1 How to plot box-plots side-by-side on one graph
colors <- c("blue", "red")
ylab <- "weight, kg"
boxplot(data$weight ~ data$sex, outline = FALSE, border = colors, ylab = ylab)
20.3.2 Excercise: Make a similar boxplot using ggplot()
#install.packages("ggplot2")
library(ggplot2)
ggplot(data=data, aes(x=sex, y=weight)) + geom_boxplot()
p <- ggplot(data=data, aes(x=sex, y=weight, col=sex)) + geom_boxplot(outlier.shape = NA)
p <- p + scale_y_continuous(limits = quantile(data$weight, c(0.1, 0.9)))
p <- p + theme_bw()
p <- p + ylab(label=ylab) + xlab(label="")
p <- p + scale_color_manual(values=colors)
p <- p + theme(legend.position="none")
p
20.3.3 How to plot together data from two or more vectors of different lengths
#install.packages("reshape2")
library(reshape2) # for melt() to use below for transforming a data frame from the wide to the long format
# let's make two vectors first
d <- data[data$weight != max(data$weight), ]
male <- d[d$sex =="M", ]
female <- d[d$sex =="F", ]
m <- male$weight
f <- female$weight
length(m)
length(f)
# make a data frame on a long format
x <- data.frame( value = m, variable = rep("Male", length(m)) )
y <- data.frame( value = f, variable = rep("Female", length(f)) )
df <- rbind(x,y)
boxplot(data = df, value ~ variable)
20.3.4 How to make a box-plot more informative and customized
# Define the plot parameters
y_limits <- c(30, 140)
colors <- c("blue", "red")
ylab <- "Weight, kg"
title <- "Distribution of weight by sex"
boxplot(data = df, value ~ variable, outline = FALSE,
#pars = list(boxwex = .4),
ylab = ylab,
cex.lab = 1.5, #to change (multiply) the font size of the axes legends
cex.axis = 1.2, #to change (multiply) the font size of the axes
ylim = y_limits,
border = colors, #color the boxplot borders
boxwex = 0.6,
staplewex = 0.4,
frame.plot = FALSE, #this removes upper and right borders on the plot area
outwex = 0.5,
cex.main = 1.5, #to change (multiply) the size of the title
main = title # the title of the graph
)
stripchart(data = df, value ~ variable,
col = colors,
method = "jitter", jitter = .2,
pch = c(16, 15), cex = c(1.0, 1.0), #different points and of different size can be used
vertical = TRUE, add = TRUE)
# let's show the (yet unknown) p-value on the plot
text <- "p-value = ..."
y <- 130 # y position of the horizontal line
offset <- 5 # length of vertical segments
x <- 1
segments(x, y, x + 1, y)
segments(x, y - offset, x, y)
segments(x + 1, y - offset, x + 1, y)
text(x + 0.5, y + offset, paste(text), cex = 1) #cex defines the font size of the text
20.3.6 Control for the size of bins
bin_size <- 5
start <- 30
end <- 120
bins <- seq(start, end, by = bin_size)
hist(d$weight, breaks = bins, col = "blue")
20.3.7 Two overlaying histograms on one graph
male <- subset(d, sex =="M") # we will use data with outliers removed
female <- subset(d, sex =="F")
m <- male$weight
f <- female$weight
bin_size <- 5
start <- min(min(m), min(f)) - 3*bin_size
end <- max(max(m), max(f)) + 3*bin_size
bins <- seq(start, end, by = bin_size)
xlim = c(start, end)
colors <- c(rgb(1, 0, 1, 0.7), rgb(0, 0, 1, 0.5)) # the last number in rgb() is for transparency
hist(female$weight, breaks = bins, col = colors[1], xlim = xlim, xlab = "Weight, kg")
hist(male$weight, breaks = bins, add = TRUE, col = colors[2], xlim = xlim)
legend("topright", legend = c("Females", "Males"), fill = colors, bty = "n", border = NA)
20.3.8 Scatterplot
plot(d$weight, d$repwt, pch = 20, col = d$sex) # it can be colored by sex, for example
legend("topleft",legend = c("Females", "Males"), col = 1:2, pch = 20)
20.3.9 Function palette()
Colors are taken in the order from the currently setup palette() .
Palette’s colors can be changed and then reset back to default, using palette(“default”).
palette(c(rgb(1, 0, 1, 0.7), "blue")) # changing palette()
plot(d$weight, d$repwt, col = d$sex, pch = 20, xlab = "Measured weight, kg", ylab = "Reported weight, kg", main = "Reported versus measured weights by sex")
legend("bottomright",legend = c("Females", "Males"), col = 1:2, pch = 20, bty = "n")