# Comparing two means: R code for Chapter 12 examples # Download the R code on this page as a single file here. # ------------------------------------------------------------ # Example 12.2. Red-winged blackbirds # Confidence interval for mean difference and the paired t-test, comparing immunocompetence of red-winged blackbirds before and after testosterone implants. # Read the data into a data frame. The data are in "wide" format (one row per individual). blackbird <- read.csv(url("http://www.zoology.ubc.ca/~schluter/WhitlockSchluter/wp-content/data/chapter12/chap12e2BlackbirdTestosterone.csv")) blackbird # Calculate and plot differences (Figure 12.2-2). We add a variable called d to the data frame with the After minus Before difference. blackbird$d <- blackbird$logAfterImplant - blackbird$logBeforeImplant head(blackbird) hist(blackbird$d, right = FALSE, col = "firebrick") # To see how to produce the strip chart with lines (Figure 12.2-1) click here. # It helps to obtain a version of the data in "long" format. blackbird2 = reshape(blackbird, varying = 4:5, direction = "long", idvar = "blackbird", v.names = "logAntibody", times = factor(c("before","after"), levels = c("before","after"))) head(blackbird2) par(bty="l") stripchart(logAntibody ~ time, data = blackbird2, vertical = TRUE, xlab = "Implant treatment", ylab="Antibody production rate (ln[mOD/min])", xlim = c(0.6,2.4), las = 1, pch = 16, col = "firebrick") segments(1, blackbird$logBeforeImplant, 2, blackbird$logAfterImplant) # 95% confidence interval for the mean difference. 95% confidence intervals are part of the output of the t.test function, viewed in isolation by adding $conf.int to the command. t.test(blackbird$d)$conf.int # or using the paired = TRUE argument of t.test and specifying the paired variables. t.test(blackbird$logAfterImplant, blackbird$logBeforeImplant, paired = TRUE)$conf.int # Paired t-test. A paired t-test can be done either on differences you have already calculated (d here) or by using the paired=TRUE argument with the measurements from the two groups. t.test(blackbird$d) # or t.test(blackbird$logAfterImplant, blackbird$logBeforeImplant, paired = TRUE) # ------------------------------------------------------------ # Example 12.3. Horned lizards # Confidence interval for the difference between two means, and the two-sample t-test, comparing horn length of live and dead (spiked) horned lizards. lizard <- read.csv(url("http://www.zoology.ubc.ca/~schluter/WhitlockSchluter/wp-content/data/chapter12/chap12e3HornedLizards.csv")) lizard # Note that there is one missing value for the variable "squamosalHornLength". Everything is easier if we eliminate the row with missing data. lizard2 <- na.omit(lizard) lizard2 # Multiple histograms using the lattice package. library(lattice) histogram( ~ squamosalHornLength | Survival, data = lizard2, layout = c(1,2), col = "firebrick", breaks = seq(12, 32, by = 2), xlab = "Horn length (mm)") # Click here for code to make multiple histograms using hist in base R instead. oldpar = par(no.readonly = TRUE) # make backup of default graph settings par(mfrow = c(2,1), oma = c(4, 6, 2, 6), mar = c(3, 4, 3, 2)) hist(lizard2$squamosalHornLength[lizard2$Survival == "living"], breaks = seq(12,32,by=2), col = "firebrick", las = 1, main = "living", ylab = "Frequency") hist(lizard2$squamosalHornLength[lizard2$Survival == "killed"], breaks = seq(12,32,by=2), col = "firebrick", las = 1, main = "killed", ylab = "Frequency") mtext("Horn length (mm)", side = 1, outer = TRUE, padj = 0) par(oldpar) # revert to default graph settings # 95% confidence interval for the difference between means # The output of t.test includes the 95% confidence interval for the difference between means. Add $confint after calling the function to get R to report only the confidence interval. The formula in the following command tells R to compare squamosalHornLength between the two groups indicated by Survival. t.test(squamosalHornLength ~ Survival, data = lizard, var.equal = TRUE)$conf.int # A two-sample t-test of the difference between two means can be carried out with t.test by using a formula, asking if squamosalHornLength is predicted by Survival, and specifying that the variables are in the data frame lizard. t.test(squamosalHornLength ~ Survival, data = lizard, var.equal = TRUE) # ------------------------------------------------------------ # Example 12.4. Salmon survival with brook trout # Welch's two-sample t-test for unequal variances, comparing chinook salmon survival in the presents and absence of brook trout. Below, we use this same example to demonstrate the 95% confidence interval for the ratio of two variances, and the F-test of equal variances. # Read the data. chinook <- read.csv(url("http://www.zoology.ubc.ca/~schluter/WhitlockSchluter/wp-content/data/chapter12/chap12e4ChinookWithBrookTrout.csv")) # Set the preferred order of categories in tables and graphs chinook$troutTreatment <- factor(chinook$troutTreatment, levels = c("present", "absent")) # Strip chart of the data (bare bones version of Figure 12.4-1) stripchart(proportionSurvived ~ troutTreatment, data = chinook, method = "jitter", vertical = TRUE) # Adding the means and confidence intervals to the plot is trickier. Code for a fancier plot is given here. # Calculate means and confidence intervals for the means. meanProportion = tapply(chinook$proportionSurvived, chinook$troutTreatment, mean) ciPresence = t.test(chinook$proportionSurvived[chinook$troutTreatment == "present"])$conf.int ciAbsence = t.test(chinook$proportionSurvived[chinook$troutTreatment == "absent"])$conf.int lower = c(ciPresence[1], ciAbsence[1]) upper = c(ciPresence[2], ciAbsence[2]) # Stripchart with options adjustAmount = 0.2 par(bty = "l") stripchart(proportionSurvived ~ troutTreatment, data = chinook, vertical = TRUE, method = "jitter", jitter = 0.1, pch = 1, col = "firebrick", cex = 1.5, las = 1, ylim = c(0, max(chinook$proportionSurvived)), lwd = 1.5, xlab = "Trout treatment", ylab = "Proportion surviving") segments( c(1,2) + adjustAmount, lower, c(1,2) + adjustAmount, upper) points(meanProportion ~ c( c(1,2) + adjustAmount ), pch = 16, cex = 1.2) # Calculating summary statistics by group (as found in Table 12.4-3) # Use tapply to calculate statistics by group. It has three required arguments. The first is the numeric variable of interest (a vector). The second argument is a categorical variable (a vector of the same length) identifying the groups that individuals belong to. The third argument is the name of the R function that you want to apply to the variable by group. meanProportion <- tapply(chinook$proportionSurvived, chinook$troutTreatment, mean) sdProportion <- tapply(chinook$proportionSurvived, chinook$troutTreatment, sd) nProportion <- tapply(chinook$proportionSurvived, chinook$troutTreatment, length) data.frame(mean = meanProportion, std.dev = sdProportion, n = nProportion) # Welch's two-sample t-test of means for unequal variances can also be done with t.test, when the var.equal argument is set to FALSE (as it is by default): t.test(proportionSurvived ~ troutTreatment, data = chinook, var.equal = FALSE) #
# Here, we demonstrate the 95% confidence interval for the ratio of two variances, and F-test of equal variances, using the chinook salmon experiment. # 95% confidence interval for variance ratio. (Warning: remember that the method is not robust to departures from assumption of normality.) var.test(proportionSurvived ~ troutTreatment, data = chinook)$conf.int # F-test of equal variances (Warning: Remember that the F-test is not robust to departures from assumption of normality.) var.test(proportionSurvived ~ troutTreatment, data = chinook) # Levene's test of equal variances. This function is in the car package, which must first be installed (see here for instructions) and loaded with the library function before use. # Install the car package. This only need to be done once per computer. The following command # downloads the package from the internet and configures it in your R folder. # install.packages("car", dependencies = TRUE) library(car) leveneTest(chinook$proportionSurvived, group = chinook$troutTreatment, center = mean)