# Figure 17.5-4 (right). Cockroach neurons # Make a residual plot to check assumptions. The data are temperature and the firing rate of neurons in cockroach. # Read and inspect the data. roach <- read.csv(url("http://www.zoology.ubc.ca/~schluter/WhitlockSchluter/wp-content/data/chapter17/chap17f5_4CockroachNeurons.csv")) head(roach) # Scatter plot with regression line. plot(rate ~ temperature, data = roach) roachRegression <- lm(rate ~ temperature, data = roach) abline(roachRegression) # Residual plot. plot(residuals(roachRegression) ~ temperature, data = roach) abline(c(0,0)) # Commands for a more elaborate residual plot are here. plot(residuals(roachRegression) ~ temperature, data = roach, pch = 16, col = "firebrick", las = 1, cex = 1.5, bty = "l", xlab = "Temperature", ylab = "Residuals") abline(c(0,0)) # ------------------------------------------------------------ # Figure 17.6-3. Iris pollination # Use a square root transformation to improve the fit to assumptions of linear regression. The data are number of pollen grains received and floral tube length of an iris species. # Read and inspect data. iris <- read.csv(url("http://www.zoology.ubc.ca/~schluter/WhitlockSchluter/wp-content/data/chapter17/chap17f6_3IrisPollination.csv")) head(iris) # Scatter plot with regression line. plot(grainsDeposited ~ tubeLengthMm, data = iris) irisRegression <- lm(grainsDeposited ~ tubeLengthMm, data = iris) abline(irisRegression) # Residual plot. plot(residuals(irisRegression) ~ tubeLengthMm, data = iris) abline(c(0,0)) # Commands for a residual plot with more options are here. plot(residuals(irisRegression) ~ tubeLengthMm, data = iris, pch = 16, col = "firebrick", las = 1, cex = 1.5, bty = "l", xlab = "Floral tube length (mm)", ylab = "Residuals") abline(c(0,0)) # Square root transformation. iris$sqRootGrains <- sqrt(iris$grainsDeposited + 1/2) # Scatter plot using transformed data, with new regression line added. plot(sqRootGrains ~ tubeLengthMm, data = iris) irisRegressionSqrt <- lm(sqRootGrains ~ tubeLengthMm, data = iris) abline(irisRegressionSqrt) # Residual plot based on the transformed data. plot(residuals(irisRegressionSqrt) ~ tubeLengthMm, data = iris) abline(c(0,0)) # Instructions for a residual plot with more options are here. plot(residuals(irisRegressionSqrt) ~ tubeLengthMm, data = iris, pch= 16, col = "firebrick", las = 1, cex=1.5, bty = "l", xlab = "Floral tube length (mm)", ylab = "Residuals") abline(c(0,0)) # ------------------------------------------------------------ # Figure 17.8-1. Iron and phytoplankton growth # Fit a nonlinear regression curve having an asymptote (Michaelis-Menten curve). The data are the relationship between population growth rate of a phytoplankton in culture and the concentration of iron in the medium. # Read and examine data. phytoplankton <- read.csv(url("http://www.zoology.ubc.ca/~schluter/WhitlockSchluter/wp-content/data/chapter17/chap17f8_1IronAndPhytoplanktonGrowth.csv")) head(phytoplankton) # Scatter plot. plot(phytoGrowthRate ~ ironConcentration, data = phytoplankton) # Instructions for a scatter plot with more options are here. plot( phytoGrowthRate ~ ironConcentration, data = phytoplankton, pch = 16, col = "firebrick", las = 1, cex = 1.5, bty = "l", ylab = "Growth rate (no./day)", xlab = expression(paste("Iron concentration (", mu, "mol)")) ) # # Fit a Michaelis-Menten curve to the phytoplankton data using the nls (nonlinear least squares). To fit the curve, provide a formula that also includes symbols (letters) for the parameters to be estimated. In the following function, we use "a" and "b" to indicate the parameters of the Michaelis-Menten curve we want to estimate. The function includes an argument where we must provide an initial guess of parameter values. The value of the initial guess is not so important -- here we choose a=1 and b=1 as initial guesses. The first function below carried out the model fit and save the results in an R object named phytoCurve. phytoCurve <- nls(phytoGrowthRate ~ a*ironConcentration / ( b+ironConcentration), data = phytoplankton, list(a = 1, b = 1)) # Obtain the parameter estimates using the summary command to , including standard errors and t-tests of null hypotheses that parameter values are zero. summary(phytoCurve) # Add the nonlinear regression curve to scatter plot. If necessary, redraw the scatter plot before issuing the following commands. xpts <- seq(min(phytoplankton$ironConcentration), max(phytoplankton$ironConcentration), length.out = 100) ypts <- predict(phytoCurve, new = data.frame(ironConcentration = xpts)) lines(xpts, ypts) # Many of the functions that can be used to extract results from a saved lm object work in the same way when applied to an nls object, such as predict, residuals, and coef. # ------------------------------------------------------------ # Figure 17.8-2. Pond plants and productivity # Fit a quadratic curve to the relationship between the number of plant species present in ponds and pond productivity. # Read and examine data. pondProductivity <- read.csv(url("http://www.zoology.ubc.ca/~schluter/WhitlockSchluter/wp-content/data/chapter17/chap17f8_2PondPlantsAndProductivity.csv")) head(pondProductivity) # Scatter plot. plot(species ~ productivity, data = pondProductivity) # Commands for a fancier scatter plot are here. plot(species ~ productivity, data = pondProductivity, pch = 16, col = "firebrick", las = 1, cex = 1.5, bty = "l", ylab = "Number of species", xlab = "Productivity (g/15 days)" ) # # Fit a quadratic curve to the data. Here, the single variable productivity in the data frame is included in the formula both as itself and as the squared term, productivity2. To make the squared term work, we need to wrap the term with I(). The results of the model fit are saved in an R object productivityCurve. productivityCurve <- lm(species ~ productivity + I(productivity^2), data = pondProductivity) # Show estimates of the parameters of the quadratic curve (regression coefficients) are obtained as follows, along with standard errors and t-tests. summary(productivityCurve) # Add quadratic regression curve to scatter plot. If necessary, redraw the previous scatter plot before issuing the following commands. xpts <- seq(min(pondProductivity$productivity), max(pondProductivity$productivity), length.out = 100) ypts <- predict(productivityCurve, new = data.frame(productivity = xpts)) lines(xpts, ypts) # ------------------------------------------------------------ # Example 17.8. Incredible shrinking seal # Fit a formula-free curve (cubic spline) to the relationship between body length and age for female fur seals. # Read and inspect data shrink <- read.csv(url("http://www.zoology.ubc.ca/~schluter/WhitlockSchluter/wp-content/data/chapter17/chap17e8ShrinkingSeals.csv")) head(shrink) # Scatter plot. plot(length ~ ageInDays, data = shrink, pch = ".") # Commands for a fancier scatter plot with more options are here. plot(jitter(length, factor = 2) ~ ageInDays, data = shrink, pch = ".", col = "firebrick", las = 1, bty = "l", ylab = "Body length (cm)", xlab = "Female age (days)") # # Fit a cubic spline. The argument df stands for effective degrees of freedom, which allows you to control how complicated the curve should be. The simplest possible curve is a straight line, which has df=2 (one for slope and another for intercept). More complex curves require more df. Here we fit a very complicated curve. shrinkCurve <- smooth.spline(shrink$ageInDays, shrink$length, df = 40) # Add curve to scatter plot. If needed, replot the scatter plot before issuing the commands below. xpts <- seq(min(shrink$ageInDays), max(shrink$ageInDays), length.out = 1000) ypts <- predict(shrinkCurve, xpts)$y lines(xpts, ypts) # ------------------------------------------------------------ # Figure 17.9-1. Guppy cold death # Fit a logistic regression to the relationship between guppy mortality and duration of exposure to a temperature of 5 degrees C. # Read and inspect the data. guppy <- read.csv(url("http://www.zoology.ubc.ca/~schluter/WhitlockSchluter/wp-content/data/chapter17/chap17f9_1GuppyColdDeath.csv")) head(guppy) # Draw a frequency table of mortality at different exposure times. table(guppy$mortality, guppy$exposureDurationMin, dnn = c("Mortality","Exposure (min)")) # Scatter plot of the data. plot(mortality ~ jitter(exposureDurationMin), data = guppy) # Commands for a fancier scatter plot, with more options, are here. plot(jitter(mortality, factor = 0.1) ~ jitter(exposureDurationMin, factor = 1), data = guppy, col = "firebrick", las = 1, bty = "l", ylab = "Mortality", xlab = "Duration of exposure (min)") # # Fit a logistic regression. guppyGlm <- glm(mortality ~ exposureDurationMin, data = guppy, family = binomial(link = logit)) # Add logistic regression curve to scatter plot. xpts <- seq(min(guppy$exposureDurationMin), max(guppy$exposureDurationMin), length.out = 100) ypts <- predict(guppyGlm, newdata = data.frame(exposureDurationMin = xpts), type = "response") lines(xpts, ypts) # Table of regression coefficients, with parameter estimates (Table 17.9-2). summary(guppyGlm) # 95% confidence intervals for parameter estimates. It is necessary to load the MASS package first. library(MASS) confint(guppyGlm) # Predict probability of mortality (mean mortality) for a given x-value, 10 min duration, including standard error of prediction. predict(guppyGlm, newdata = data.frame(exposureDurationMin = 10), type = "response", se.fit = TRUE) # Estimate the LD50, the dose at which half the individuals are predicted to die from exposure. library(MASS) dose.p(guppyGlm, p = 0.50) # Analysis of deviance table, with a of the null hypothesis of zero slope (Table 17.9-3). anova(guppyGlm, test = "Chi")