data("quine", package="MASS")
library(car)       # for Anova()
library(AER)       # dispersiontest
library(lmtest)    # coeftest()
library(effects)   # effect plots

Fixup the data for ease of use

Age should be treated as an ordered factor; use better labels for Lrn

quine$Age <- ordered(quine$Age)
levels(quine$Lrn) <- c("Average", "Slow")

examine sample sizes in table of predictors

quine.tab <- xtabs(~ Lrn + Age + Sex + Eth, data=quine)
ftable(Age + Sex ~ Lrn + Eth, data=quine.tab)
##             Age F0    F1    F2    F3   
##             Sex  F  M  F  M  F  M  F  M
## Lrn     Eth                            
## Average A        4  5  5  2  1  7  9  7
##         N        4  6  6  2  1  7 10  7
## Slow    A        1  3 10  3  8  4  0  0
##         N        1  3 11  7  9  3  0  0

Fit model with all predictors

Age should be treated as an ordered factor

quine$Age <- ordered(quine$Age)
quine.mod1 <- glm(Days ~ ., data=quine, family=poisson)
summary(quine.mod1)
## 
## Call:
## glm(formula = Days ~ ., family = poisson, data = quine)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
##  -6.81   -3.06   -1.12    1.82    9.91  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   2.8033     0.0434   64.55  < 2e-16 ***
## EthN         -0.5336     0.0419  -12.74  < 2e-16 ***
## SexM          0.1616     0.0425    3.80  0.00015 ***
## Age.L         0.4192     0.0471    8.90  < 2e-16 ***
## Age.Q         0.2519     0.0497    5.06  4.1e-07 ***
## Age.C        -0.3013     0.0410   -7.34  2.1e-13 ***
## LrnSlow       0.3489     0.0520    6.70  2.0e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 2073.5  on 145  degrees of freedom
## Residual deviance: 1696.7  on 139  degrees of freedom
## AIC: 2299
## 
## Number of Fisher Scoring iterations: 5
Anova(quine.mod1)
## Analysis of Deviance Table (Type II tests)
## 
## Response: Days
##     LR Chisq Df Pr(>Chisq)    
## Eth    166.8  1    < 2e-16 ***
## Sex     14.4  1    0.00015 ***
## Age    168.3  3    < 2e-16 ***
## Lrn     45.8  1    1.3e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Plot effects

library(effects)
plot(allEffects(quine.mod1), ci.style='bands')

Test for overdispersion

dispersiontest(quine.mod1)
## 
##  Overdispersion test
## 
## data:  quine.mod1
## z = 5.47, p-value = 2.3e-08
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion 
##      12.53

Re-fit as quasi-poisson

quine.mod1q <- glm(Days ~ ., data=quine, family=quasipoisson)
coeftest(quine.mod1q)
## 
## z test of coefficients:
## 
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    2.803      0.158   17.79  < 2e-16 ***
## EthN          -0.534      0.152   -3.51  0.00045 ***
## SexM           0.162      0.154    1.05  0.29510    
## Age.L          0.419      0.171    2.45  0.01418 *  
## Age.Q          0.252      0.181    1.40  0.16288    
## Age.C         -0.301      0.149   -2.02  0.04297 *  
## LrnSlow        0.349      0.189    1.85  0.06463 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(quine.mod1q)
## Analysis of Deviance Table (Type II tests)
## 
## Response: Days
##     LR Chisq Df Pr(>Chisq)    
## Eth    12.67  1    0.00037 ***
## Sex     1.09  1    0.29560    
## Age    12.78  3    0.00513 ** 
## Lrn     3.48  1    0.06218 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Visualize effects

plot(allEffects(quine.mod1q), ci.style="bands")

further analyses: test for interactions

quine.mod2q <- glm(Days ~ .^2, data=quine, family=quasipoisson)

summary(quine.mod2q)
## 
## Call:
## glm(formula = Days ~ .^2, family = quasipoisson, data = quine)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
##  -7.65   -2.78   -0.53    1.57    8.20  
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.86198    0.19938   14.35   <2e-16 ***
## EthN          -0.76782    0.27360   -2.81   0.0058 ** 
## SexM          -0.16370    0.29275   -0.56   0.5770    
## Age.L         -0.11232    0.27711   -0.41   0.6859    
## Age.Q         -0.01194    0.33992   -0.04   0.9720    
## Age.C         -0.00492    0.36984   -0.01   0.9894    
## LrnSlow        0.65458    0.60858    1.08   0.2841    
## EthN:SexM      0.43902    0.30152    1.46   0.1478    
## EthN:Age.L    -0.16600    0.31157   -0.53   0.5951    
## EthN:Age.Q     1.07522    0.35210    3.05   0.0027 ** 
## EthN:Age.C     0.24660    0.29802    0.83   0.4095    
## EthN:LrnSlow   0.26415    0.37260    0.71   0.4796    
## SexM:Age.L     1.03609    0.31698    3.27   0.0014 ** 
## SexM:Age.Q     0.05786    0.42656    0.14   0.8923    
## SexM:Age.C    -0.51558    0.33414   -1.54   0.1253    
## SexM:LrnSlow   0.04143    0.44920    0.09   0.9267    
## Age.L:LrnSlow  1.12606    1.17225    0.96   0.3386    
## Age.Q:LrnSlow  0.63378    0.92199    0.69   0.4931    
## Age.C:LrnSlow       NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 10.723)
## 
##     Null deviance: 2073.5  on 145  degrees of freedom
## Residual deviance: 1368.7  on 128  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5
car::Anova(quine.mod2q)
## Analysis of Deviance Table (Type II tests)
## 
## Response: Days
##         LR Chisq Df Pr(>Chisq)    
## Eth        13.69  1    0.00022 ***
## Sex         1.76  1    0.18418    
## Age        15.52  3    0.00142 ** 
## Lrn         6.50  1    0.01078 *  
## Eth:Sex     2.14  1    0.14377    
## Eth:Age    12.01  3    0.00735 ** 
## Eth:Lrn     0.51  1    0.47553    
## Sex:Age    13.95  3    0.00298 ** 
## Sex:Lrn     0.01  1    0.92655    
## Age:Lrn     1.08  2    0.58192    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Try adding interactions

quine.mod3 <- update(quine.mod1q, . ~ . + Eth:Age)
plot(allEffects(quine.mod3))

quine.mod4 <- update(quine.mod3, . ~ . + Sex:Age)
plot(allEffects(quine.mod4))

Compare models

anova(quine.mod1q, quine.mod3, quine.mod4, quine.mod2q, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: Days ~ Eth + Sex + Age + Lrn
## Model 2: Days ~ Eth + Sex + Age + Lrn + Eth:Age
## Model 3: Days ~ Eth + Sex + Age + Lrn + Eth:Age + Sex:Age
## Model 4: Days ~ (Eth + Sex + Age + Lrn)^2
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)   
## 1       139       1697                        
## 2       136       1543  3    153.9   0.0025 **
## 3       133       1410  3    132.8   0.0062 **
## 4       128       1369  5     41.4   0.5698   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1