Polytomous Data: nested dichotomies

library(car)   # for data and Anova()
data(Womenlf)
some(Womenlf)

##       partic hincome children   region
## 29  not.work      17  present  Prairie
## 33  not.work      15  present  Ontario
## 36  not.work      19   absent  Ontario
## 51  parttime      10  present  Prairie
## 58  not.work      23  present Atlantic
## 104 not.work      15   absent       BC
## 128 parttime      13  present  Prairie
## 143 not.work      11  present  Prairie
## 208 fulltime      11   absent   Quebec
## 235 not.work       7  present   Quebec

create dichotomies

Womenlf <- within(Womenlf,{
  working <- recode(partic, " 'not.work' = 'no'; else = 'yes' ")
  fulltime <- recode (partic, 
    " 'fulltime' = 'yes'; 'parttime' = 'no'; 'not.work' = NA")})
some(Womenlf)

##       partic hincome children   region fulltime working
## 31  not.work      13  present  Ontario     <NA>      no
## 69  not.work      19  present  Ontario     <NA>      no
## 82  parttime      15  present  Ontario       no     yes
## 122 not.work      23  present Atlantic     <NA>      no
## 128 parttime      13  present  Prairie       no     yes
## 137 parttime      14  present  Ontario       no     yes
## 143 not.work      11  present  Prairie     <NA>      no
## 204 not.work      28  present   Quebec     <NA>      no
## 207 not.work      11  present   Quebec     <NA>      no
## 216 parttime      17  present   Quebec       no     yes

fit models for each dichotomy

Womenlf <- within(Womenlf, contrasts(children)<- 'contr.treatment')
mod.working <- glm(working ~ hincome + children, family=binomial, data=Womenlf)
summary(mod.working)

## 
## Call:
## glm(formula = working ~ hincome + children, family = binomial, 
##     data = Womenlf)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6767  -0.8652  -0.7768   0.9292   1.9970  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      1.33583    0.38376   3.481   0.0005 ***
## hincome         -0.04231    0.01978  -2.139   0.0324 *  
## childrenpresent -1.57565    0.29226  -5.391    7e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 356.15  on 262  degrees of freedom
## Residual deviance: 319.73  on 260  degrees of freedom
## AIC: 325.73
## 
## Number of Fisher Scoring iterations: 4

mod.fulltime <- glm(fulltime ~ hincome + children, family=binomial, data=Womenlf)
summary(mod.fulltime)

## 
## Call:
## glm(formula = fulltime ~ hincome + children, family = binomial, 
##     data = Womenlf)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4047  -0.8678   0.3949   0.6213   1.7641  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      3.47777    0.76711   4.534 5.80e-06 ***
## hincome         -0.10727    0.03915  -2.740  0.00615 ** 
## childrenpresent -2.65146    0.54108  -4.900 9.57e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 144.34  on 107  degrees of freedom
## Residual deviance: 104.49  on 105  degrees of freedom
##   (155 observations deleted due to missingness)
## AIC: 110.49
## 
## Number of Fisher Scoring iterations: 5

Anova(mod.working)

## Analysis of Deviance Table (Type II tests)
## 
## Response: working
##          LR Chisq Df Pr(>Chisq)    
## hincome    4.8264  1    0.02803 *  
## children  31.3229  1  2.185e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova(mod.fulltime)

## Analysis of Deviance Table (Type II tests)
## 
## Response: fulltime
##          LR Chisq Df Pr(>Chisq)    
## hincome     8.981  1   0.002728 ** 
## children   32.136  1  1.437e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Prepare for plotting

attach(Womenlf)
# get fitted values for both sub-models
predictors <- expand.grid(hincome=1:45, children=c('absent', 'present'))
p.work <- predict(mod.working, predictors, type='response')
p.fulltime <- predict(mod.fulltime, predictors, type='response')

# calculate unconditional probs for the three response categories
p.full <- p.work * p.fulltime
p.part <- p.work * (1 - p.fulltime)
p.not <- 1 - p.work

## NB: the plot looks best if the plot window is made wider than tall
op <- par(mfrow=c(1,2))
# children absent panel
plot(c(1,45), c(0,1), 
    type='n', xlab="Husband's Income", ylab='Fitted Probability',
    main="Children absent")
lines(1:45, p.not[1:45], lty=1, lwd=3, col="black")
lines(1:45, p.part[1:45], lty=2, lwd=3, col="blue")
lines(1:45, p.full[1:45], lty=3, lwd=3, col="red")

legend(5, 0.97, lty=1:3, lwd=3, col=c("black", "blue", "red"),
    legend=c('not working', 'part-time', 'full-time'))  

# children present panel
plot(c(1,45), c(0,1), 
    type='n', xlab="Husband's Income", ylab='Fitted Probability',
    main="Children present")
lines(1:45, p.not[46:90], lty=1, lwd=3, col="black")
lines(1:45, p.part[46:90], lty=2, lwd=3, col="blue")
lines(1:45, p.full[46:90], lty=3, lwd=3, col="red")

par(op)

a more general way to make the plot

op <- par(mfrow=c(1,2))
plotdata <- data.frame(predictors, p.full, p.part, p.not)
Hinc <- 1:max(hincome)
for ( kids in c("absent", "present") ) {
    data <- subset(plotdata, children==kids)
    plot( range(hincome), c(0,1), type="n",
        xlab="Husband's Income", ylab='Fitted Probability',
        main = paste("Children", kids))
    lines(Hinc, data$p.not,  lwd=3, col="black", lty=1)
    lines(Hinc, data$p.part, lwd=3, col="blue",  lty=2)
    lines(Hinc, data$p.full, lwd=3, col="red",   lty=3)
  if (kids=="absent") {
  legend(5, 0.97, lty=1:3, lwd=3, col=c("black", "blue", "red"),
    legend=c('not working', 'part-time', 'full-time'))
    }
}

par(op)


detach(Womenlf)

Polytomous Data: nested dichotomies

Michael Friendly

21 Jan 2015

create dichotomies

fit models for each dichotomy

a more general way to make the plot