Title 'Vocabulary Growth Study: Repeated measures analysis';
data vocab;
*------------Longitudinal vocabulary study ------------------
| Data from the Laboratory School of Univ.  of Chicago.  They
| consist of scores from a cohort of pupils in grades 8-11 on
| the vocabulary section of the Cooperative Reading Test.  The
| scores are scaled to a common, but arbitrary origin and unit
| of measurement, so as to be comparable over the four grades.
|
| Since these data cover an age range in which physical growth
| is beginning to decelerate, it is of interest whether a
| similar effect occurs in the acquisition of new vocabulary.
|
| Source of data: R.D.  Bock, "Multivariate statistical
| methods", 1975, pp453.  See also Keesling, J.W., Bock, R.D.
| et al, "The Laboratory School study of vocabulary growth,
| University of Chicago, 1975.
*-------------------------------------------------------------;
    input subject grade8-grade11;
    label   grade8  = 'Grade 8 vocabulary score'
            grade9  = 'Grade 9 vocabulary score'
            grade10 = 'Grade 10 vocabulary score'
            grade11 = 'Grade 11 vocabulary score';
datalines;
1       1.75    2.6     3.76    3.68
2        .9     2.47    2.44    3.43
3        .8      .93     .4     2.27
4       2.42    4.15    4.56    4.21
5       -1.31   -1.31   -.66    -2.22
6       -1.56   1.67     .18    2.33
7       1.09    1.50     .52    2.33
8       -1.92   1.03     .50    3.04
9       -1.61    .29     .73    3.24
10      2.47    3.64    2.87    5.38
11      -.95     .41     .21    1.82
12      1.66    2.74    2.40    2.17
13      2.07    4.92    4.46    4.71
14      3.30    6.10    7.19    7.46
15      2.75    2.53    4.28    5.93
16      2.25    3.38    5.79    4.40
17      2.08    1.74    4.12    3.62
18       .14     .01    1.48    2.78
19       .13    3.19     .60    3.14
20      2.19    2.65    3.27    2.73
21      -.64    -1.31   -.37    4.09
22      2.02    3.45    5.32    6.01
23      2.05    1.80    3.91    2.49
24      1.48     .47    3.63    3.88
25      1.97    2.54    3.26    5.62
26      1.35    4.63    3.54    5.24
27      -.56    -.36    1.14    1.34
28       .26     .08    1.17    2.15
29      1.22    1.41    4.66    2.47
30      -1.43    .80    -.03    1.04
31      -1.17   1.66    2.11    1.42
32      1.68    1.71    4.07    3.30
33      -.47     .93    1.30     .76
34      2.18    6.42    4.64    4.82
35      4.21    7.08    6.00    5.65
36      8.28    9.55    10.24   10.58
37      1.24    4.90    2.42    2.54
38      5.94    6.56    9.36    7.72
39       .87    3.36    2.58    1.73
40      -.09    2.29    3.08    3.35
41      3.24    4.78    3.52    4.84
42      1.03    2.10    3.88    2.81
43      3.58    4.67    3.83    5.19
44      1.41    1.75    3.70    3.77
45      -.65    -.11    2.40    3.5
46      1.52    3.04    2.74    2.63
47       .57    2.71    1.90    2.41
48      2.18    2.96    4.78    3.34
49      1.10    2.65    1.72    2.96
50       .15    2.69    2.69    3.50
51      -1.27   1.26     .71    2.68
52      2.81    5.19    6.33    5.93
53      2.62    3.54    4.86    5.80
54       .11    2.25    1.56    3.92
55       .61    1.14    1.35     .53
56      -2.19   -.42    1.54    1.16
57      1.55    2.42    1.11    2.18
58      -.04     .50    2.60    2.61
59      3.10    2.00    3.92    3.91
60      -.29    2.62    1.60    1.86
61      2.28    3.39    4.91    3.89
62      2.57    5.78    5.12    4.98
63      -2.19    .71    1.56    2.31
64      -.04    2.44    1.79    2.64
;
*----------------------------------------------------------*
| The REPEATED statement makes the analysis of repeated    |
| measures simple and efficient.  Note that since there is |
| no between-S design, there is no independent variable    |
| specified on the MODEL statement.                        |
*----------------------------------------------------------;
proc glm data=vocab;
     model grade8-grade11 = /nouni;
     repeated grade 4 (8 9 10 11) polynomial /
         short summary printh printe;
     Title2 'Multivariate Repeated Measures Analysis';
 run;
*-------------------------------------------------------------*
| The older approach required putting the data in a univariate
| form, strung out so each measure was a separate observation
| in a single variable.  The rest of the analysis below is for
| comparison only.
*-------------------------------------------------------------;
data vravel; set vocab;
     keep subject grade vocab;
     grade=8;  vocab=grade8; output;
     grade=9;  vocab=grade9; output;
     grade=10; vocab=grade10; output;
     grade=11; vocab=grade11; output;
	 run;
	 
title2 'Univariate Mixed Model Analysis';
*--------------------------------------------------------------*
| In the mixed model (Subjects random, Grade fixed), the error
| term for the Grade effect is the GRADE*SUBJECT interaction.
| Since this term is not listed in the MODEL statement, it
| goes into the residual.  The mixed model, however, required
| the assumption of compound symmetry (which IS satisfied
| here).  This form of analysis is HORRIBLY INEFFFICIENT if
| you have large N.  CONTRAST statements are used to pull out
| the trend components of the GRADE effect.
*---------------------------------------------------------------;
proc glm data=vravel;
    classes subject grade;
    model vocab = grade subject;
    contrast 'Linear'       grade  -3 -1  1  3;
    contrast 'Quadratic'    grade   1 -1 -1  1;
    contrast 'Cubic'        grade  -1  3 -3  1;
    estimate 'Slope'        grade  -3 -1  1  3;
    estimate 'Curvature'    grade   1 -1 -1  1;
title2 'Mixed Model Analysis with Trends';
run;
*--------------------------------------------------------------
| The first GLM with the REPEATED ....  POLYNOMIAL statement
| transforms the data to scores on orthogonal polynomials.  The
| steps below do the same analysis by explicitly calculating
| these scores and then performing the multivariate tests on
| the transformed scores.
*---------------------------------------------------------------;
data trends;
   set vocab;
   mean = mean(of grade8-grade11);
   linear = -3*grade8 -1*grade9 +1*grade10 +3*grade11;
   quad   =  1*grade8 -1*grade9 -1*grade10 +1*grade11;
   cubic  = -1*grade8 +3*grade9 -3*grade10 +1*grade11;
   Title2 'Transformation to orthogonal polynomial scores';
proc print data=trends(obs=40);
   id subject;
proc summary;
        var grade8-grade11 mean--cubic;
        output  out=means  mean=grade8-grade11
                                mean linear quad cubic;
proc print;
     var grade8-grade11 mean linear quad cubic;
 
*------------------------------------------------------------
| If there were a between-S factor, it would be tested based
| on the MEAN over the repeated measures, e.g, with a model
| MEAN=GROUP.  Here, we just test whether the mean is a
| constant, as in a 1-sample t-test.
*------------------------------------------------------------;
PROC GLM DATA=trends;
     model MEAN = ;
     title2 'Test of mean';
 
*------------------------------------------------------------*
| Next, test the trend components using a multivariate test.
| Again, if there were between-S factors, they would be
| included in the model.  The multivariate test of the
| intercept gives the within-S effects.  The test of GROUP
| (if present) would give the interactions of within-S with
| between-S effects.
*------------------------------------------------------------;
PROC GLM DATA=trends;
     model LINEAR QUAD CUBIC = ;
     manova h=intercept / short printe  printh;
TITLE2 'Multivariate Analysis of Trend Scores';
run;