options ls=78 nodate; title 'Outlier detection: Simple ChiSquare Probability plot'; data outlier2; drop i sum; do i = 1 to 100; x1 = uniform(54321); x2 = uniform(54321); x3 = uniform(54321); sum= sum(of x1-x3); x1 = x1 / sum; *-- introduce negative correlations; x2 = x2 / sum; x3 = x3 / sum; output; end; x1 = .1; x2 = .1; x3 = .1; output; /* outlier */ x1 = .15; x2 = .05; x3 = .1; output; /* outlier */ options ps=60; *--- Compute standardized principal component scores; * (just a rotation of the data in 3-space); proc princomp STD noprint out=prin; var x1-x3; *-- Chi-Squared probability plot to look for outliers; data chisq; set prin; dsq = uss(of prin1-prin3); proc sort; by dsq; data chisq; set chisq; drop x1-x3 prin1-prin3; p = ( _n_ -.5 )/103; *-- percentile of this dsq; chisq = cinv( p, 3); *-- chisquare quantile for 3df; prob = 1 - probchi(dsq, 3); *-- p-value; if prob < .01 then char='#'; else char='*'; label dsq='Squared Distance from Centroid' chisq='Expected Chi Squared'; proc print data=chisq(firstobs=51); proc plot; plot dsq * chisq = char chisq * chisq = '-' / overlay; title2;