/*
_______________________________________________________________________________
| |
| SAS MACRO FOR LOCAL INFLUENCE ANALYSIS IN LINEAR MIXED MODELS |
|_______________________________________________________________________________|
Prepared by : Franz Torres Barbosa
Center for Molecular Immunology, Habana, Cuba
E-mail : franz@ict.cim.sld.cu
Geert Verbeke
Biostatistical Centre for Clinical Trials, K.U.Leuven, Belgium
E-mail : geert.verbeke@biostat.be
References : Verbeke, G. and Molenberghs, G. (1997)
`Linear Mixed Models in Practice : A SAS-Oriented Approach,'
Lecture Notes in Statistics 126, New-York : Springer-Verlag.
Lesaffre, E. and Verbeke, G. (1997)
`Local Influence in Linear Mixed Models,'
Biometrics, to appear.
Purpose of the macro : The macro performs a local influence analysis for the
detection of influential subjects in linear mixed models. We refer to Verbeke and
Molenberghs (Section 3.13.2) for a description of the procedure, and to Lesaffre
and Verbeke for the technical details.
Three examples of how to use the macro are given at the bottom of this file
*/
/*
_______________________________________________________________________________
| |
| THE MACRO CODE |
|_______________________________________________________________________________|
*/
%macro
LocInfl(data=, idno =, varesp=, fixedef=, randomef=, outdata=);/* Initialization */
%if
%bquote(&data)= %then %let data=&syslast;/* To calculate the number of repeated measures per subject and the time points*/
proc freq data=&data noprint;
tables &idno /out=nfrec;
run;
proc sort data=&data;
by &idno;
run;
/* Fitting the model in Proc mixed */
proc mixed data=&data method=ml asycov;
class &idno;
model &varesp = &fixedef /noint covb s;
random &randomef /type=un sub=&idno g;
ods output solutionf= fixedsol;
ods output covparms= covpar;
ods output G= Dmatr;
ods output asycov = varcov;
ods output covb= covfixed;
run;
proc iml;
reset noprint;
/* Matrix with the solution of the fixed effects */
use fixedsol;
read all into fixedsol;
fixedsol= fixedsol[,
1];/* Matrix with the solution for the var-cov of the random. eff. parameters */
use covpar;
read all into covpar;
n_covp = nrow(covpar);
/* Var-Cov matrix for the fixed effects */
use covfixed;
read all into fixedvar;
n_fix = nrow(fixedvar);
fixedvar = fixedvar[,
2:1+n_fix];fixedvar= -inv(fixedvar);
/* Var-Cov matrix for the random components */
use varcov;
read all into varcov;
n_vc = nrow(varcov);
varcov = varcov[,
1:n_vc];varcov= -inv(varcov);
/* Matrix with the data to obtain the design matrices for the fixed and random effects */
use &data;
labelx = {&fixedef};
labelz = {&randomef};
labely = {&varesp};
read all var labelx into fixed;
read all var labelz into random;
read all var labely into resp;
/* Matrix with the frequencies for each of the subjects */
use nfrec;
read all into nfrec;
nfrec = nfrec[,
2];/* G=D matrix */
use Dmatr;
read all into D;
n_D = nrow(D);
D = D[,
3:2+n_D];/* Procedure to calculate the first derivative */
start Deriv(fixed, random, res, D, sigma2);
/*** With respect to alpha ***/
ni = nrow(fixed);
p = ncol(fixed);
q = ncol(random);
V = random*D*random` + sigma2*I(ni);
W = inv(V);
zero = {
0};help1=repeat(zero,p,
1);help2 =-
2#fixed`*W*res;first = help1 + help2;
/*** With respect to D ***/
j =
1;do while (j <= q);
i =
1;do while (i <= j);
if i=j then mul=
1;else mul=
0;hulp1 = (
2-(mul))#random[,i]`*W*random[,j];hulp2 = -(
2-(mul))#random[,i]`*W*res*res`*W*random[,j]; /* Check this derivatives */first = first//(hulp1 + hulp2);
i = i +
1;end;
j = j +
1;end;
/*** With respect to sigma2 ***/
hulp1 = trace(W);
hulp2 = -res`*W*W*res;
first3 = hulp1 + hulp2;
first = first//first3;
first = -first/
2;return(first);
finish;
/* Procedure to calculate the second derivative */
start SecDer(fixed, random, res, D, sigma2);
/* 2nd derivative alpha and D */
ni = nrow(fixed);
p = ncol(fixed);
q = ncol(random);
V = random*D*random` + sigma2*I(ni);
W = inv(V);
zero = {
0};j=
1;do while (j<=q);
i=
1;do while (i<=j);
help1 =
0;help2 =
2#fixed`*W*random[,i]*random[,j]`*W*res;if i=j then mul=
1;else mul=
0;help2 = help2 +
2#(1-mul)#fixed`*W*random[,j]*random[,i]`*W*res;ret = help1` + help2`;
/* ver como arreglar esto */fila = fila//ret;
i = i+
1;end;
j= j+
1;end;
/* the 2nd derivative alpha and sigma2 */
help1 =
0;help2 =
2#fixed`*W*W*res;ret = help1` + help2`;
fila = fila//ret;
return(fila);
finish;
/* Initializations for the whole loop and calculations of the derivatives */
q = ncol(random);
p = ncol(fixed);
uno = {
1};begin =
1;sum=repeat(
0,(q*(q+1)/2)+1,p);n=nrow(nfrec);
sigma2 = covpar[n_covp,];
s=
1;do s=
1 to n ;ni = nfrec[s,];
end = begin -
1 + ni;fixedi = fixed[begin:end,];
randomi = random[begin:end,];
respi = resp[begin:end];
res = respi-fixedi*fixedsol;
begin = end +
1;first = Deriv(fixedi, randomi, res, D, sigma2);
firstT = firstT||first;
second = SecDer(fixedi, randomi, res, D, sigma2);
sum = sum - (second/
2);end;
L = (fixedvar//sum)||(sum`//varcov);
vect = repeat(sqrt(
2)-1,q,1);help = diag(vect)+
1;help = symsqr(help);
help = repeat(uno,p,
1)//help//1;firstT = firstT#help;
L = L#(help*help`);
Linv = inv(-L);
b22 =repeat(
0,p+(q*(q+1)/2)+1,p+(q*(q+1)/2)+1);b11 = b22;
help = L[(p+
1):(nrow(L)), (p+1):(ncol(L))];help = -inv(-help);
b22[(p+
1):(nrow(L)), (p+1):(ncol(L))] = help;b22 = -inv(-L) - b22;
help = L[
1:p,1:p];help = -inv(-help);
b11[
1:p, 1:p] = help;b11 = -inv(-L) -b11;
/* Part to calculate the influence measures */
zero = {
0};uno = {
1};begin =
1;n=nrow(nfrec);
do s=
1 to n ;ni = nfrec[s,];
end = begin -
1 + ni;fixedi = fixed[begin:end,];
randomi = random[begin:end,];
respi = resp[begin:end];
res = respi-fixedi*fixedsol;
begin = end +
1;V = randomi*D*randomi` + sigma2*I(ni);
W = inv(V);
Ci =
2#firstT[,s]`*Linv*firstT[,s];Cai = -(
2#firstT[,s]`*b22*firstT[,s]);Cbi = -(
2#firstT[,s]`*b11*firstT[,s]);onei = fixedi`*W*res;
onei = onei`*onei;
twoi = randomi`*W*randomi - randomi`*W*res*res`*W*randomi;
twoi = trace(twoi*twoi`);
threei =trace(W) - res`*W*W*res;
threei = (threei##
2);call eigen(egval,egvect,W);
WW = egvect*diag(sqrt(egval))*egvect`;
rri = WW*res;
xxi = WW*fixedi;
zzi = WW*randomi;
oneai = xxi*xxi`;
oneai = sqrt(trace(oneai*oneai`));
onebi = rri*rri`;
onebi = sqrt(trace(onebi*onebi`));
twoai = zzi*zzi`;
twoai = trace(twoai*twoai`);
twobi = I(ni) - rri*rri`;
twobi = trace(twobi*twobi`);
threeai = trace(W*W`);
C = C//Ci;
Ca = Ca//Cai;
Cb = Cb//Cbi;
one = one//onei;
two = two//twoi;
three = three//threei;
onea = onea//oneai;
oneb = oneb//onebi;
twoa = twoa//twoai;
twob = twob//twobi;
threea = threea//threeai;
end;
/*Setting the output dataset with the diagnostic measures */
out=C||Ca||Cb||onea||oneb||twoa||twob||threea;
/* onea-> norm(xixi'2);
oneb-> norm(R2);
twoa-> norm(zizi'2);
twob-> norm(I-riri'2);
threea-> norm(Vi2) */
varnames = {
'C_i' 'C_i_F' 'C_i_V' 'F1' 'F2' 'V1' 'V2' 'V3'};create &outdata from out [colname= varnames];
append from out;
close covfixed;
close nfrec;
close &data;
close varcov;
close covpar;
close fixedsol;
quit;
/* End of the IML procedure */
data temp;
set nfrec;
keep &idno count;
run;
data &outdata;
merge temp &outdata;
run;
/* Printing the results */
proc print data=&outdata;
run;
/* Plotting the results */
goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;
symbol v=dot i=join;
title f=swissb h=
3 'Total local influence';proc gplot data = &outdata;
plot C_i*&idno / haxis=axis1 vaxis=axis2;
axis1 label=(f=swissb h=
2 'Identification number') w=5;axis2 label=none w=
5 ;run;
goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;
symbol v=dot i=join;
title f=swissb h=
3 'Local influence for fixed effects';proc gplot data = &outdata;
plot C_i_F*&idno / haxis=axis1 vaxis=axis2;
axis1 label=(f=swissb h=
2 'Identification number') w=5;axis2 label=none w=
5 ;run;
goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;
symbol v=dot i=join;
title f=swissb h=
3 'Local influence for variance components';proc gplot data = &outdata;
plot C_i_V*&idno / haxis=axis1 vaxis=axis2;
axis1 label=(f=swissb h=
2 'Identification number') w=5;axis2 label=none w=
5 ;run;
goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;
symbol v=dot i=join;
title f=swissb h=
3 'Covariates in mean structure';proc gplot data = &outdata;
plot F1*&idno / haxis=axis1 vaxis=axis2;
axis1 label=(f=swissb h=
2 'Identification number') w=5;axis2 label=none w=
5 ;run;
goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;
symbol v=dot i=join;
title f=swissb h=
3 'Residuals for mean structure';proc gplot data = &outdata;
plot F2*&idno / haxis=axis1 vaxis=axis2;
axis1 label=(f=swissb h=
2 'Identification number') w=5;axis2 label=none w=
5 ;run;
goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;
symbol v=dot i=join;
title f=swissb h=
3 'Covariates in covariance structure';proc gplot data = &outdata;
plot V1*&idno / haxis=axis1 vaxis=axis2;
axis1 label=(f=swissb h=
2 'Identification number') w=5;axis2 label=none w=
5 ;run;
goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;
symbol v=dot i=join;
title f=swissb h=
3 'Residuals for covariance structure';proc gplot data = &outdata;
plot V2*&idno / haxis=axis1 vaxis=axis2;
axis1 label=(f=swissb h=
2 'Identification number') w=5;axis2 label=none w=
5 ;run;
goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;
symbol v=dot i=join;
title f=swissb h=
3 'Measure of variability';proc gplot data = &outdata;
plot V3*&idno / haxis=axis1 vaxis=axis2;
axis1 label=(f=swissb h=
2 'Identification number') w=5;axis2 label=none w=
5 ;run;
goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;
symbol v=dot i=join;
title f=swissb h=
3 'Number of repeated measures per subject';proc gplot data = &outdata;
plot count*&idno / haxis=axis1 vaxis=axis2;
axis1 label=(f=swissb h=
2 'Identification number') w=5;axis2 label=none w=
5 ;run;
title
' ';%mend
;/* End of the macro */
/*
_______________________________________________________________________________
| |
| REMARKS |
|_______________________________________________________________________________|
The macro is called by specifying
%LocInfl(data=, idno =, varesp=, fixedef=, randomef=, outdata=);
in which the parameters have the following meaning :
(a) data : the name of a SAS data set
(b) idno : the name of the variable in the data set which contains an identification
number for the subjects in the data set. All records with the same value
for `idno' are assumed to be from the same subject
(c) varesp : the response variable
(d) fixedef : the fixed effects in the model. Note that, in contrast to most regression
procedures in SAS, no intercept is specified by default. Hence, if an
intercept is to be included it should be specified explicitly as one of
the effects in the `fixedef=' statement.
(e) randomef : the random effects in the model. Note that, as is the case in the SAS
procedure MIXED, no intercept is specified by default. Hence, if an
intercept is to be included it should be specified explicitly as one of
the effects in the `randomef=' statement.
(f) outdata : name of the SAS data set where the results will be saved.
The results will be saved in the SAS data set which has been specified in the
`outdata=' statement. The file has one record for each subject in the original
data base, and has the following variables :
(a) idno : the identification variable in the original data set
(b) count : the number of repeated measures for the different subjects
(c) C_i : the total local influence
(d) C_i_F : the local influence for the fixed effects in the model
(e) C_i_V : the local influence for the variance components in the model
(f) F1 : the contribution of the covariates in the mean structure to the local
influence for the fixed effects
(g) F2 : the contribution of the residuals for the mean structure to the local
influence for the fixed effects
(h) V1 : the contribution of the covariates in the covariance structure to the
local influence for the variance components
(i) V2 : the contribution of the residuals for the covariance structure to the
local influence for the variance components
(j) V3 : the contribution of the measure of variability to the local influence
for the variance components
When the macro is executed, the OUTPUT screen shows the following output :
(a) Output from fitting the specified model with the procedure MIXED. Note that
estimation is done via the method of maximum likelihood.
(b) A printed version of the output data set.
In the GRAPH screen, 9 indexplots are prepared :
(a) the total local influence
(b) the local influence for the fixed effects in the model
(c) the local influence for the variance components in the model
(d) the contribution of the covariates in the mean structure to the local
influence for the fixed effects
(e) the contribution of the residuals for the mean structure to the local
influence for the fixed effects
(f) the contribution of the covariates in the covariance structure to the
local influence for the variance components
(g) the contribution of the residuals for the covariance structure to the
local influence for the variance components
(h) the contribution of the measure of variability to the local influence
for the variance components
(i) the number of repeated measures for the different subjects
_______________________________________________________________________________
| |
| EXAMPLES |
|_______________________________________________________________________________|
(1) Example 1 : The prostate data
_________________________________
We apply the above SAS macro to repeat the influence analysis for the prostate cancer
data, as is described in Verbeke and Molenberghs (Section 3.13.2).
The SAS code is given by :
data prostate;
set prostate;
int = 1;
atime = agediag*time;
ctime = control*time;
btime = bph*time;
ltime = loccanc*time;
mtime = metcanc*time;
atime2 = agediag*time2;
ctime2 = control*time2;
btime2 = bph*time2;
ltime2 = loccanc*time2;
mtime2 = metcanc*time2;
run;
%LocInfl(data=prostate,
idno= id,
varesp=lnpsa,
fixedef= agediag control bph loccanc metcanc
atime ctime btime ltime mtime
atime2 ctime2 btime2 ltime2 mtime2,
randomef= int time time2,
outdata = out);
(2) Example 2 : Growth curves
_____________________________
As a second example, we apply the local influence approach to the growth curves discussed
by Verbeke and Molenberghs in Section 4.2.
The model we use is a linear mixed model with a linear average trend for each of
the three subgroups separately, and with random intercepts and random slopes for age.
The SAS code is given by :
data growth;
input height child age group;
cards;
111.0 1 6 1
116.4 1 7 1
121.7 1 8 1
126.3 1 9 1
130.5 1 10 1
110.0 2 6 1
115.8 2 7 1
121.5 2 8 1
126.6 2 9 1
131.4 2 10 1
113.7 3 6 1
119.7 3 7 1
125.3 3 8 1
130.1 3 9 1
136.0 3 10 1
114.0 4 6 1
118.9 4 7 1
124.6 4 8 1
129.1 4 9 1
134.0 4 10 1
114.5 5 6 1
122.0 5 7 1
126.4 5 8 1
131.2 5 9 1
135.0 5 10 1
112.0 6 6 1
117.3 6 7 1
124.4 6 8 1
129.2 6 9 1
135.2 6 10 1
116.0 7 6 2
122.0 7 7 2
126.6 7 8 2
132.6 7 9 2
137.6 7 10 2
117.6 8 6 2
123.2 8 7 2
129.3 8 8 2
134.5 8 9 2
138.9 8 10 2
121.0 9 6 2
127.3 9 7 2
134.5 9 8 2
139.9 9 9 2
145.4 9 10 2
114.5 10 6 2
119.0 10 7 2
124.0 10 8 2
130.0 10 9 2
135.1 10 10 2
117.4 11 6 2
123.2 11 7 2
129.5 11 8 2
134.5 11 9 2
140.0 11 10 2
113.7 12 6 2
119.7 12 7 2
125.3 12 8 2
130.1 12 9 2
135.9 12 10 2
113.6 13 6 2
119.1 13 7 2
124.8 13 8 2
130.8 13 9 2
136.3 13 10 2
120.4 14 6 3
125.0 14 7 3
132.0 14 8 3
136.6 14 9 3
140.7 14 10 3
120.2 15 6 3
128.5 15 7 3
134.6 15 8 3
141.0 15 9 3
146.5 15 10 3
118.9 16 6 3
125.6 16 7 3
132.1 16 8 3
139.1 16 9 3
144.0 16 10 3
120.7 17 6 3
126.7 17 7 3
133.8 17 8 3
140.7 17 9 3
146.0 17 10 3
121.0 18 6 3
128.1 18 7 3
134.3 18 8 3
140.3 18 9 3
144.0 18 10 3
115.9 19 6 3
121.3 19 7 3
127.4 19 8 3
135.1 19 9 3
141.1 19 10 3
125.1 20 6 3
131.8 20 7 3
141.3 20 8 3
146.8 20 9 3
152.3 20 10 3
;
run;
data growth;
set growth;
age = age-6;
/* such that intercepts are the values at the start
of the observation period */
int =
1;small = (group=
1);medium = (group=
2);tall = (group=
3);sage = small*age;
mage = medium*age;
tage = tall*age;
run;
%LocInfl(data=growth,
idno= child,
varesp=height,
fixedef= small medium tall sage mage tage,
randomef= int age,
outdata = out);
(
3) Example 3 : Growth data___________________________
For the last example, we apply the local influence approach to the growth data discussed
by Verbeke
and Molenberghs in Section 4.4.The model we use is a linear mixed model with a linear average trend for each of
the three subgroups separately,
and with random intercepts and random slopes for age.This is Model
6 described in Section 4.4.7.The SAS code is given by :
data growth;
input idnr age sex measure;
cards;
1 8 2 21.0
1 10 2 20.0
1 12 2 21.5
1 14 2 23.0
2 8 2 21.0
2 10 2 21.5
2 12 2 24.0
2 14 2 25.5
3 8 2 20.5
3 10 2 24.0
3 12 2 24.5
3 14 2 26.0
4 8 2 23.5
4 10 2 24.5
4 12 2 25.0
4 14 2 26.5
5 8 2 21.5
5 10 2 23.0
5 12 2 22.5
5 14 2 23.5
6 8 2 20.0
6 10 2 21.0
6 12 2 21.0
6 14 2 22.5
7 8 2 21.5
7 10 2 22.5
7 12 2 23.0
7 14 2 25.0
8 8 2 23.0
8 10 2 23.0
8 12 2 23.5
8 14 2 24.0
9 8 2 20.0
9 10 2 21.0
9 12 2 22.0
9 14 2 21.5
10 8 2 16.5
10 10 2 19.0
10 12 2 19.0
10 14 2 19.5
11 8 2 24.5
11 10 2 25.0
11 12 2 28.0
11 14 2 28.0
12 8 1 26.0
12 10 1 25.0
12 12 1 29.0
12 14 1 31.0
13 8 1 21.5
13 10 1 22.5
13 12 1 23.0
13 14 1 26.5
14 8 1 23.0
14 10 1 22.5
14 12 1 24.0
14 14 1 27.5
15 8 1 25.5
15 10 1 27.5
15 12 1 26.5
15 14 1 27.0
16 8 1 20.0
16 10 1 23.5
16 12 1 22.5
16 14 1 26.0
17 8 1 24.5
17 10 1 25.5
17 12 1 27.0
17 14 1 28.5
18 8 1 22.0
18 10 1 22.0
18 12 1 24.5
18 14 1 26.5
19 8 1 24.0
19 10 1 21.5
19 12 1 24.5
19 14 1 25.5
20 8 1 23.0
20 10 1 20.5
20 12 1 31.0
20 14 1 26.0
21 8 1 27.5
21 10 1 28.0
21 12 1 31.0
21 14 1 31.5
22 8 1 23.0
22 10 1 23.0
22 12 1 23.5
22 14 1 25.0
23 8 1 21.5
23 10 1 23.5
23 12 1 24.0
23 14 1 28.0
24 8 1 17.0
24 10 1 24.5
24 12 1 26.0
24 14 1 29.5
25 8 1 22.5
25 10 1 25.5
25 12 1 25.5
25 14 1 26.0
26 8 1 23.0
26 10 1 24.5
26 12 1 26.0
26 14 1 30.0
27 8 1 22.0
27 10 1 21.5
27 12 1 23.5
27 14 1 25.0
;
data growth;
set growth;
int=
1;girls = (sex=
2);boys = (sex=
1);gage = girls*age;
bage = boys*age;
run;
%LocInfl(data=growth,
idno= idnr,
varesp=measure,
fixedef= girls boys gage bage,
randomef= int age,
outdata = out);
run;
*/