/*

_______________________________________________________________________________

| |

| SAS MACRO FOR LOCAL INFLUENCE ANALYSIS IN LINEAR MIXED MODELS |

|_______________________________________________________________________________|

 

Prepared by : Franz Torres Barbosa

Center for Molecular Immunology, Habana, Cuba

E-mail : franz@ict.cim.sld.cu

Geert Verbeke

Biostatistical Centre for Clinical Trials, K.U.Leuven, Belgium

E-mail : geert.verbeke@biostat.be

References : Verbeke, G. and Molenberghs, G. (1997)

`Linear Mixed Models in Practice : A SAS-Oriented Approach,'

Lecture Notes in Statistics 126, New-York : Springer-Verlag.

Lesaffre, E. and Verbeke, G. (1997)

`Local Influence in Linear Mixed Models,'

Biometrics, to appear.

Purpose of the macro : The macro performs a local influence analysis for the

detection of influential subjects in linear mixed models. We refer to Verbeke and

Molenberghs (Section 3.13.2) for a description of the procedure, and to Lesaffre

and Verbeke for the technical details.

Three examples of how to use the macro are given at the bottom of this file

*/

 

 

 

 

 

 

/*

_______________________________________________________________________________

| |

| THE MACRO CODE |

|_______________________________________________________________________________|

*/

%macro LocInfl(data=, idno =, varesp=, fixedef=, randomef=, outdata=);

/* Initialization */

%if %bquote(&data)= %then %let data=&syslast;

/* To calculate the number of repeated measures per subject and the time points*/

proc freq data=&data noprint;

tables &idno /out=nfrec;

run;

proc sort data=&data;

by &idno;

run;

/* Fitting the model in Proc mixed */

proc mixed data=&data method=ml asycov;

class &idno;

model &varesp = &fixedef /noint covb s;

random &randomef /type=un sub=&idno g;

ods output solutionf= fixedsol;

ods output covparms= covpar;

ods output G= Dmatr;

ods output asycov = varcov;

ods output covb= covfixed;

run;

 

proc iml;

reset noprint;

/* Matrix with the solution of the fixed effects */

use fixedsol;

read all into fixedsol;

fixedsol= fixedsol[,1];

/* Matrix with the solution for the var-cov of the random. eff. parameters */

use covpar;

read all into covpar;

n_covp = nrow(covpar);

/* Var-Cov matrix for the fixed effects */

use covfixed;

read all into fixedvar;

n_fix = nrow(fixedvar);

fixedvar = fixedvar[,2:1+n_fix];

fixedvar= -inv(fixedvar);

/* Var-Cov matrix for the random components */

use varcov;

read all into varcov;

n_vc = nrow(varcov);

varcov = varcov[,1:n_vc];

varcov= -inv(varcov);

/* Matrix with the data to obtain the design matrices for the fixed and random effects */

use &data;

labelx = {&fixedef};

labelz = {&randomef};

labely = {&varesp};

read all var labelx into fixed;

read all var labelz into random;

read all var labely into resp;

/* Matrix with the frequencies for each of the subjects */

use nfrec;

read all into nfrec;

nfrec = nfrec[,2];

/* G=D matrix */

use Dmatr;

read all into D;

n_D = nrow(D);

D = D[,3:2+n_D];

/* Procedure to calculate the first derivative */

start Deriv(fixed, random, res, D, sigma2);

/*** With respect to alpha ***/

ni = nrow(fixed);

p = ncol(fixed);

q = ncol(random);

V = random*D*random` + sigma2*I(ni);

W = inv(V);

zero = {0};

help1=repeat(zero,p,1);

help2 =-2#fixed`*W*res;

first = help1 + help2;

/*** With respect to D ***/

j = 1;

do while (j <= q);

i = 1;

do while (i <= j);

if i=j then mul=1;

else mul=0;

hulp1 = (2-(mul))#random[,i]`*W*random[,j];

hulp2 = -(2-(mul))#random[,i]`*W*res*res`*W*random[,j]; /* Check this derivatives */

first = first//(hulp1 + hulp2);

i = i + 1;

end;

j = j + 1;

end;

/*** With respect to sigma2 ***/

hulp1 = trace(W);

hulp2 = -res`*W*W*res;

first3 = hulp1 + hulp2;

first = first//first3;

first = -first/2;

return(first);

finish;

/* Procedure to calculate the second derivative */

start SecDer(fixed, random, res, D, sigma2);

/* 2nd derivative alpha and D */

ni = nrow(fixed);

p = ncol(fixed);

q = ncol(random);

V = random*D*random` + sigma2*I(ni);

W = inv(V);

zero = {0};

j=1;

do while (j<=q);

i=1;

do while (i<=j);

help1 = 0;

help2 = 2#fixed`*W*random[,i]*random[,j]`*W*res;

if i=j then mul=1;

else mul=0;

help2 = help2 + 2#(1-mul)#fixed`*W*random[,j]*random[,i]`*W*res;

ret = help1` + help2`; /* ver como arreglar esto */

fila = fila//ret;

i = i+1;

end;

j= j+1;

end;

/* the 2nd derivative alpha and sigma2 */

help1 = 0;

help2 = 2#fixed`*W*W*res;

ret = help1` + help2`;

fila = fila//ret;

return(fila);

finish;

/* Initializations for the whole loop and calculations of the derivatives */

q = ncol(random);

p = ncol(fixed);

uno = {1};

begin = 1;

sum=repeat(0,(q*(q+1)/2)+1,p);

n=nrow(nfrec);

sigma2 = covpar[n_covp,];

s=1;

do s=1 to n ;

ni = nfrec[s,];

end = begin -1 + ni;

fixedi = fixed[begin:end,];

randomi = random[begin:end,];

respi = resp[begin:end];

res = respi-fixedi*fixedsol;

begin = end +1;

first = Deriv(fixedi, randomi, res, D, sigma2);

firstT = firstT||first;

second = SecDer(fixedi, randomi, res, D, sigma2);

sum = sum - (second/2);

end;

L = (fixedvar//sum)||(sum`//varcov);

vect = repeat(sqrt(2)-1,q,1);

help = diag(vect)+1;

help = symsqr(help);

help = repeat(uno,p,1)//help//1;

firstT = firstT#help;

L = L#(help*help`);

Linv = inv(-L);

b22 =repeat(0,p+(q*(q+1)/2)+1,p+(q*(q+1)/2)+1);

b11 = b22;

help = L[(p+1):(nrow(L)), (p+1):(ncol(L))];

help = -inv(-help);

b22[(p+1):(nrow(L)), (p+1):(ncol(L))] = help;

b22 = -inv(-L) - b22;

help = L[1:p,1:p];

help = -inv(-help);

b11[1:p, 1:p] = help;

b11 = -inv(-L) -b11;

/* Part to calculate the influence measures */

zero = {0};

uno = {1};

begin = 1;

n=nrow(nfrec);

do s=1 to n ;

ni = nfrec[s,];

end = begin -1 + ni;

fixedi = fixed[begin:end,];

randomi = random[begin:end,];

respi = resp[begin:end];

res = respi-fixedi*fixedsol;

begin = end +1;

V = randomi*D*randomi` + sigma2*I(ni);

W = inv(V);

Ci = 2#firstT[,s]`*Linv*firstT[,s];

Cai = -(2#firstT[,s]`*b22*firstT[,s]);

Cbi = -(2#firstT[,s]`*b11*firstT[,s]);

onei = fixedi`*W*res;

onei = onei`*onei;

twoi = randomi`*W*randomi - randomi`*W*res*res`*W*randomi;

twoi = trace(twoi*twoi`);

threei =trace(W) - res`*W*W*res;

threei = (threei##2);

call eigen(egval,egvect,W);

WW = egvect*diag(sqrt(egval))*egvect`;

rri = WW*res;

xxi = WW*fixedi;

zzi = WW*randomi;

oneai = xxi*xxi`;

oneai = sqrt(trace(oneai*oneai`));

onebi = rri*rri`;

onebi = sqrt(trace(onebi*onebi`));

twoai = zzi*zzi`;

twoai = trace(twoai*twoai`);

twobi = I(ni) - rri*rri`;

twobi = trace(twobi*twobi`);

threeai = trace(W*W`);

C = C//Ci;

Ca = Ca//Cai;

Cb = Cb//Cbi;

one = one//onei;

two = two//twoi;

three = three//threei;

onea = onea//oneai;

oneb = oneb//onebi;

twoa = twoa//twoai;

twob = twob//twobi;

threea = threea//threeai;

end;

/*Setting the output dataset with the diagnostic measures */

out=C||Ca||Cb||onea||oneb||twoa||twob||threea;

/* onea-> norm(xixi'2);

oneb-> norm(R2);

twoa-> norm(zizi'2);

twob-> norm(I-riri'2);

threea-> norm(Vi2) */

varnames = {'C_i' 'C_i_F' 'C_i_V' 'F1' 'F2' 'V1' 'V2' 'V3'};

create &outdata from out [colname= varnames];

append from out;

close covfixed;

close nfrec;

close &data;

close varcov;

close covpar;

close fixedsol;

quit;

/* End of the IML procedure */

data temp;

set nfrec;

keep &idno count;

run;

data &outdata;

merge temp &outdata;

run;

/* Printing the results */

proc print data=&outdata;

run;

/* Plotting the results */

goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;

symbol v=dot i=join;

title f=swissb h=3 'Total local influence';

proc gplot data = &outdata;

plot C_i*&idno / haxis=axis1 vaxis=axis2;

axis1 label=(f=swissb h=2 'Identification number') w=5;

axis2 label=none w=5 ;

run;

goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;

symbol v=dot i=join;

title f=swissb h=3 'Local influence for fixed effects';

proc gplot data = &outdata;

plot C_i_F*&idno / haxis=axis1 vaxis=axis2;

axis1 label=(f=swissb h=2 'Identification number') w=5;

axis2 label=none w=5 ;

run;

goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;

symbol v=dot i=join;

title f=swissb h=3 'Local influence for variance components';

proc gplot data = &outdata;

plot C_i_V*&idno / haxis=axis1 vaxis=axis2;

axis1 label=(f=swissb h=2 'Identification number') w=5;

axis2 label=none w=5 ;

run;

goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;

symbol v=dot i=join;

title f=swissb h=3 'Covariates in mean structure';

proc gplot data = &outdata;

plot F1*&idno / haxis=axis1 vaxis=axis2;

axis1 label=(f=swissb h=2 'Identification number') w=5;

axis2 label=none w=5 ;

run;

goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;

symbol v=dot i=join;

title f=swissb h=3 'Residuals for mean structure';

proc gplot data = &outdata;

plot F2*&idno / haxis=axis1 vaxis=axis2;

axis1 label=(f=swissb h=2 'Identification number') w=5;

axis2 label=none w=5 ;

run;

goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;

symbol v=dot i=join;

title f=swissb h=3 'Covariates in covariance structure';

proc gplot data = &outdata;

plot V1*&idno / haxis=axis1 vaxis=axis2;

axis1 label=(f=swissb h=2 'Identification number') w=5;

axis2 label=none w=5 ;

run;

goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;

symbol v=dot i=join;

title f=swissb h=3 'Residuals for covariance structure';

proc gplot data = &outdata;

plot V2*&idno / haxis=axis1 vaxis=axis2;

axis1 label=(f=swissb h=2 'Identification number') w=5;

axis2 label=none w=5 ;

run;

goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;

symbol v=dot i=join;

title f=swissb h=3 'Measure of variability';

proc gplot data = &outdata;

plot V3*&idno / haxis=axis1 vaxis=axis2;

axis1 label=(f=swissb h=2 'Identification number') w=5;

axis2 label=none w=5 ;

run;

goptions reset=goptions device=win targetdevice=winprtm rotate=landscape;

symbol v=dot i=join;

title f=swissb h=3 'Number of repeated measures per subject';

proc gplot data = &outdata;

plot count*&idno / haxis=axis1 vaxis=axis2;

axis1 label=(f=swissb h=2 'Identification number') w=5;

axis2 label=none w=5 ;

run;

title ' ';

%mend;

/* End of the macro */

 

 

 

 

 

 

 

 

/*

_______________________________________________________________________________

| |

| REMARKS |

|_______________________________________________________________________________|

The macro is called by specifying

%LocInfl(data=, idno =, varesp=, fixedef=, randomef=, outdata=);

in which the parameters have the following meaning :

(a) data : the name of a SAS data set

(b) idno : the name of the variable in the data set which contains an identification

number for the subjects in the data set. All records with the same value

for `idno' are assumed to be from the same subject

(c) varesp : the response variable

(d) fixedef : the fixed effects in the model. Note that, in contrast to most regression

procedures in SAS, no intercept is specified by default. Hence, if an

intercept is to be included it should be specified explicitly as one of

the effects in the `fixedef=' statement.

(e) randomef : the random effects in the model. Note that, as is the case in the SAS

procedure MIXED, no intercept is specified by default. Hence, if an

intercept is to be included it should be specified explicitly as one of

the effects in the `randomef=' statement.

(f) outdata : name of the SAS data set where the results will be saved.

The results will be saved in the SAS data set which has been specified in the

`outdata=' statement. The file has one record for each subject in the original

data base, and has the following variables :

(a) idno : the identification variable in the original data set

(b) count : the number of repeated measures for the different subjects

(c) C_i : the total local influence

(d) C_i_F : the local influence for the fixed effects in the model

(e) C_i_V : the local influence for the variance components in the model

(f) F1 : the contribution of the covariates in the mean structure to the local

influence for the fixed effects

(g) F2 : the contribution of the residuals for the mean structure to the local

influence for the fixed effects

(h) V1 : the contribution of the covariates in the covariance structure to the

local influence for the variance components

(i) V2 : the contribution of the residuals for the covariance structure to the

local influence for the variance components

(j) V3 : the contribution of the measure of variability to the local influence

for the variance components

When the macro is executed, the OUTPUT screen shows the following output :

(a) Output from fitting the specified model with the procedure MIXED. Note that

estimation is done via the method of maximum likelihood.

(b) A printed version of the output data set.

In the GRAPH screen, 9 indexplots are prepared :

(a) the total local influence

(b) the local influence for the fixed effects in the model

(c) the local influence for the variance components in the model

(d) the contribution of the covariates in the mean structure to the local

influence for the fixed effects

(e) the contribution of the residuals for the mean structure to the local

influence for the fixed effects

(f) the contribution of the covariates in the covariance structure to the

local influence for the variance components

(g) the contribution of the residuals for the covariance structure to the

local influence for the variance components

(h) the contribution of the measure of variability to the local influence

for the variance components

(i) the number of repeated measures for the different subjects

 

 

 

 

 

 

 

_______________________________________________________________________________

| |

| EXAMPLES |

|_______________________________________________________________________________|

 

(1) Example 1 : The prostate data

_________________________________

We apply the above SAS macro to repeat the influence analysis for the prostate cancer

data, as is described in Verbeke and Molenberghs (Section 3.13.2).

The SAS code is given by :

data prostate;

set prostate;

int = 1;

atime = agediag*time;

ctime = control*time;

btime = bph*time;

ltime = loccanc*time;

mtime = metcanc*time;

atime2 = agediag*time2;

ctime2 = control*time2;

btime2 = bph*time2;

ltime2 = loccanc*time2;

mtime2 = metcanc*time2;

run;

%LocInfl(data=prostate,

idno= id,

varesp=lnpsa,

fixedef= agediag control bph loccanc metcanc

atime ctime btime ltime mtime

atime2 ctime2 btime2 ltime2 mtime2,

randomef= int time time2,

outdata = out);

 

 

(2) Example 2 : Growth curves

_____________________________

As a second example, we apply the local influence approach to the growth curves discussed

by Verbeke and Molenberghs in Section 4.2.

The model we use is a linear mixed model with a linear average trend for each of

the three subgroups separately, and with random intercepts and random slopes for age.

The SAS code is given by :

data growth;

input height child age group;

cards;

111.0 1 6 1

116.4 1 7 1

121.7 1 8 1

126.3 1 9 1

130.5 1 10 1

110.0 2 6 1

115.8 2 7 1

121.5 2 8 1

126.6 2 9 1

131.4 2 10 1

113.7 3 6 1

119.7 3 7 1

125.3 3 8 1

130.1 3 9 1

136.0 3 10 1

114.0 4 6 1

118.9 4 7 1

124.6 4 8 1

129.1 4 9 1

134.0 4 10 1

114.5 5 6 1

122.0 5 7 1

126.4 5 8 1

131.2 5 9 1

135.0 5 10 1

112.0 6 6 1

117.3 6 7 1

124.4 6 8 1

129.2 6 9 1

135.2 6 10 1

116.0 7 6 2

122.0 7 7 2

126.6 7 8 2

132.6 7 9 2

137.6 7 10 2

117.6 8 6 2

123.2 8 7 2

129.3 8 8 2

134.5 8 9 2

138.9 8 10 2

121.0 9 6 2

127.3 9 7 2

134.5 9 8 2

139.9 9 9 2

145.4 9 10 2

114.5 10 6 2

119.0 10 7 2

124.0 10 8 2

130.0 10 9 2

135.1 10 10 2

117.4 11 6 2

123.2 11 7 2

129.5 11 8 2

134.5 11 9 2

140.0 11 10 2

113.7 12 6 2

119.7 12 7 2

125.3 12 8 2

130.1 12 9 2

135.9 12 10 2

113.6 13 6 2

119.1 13 7 2

124.8 13 8 2

130.8 13 9 2

136.3 13 10 2

120.4 14 6 3

125.0 14 7 3

132.0 14 8 3

136.6 14 9 3

140.7 14 10 3

120.2 15 6 3

128.5 15 7 3

134.6 15 8 3

141.0 15 9 3

146.5 15 10 3

118.9 16 6 3

125.6 16 7 3

132.1 16 8 3

139.1 16 9 3

144.0 16 10 3

120.7 17 6 3

126.7 17 7 3

133.8 17 8 3

140.7 17 9 3

146.0 17 10 3

121.0 18 6 3

128.1 18 7 3

134.3 18 8 3

140.3 18 9 3

144.0 18 10 3

115.9 19 6 3

121.3 19 7 3

127.4 19 8 3

135.1 19 9 3

141.1 19 10 3

125.1 20 6 3

131.8 20 7 3

141.3 20 8 3

146.8 20 9 3

152.3 20 10 3

;

run;

data growth;

set growth;

age = age-6;

/* such that intercepts are the values at the start

of the observation period */

int =1;

small = (group=1);

medium = (group=2);

tall = (group=3);

sage = small*age;

mage = medium*age;

tage = tall*age;

run;

%LocInfl(data=growth,

idno= child,

varesp=height,

fixedef= small medium tall sage mage tage,

randomef= int age,

outdata = out);

 

 

(3) Example 3 : Growth data

___________________________

For the last example, we apply the local influence approach to the growth data discussed

by Verbeke and Molenberghs in Section 4.4.

The model we use is a linear mixed model with a linear average trend for each of

the three subgroups separately, and with random intercepts and random slopes for age.

This is Model 6 described in Section 4.4.7.

The SAS code is given by :

data growth;

input idnr age sex measure;

cards;

1 8 2 21.0

1 10 2 20.0

1 12 2 21.5

1 14 2 23.0

2 8 2 21.0

2 10 2 21.5

2 12 2 24.0

2 14 2 25.5

3 8 2 20.5

3 10 2 24.0

3 12 2 24.5

3 14 2 26.0

4 8 2 23.5

4 10 2 24.5

4 12 2 25.0

4 14 2 26.5

5 8 2 21.5

5 10 2 23.0

5 12 2 22.5

5 14 2 23.5

6 8 2 20.0

6 10 2 21.0

6 12 2 21.0

6 14 2 22.5

7 8 2 21.5

7 10 2 22.5

7 12 2 23.0

7 14 2 25.0

8 8 2 23.0

8 10 2 23.0

8 12 2 23.5

8 14 2 24.0

9 8 2 20.0

9 10 2 21.0

9 12 2 22.0

9 14 2 21.5

10 8 2 16.5

10 10 2 19.0

10 12 2 19.0

10 14 2 19.5

11 8 2 24.5

11 10 2 25.0

11 12 2 28.0

11 14 2 28.0

12 8 1 26.0

12 10 1 25.0

12 12 1 29.0

12 14 1 31.0

13 8 1 21.5

13 10 1 22.5

13 12 1 23.0

13 14 1 26.5

14 8 1 23.0

14 10 1 22.5

14 12 1 24.0

14 14 1 27.5

15 8 1 25.5

15 10 1 27.5

15 12 1 26.5

15 14 1 27.0

16 8 1 20.0

16 10 1 23.5

16 12 1 22.5

16 14 1 26.0

17 8 1 24.5

17 10 1 25.5

17 12 1 27.0

17 14 1 28.5

18 8 1 22.0

18 10 1 22.0

18 12 1 24.5

18 14 1 26.5

19 8 1 24.0

19 10 1 21.5

19 12 1 24.5

19 14 1 25.5

20 8 1 23.0

20 10 1 20.5

20 12 1 31.0

20 14 1 26.0

21 8 1 27.5

21 10 1 28.0

21 12 1 31.0

21 14 1 31.5

22 8 1 23.0

22 10 1 23.0

22 12 1 23.5

22 14 1 25.0

23 8 1 21.5

23 10 1 23.5

23 12 1 24.0

23 14 1 28.0

24 8 1 17.0

24 10 1 24.5

24 12 1 26.0

24 14 1 29.5

25 8 1 22.5

25 10 1 25.5

25 12 1 25.5

25 14 1 26.0

26 8 1 23.0

26 10 1 24.5

26 12 1 26.0

26 14 1 30.0

27 8 1 22.0

27 10 1 21.5

27 12 1 23.5

27 14 1 25.0

;

data growth;

set growth;

int=1;

girls = (sex=2);

boys = (sex=1);

gage = girls*age;

bage = boys*age;

run;

%LocInfl(data=growth,

idno= idnr,

varesp=measure,

fixedef= girls boys gage bage,

randomef= int age,

outdata = out);

run;

*/