/* Macro for the informal check whether or not observed longitudinal profiles can be well
described by a specific linear regression function
Written by : Geert Verbeke, Biostatistical Centre, K.U.Leuven
Date : December 1, 1999
Changed : November 2, 2000 : Allow subjects with constant response. Their R^2 measure
is set equal to 1
Input : INDATA : input data set, which does not contain any missing values in the
variables to be used in this macro
OUTDATA : output data set, which contains 3 variables. The first variable contains
the identification number of each subject. The second variable
is the coefficient of multiple determination for each subject.
The third variable contains the number of
repeated measurements on which the calculation was based
Y : response variable
ID : numeric identification variable for the subjects in the input data set
X1 : covariates needed to model each profile. Note that no intercept term will
be included by default. If an intercept term is required, it should be
explicitly included via a variable which contains only ones
X2 : a set of variables not included in X1, for which it will be tested using
an overall F-test whether or not these variables are needed for describing
the observed longitudinal profiles
Output : (1) an overall F-test is performed to test whether the covariates in X2 are
needed to describe the observed longitudinal profiles, in addition to the
covariates already in X1
(2) an overall coefficient of multiple determination is calculated
(3) the output data set OUTDATA is created */
%macro gof(INDATA = ,
OUTDATA = ,
Y = ,
ID = ,
X1 = ,
X2 = );
proc sort data = &indata;
by &id;
run;
proc freq data = &indata;
tables &id / out=uit noprint ;
run;
proc iml;
use uit;
read all var {count} into aantal;
close uit;
use &indata;
labelx1 = {&X1};
labelx2 = {&X2};
labely = {&Y};
labelid = {&id};
read all var labelid into id;
read all var labely into y;
read all var labelx1 into x1;
read all var labelx2 into x2;
close &indata;
p=ncol(x1)+ncol(x2);
ftel1 = 0;
fnoem1 = 0;
dftel1 = 0;
dfnoem1 = 0;
begin = 1;
do i = 1 to nrow(aantal);
ni = aantal[i,];
einde = begin + ni - 1;
if ni >= p then do;
x1i = x1[(begin:einde),];
x2i = x2[(begin:einde),];
yi = y[(begin:einde),];
xi = x1i||x2i;
ri = yi-xi*(inv(xi`*xi))*xi`*yi;
rHi = yi - x1i*(inv(x1i`*x1i))*x1i`*yi;
rssi = ri`*ri;
rssHi = rHi`*rHi;
ftel1 = ftel1 + (rssHi - rssi);
fnoem1 = fnoem1 + rssi;
dftel1 = dftel1 + ncol(x2);
dfnoem1 = dfnoem1 + (ni-p);
end;
begin = einde + 1;
end;
f = (ftel1/dftel1)/(fnoem1/dfnoem1);
c = {"F" "ndf" "ddf" "p-value"};
F_test = (F||dftel1||dfnoem1||(1-probf(f,dftel1,dfnoem1)));
print F_test[colname=c format=10.4];
p=ncol(x1);
R = 0||0||0;
ssto = 0;
ssr = 0;
begin = 1;
do i = 1 to nrow(aantal);
ni = aantal[i,];
einde = begin + ni - 1;
if ni >= p then do;
idi = id[begin,];
x1i = x1[(begin:einde),];
yi = y[(begin:einde),];
ri = yi - x1i*(inv(x1i`*x1i))*x1i`*yi;
ssei = ri`*ri;
sstoi = (yi-(j(1,ni)*yi/ni))`*(yi-(j(1,ni)*yi/ni));
ssri = sstoi-ssei;
ssto = ssto+sstoi;
ssr = ssr+ssri;
if sstoi > 0 then do;
r = r//(idi||(ssri/sstoi)||ni);
end;
else do;
r = r//(idi||1||ni);
end;
end;
else do;
idi = id[begin,];
yi = y[(begin:einde),];
sstoi = (yi-(j(1,ni)*yi/ni))`*(yi-(j(1,ni)*yi/ni));
ssto = ssto+sstoi;
ssr = ssr+sstoi;
r = r//(idi||1||ni);
end;
begin = einde + 1;
end;
Tot_R2=ssr/ssto;
c = {"R2"};
print Tot_R2[colname=c format=10.4];
r=r[2:nrow(r),];
naam = labelid||"R2"||"ni";
create &outdata var naam;
append from r;
quit;
run;
%mend;