Notice: On April 23, 2014, Statalist moved from an email list to a forum, based at statalist.org.
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
st: SAS macro to do.file
From
[email protected]
To
"[email protected]" <[email protected]>
Subject
st: SAS macro to do.file
Date
Wed, 07 Aug 2013 13:12:47 +0200
Dear All,
I use Stata 13 IC.
I need help for "translate" a SAS macro for NRI for Cox regression in
a do.file.
Is anyone interest?
The SAS macro is available at:
http://saswiki.org/images/5/53/16._KSFE_2012_M%C3%BChlenbruch_Makro_reclassification_phreg.sas
I also enclose copy of the macro (text)
************************************************************
Progname: reclassification_phreg.sas
Purpose: Creating a macro for the estimation of reclassification
statistics (NRI, IDI)
for the added predictive value of an new parameter
%nriidi macro by Lars Berglund is extended for cox-regression!!!
Author: Kristin Mühlenbruch [email protected]
(with the help of W. Bernigau) [email protected]
Date: 29.03..2011
Last Modification: 25.05.2011
Path: "____"
Output: ROC AUC values and comparisons
ROC Curves
Cox regression estimates
Risk Tables
NRI, continuous NRI
IDI
************************************************************;
/* Description of macro components
********************************************************************
***dataset - input dataset which includes all variables for analysis
***
***id - identification variable for each observation
***
***timevar - variable that contains the follow-up time of the
study participants ***
***outcome - dependant binary variable for the logistic and cox
regression model (Y variable) ***
***model1 - Model1 with all covariates included as independant
variables (sparser model) ***
***nvar1 - Number of variables included in model1 ***
***roc_title1 - Title for the ROC curve of model 1 ***
***model2 - Model2 with all covariates included as independant
variables from model1
+ added variables (larger model) ***
***nvar2 - Number of variables included in model2 ***
***roc_title2 - Title for the ROC curve of model 2 ***
***nriskcat - Number of categories for risk stratification ***
***riskcutoffs - Cut-off values for separating the risk categories ***
***risk_year - Time risk which will be estimated (5-year risk, 10-year
risk, etc.) ***
***base_muster - First Baseline pattern of covariates for determining
the survivor function
values for the variables are listed with blanks as delimiter ***
***printtable - option for the risk table output by event status
(Y=Yes/N=No) ***
******************************************************************************************************/
%macro reclassification_phreg(dataset, id, outcome,timevar,
model1,nvar1,roc_title1, model2, nvar2,roc_title2, nriskcat,
riskcutoffs, risk_year, base_muster, printtable);
/*Determining the ROC-AUC value for each of the models and Comparison
of those (Plots included)*/
ods graphics on;
proc logistic data=&dataset plots=roc descending;
model &outcome= &model2 ;
roc &roc_title1 &model1;
roc &roc_title2 &model2;
roccontrast reference(&roc_title1) / estimate e;
ods output ROCAssociation=rocass ROCContrastEstimate=rocdiff;
run;
%let Anz1=1;
%let Word=%scan(&model1,%eval(&Anz1+1));
%do %while (&Word ne);
%let Anz1=%eval(&Anz1+1);
%let Word=%scan(&model1,%eval(&Anz1+1));
%end;
%do i=1 %to &Anz1;
%let Mod1&i=%scan(&model1,&i);
%end;
/*Estimation of model 1 by cox regression*/
proc phreg data=&dataset;
model &timevar*&outcome(0)= &model1 ;
ods output ParameterEstimates=out1_1;
run;
proc transpose data=out1_1(keep=estimate) out=out1_2;
run;
data Inrisks1;
%do i=1 %to &Anz1;
%let base1_wert&i=%scan(&base_muster,&i);
%end;
%do i=1 %to &Anz1; &&mod1&i=&&base1_wert&i;
%end;
output;
run;
*Determining the individual risks out of the cox model parameters
from model 1;
proc phreg data=&dataset nosummary;
model &timevar*&outcome(0)=&model1 / rl;
baseline covariates=Inrisks1 out=riskscore1 survival=s lower=S_lower
upper=S_upper/nomean;
run;
data riskscore1_1;
set riskscore1(where=(&timevar>(&risk_year-0.01) and
&timevar<(&risk_year+0.01)));
absrisk=1-s;
LCL=1-S_upper;
UCL=1-S_lower;
run;
data out_1;
if _N_=1 then set out1_2;
set &dataset;
run;
data pred_1;
array col col1-col&Anz1.;
if _N_=1 then set riskscore1_1(keep=absrisk);
set out_1;
pred1=.;
exponent1=(col1*&mod11 %do i=2 %to &Anz1; +col&i*&&Mod1&i %end;);
pred1=1-(1-absrisk)**exp(exponent1);
run;
%let Anz2=1;
%let Word2=%scan(&model2,%eval(&Anz2+1));
%do %while (&Word2 ne);
%let Anz2=%eval(&Anz2+1);
%let Word2=%scan(&model2,%eval(&Anz2+1));
%end;
%do i=1 %to &Anz2;
%let Mod2&i=%scan(&model2,&i);
%end;
/*Estimation of model 2 by cox regression*/
proc phreg data=&dataset;
model &timevar*&outcome(0)=&model2;
ods output ParameterEstimates=out2_1;
run;
proc transpose data=out2_1(keep=estimate) out=out2_2;
run;
data Inrisks2;
%do i=1 %to &Anz2;
%let base2_wert&i=%scan(&base_muster,&i);
%end;
%do i=1 %to &Anz2; &&mod2&i=&&base2_wert&i;
%end;
output;
run;
*Determining the individual risks out of the cox model parameters
from model 2;
proc phreg data=&dataset nosummary;
model &timevar*&outcome(0)=&model2/ rl;
baseline covariates=Inrisks2 out=riskscore2 survival=s lower=S_lower
upper=S_upper/nomean;
run;
data riskscore2_1;
set riskscore2(where=(&timevar>(&risk_year-0.01) and
&timevar<(&risk_year+0.01)));
absrisk=1-s;
LCL=1-S_upper;
UCL=1-S_lower;
run;
data out_2;
if _n_=1 then set out2_2;
set &dataset;
run;
data pred_2;
array col col1-col&Anz2.;
if _N_=1 then set riskscore2_1(keep=absrisk);
set out_2;
exponent2=(col1*&mod21 %do i=2 %to &Anz2; +col&i*&&Mod2&i %end;);
pred2=1-(1-absrisk)**exp(exponent2);
run;
proc sort data=pred_1;
by &id;
run;
proc sort data=pred_2;
by &id;
run;
/*From here onwards using the %nriidi macro by Lars Berglund*/
data out;
merge pred_1(drop=absrisk col1--col&Anz1.) pred_2(drop=absrisk
col1--col&Anz2.);
by &id;
predcat1=.;
predcat2=.;
predcat1c=' ';
predcat2c=' ';
label predcat1c='Established risk factors';
label predcat2c='Established risk factors + new risk factors';
label &outcome='Cases(1)/Controls(0)=';
riskcoh=.;
riskcol=.;
riskcohc=' ';
riskcolc=' ';
ic=' ';
colon=':';
to='-';
one='1';
lt='<';
gt='>=';
delim='()';
modif='oq';
length riskco $ 400;
riskco=symget('riskcutoffs');
do i=1 to &nriskcat;
j=i-1;
if i=1 then do;
riskcoh=scan(riskco,i,delim,modif);
riskcohc=riskcoh;
if pred1 ne . and pred1 lt riskcoh then call
cats(predcat1c,one,colon,lt,riskcohc);
if pred1 ne . and pred1 lt riskcoh then predcat1=1;
if pred2 ne . and pred2 lt riskcoh then call
cats(predcat2c,one,colon,lt,riskcohc);
if pred2 ne . and pred2 lt riskcoh then predcat2=1;
end;
if i > 1 and i < &nriskcat then do;
riskcoh=scan(riskco,i,delim,modif);
riskcohc=riskcoh;
riskcol=scan(riskco,j,delim,modif);
riskcolc=riskcol;
ic=i;
if pred1 ne . and pred1 ge riskcol and pred1 lt riskcoh
then call cats(predcat1c,ic,colon,riskcolc,to,riskcohc);
if pred1 ne . and pred1 ge riskcol and pred1 lt riskcoh then predcat1=i;
if pred2 ne . and pred2 ge riskcol and pred2 lt riskcoh then call
cats(predcat2c,ic,colon,riskcolc,to,riskcohc);
if pred2 ne . and pred2 ge riskcol and pred2 lt riskcoh
then predcat2=i;
end;
if i = &nriskcat then do;
riskcoh=.;
riskcohc=' ';
riskcol=scan(riskco,j,delim,modif);
riskcolc=riskcol;
ic=i;
if pred1 ne . and pred1 ge riskcol then predcat1c=ic ||
colon || gt || left(riskcolc);
if pred1 ne . and pred1 ge riskcol then predcat1=i;
if pred2 ne . and pred2 ge riskcol then predcat2c=ic || colon ||
gt || left(riskcolc);
if pred2 ne . and pred2 ge riskcol then predcat2=i;
end;
output;
end;
run;
data out;
set out(where=(predcat1 ne . and predcat2 ne . and predcat1=i or
predcat1 ne . and predcat2 ne . and predcat2=i)); ***Modified;
diff_pred=pred2-pred1;
run;
data temp2;
do &outcome=0 to 1;
length &outcome 4;***Modified;
do predcat1=1 to &nriskcat;
do predcat2=1 to &nriskcat;
output;
end;
end;
end;
run;
proc sort data=temp2;
by &outcome predcat1 predcat2;
run;
***Adding continuous NRI and prior calculations for that;
data temp3;
set out;
n_up=.;
n_down=.;
if diff_pred>0 then up=1;
else up=0;
if diff_pred<0 then down=1;
else down=0;
run;
proc freq data=temp3 noprint;
tables &outcome*up*down/nocol nopercent norow out=outfr_3 sparse;
run;
data outfr_3;
set outfr_3(where=((up=1 and down=0) or (up=0 and
down=1))keep=&outcome up down count);
run;
proc transpose data=outfr_3 out=outfr_4;
run;
data outfr_4;
set outfr_4(rename=(col1=n_down_ne col2=n_up_ne col3=n_down_e
col4=n_up_e));
if _name_='COUNT';
run;
proc freq data=out noprint;
tables &outcome*predcat1*predcat2/nocol nopercent norow out=outfr sparse;
run;
%if %upcase(&printtable) = Y %then %do;
proc sort data=out ;
by descending &outcome predcat1 predcat2;
run;
proc tabulate data=out format=12.0 order=data;
class &outcome predcat1c predcat2c;
table &outcome,predcat1c all='Total',predcat2c all='Total' ;
run;
%end;
proc sort data=outfr;
by &outcome predcat1 predcat2;
run;
data outfr;
merge outfr temp2;
by &outcome predcat1 predcat2;
run;
data outfr;
set outfr;
drop &outcome percent;
run;
proc transpose data=outfr out=outfr2;
run;
data outfr2;
if _N_=1 then set outfr_4;
array col col1-col10000;
set outfr2;
if _name_='COUNT';
nrc=symget('nriskcat');
istop=2*nrc**2;
do i=1 to istop;
if col{i}=. then col{i}=0;
end;
n_nonevents=0;
n_events=0;
n_up_nonevents=0;
n_down_nonevents=0;
n_up_events=0;
n_down_events=0;
do i=1 to istop;
if i le istop/2 then n_nonevents=n_nonevents+col{i};
if i gt istop/2 then n_events=n_events+col{i};
end;
counter=0;
do i=1 to nrc;
do j= 1 to nrc;
counter=counter+1;
if j gt i then n_up_nonevents= n_up_nonevents+col{counter};
if j lt i then n_down_nonevents= n_down_nonevents+col{counter};
end;
end;
counter=0;
do i=1 to nrc;
do j= 1 to nrc;
counter=counter+1;
if j gt i then n_up_events= n_up_events+col{istop/2+counter};
if j lt i then n_down_events= n_down_events+col{istop/2+counter};
end;
end;
p_up_nonevents= n_up_nonevents/n_nonevents;
p_down_nonevents= n_down_nonevents/n_nonevents;
p_up_events= n_up_events/n_events;
p_down_events= n_down_events/n_events;
nri = ( p_up_events - p_down_events ) - ( p_up_nonevents -
p_down_nonevents ) ;
nri_case=( p_up_events - p_down_events );
nri_noncase=(p_down_nonevents - p_up_nonevents);
nri_c= ( (n_up_e/n_events)-(n_down_e/n_events))+(
(n_down_ne/n_nonevents)-(n_up_ne/n_nonevents));
*Under hypothesis;
/*h_se_nri_case= (( p_up_events + p_down_events ) / n_events)**0.5;
h_se_nri_noncase= (( p_up_nonevents + p_down_nonevents ) /
n_nonevents)**0.5;
h_se_nri = ( (( p_up_events + p_down_events ) / n_events) + ((
p_up_nonevents + p_down_nonevents ) / n_nonevents )) **.5 ;
h_z_nri_case= nri_case / h_se_nri_case;
h_z_nri_noncase= nri_noncase / h_se_nri_noncase;
h_z_nri = nri / h_se_nri ;
p_h_nri= 2 * ( 1 - probnorm ( abs ( h_z_nri ) ) ) ;*/
*General;
se_nri_case= (( p_up_events + p_down_events -(p_up_events -
p_down_events)**2) / n_events)**0.5;
se_nri_noncase= (( p_up_nonevents + p_down_nonevents
-(p_up_nonevents - p_down_nonevents)**2) / n_nonevents)**0.5;
se_nri = (se_nri_case**2 + se_nri_noncase**2)**.5 ;
z_nri_case= nri_case / se_nri_case;
z_nri_noncase= nri_noncase / se_nri_noncase;
z_nri = nri / se_nri ;
p_nri= 2 * ( 1 - probnorm ( abs ( z_nri ) ) ) ;
p_nri_case= ( 1 - probnorm ( abs ( z_nri_case ) ) ) ;
p_nri_noncase= ( 1 - probnorm ( abs ( z_nri_noncase ) ) ) ;
nri_ucl= nri+(1.96*se_nri);
nri_lcl= nri-(1.96*se_nri);
nri_case_ucl= nri_case+(1.96*se_nri_case);
nri_case_lcl= nri_case-(1.96*se_nri_case);
nri_noncase_ucl= nri_noncase+(1.96*se_nri_noncase);
nri_noncase_lcl= nri_noncase-(1.96*se_nri_noncase);
i=1;
run;
proc means data=out noprint;
class &outcome;
var pred1 pred2 diff_pred;
output out=idi n=n n2 n3 mean=m_pred1 m_pred2 m_diff_pred
stderr=a b se_diff_pred;
run;
data idi;
set idi;
if &outcome ne .;
label n="n" n2="n2" n3="n3" m_pred1="mean_pred1"
m_pred2="mean_pred2" a="a" b="b";
run;
proc sort data=idi;
by &outcome;
run;
data idi2;
do i=1 to 2;
set idi;
if &outcome=0 then do;
m_pred1_nonevents=m_pred1;
m_pred2_nonevents=m_pred2;
se_diff_nonevents=se_diff_pred;
n0=n;
end;
if &outcome=1 then do;
m_pred1_events=m_pred1;
m_pred2_events=m_pred2;
se_diff_events=se_diff_pred;
n1=n;
end;
end;
run;
data idi2;
set idi2;
idi=(m_pred2_events-m_pred1_events)-(m_pred2_nonevents-m_pred1_nonevents);
rel_idi=((m_pred2_events-m_pred2_nonevents)/(m_pred1_events-m_pred1_nonevents))-1;
se_idi= ( se_diff_nonevents**2+se_diff_events**2) ** .5;
z_idi=idi/se_idi;
p_idi= 2 * ( 1 - probnorm ( abs ( z_idi) ) ) ;
i=1;
run;
data nriidi;
merge outfr2(keep=i nri nri_case nri_noncase nri_c se_nri_case
se_nri_noncase z_nri_case z_nri_noncase z_nri p_nri p_nri_case
p_nri_noncase
nri_ucl nri_lcl nri_case_ucl nri_case_lcl nri_noncase_ucl
nri_noncase_lcl) idi2(keep=n0 n1 i idi rel_idi se_idi p_idi);
by i;
drop i;
n=n0+n1;
label n='n';
run;
proc print data=nriidi noobs label double;
where nri ne .;
var n nri nri_case nri_noncase nri_c z_nri_case z_nri_noncase
z_nri p_nri p_nri_case p_nri_noncase
nri_ucl nri_lcl nri_case_ucl nri_case_lcl nri_noncase_ucl
nri_noncase_lcl idi rel_idi se_idi p_idi;
run;
%mend;
/*Beispielaufruf
%reclassification_phreg(dataset=gdrs, id=ident, outcome=fall,
timevar=fup_zeit,
model1=alter_basis groesse taille hypertonie
, nvar1=4,roc_title1='Model 2',
model2=alter_basis groesse taille hypertonie alkohol_3 aktiv_2
rauch_2 rauch_4 wgrainbread coffee redmeat, nvar2=11,roc_title2='Model
2 plus Modifiable Risk Factors',
nriskcat=5, riskcutoffs=(0.0088)(0.0237)(0.0630)(0.1621),
risk_year=5, base_muster=0 0 0 0 0 0 0 0 0 0 0 0 , printtable=Y);*/
--
Mario Petretta
Department of Translational Medical Sciences
Naples University Federico II
Italy
*
* For searches and help try:
* http://www.stata.com/help.cgi?search
* http://www.stata.com/support/faqs/resources/statalist-faq/
* http://www.ats.ucla.edu/stat/stata/