/*************************************************************** *Simulated potential survival outcomes ***************************************************************/ %let n = 100000; data population; do i = 1 to &n.; call streaminit(10); age= rand('Normal',50, 10); zage=(age-50) /10; *standardized age; sex= rand('bern',0.6); race= rand('bern',0.3); income=rand('Normal',20, 3); zincome=(income-20) /3; *standardized income; *create the treatment assignment mechanism; linear_term = -0.4+ log(1.5)*zage*(age<75) + log(0.8)*sex + log(1.5)*race + log(.50)*zincome; pA = exp(linear_term)/(1+exp(linear_term)); A = rand('bern',pA); overlap = (A*(1-pA) + (1-A)*pA); lambdat = 1*365/0.09; *baseline hazard; linear_term = exp( - ( log(3)*zage + log(0.8)*sex + log(1.5)*race + log(.70)*zincome )); t_0 = rand("WEIBULL", 1, lambdat * linear_term); linear_term = exp( - ( log(3)*zage + log(0.8)*sex + log(1.5)*race + log(.70)*zincome+ log(0.70))); t_1 = rand("WEIBULL", 1, lambdat * linear_term); *Observed outcome is effected by treatment; T = t_0*(1-A) + t_1*A ; C = 500+ 500*(rand('UNIFORM')); * time of censoring is totally random; *C = 10000; time = min(T, C); * which came first?; Y = (T lt C); output; end; run; *add normalized ow; proc sql ; create table population2 as select *, overlap*A/sum(overlap*A)+overlap*(1-A)/sum(overlap*(1-A)) as zoverlap from population; quit; proc summary data=population; var age zage income zincome pA sex race T C Y; output out=class; run; * Plot the relationship between zage, zincome and pA by treatment; proc gplot data =population; plot pA*zage pA*zincome; run; quit; * Plot the relationship between SBP and pY; proc gplot data =population; plot pA*zage pA*zincome/ overlay; run; quit; *In typical practice we would only have a small sample and more sampling variability; *Just switch our "Sample" to apply methods to a more realistic sample; data sample; set population; where i le 5000; run; ***************************************************************************; * PRIMARY ANALYSIS: Use Overlap weighting to address confounding by the four covariates; ***************************************************************************; proc logistic data = sample descending; class sex(ref="0") race (ref="0") /param = ref; effect age_spl=spline(zage / naturalcubic details knotmethod=percentiles(4)); effect income_spl=spline(zincome / naturalcubic details knotmethod=percentiles(4)); model A = age_spl income_spl sex race; output out=propmodel prob=p1; *p1 contains P(A=1|age_spl, income_spl, sex race); run; data propmodel ; set propmodel; *estimated ow based on the propensity score; ow_weight = (A)*(1-p1) + (1-A)*p1; *definition of the overlap weights; run; *normalize ow_weight; proc sql ; create table propmodel2 as select *, ow_weight*A/sum(ow_weight*A)+ow_weight*(1-A)/sum(ow_weight*(1-A)) as zow_weight from propmodel; quit; *SAS automatically normalizes weights, *so there is no difference between using ow_weight or zow_weight, but this is not the case in R *Remember that ow needs to be normalized in practice; *****************************************************************************************************; **********Check the distribution of propensity scores **************; *****************************************************************************************************; proc means data = propmodel min p1 q1 median q3 p99 max maxdec=2; class A; var p1; run; *Unweighted distribution of estimated propensity scores; proc sgplot data=propmodel; title "Propensity distribution"; density p1 / type=kernel group=A; keylegend / location=inside position=topright; run; *Weighted distribution of propensity scores; proc sgplot data=propmodel; title "Propensity distribution"; density p1 / type=kernel group=A weight = ow_weight; keylegend / location=inside position=topright; run; *****************************************************************************************************; ***************** Weighted baseline characteristics table to show comparability ***************************; ***************** This is the same as showing a baseline characteristics table after matching *******; *****************************************************************************************************; *If you want details; proc means data = propmodel min p1 q1 median q3 p99 max maxdec=1; class A; var zage zincome sex race ; weight ow_weight; run; *Simpler version looking only at the means for nice table; proc means data = propmodel noprint; class A; var zage zincome sex race ; weight ow_weight; output out=balance ; run; data balance; set balance; where A ne . and _STAT_ = "MEAN"; drop _TYPE_ _FREQ_; run; *****************************************************************************************************; ***************** Analysis of Outcome ***************************************************************; *****************************************************************************************************; *******************************; * Since we simulated data we can derive the true causal risk difference with potential outcomes; * True Causal effect in the overlap population; * Determine the target parameter for the average effect of treatment for the overlapped population (ATO) on the hazard scale ; *******************************; *true parameter; data truth; set population2; A_p = 1; T = t_1; output; A_p = 0; T = t_0; output; run; data truth; set truth; time = min(T, C); * which came first?; Y_p = (T lt C); run; *true marginal parameter with CENSORING; proc phreg data = truth; model time*Y_p(0) = A_p /rl; weight zoverlap; *using the true probability from data generating process, same if using overlap; run; *HR = 0.734; data cov ; A=1 ; output; A=0 ; output; run; *Estimation in real data with observable quantities using a Cox PH model; proc phreg data = propmodel2 covs(aggregate); *<<<<<<<< NOTE: covs(aggregate) specifies the robust variance; model time*Y(0) = A / rl ; weight zow_weight; * same if using ow_weight; baseline out=outpred covariates=cov survival=survival stderr=stderr upper=uclm lower=lclm; id i; run; * HR 0.715 (0.630, 0.812) <<<<<<<<<<<< Contains the truth; *getting out causal risk difference by OW at 1 year; proc sort data = outpred out=outpred2; by A time; where time<365; run; data eventrate; set outpred2; by A time; event=1-survival; euclm = 1-lclm; elclm = 1-uclm; if last.A then output; run; proc print data = eventrate; var A elclm event euclm stderr; run; ****************************; * 1-S(365) = 0.16 (0.14, 0.17) with StdErr=0.007 when A=0; * 1-S(365) = 0.11 (0.10, 0.13) with StdErr=0.006 when A=1; ****************************; *Add weighting for survival curves; *semi-parametric from Cox proportional hazard model; symbol color=blue value=plus ; symbol2 color=pink value=dot ; * graphical display of adjusted survival probabilies; proc gplot data=outpred; plot survival*time=A/vaxis=axis1; title Adjusted Cox survival probabilities from the Cox PH model; run; quit; *non-parametric from Nelson-Aalen estimator, using the strata statement without specifying treatment in the model statement; proc phreg data = propmodel2; strata A; model time*Y(0) = ; weight zow_weight; * same if using ow_weight; baseline out=outpred covariates=cov survival=survival ; run; symbol color=blue value=plus ; symbol2 color=pink value=dot ; * graphical display of adjusted survival probabilies; proc gplot data=outpred; plot survival*time=A/vaxis=axis1; title Adjusted Cox survival probabilities from the Nelson-Aalen estimator; run; quit; ****************************; * The two plots look the same here; * That is because proportional hazards holds; * In real data there is very little chance that these * plots would look the same; * In general you should use the model syntax if you are * illustrating a proportional hazards model; * You should use the strata syntax if you are checking proportional hazards, * or avoiding the assumption of proportional hazards; ****************************;