/***************************************************** * Filename: g2_parition_macro.sas * Creator: Jimmy A. Doi * Date: Jan 27, 2009 * Purpose: Show exact partition of G^2 * * The data set DSET must be entered as a * Jx2 array as shown at the bottom of this code * * Macro %g2(DSET) will construct the overall * LRT G^2 test statistic for the data in DSET * * Macro %partition(DSET) will create independent * G^2 statistics based on the partition described * in Agresti's test. Each statistic will be saved * in a separate dataset. * * Macro %gather(NUM) will collect all datasets into one * * Macro %partition(DSET) uses all macros. It is the * only macro that needs to be invoked as in: * * %partition(table) * *********************************************************/ %macro g2(dset); data prep; set &dset; row = x+y; c1+x; c2+y; call symput("col1",c1); call symput("col2",c2); call symput("gt",c1+c2); run; data g2; set prep; sum+x*log(x/(row*&col1./>)); sum+y*log(y/(row*&col2./>)); if _N_=&obs then G2=2*sum; if _N_=&obs then output; run; proc print data=g2; var g2; title "Overall LRT G^2 Statistic"; run; %mend; %macro gather(num); data all; set %do j = 1 %to &num-1; table_&j %end; ; run; %mend; ************************************************************; *START MACRO HERE; %macro partition(dset); title; proc contents data=&dset out=myout noprint; data temp; set myout; call symput("obs",trim(left(nobs))); run; *****************************************; * THIS STAYS FIXED ... FIRST RUN; data tab_1 table_1; set &dset; retain n11 n12; if _N_=1 then do; n11 = x; n12 = y; end; if _N_=2 then do; n21 = x; n22 = y; r1 = n11+n12; r2 = n21+n22; c1 = n11+n21; c2 = n12+n22; gt = c1 + c2; G2 = 2*(n11*log(n11/(r1*c1/gt))+ n12*log(n12/(r1*c2/gt))+ n21*log(n21/(r2*c1/gt))+ n22*log(n22/(r2*c2/gt)) ); pvalue = 1-probchi(g2,1); output table_1; end; run; * THIS STAYS FIXED ... FIRST RUN; *****************************************; %do i = 2 %to &obs-1; *****************************************; * THIS WILL BE LOOPED FOR SUBSEQUENT RUNS; data tab_&i. table_&i.; *USE INDEX VAR; set &dset; retain n11 n12; if _N_<=&i then do; *THIS WORKS IN GENERAL; n11 + x; n12 + y; end; bound = &i + 1; if _N_=bound then do; *ADD ONE TO THE INDEX VAR; n21 = x; n22 = y; r1 = n11+n12; r2 = n21+n22; c1 = n11+n21; c2 = n12+n22; gt = c1 + c2; G2 = 2*(n11*log(n11/(r1*c1/gt))+ n12*log(n12/(r1*c2/gt))+ n21*log(n21/(r2*c1/gt))+ n22*log(n22/(r2*c2/gt)) ); pvalue = 1-probchi(g2,1); output table_&i.; *USE INDEX VAR; end; run; * THIS WILL BE LOOPED FOR SUBSEQUENT RUNS; *****************************************; %end; %g2(&dset); ** Reveal the overall G2 statistic; %gather(&obs); ** Combine all into on dataset; proc print data=all; var n11 n12 n21 n22 r1 r2 c1 c2 gt G2 pvalue; sum G2; ** Use sum to show the exact partition; title "Partitioned G2 values"; run; %mend; ***************************************************; data table; input x y; cards; 762 484 327 239 468 477 554 223 242 120 100 200 300 400 ; %partition(table);