/*****************************************************/ /* STAT 330, Fall 2011 */ /* Homework 7 */ /*****************************************************/ options nocenter nodate nonumber pageno=1 pagesize=55 linesize=90; %let drive=C; /* 7.1.1 */ filename demogFL "&drive:\Classes\STAT 330\2011-4\Homework\demographics.txt"; filename wideFL "&drive:\Classes\STAT 330\2011-4\Homework\measuresWide.txt"; filename longFL "&drive:\Classes\STAT 330\2011-4\Homework\measuresLong.txt"; data demographics; infile demogFL firstobs=2 truncover; input id $ 1-4 gender $ 12 age 14-17; run; proc means data=demographics; var age; class gender; run; data wideMeas; infile wideFL firstobs=2; informat id $4.; input id meas1 - meas13; array allMeas meas1 - meas13; do over allMeas; if allMeas=-9 then allMeas=.; end; run; proc means data=wideMeas maxdec=2; var meas1-meas13; run; data longMeas; infile longFL firstobs=2 truncover; informat id $4.; input id time measure; run; proc means data=longMeas; var measure; class time; run; proc means data=longMeas nway; var measure; class time; output out=statsByTime mean=mean stddev=sd; run; proc sort data=longMeas; by time; run; data zscores; merge longMeas statsByTime; by time; zscore = (measure - mean)/sd; drop _type_ _freq_; run; data outliers; set zscores; if abs(zscore) > 2; keep id time measure zscore; run; proc sort data=outliers out=outlierIDs nodupkey; by id; run; proc sort data=longMeas; by id time; run; data outlierData; merge longMeas outlierIDs (in=outlier keep=id); by id; if outlier; run; proc print data=outlierData; run; proc transpose data=outlierData out=transpose1 (drop=_name_) prefix=measure; var measure; by id; run; options ls=200; proc print data=transpose1; run; proc sort data=wideMeas; by id; run; proc transpose data=wideMeas out=transpose2 (rename=(col1=measure) drop=_name_); var meas1-meas13; by id; run; data combine2; merge demographics transpose2; by id; if measure ^= .; run; proc print data=combine2; run; /* 7.2.1 */ /* filename readXLS dde "Excel|D:\Classes\STAT 330\2011-4\Homework\[College Rankings 2011.xlsx]National Universities!R2C1:R259C16"; data saveLoc.colleges; infile readXLS dlm='09'x notab dsd missover; informat school $70. percPell predGradRate actGradRate facAwards facNatAcads fedFunds percent4. resExpends comma6.; input rank school score percPell predGradRate actGradRate resExpends BA2PhD sciEngPhDs facAwards facNatAcads pcRank rotcRank fedFunds commServRank servFinAidRank; run; */ ods pdf file="D:\Classes\STAT 330\2011-4\Homework\hw7.pdf"; libname dataLoc "D:\Classes\STAT 330\2011-4\Homework\"; data recode; set dataLoc.colleges; state = scan(school, -2, "()*"); public = substr(school, length(school), 1) = "*"; run; proc freq data=recode; tables state / out=outStates; run; proc sort data=outStates; by descending count; run; data outStates; set outStates; numSchools = _n_; run; data dataLoc.outStates; set outStates; run; proc sort data=recode; by state; proc sort data=outStates; by state; run; data use; merge recode outStates; by state; run; data dataLoc.use; set use; run; proc sgplot data=use; hbar public; run; proc format; value yesno 0='Private' 1='Public'; run; title1 "Distribution of Public/Private Colleges by State"; title2 "Top 10 States"; proc sgplot data=use; hbar state / group=public; where numSchools <= 10; format public yesno.; label public="School Funding" state="State"; run; title; title1 "Mean College Score by School Type: Public vs. Private"; proc sgplot data=use; vbar public / stat=mean response=score barwidth=0.20; format public yesno.; label public="School Funding" score="Mean College Score"; yaxis values=(0 to 50 by 5); run; title; proc sgplot data=use; scatter x=score y=predGradRate; run; title1 "Predicted Graduation Rate by College Score"; title2 "LOESS Curve with Smoothing Parameter=0.75"; proc sgplot data=use; loess x=score y=predGradRate / markerattrs=(symbol=circleFilled) smooth=0.75 nolegfit; label score="College Score" predGradRate="Graduation Rate"; run; title; ods pdf close;