/*****************************************************/ /* STAT 330, Fall 2011 */ /* Homework 8 */ /*****************************************************/ options nocenter nodate nonumber pageno=1 pagesize=55 linesize=90; x "cd C:\temp\"; %let drive=C; libname saveLoc "&drive:\Classes\STAT 330\2011-4\Homework\"; /* 8.1.1 */ filename readXLS dde "Excel|&drive:\Classes\STAT 330\2011-4\Homework\[Top 10 Perc Income Share.xlsx]Data!R4C1:R95C3"; data top10; infile readXLS notab dsd dlm='09'x; input year exclude include; run; /* data saveLoc.top10; set top10; run; */ title "Figure 1: The Top Decile Income Share, 1917-2008"; proc sgplot data=top10; series x=year y=exclude / markers markerattrs=(symbol=diamond); series x=year y=include / markers markerattrs=(symbol=triangleFilled); xaxis label="Year" values=(1917 to 2007 by 5) grid; yaxis label="Top 10% Income Share" values=(0.25 to 0.50 by 0.05) grid; format exclude include percent5.; label exclude="Excluding capital gains" include="Including capital gains"; keylegend / down=2 location=inside position=bottomleft; run; title; /* 8.1.2 */ filename readXLS dde "Excel|&drive:\Classes\STAT 330\2011-4\Homework\[Redev_Agencies.xls]data!R2C1:R433C7"; data redevelop; infile readXLS notab dsd dlm='09'x; informat agency city county $40. zip $10.; input agency totalCost popln pctPoor city county zip; logCost = log(totalCost); logPopln = log(popln); propCity = propCase(city); propCounty = propcase(county); if propCity="" then propCity=propCase(agency); run; /* data saveLoc.redevelop; set redevelop; run; */ proc means data=redevelop; var totalCost popln pctPoor; run; proc sgplot data=redevelop; scatter x=popln y=totalCost; run; proc sgplot data=redevelop; scatter x=logPopln y=logCost; run; proc sgplot data=redevelop; scatter x=logPopln y=pctPoor; run; ods graphics on; proc univariate data=redevelop; var pctPoor; title "Percentage Poor Among California Cities (n=397)"; histogram pctPoor / endpoints; label pctPoor="Percent Poor"; run; title; proc freq data=redevelop; tables county; run; data boxData; set redevelop; if propCounty in ("Los Angeles", "Riverside", "San Bernardino"); run; title "Percent Poor in Los Angeles, Riverside, San Bernardino"; proc sgplot data=boxData; vbox pctPoor / category=propCounty; label propCounty="County" pctPoor="Percent Poor"; run; title; proc means data=redevelop; var popln pctPoor; output out=outStat p75(popln pctPoor)=p75Pop p75Poor; run; data bigcities poorcities; if _n_=1 then set outStat; set redevelop; if popln > p75Pop then output bigCities; if pctPoor > p75Poor then output poorCities; run; /* data saveLoc.bigcities; set bigCities; run; data saveLoc.poorCities; set poorCities; run; */ proc sort data=poorCities; by descending pctPoor; run; title "Top 25% Poorest Cities in California"; proc print data=poorCities label obs="Rank" width=uniform; var pctPoor propCity propCounty popln; label pctPoor="Percent Poor (%)" propCity="City" propCounty="County" popln="Population"; format popln comma8.; run; title;