/*****************************************************/ /* STAT 330, Fall 2011 */ /* Homework 9 */ /*****************************************************/ options nocenter nodate nonumber pageno=1 pagesize=55 linesize=90; x "cd C:\temp\"; %let drive=C; libname saveLoc "&drive:\Classes\STAT 330\2011-4\Homework\"; /* 9.1.1 */ data colleges; set saveLoc.colleges; public = substr(school, length(school), 1) = "*"; run; proc corr data=colleges; var _numeric_; with score; run; ods graphics on; proc reg data=colleges plots=(diagnostics); model score = percPell predGradRate actGradRate facAwards facNatAcads fedFunds resExpends BA2PhD sciEngPhDs; output out=regOut r=residual p=pred; run;quit; * Low F p-value means that the regression model can explain a significant amount of variability in the college scores; * High R-squared also indicates that a large percentage of variability in the college scores is being explained by the regression model using the covariates included; * Diagnostic plots look pretty good. We can be comfortable witht the inference from this model.; data findOutlier; set regOut; if residual > 20; run; * Portland St. Univ is the large outlier; * PSU has a score of 39, but a predicted score of only 12, therefore the huge residual. They would be pleased about this. For some reason their score is higher than it would be expected to be, based on their school charactersitics; proc anova data=colleges; class public; model score = public; means public; run;quit; proc ttest data=colleges; class public; var score; run; * t-stat = diff/se = 7.1179 / 2.2432 = 3.1731, p-value=0.0017 t-stat squared = 10.0686; * ANOVA stat = MSA/MSE = 3027.32712 / 300.65736 = 10.0690, p-value=0.0017; * p-values are about the same; F-stat is the square of the t-stat; * The p-values for the ANOVA and t-test indicate that there is a significant difference in mean scores, between public and private universities. Private universities have a significantly higher mean score than public universities; *ods trace on; proc anova data=colleges; class public; model score = public; ods output overallanova=anovaOut; run;quit; proc ttest data=colleges; class public; var score; ods output ttests=ttestOut; run; *ods trace off; data checkStats; set anovaOut (in=inANOVA keep=fvalue probF); set ttestOut (in=inTtest keep=tvalue probT); if _n_=1; diffPvalue = probF - probT; /* 2.84x10-17 */ diffStats = fvalue - tvalue**2; /* -4.44x10-14 */ run; ods graphics off; /* 9.2.1 */ filename readXLS dde "Excel|C:\Classes\STAT 330\2011-4\Homework\[Coachella Lineups.xlsx]1999!C1"; data coach1999; infile readXLS notab dlm='09'x dsd missover firstobs=8 obs=179; informat bandName $40.; input bandNo / bandName; run; %macro readLineUp(fileLoc, year, start, end); filename readXLS dde "Excel|&fileLoc\[Coachella Lineups.xlsx]&year!C1"; data coach&year; infile readXLS notab dlm='09'x dsd missover firstobs=&start obs=&end; informat bandName $43.; input bandNo / bandName; year = &year; run; %mend readLineUp; %readLineUp(C:\Classes\STAT 330\2011-4\Homework, 1999, 8, 179); %readLineUp(C:\Classes\STAT 330\2011-4\Homework, 2001, 8, 99); %readLineUp(C:\Classes\STAT 330\2011-4\Homework, 2002, 8, 127); %readLineUp(C:\Classes\STAT 330\2011-4\Homework, 2003, 8, 183); %readLineUp(C:\Classes\STAT 330\2011-4\Homework, 2004, 8, 187); %readLineUp(C:\Classes\STAT 330\2011-4\Homework, 2005, 8, 185); %readLineUp(C:\Classes\STAT 330\2011-4\Homework, 2006, 8, 197); %readLineUp(C:\Classes\STAT 330\2011-4\Homework, 2007, 8, 269); %readLineUp(C:\Classes\STAT 330\2011-4\Homework, 2008, 8, 293); %readLineUp(C:\Classes\STAT 330\2011-4\Homework, 2009, 8, 399); %readLineUp(C:\Classes\STAT 330\2011-4\Homework, 2010, 8, 323); %readLineUp(C:\Classes\STAT 330\2011-4\Homework, 2011, 8, 451); data CoachAllYrs; set coach1999 coach2001 - coach2011; run; proc freq data=CoachAllYrs; tables bandName / out=outCount; run; data appearances; set outCount; appears = count; keep bandName appears; run; proc print data=appearances width=uniform; run; proc sort data=appearances out=temp; by descending appears; run; title "Coachella Bands Appearing At Least 3 Times: 1999 - 2011"; ods rtf file="coachBands.rtf"; proc print data=temp noobs label; var bandName appears; where appears >= 3; label bandName="Band Name" appears="# of Appearances"; run; ods rtf close;