%let name=cdc_smoking_2014;
filename odsout '.';

/*
Imitating graph from:
http://flowingdata.com/2016/06/20/who-still-smokes/

Using data from:
http://www.cdc.gov/brfss/annual_data/annual_2014.html
*/

/*
* I edited the "filename datain" and "libname dataout" in this cdc-supplied sas job;
%include 'd:\Public\CDC\brfss\sasout14_llcp.sas';
*/

data anno_legend;
length text $50;
xsys='1'; ysys='1'; hsys='3'; when='a';
function='label'; 
y=100;
x=.5; position='3'; text='Smokers'; output;
x=50; position='2'; text='Non-Smokers'; output;
x=99.5; position='1'; text='No Response'; output;
run;

proc format;
value stk_fmt
1='Smokers'
2='Non-Smokers'
3='No Response'
;
run;


* This is just a 'proc format' that sets up the cdc's user-defined formats;
%include 'd:\Public\CDC\brfss\format14.sas';

libname dataout 'd:\Public\CDC\brfss\';
data my_data; set dataout.sasdata;
* Assign the formats to the variables;
%include 'd:\Public\CDC\brfss\formas14.sas';

smoking_text=put(_rfsmok3,_3RFSMOK.);
format stack_order stk_fmt.;
if smoking_text="Yes" then stack_order=1;
else if smoking_text="No" then stack_order=2;
else stack_order=3;
run;



ODS LISTING CLOSE;
ODS html path=odsout body="&name..htm"
 (title="Smoking Prevalence - US CDC data") 
 style=htmlblue;

goptions gunit=pct ftitle='albany amt' ftext='albany amt' htitle=15pt htext=10pt;
goptions ctext=gray33;

footnote  
 link="http://www.cdc.gov/brfss/annual_data/annual_2014.html"
 c=gray "Data source: CDC Behavioral Risk Factor Surveillance System - 2014 data";

pattern1 v=s c=cxfb9a99;
pattern2 v=s c=cxb2df8a;
pattern3 v=s c=grayee;

axis2 label=none style=0 order=(0 to 1 by .2) minor=none offset=(0,0);

%let var=sex;
%let fmt=sex.;
%let ypix=280;

proc sql noprint;

create table plot_data as
select unique &var, stack_order, count(*) as count
from my_data
group by &var, stack_order;

create table plot_data as
select unique *, sum(count) as group_count
from plot_data
group by &var;

quit; run;

data plot_data; set plot_data;
format subgroup_percent percent7.0;
subgroup_percent=count/group_count;
length my_html $300;
my_html='title='||quote(
 trim(left(put(&var,&fmt)))||': '||
 trim(left(put(subgroup_percent,percent7.1)))||' '||trim(left(put(stack_order,stk_fmt.)))
 );
run;

axis1 label=none;

title1 j=l move=(+17,+0) ls=2.5 "Smoking Prevalence by Gender";
title2 j=l move=(+17,+0) ls=0.8 font='albany amt/italic' 
 "A higher percentage of males smoke than females.";
title3 j=l move=(+17,+0) ls=0.8 font='albany amt/italic' 
 "The difference between the two increased during 1994-2014.";
title4 a=90 h=18pct ' ';

goptions ypixels=&ypix xpixels=700;
ods html anchor='gender';
proc gchart data=plot_data anno=anno_legend;
format &var &fmt;
hbar &var / discrete type=sum sumvar=subgroup_percent nostats
 subgroup=stack_order nolegend
 maxis=axis1 raxis=axis2 noframe 
 space=0 coutline=white
 html=my_html
 des='' name="&name._&var";
run;


/* The official text was a bit long/wordy, so I use the shortened text they used on FlowingData */
proc format;
value edu_fmt
1='None'
2='Elementary'
3='Some High School'
4='High School Grad'
5='Some College'
6='College Grad'
;
run;

%let var=educa;
/* %let fmt=educa.; */
%let fmt=edu_fmt.;
%let ypix=450;

proc sql noprint;

create table plot_data as
select unique &var, stack_order, count(*) as count
from my_data
group by &var, stack_order;

create table plot_data as
select unique *, sum(count) as group_count
from plot_data
group by &var;

quit; run;

/* I'm leaving out category #9, where people didn't respond with what their education level was */
data plot_data; set plot_data (where=(&var in (1 2 3 4 5 6)));
format subgroup_percent percent7.0;
subgroup_percent=count/group_count;
length my_html $300;
my_html='title='||quote(
 trim(left(put(&var,&fmt)))||': '||
 trim(left(put(subgroup_percent,percent7.1)))||' '||trim(left(put(stack_order,stk_fmt.)))
 );
run;

axis1 label=none order=(6 to 1 by -1);

title1 j=l move=(+17,+0) ls=2.5 "Smoking Prevalence by Education Level";
title2 j=l move=(+17,+0) ls=0.8 font='albany amt/italic' 
 "Smokers with a college education decreased by almost half during 1994-2014.";
title3 j=l move=(+17,+0) ls=0.8 font='albany amt/italic' 
 "Those with only some high school decreased by only a few percentage points.";

goptions ypixels=&ypix xpixels=700;
ods html anchor='education';
proc gchart data=plot_data anno=anno_legend;
format &var &fmt;
/*
format &var comma8.0;
*/
hbar &var / discrete type=sum sumvar=subgroup_percent nostats
 subgroup=stack_order nolegend
 maxis=axis1 raxis=axis2 noframe
 space=0 coutline=white
 html=my_html
 des='' name="&name._&var";
run;




/* The official text was a bit long/wordy, so I shortened it */
proc format;
value inc_fmt
1='Less than $10k'
2='$10-$15k'
3='$15-$20k'
4='$20-$25k'
5='$25-$35k'
6='$35-$50k'
7='$50-$75k'
8='Over $75k'
;
run;

%let var=income2;
/* %let fmt=in2come.; */
%let fmt=inc_fmt.;
%let ypix=500;

proc sql noprint;

create table plot_data as
select unique &var, stack_order, count(*) as count
from my_data
group by &var, stack_order;

create table plot_data as
select unique *, sum(count) as group_count
from plot_data
group by &var;

quit; run;

/* Leaving out 77 and 99, where people didn't provide income level */
data plot_data; set plot_data (where=(&var in (1 2 3 4 5 6 7 8)));
format subgroup_percent percent7.0;
subgroup_percent=count/group_count;
length my_html $300;
my_html='title='||quote(
 trim(left(put(&var,&fmt)))||': '||
 trim(left(put(subgroup_percent,percent7.1)))||' '||trim(left(put(stack_order,stk_fmt.)))
 );
run;

axis1 label=none order=(8 to 1 by -1);

title1 j=l move=(+17,+0) ls=2.5 "Smoking Prevalence by Income Level";
title2 j=l move=(+17,+0) ls=0.8 font='albany amt/italic'
 "Lower household income is related to higher smoker rates.";
title3 j=l move=(+17,+0) ls=0.8 font='albany amt/italic'
 "The trend is more evident in recent years.";
title4 a=90 h=3pct ' ';

goptions ypixels=&ypix xpixels=700;
ods html anchor='income';
proc gchart data=plot_data anno=anno_legend;
format &var &fmt;
/*
format &var comma8.0;
*/
hbar &var / discrete type=sum sumvar=subgroup_percent nostats
 subgroup=stack_order nolegend
 maxis=axis1 raxis=axis2 noframe
 space=0 coutline=white
 html=my_html
 des='' name="&name._&var";
run;



/*
Many race variables to choose from...
_RACE
RRCLASS
_IMPCRAC
_IMPRACE
_P1RACE
*/


/* The official text was a bit long/wordy, so I shortened it */
proc format;
value race_fmt
1='White'
2='Black'
3='American Indian'
4='Asian'
8='Hispanic'
;
run;

%let var=_race;
%let fmt=race_fmt.;

%let ypix=400;

proc sql noprint;

create table plot_data as
select unique &var, stack_order, count(*) as count
from my_data
group by &var, stack_order;

create table plot_data as
select unique *, sum(count) as group_count
from plot_data
group by &var;

quit; run;

/* Leaving out #5, which is multi-race */
/* White, Black, American Indian, Asian, Hispanic */
data plot_data; set plot_data (where=(&var in (1 2 3 4 8)));
format subgroup_percent percent7.0;
subgroup_percent=count/group_count;
length my_html $300;
my_html='title='||quote(
 trim(left(put(&var,&fmt)))||': '||
 trim(left(put(subgroup_percent,percent7.1)))||' '||trim(left(put(stack_order,stk_fmt.)))
 );
run;

axis1 label=none;

title1 j=l move=(+17,+0) ls=2.5 "Smoking Prevalence by Race and Origin";
title2 j=l move=(+17,+0) ls=0.8 font='albany amt/italic'
 "American Indian is the only race group with increased smoking prevalence";
title3 j=l move=(+17,+0) ls=0.8 font='albany amt/italic'
 "between 1994 and 2014.";
title4 a=90 h=2.5pct ' ';

goptions ypixels=&ypix xpixels=700;
ods html anchor='race';
proc gchart data=plot_data anno=anno_legend;
format &var &fmt;
/*
format &var comma8.0;
*/
hbar &var / discrete type=sum sumvar=subgroup_percent nostats
 subgroup=stack_order nolegend
 maxis=axis1 raxis=axis2 noframe
 space=0 coutline=white
 html=my_html
 des='' name="&name._&var";
run;

quit;
ODS HTML CLOSE;
ODS LISTING;
