/*-------------------------------------------------------- Went to http://www.fueleconomy.gov/feg/download.shtml Clicked the 2007 zip file. Saved out the 2007_FE_guide_ALL_no_sales_May_01_2007.xlsm (sas v9.2 can't import xlsm) Brought up in Excel. Saved as a 2005 file. 2007_FE_guide_ALL_no_sales_May_01_2007.xls (sas 9.2 import wizard doesn't work with it - combination of sheet name, and sas' required sheet name string-stuff exceeded 32 chars?) Had to rename sheet from '2007_FE_guide_ALL_with_5_cycle_' to 'Sheet1' According to the "Gas Guzzler" faq on this page, the 'unadj' mpg is only used for the gas guzzler tax. I create the final dataset with "compress=yes" to save a lot of disk space (only used 2Mb instead of 7Mb). But sas jobs that use the data will run a little slower. ----------------------------------------------------------*/ /* proc import datafile= "backup\2012 FEGuide-for DOE-rel dates before 9-13-2011-no-sales-09-1-2011public.xlsx" */ proc import datafile= "2012 FEGuide for DOE-all rel before 1-23-2012-no-sales-1-18-2012public.xlsx" dbms=EXCEL out=mpg2012 replace; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg2012; rename model_yr = year; rename mfr_name = Manufacturer; /* rename division = */ rename carline = Model; rename City_FE__Guide____Conventional_F = MPG_City; rename Hwy_FE__Guide____Conventional_Fu = MPG_Hwy; rename Comb_FE__Guide____Conventional_F = MPG_Cmb; rename City_Unadj_FE___Conventional_Fue = MPG_City_Unadj; rename Hwy_Unadj_FE___Conventional_Fuel = MPG_Hwy_Unadj; rename Comb_Unadj_FE___Conventional_Fue = MPG_Cmb_Unadj; rename __Cyl = Cylinders; rename Eng_Displ = Engine_liters; rename trans = Transmission; rename Fuel_Usage____Conventional_Fuel = fuel_type; rename Carline_Class_Desc = class; run; data mpg2012; set mpg2012; length Extra_info $50; if Manufacturer='GM' then Manufacturer=propcase(Division); if Manufacturer='Chrysler Group LLC' then Manufacturer=propcase(Division); if Manufacturer='FOMOCO' then Manufacturer=propcase(scan(Division,1,' ')); Extra_info=''; if substr(fuel_type,1,1) eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if Air_Aspir_Method eq 'SC' then Extra_info=trim(left(Extra_info))||' (SC)'; if Air_Aspir_Method eq 'TC' then Extra_info=trim(left(Extra_info))||' (TRBO)'; if Guzzler__ eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; Extra_info=trim(left(Extra_info)); manufacturer=upcase(manufacturer); model=upcase(model); class=upcase(class); if index(class,'SUV 2WD')^=0 then class='S.U.V. 2WD'; if index(class,'SUV 4WD')^=0 then class='S.U.V. 4WD'; run; data mpg2012 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg2012; run; /* proc import datafile= "2011FEguide-for_DOE-rel-dates_before_9-9-10-no-sales-9-2-10public.xls" proc import datafile= "2011FEguide-for_DOE-rel-dates before10-19-10-no-sales-10-06-10public.xls" proc import datafile= "2011FEGuide-for DOE rel-dates before 1-23-2011-no-sales-01-10-2011public.xlsx" */ proc import datafile= "2011FEGuide-for DOE rel-dates before 1-23-2011-no-sales-01-10-2011_All_public.xlsx" dbms=EXCEL out=mpg2011 replace; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg2011; rename model_yr = year; rename mfr_name = Manufacturer; /* rename division = */ rename carline = Model; rename City_FE__Guide____Conventional_F = MPG_City; rename Hwy_FE__Guide____Conventional_Fu = MPG_Hwy; rename Comb_FE__Guide____Conventional_F = MPG_Cmb; rename City_Unadj_FE___Conventional_Fue = MPG_City_Unadj; rename Hwy_Unadj_FE___Conventional_Fuel = MPG_Hwy_Unadj; rename Comb_Unadj_FE___Conventional_Fue = MPG_Cmb_Unadj; rename __Cyl = Cylinders; rename Eng_Displ = Engine_liters; rename trans = Transmission; rename Fuel_Usage____Conventional_Fuel = fuel_type; rename Carline_Class_Desc = class; run; data mpg2011; set mpg2011; length Extra_info $50; if Manufacturer='GM' then Manufacturer=propcase(Division); if Manufacturer='Chrysler Group LLC' then Manufacturer=propcase(Division); if Manufacturer='FOMOCO' then Manufacturer=propcase(scan(Division,1,' ')); Extra_info=''; if substr(fuel_type,1,1) eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if Air_Aspir_Method eq 'SC' then Extra_info=trim(left(Extra_info))||' (SC)'; if Air_Aspir_Method eq 'TC' then Extra_info=trim(left(Extra_info))||' (TRBO)'; if Guzzler__ eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; Extra_info=trim(left(Extra_info)); manufacturer=upcase(manufacturer); model=upcase(model); class=upcase(class); if index(class,'SUV 2WD')^=0 then class='S.U.V. 2WD'; if index(class,'SUV 4WD')^=0 then class='S.U.V. 4WD'; run; data mpg2011 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg2011; run; proc import datafile= "2010_FEguide_for_DOE_rel_dates_before_10-16-09_no_sales_10-8-09_public.xls" dbms=EXCEL out=mpg2010 replace; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg2010; rename model_yr = year; rename mfr_name = Manufacturer; /* rename division = */ rename carline = Model; rename City_FE__Guide____Conventional_F = MPG_City; rename Hwy_FE__Guide____Conventional_Fu = MPG_Hwy; rename Comb_FE__Guide____Conventional_F = MPG_Cmb; rename City_Unadj_FE___Conventional_Fue = MPG_City_Unadj; rename Hwy_Unadj_FE___Conventional_Fuel = MPG_Hwy_Unadj; rename Comb_Unadj_FE___Conventional_Fue = MPG_Cmb_Unadj; rename __Cyl = Cylinders; rename Eng_Displ = Engine_liters; rename trans = Transmission; rename Fuel_Usage____Conventional_Fuel = fuel_type; rename Carline_Class_Desc = class; run; data mpg2010; set mpg2010; length Extra_info $50; if Manufacturer='GM' then Manufacturer=propcase(Division); if Manufacturer='Chrysler Group LLC' then Manufacturer=propcase(Division); if Manufacturer='FOMOCO' then Manufacturer=propcase(scan(Division,1,' ')); Extra_info=''; if substr(fuel_type,1,1) eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if Air_Aspir_Method eq 'SC' then Extra_info=trim(left(Extra_info))||' (SC)'; if Air_Aspir_Method eq 'TC' then Extra_info=trim(left(Extra_info))||' (TRBO)'; if Guzzler__ eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; Extra_info=trim(left(Extra_info)); manufacturer=upcase(manufacturer); model=upcase(model); class=upcase(class); if index(class,'SUV 2WD')^=0 then class='S.U.V. 2WD'; if index(class,'SUV 4WD')^=0 then class='S.U.V. 4WD'; run; data mpg2010 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg2010; run; proc import datafile= "2009_FE_guide_for_DOE_ALL-rel_dates-no-sales-8-28-08download.xls" dbms=EXCEL out=mpg2009 replace; range='A1:AE1183'; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg2009; rename mfr = Manufacturer; rename car_line = Model; rename city_mpg__guide_ = MPG_City; rename hwy_mpg__guide_ = MPG_Hwy; rename comb_mpg__guide_ = MPG_Cmb; rename unrnd_city__epa_ = MPG_City_Unadj; rename unrnd_hwy__epa_ = MPG_Hwy_Unadj; rename unrnd_comp__epa_ = MPG_Cmb_Unadj; rename numb_cyl = Cylinders; rename displacement = Engine_liters; run; data mpg2009; set mpg2009; Year=2009; Transmission=substr(trans,index(trans,'(')+1,2); length Extra_info $50; Extra_info=''; if fuel_type eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if spchgr eq 'S' then Extra_info=trim(left(Extra_info))||' (SC)'; if turbo eq 'T' then Extra_info=trim(left(Extra_info))||' (TRBO)'; if guzlr eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; if eng_blk_txt ne '' then Extra_info=trim(left(Extra_info))||' ('||trim(left(eng_blk_txt))||')'; Extra_info=trim(left(Extra_info)); run; data mpg2009 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg2009; run; /* Hmm ... for 2008, the spreadsheet had numbers at the top of the columns, rather than A, B, C, ect. SAS didn't seem to be able to read in the range with numbers (such as range='R1C1:R1153C31'), so I copy-n-pasted the data into a regular spreadsheet. */ proc import datafile= "2008_FE_guide_ALL-8-30-07.xls" dbms=EXCEL out=mpg2008 replace; sheet="'Sheet1$'"; range='A1:AE1153'; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg2008; rename mfr = Manufacturer; rename car_line = Model; rename city_mpg__guide_ = MPG_City; rename hwy_mpg__guide_ = MPG_Hwy; rename comb_mpg__guide_ = MPG_Cmb; rename unrnd_city__epa_ = MPG_City_Unadj; rename unrnd_hwy__epa_ = MPG_Hwy_Unadj; rename unrnd_comp__epa_ = MPG_Cmb_Unadj; rename numb_cyl = Cylinders; rename displacement = Engine_liters; run; data mpg2008; set mpg2008; Year=2008; Transmission=substr(trans,index(trans,'(')+1,2); length Extra_info $50; Extra_info=''; if fuel_type eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if spchgr eq 'S' then Extra_info=trim(left(Extra_info))||' (SC)'; if turbo eq 'T' then Extra_info=trim(left(Extra_info))||' (TRBO)'; if guzlr eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; if eng_blk_txt ne '' then Extra_info=trim(left(Extra_info))||' ('||trim(left(eng_blk_txt))||')'; Extra_info=trim(left(Extra_info)); run; data mpg2008 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg2008; run; proc import datafile= "2007_FE_guide_ALL_no_sales_May_01_2007.xls" dbms=EXCEL out=mpg2007 replace; sheet="'Sheet1$'"; range='A1:AC1185'; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg2007; rename mfr = Manufacturer; rename car_line = Model; rename city_mpg__guide_ = MPG_City; rename hwy_mpg__guide_ = MPG_Hwy; rename comb_mpg__guide_ = MPG_Cmb; rename unrnd_city__epa_ = MPG_City_Unadj; rename unrnd_hwy__epa_ = MPG_Hwy_Unadj; rename unrnd_comp__epa_ = MPG_Cmb_Unadj; rename numb_cyl = Cylinders; rename displacement = Engine_liters; run; data mpg2007; set mpg2007; Year=2007; Transmission=substr(trans,index(trans,'(')+1,2); length Extra_info $50; Extra_info=''; if fuel_type eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if spchgr eq 'S' then Extra_info=trim(left(Extra_info))||' (SC)'; if turbo eq 'T' then Extra_info=trim(left(Extra_info))||' (TRBO)'; if guzlr eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; if eng_blk_txt ne '' then Extra_info=trim(left(Extra_info))||' ('||trim(left(eng_blk_txt))||')'; Extra_info=trim(left(Extra_info)); run; data mpg2007 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg2007; run; proc import datafile= "2006_FE_Guide_14-Nov-2005_download.xls" dbms=EXCEL out=mpg2006 replace; sheet="'Sheet1$'"; range='A1:AC1077'; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg2006; rename mfr = Manufacturer; rename car_line = Model; rename city_mpg__guide_ = MPG_City; rename hwy_mpg__guide_ = MPG_Hwy; rename comb_mpg__guide_ = MPG_Cmb; rename unrnd_city__epa_ = MPG_City_Unadj; rename unrnd_hwy__epa_ = MPG_Hwy_Unadj; rename unrnd_comp__epa_ = MPG_Cmb_Unadj; rename numb_cyl = Cylinders; rename displacement = Engine_liters; run; data mpg2006; set mpg2006; Year=2006; Transmission=substr(trans,index(trans,'(')+1,2); length Extra_info $50; Extra_info=''; if fuel_type eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if spchgr eq 'S' then Extra_info=trim(left(Extra_info))||' (SC)'; if turbo eq 'T' then Extra_info=trim(left(Extra_info))||' (TRBO)'; if guzlr eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; if eng_blk_txt ne '' then Extra_info=trim(left(Extra_info))||' ('||trim(left(eng_blk_txt))||')'; Extra_info=trim(left(Extra_info)); run; data mpg2006 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg2006; run; proc import datafile= "2005_FE_guide_2004oct15.xls" dbms=EXCEL out=mpg2005 replace; sheet="'Sheet1$'"; range='A1:AC1122'; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg2005; rename carline_name = Model; rename cty = MPG_City; rename hwy = MPG_Hwy; rename cmb = MPG_Cmb; rename ucty = MPG_City_Unadj; rename uhwy = MPG_Hwy_Unadj; rename ucmb = MPG_Cmb_Unadj; rename cyl = Cylinders; rename displ = Engine_liters; rename drv = Drive_sys; run; data mpg2005; set mpg2005; Year=2005; Transmission=substr(trans,index(trans,'(')+1,2); length Extra_info $50; Extra_info=''; if fl eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if s eq 'S' then Extra_info=trim(left(Extra_info))||' (SC)'; if t eq 'T' then Extra_info=trim(left(Extra_info))||' (TRBO)'; if g eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; if eng_dscr ne '' then Extra_info=trim(left(Extra_info))||' ('||trim(left(eng_dscr))||')'; Extra_info=trim(left(Extra_info)); run; data mpg2005 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg2005; run; proc import datafile= "2004_gd04-Feb1804-RelDtFeb20.xls" dbms=EXCEL out=mpg2004 replace; sheet="'Sheet1$'"; range='A1:AC1146'; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg2004; rename carline_name = Model; rename cty = MPG_City; rename hwy = MPG_Hwy; rename cmb = MPG_Cmb; rename ucty = MPG_City_Unadj; rename uhwy = MPG_Hwy_Unadj; rename ucmb = MPG_Cmb_Unadj; rename cyl = Cylinders; rename displ = Engine_liters; rename drv = Drive_sys; run; data mpg2004; set mpg2004; Year=2004; Transmission=substr(trans,index(trans,'(')+1,2); length Extra_info $50; Extra_info=''; if fl eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if s eq 'S' then Extra_info=trim(left(Extra_info))||' (SC)'; if t eq 'T' then Extra_info=trim(left(Extra_info))||' (TRBO)'; if g eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; if eng_dscr ne '' then Extra_info=trim(left(Extra_info))||' ('||trim(left(eng_dscr))||')'; Extra_info=trim(left(Extra_info)); run; data mpg2004 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg2004; run; proc import datafile= "2003_guide_2003_feb04-03b.xls" dbms=EXCEL out=mpg2003 replace; sheet="'Sheet1$'"; range='A1:AC1146'; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg2003; rename carline_name = Model; rename cty = MPG_City; rename hwy = MPG_Hwy; rename cmb = MPG_Cmb; rename ucty = MPG_City_Unadj; rename uhwy = MPG_Hwy_Unadj; rename ucmb = MPG_Cmb_Unadj; rename cyl = Cylinders; rename displ = Engine_liters; rename drv = Drive_sys; run; data mpg2003; set mpg2003; Year=2003; Transmission=substr(trans,index(trans,'(')+1,2); length Extra_info $50; Extra_info=''; if fl eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if s eq 'S' then Extra_info=trim(left(Extra_info))||' (SC)'; if t eq 'T' then Extra_info=trim(left(Extra_info))||' (TRBO)'; /* if g eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; */ if eng_dscr ne '' then Extra_info=trim(left(Extra_info))||' ('||trim(left(eng_dscr))||')'; Extra_info=trim(left(Extra_info)); run; data mpg2003 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg2003; run; proc import datafile= "2002_guide_jan28.xls" dbms=EXCEL out=mpg2002 replace; sheet="'guide_jan28$'"; range='A1:AC945'; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg2002; rename carline_name = Model; rename cty = MPG_City; rename hwy = MPG_Hwy; rename cmb = MPG_Cmb; rename ucty = MPG_City_Unadj; rename uhwy = MPG_Hwy_Unadj; rename ucmb = MPG_Cmb_Unadj; rename cyl = Cylinders; rename displ = Engine_liters; rename drv = Drive_sys; run; data mpg2002; set mpg2002; Year=2002; Transmission=substr(trans,index(trans,'(')+1,2); length Extra_info $50; Extra_info=''; if fl eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if s eq 'S' then Extra_info=trim(left(Extra_info))||' (SC)'; if t eq 'T' then Extra_info=trim(left(Extra_info))||' (TRBO)'; /* if g eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; */ if eng_dscr ne '' then Extra_info=trim(left(Extra_info))||' ('||trim(left(eng_dscr))||')'; Extra_info=trim(left(Extra_info)); run; data mpg2002 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg2002; run; proc import datafile= "2001_guide0918.xls" dbms=EXCEL out=mpg2001 replace; sheet="'2001_guide0918$'"; range='A1:AB853'; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg2001; rename carline_name = Model; rename cty = MPG_City; rename hwy = MPG_Hwy; rename cmb = MPG_Cmb; rename ucty = MPG_City_Unadj; rename uhwy = MPG_Hwy_Unadj; rename ucmb = MPG_Cmb_Unadj; rename cyl = Cylinders; rename drv = Drive_sys; run; data mpg2001; set mpg2001; Year=2001; Transmission=substr(trans,index(trans,'(')+1,2); length Extra_info $50; Extra_info=''; if fl eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if s eq 'S' then Extra_info=trim(left(Extra_info))||' (SC)'; if t eq 'T' then Extra_info=trim(left(Extra_info))||' (TRBO)'; /* if g eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; */ if eng_dscr ne '' then Extra_info=trim(left(Extra_info))||' ('||trim(left(eng_dscr))||')'; Extra_info=trim(left(Extra_info)); Engine_liters=0; Engine_liters=displ; run; data mpg2001 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg2001; run; proc import datafile= "2000_G6080900.xls" dbms=EXCEL out=mpg2000 replace; range='A1:AB846'; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg2000; rename carline_name = Model; rename cty = MPG_City; rename hwy = MPG_Hwy; rename cmb = MPG_Cmb; rename ucty = MPG_City_Unadj; rename uhwy = MPG_Hwy_Unadj; rename ucmb = MPG_Cmb_Unadj; rename cyl = Cylinders; rename displ = Engine_liters; rename drv = Drive_sys; run; data mpg2000; set mpg2000; Year=2000; Transmission=substr(trans,index(trans,'(')+1,2); length Extra_info $50; Extra_info=''; if fl eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if s eq 'S' then Extra_info=trim(left(Extra_info))||' (SC)'; if t eq 'T' then Extra_info=trim(left(Extra_info))||' (TRBO)'; /* if g eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; */ if eng_dscr ne '' then Extra_info=trim(left(Extra_info))||' ('||trim(left(eng_dscr))||')'; Extra_info=trim(left(Extra_info)); run; data mpg2000 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg2000; run; proc import datafile= "1999_guide6.xls" dbms=EXCEL out=mpg1999 replace; range='A1:AA801'; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg1999; rename carline_name = Model; rename cty = MPG_City; rename hwy = MPG_Hwy; rename cmb = MPG_Cmb; rename ucty = MPG_City_Unadj; rename uhwy = MPG_Hwy_Unadj; rename ucmb = MPG_Cmb_Unadj; rename cyl = Cylinders; rename displ = Engine_liters; rename drv = Drive_sys; run; data mpg1999; set mpg1999; Year=1999; Transmission=substr(trans,index(trans,'(')+1,2); length Extra_info $50; Extra_info=''; if fl eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if s eq 'S' then Extra_info=trim(left(Extra_info))||' (SC)'; if t eq 'T' then Extra_info=trim(left(Extra_info))||' (TRBO)'; if g eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; if eng_dscr ne '' then Extra_info=trim(left(Extra_info))||' ('||trim(left(eng_dscr))||')'; Extra_info=trim(left(Extra_info)); run; data mpg1999 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg1999; run; proc import datafile= "1998_guide6.xls" dbms=EXCEL out=mpg1998 replace; range='A1:Z808'; getnames=YES; mixed=YES; scantext=NO; run; proc datasets; modify mpg1998; rename carline_name = Model; rename cty = MPG_City; rename hwy = MPG_Hwy; rename cmb = MPG_Cmb; rename ucty = MPG_City_Unadj; rename uhwy = MPG_Hwy_Unadj; rename ucmb = MPG_Cmb_Unadj; rename cyl = Cylinders; rename displ = Engine_liters; rename drv = Drive_sys; run; data mpg1998; set mpg1998; Year=1998; Transmission=substr(trans,index(trans,'(')+1,2); length Extra_info $50; Extra_info=''; if fl eq 'D' then Extra_info=trim(left(Extra_info))||' (DIESEL)'; if s eq 'S' then Extra_info=trim(left(Extra_info))||' (SC)'; if t eq 'T' then Extra_info=trim(left(Extra_info))||' (TRBO)'; if g eq 'G' then Extra_info=trim(left(Extra_info))||' (GUZZLER)'; if eng_dscr ne '' then Extra_info=trim(left(Extra_info))||' ('||trim(left(eng_dscr))||')'; Extra_info=trim(left(Extra_info)); run; data mpg1998 (keep = Year Class Manufacturer Model MPG_City MPG_Hwy MPG_Cmb MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj Cylinders Engine_liters Extra_info Drive_sys Transmission); length Class Manufacturer Model $50; length Drive_sys Transmission $3; format Class Manufacturer Model $50.; format Drive_sys Transmission $3.; set mpg1998; run; filename textfile "1997_MFGUI.txt"; data mpg1997 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,1,4); Manufacturer=substr(longline,84,33); Model=substr(longline,11,28); MPG_City=0; MPG_City=substr(longline,118,2); MPG_Hwy= 0; MPG_Hwy= substr(longline,123,2); MPG_Cmb= 0; MPG_Cmb= substr(longline,128,2); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,133,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,138,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,143,2); Transmission=substr(longline,45,2); Cylinders=0; Cylinders=substr(longline,51,2); Engine_liters=0; Engine_liters=substr(longline,154,4); Extra_info=substr(longline,161,30); Drive_sys=substr(longline,226,1); run; filename textfile "1996_MFGUI.txt"; data mpg1996 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,1,4); Manufacturer=substr(longline,84,33); Model=substr(longline,11,28); MPG_City=0; MPG_City=substr(longline,118,2); MPG_Hwy= 0; MPG_Hwy= substr(longline,123,2); MPG_Cmb= 0; MPG_Cmb= substr(longline,128,2); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,133,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,138,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,143,2); Transmission=substr(longline,45,2); Cylinders=0; Cylinders=substr(longline,51,2); Engine_liters=0; Engine_liters=substr(longline,154,4); Extra_info=substr(longline,161,30); Drive_sys=substr(longline,226,1); run; * column positions changed, just for 1995; filename textfile "1995_MFGUI.txt"; data mpg1995 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,12,4); Manufacturer=substr(longline,95,33); Model=substr(longline,22,28); MPG_City=0; MPG_City=substr(longline,129,2); MPG_Hwy= 0; MPG_Hwy= substr(longline,134,2); MPG_Cmb= 0; MPG_Cmb= substr(longline,139,2); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,144,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,149,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,154,2); Transmission=substr(longline,56,2); Cylinders=0; Cylinders=substr(longline,62,2); Engine_liters=0; Engine_liters=substr(longline,165,4); Extra_info=substr(longline,172,30); Drive_sys=substr(longline,237,1); run; filename textfile "1994_MFGUI.txt"; data mpg1994 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,1,4); Manufacturer=substr(longline,84,33); Model=substr(longline,11,28); MPG_City=0; MPG_City=substr(longline,118,2); MPG_Hwy= 0; MPG_Hwy= substr(longline,123,2); MPG_Cmb= 0; MPG_Cmb= substr(longline,128,2); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,133,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,138,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,143,2); Transmission=substr(longline,45,2); Cylinders=0; Cylinders=substr(longline,51,2); Engine_liters=0; Engine_liters=substr(longline,154,4); Extra_info=substr(longline,161,30); Drive_sys=substr(longline,226,1); run; filename textfile "1993_MFGUI.txt"; data mpg1993 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,1,4); Manufacturer=substr(longline,84,33); Model=substr(longline,11,28); MPG_City=0; MPG_City=substr(longline,118,2); MPG_Hwy= 0; MPG_Hwy= substr(longline,123,2); MPG_Cmb= 0; MPG_Cmb= substr(longline,128,2); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,133,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,138,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,143,2); Transmission=substr(longline,45,2); Cylinders=0; Cylinders=substr(longline,51,2); Engine_liters=0; Engine_liters=substr(longline,154,4); Extra_info=substr(longline,161,30); Drive_sys=substr(longline,226,1); run; filename textfile "1992_MFGUI.txt"; data mpg1992 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,1,4); Manufacturer=substr(longline,84,33); Model=substr(longline,11,28); MPG_City=0; MPG_City=substr(longline,118,2); MPG_Hwy= 0; MPG_Hwy= substr(longline,123,2); MPG_Cmb= 0; MPG_Cmb= substr(longline,128,2); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,133,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,138,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,143,2); Transmission=substr(longline,45,2); Cylinders=0; Cylinders=substr(longline,51,2); Engine_liters=0; Engine_liters=substr(longline,154,4); Extra_info=substr(longline,161,30); Drive_sys=substr(longline,226,1); run; filename textfile "1991_MFGUI.txt"; data mpg1991 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,1,4); Manufacturer=substr(longline,84,33); Model=substr(longline,11,28); MPG_City=0; MPG_City=substr(longline,118,2); MPG_Hwy= 0; MPG_Hwy= substr(longline,123,2); MPG_Cmb= 0; MPG_Cmb= substr(longline,128,2); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,133,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,138,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,143,2); Transmission=substr(longline,45,2); Cylinders=0; Cylinders=substr(longline,51,2); Engine_liters=0; Engine_liters=substr(longline,154,4); Extra_info=substr(longline,161,30); Drive_sys=substr(longline,226,1); run; filename textfile "1990_MFGUI.txt"; data mpg1990 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,1,4); Manufacturer=substr(longline,84,33); Model=substr(longline,11,28); MPG_City=0; MPG_City=substr(longline,118,2); MPG_Hwy= 0; MPG_Hwy= substr(longline,123,2); MPG_Cmb= 0; MPG_Cmb= substr(longline,128,2); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,133,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,138,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,143,2); Transmission=substr(longline,45,2); Cylinders=0; Cylinders=substr(longline,51,2); Engine_liters=0; Engine_liters=substr(longline,154,4); Extra_info=substr(longline,161,30); Drive_sys=substr(longline,226,1); run; filename textfile "1989_MFGUI.txt"; data mpg1989 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,1,4); Manufacturer=substr(longline,84,33); Model=substr(longline,11,28); MPG_City=0; MPG_City=substr(longline,118,2); MPG_Hwy= 0; MPG_Hwy= substr(longline,123,2); MPG_Cmb= 0; MPG_Cmb= substr(longline,128,2); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,133,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,138,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,143,2); Transmission=substr(longline,45,2); Cylinders=0; Cylinders=substr(longline,51,2); Engine_liters=0; Engine_liters=substr(longline,154,4); Extra_info=substr(longline,161,30); Drive_sys=substr(longline,226,1); run; filename textfile "1988_MFGUI.txt"; data mpg1988 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,1,4); Manufacturer=substr(longline,84,33); Model=substr(longline,11,28); MPG_City=0; MPG_City=substr(longline,118,2); MPG_Hwy= 0; MPG_Hwy= substr(longline,123,2); MPG_Cmb= 0; MPG_Cmb= substr(longline,128,2); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,133,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,138,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,143,2); Transmission=substr(longline,45,2); Cylinders=0; Cylinders=substr(longline,51,2); Engine_liters=0; Engine_liters=substr(longline,154,4); Extra_info=substr(longline,161,30); Drive_sys=substr(longline,226,1); run; filename textfile "1987_MFGUI.txt"; data mpg1987 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,1,4); Manufacturer=substr(longline,84,33); Model=substr(longline,11,28); MPG_City=0; MPG_City=substr(longline,118,2); MPG_Hwy= 0; MPG_Hwy= substr(longline,123,2); MPG_Cmb= 0; MPG_Cmb= substr(longline,128,2); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,133,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,138,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,143,2); Transmission=substr(longline,45,2); Cylinders=0; Cylinders=substr(longline,51,2); Engine_liters=0; Engine_liters=substr(longline,154,4); Extra_info=substr(longline,161,30); Drive_sys=substr(longline,226,1); run; filename textfile "1986_MFGUI.txt"; data mpg1986 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,1,4); Manufacturer=substr(longline,84,33); Model=substr(longline,11,28); MPG_City=0; MPG_City=substr(longline,118,2); MPG_Hwy= 0; MPG_Hwy= substr(longline,123,2); MPG_Cmb= 0; MPG_Cmb= substr(longline,128,2); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,133,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,138,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,143,2); Transmission=substr(longline,45,2); Cylinders=0; Cylinders=substr(longline,51,2); Engine_liters=0; Engine_liters=substr(longline,154,4); Extra_info=substr(longline,161,30); Drive_sys=substr(longline,226,1); run; filename textfile "1985_MFGUI.txt"; data mpg1985 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,1,4); Manufacturer=substr(longline,84,33); Model=substr(longline,11,28); MPG_City=0; MPG_City=substr(longline,118,2); MPG_Hwy= 0; MPG_Hwy= substr(longline,123,2); MPG_Cmb= 0; MPG_Cmb= substr(longline,128,2); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,133,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,138,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,143,2); Transmission=substr(longline,45,2); Cylinders=0; Cylinders=substr(longline,51,2); Engine_liters=0; Engine_liters=substr(longline,154,4); Extra_info=substr(longline,161,30); if Engine_liters > 1000 then Engine_liters=Engine_liters/1000; Drive_sys=substr(longline,226,1); run; * Unadjusted mpg is usually higher than the adjusted mpg; * The unadjusted number is the (old) standard used for gas-guzzler tax; filename textfile "1984_MFGUI.txt"; data mpg1984 (drop = longline); infile textfile lrecl=500 pad; input longline $ 1-500; Year=0; Year=substr(longline,1,4); Manufacturer=substr(longline,84,33); Model=substr(longline,11,28); MPG_City_Unadj=0; MPG_City_Unadj=substr(longline,133,2); MPG_Hwy_Unadj= 0; MPG_Hwy_Unadj= substr(longline,138,2); MPG_Cmb_Unadj= 0; MPG_Cmb_Unadj= substr(longline,143,2); Transmission=substr(longline,45,2); Cylinders=0; Cylinders=substr(longline,51,2); Engine_liters=0; Engine_liters=substr(longline,154,4); Extra_info=substr(longline,161,30); if Engine_liters > 1000 then Engine_liters=Engine_liters/1000; run; data mpg_data; /* Bring in all the individual-year datasets */ set mpg2012 mpg2011 mpg2010 mpg2009 mpg2008 mpg2007 mpg2006 mpg2005 mpg2004 mpg2003 mpg2002 mpg2001 mpg2000 mpg1999 mpg1998 mpg1997 mpg1996 mpg1995 mpg1994 mpg1993 mpg1992 mpg1991 mpg1990 mpg1989 mpg1988 mpg1987 mpg1986 mpg1985 mpg1984 ; Drive_sys=trim(left(Drive_sys))||'WD'; if index(Class,'SPEC PURP VEH -') ne 0 then Class=substr(Class,17); if index(Class,'SPECIAL PURPOSE VEHICLE') ne 0 then Class='S.U.V. - '||substr(Class,25); Model=trim(left(Model)); Class=trim(left(Class)); Manufacturer=trim(left(Manufacturer)); /* Some manufacturers had a stray '5' in them - parse it out */ if index(Manufacturer,' 5') ne 0 then Manufacturer=scan(Manufacturer,1,'5'); if Manufacturer eq 'FORD MOTOR COMPANY' then Manufacturer='FORD'; if Manufacturer eq 'Geo' then Manufacturer='GEO'; if Manufacturer eq 'GM' then Manufacturer='GMC'; if Manufacturer eq 'GENERAL MOTORS' then Manufacturer='GM'; if Manufacturer eq 'HYUNDAI MOTOR COMPANY' then Manufacturer='HYUNDAI'; if Manufacturer eq 'ISUZU MOTORS LIMITED' then Manufacturer='ISUZU'; if Manufacturer eq 'JAGUAR CARS INC.' then Manufacturer='JAGUAR'; if Manufacturer eq 'JAGUAR CARS' then Manufacturer='JAGUAR'; if Manufacturer eq 'JEEP CORPORATION' then Manufacturer='JEEP'; if Manufacturer eq 'KIA MOTORS CORPORATION' then Manufacturer='KIA'; if Manufacturer eq 'LAFORZA AUTOMIBILES INC' then Manufacturer='LAFORZA AUTOMIBILES'; if Manufacturer eq 'LAMBORGHINI EAST' then Manufacturer='LAMBORGHINI'; if Manufacturer eq 'LAMBORGHINI OF NORTH AMERICA' then Manufacturer='LAMBORGHINI'; if Manufacturer eq 'LAND ROVER LTD' then Manufacturer='LAND ROVER'; if Manufacturer eq 'AMERICAN MOTORS CORPORATION' then Manufacturer='AMC'; if Manufacturer eq 'ASC INCORPORATED' then Manufacturer='ASC INC.'; if Manufacturer eq 'ASTON MARTIN LAGONDA LTD' then Manufacturer='ASTON MARTIN'; if Manufacturer eq 'CXAUTOMOTIVE' then Manufacturer='CX AUTOMOTIVE'; if Manufacturer eq 'NISSAN MOTOR CO., LTD.' then Manufacturer='NISSAN'; if Manufacturer eq 'NISSAN MOTOR COMPANY, LTD.' then Manufacturer='NISSAN'; if Manufacturer eq 'PAS,INC' then Manufacturer='PAS INC'; if Manufacturer eq 'ROUSH' then Manufacturer='ROUSH PERFORMANCE'; if Manufacturer eq 'VW' then Manufacturer='VOLKSWAGEN'; if index(Manufacturer,'AUSTIN ROVER') ne 0 then Manufacturer='AUSTIN ROVER'; if index(Manufacturer,'BILL DOVELL') ne 0 then Manufacturer='BILL DOVELL'; if index(Manufacturer,'CALLAWAY') ne 0 then Manufacturer='CALLAWAY'; if index(Manufacturer,'CONSULIER') ne 0 then Manufacturer='CONSULIER'; if index(Manufacturer,'CX AUTOMOTIVE') ne 0 then Manufacturer='CX AUTOMOTIVE'; if index(Manufacturer,'DAIHATSU') ne 0 then Manufacturer='DAIHATSU'; if index(Manufacturer,'LAFORZA') ne 0 then Manufacturer='LAFORZA'; if index(Manufacturer,'MAZDA') ne 0 then Manufacturer='MAZDA'; if index(Manufacturer,'MITSUBISHI') ne 0 then Manufacturer='MITSUBISHI'; if index(Manufacturer,'RED SHIFT') ne 0 then Manufacturer='RED SHIFT'; if index(Manufacturer,'ROLLS-ROYCE') ne 0 then Manufacturer='ROLLS-ROYCE'; if index(Manufacturer,'ROVER GROUP') ne 0 then Manufacturer='ROVER GROUP'; if index(Manufacturer,'SALEEN') ne 0 then Manufacturer='SALEEN'; if index(Manufacturer,'SHELBY') ne 0 then Manufacturer='SHELBY'; if index(Manufacturer,'SUZUKI') ne 0 then Manufacturer='SUZUKI'; if index(Manufacturer,'VECTOR') ne 0 then Manufacturer='VECTOR'; if index(Manufacturer,'VOLVO') ne 0 then Manufacturer='VOLVO'; if index(Manufacturer,'YUGO') ne 0 then Manufacturer='YUGO'; if index(Manufacturer,'PAS INC') ne 0 then Manufacturer='PAS INC'; if index(Manufacturer,'Wallace Environmental') ne 0 then Manufacturer='WALLACE ENVIRONMENTAL'; if (Manufacturer ne '') and (Model ne '') then output; run; /* Try to fill-in the 'Class' for the older years that didn't have Class variable */ proc sql; create table foo as select unique Year, Class, Manufacturer, Model from mpg_data where (Class ne '') and (Manufacturer ne '') and (Model ne '') order by manufacturer, model, class; create table foo as select Year, min(year) as minyear, Class, Manufacturer, Model from foo group by manufacturer, model; create table foo as select unique Class, Manufacturer, Model from foo where year=minyear; create table mpg_data as select mpg_data.*, foo.Class as Class_guess from mpg_data left join foo on (trim(left(mpg_data.Manufacturer))=trim(left(foo.Manufacturer))) and (trim(left(mpg_data.Model))=trim(left(foo.Model))); quit; run; data mpg_data (drop=Class_guess); set mpg_data; if trim(left(Class)) eq '' then Class=Class_guess; run; /* Now fill-in 'Class' for some that are obvious */ data mpg_data (drop=Class_guess); set mpg_data; length Class_guess $50; if Class eq '' then do; if index(Manufacturer,'JEEP')^=0 and index(Model,'CHEROKEE')^=0 then Class_guess='S.U.V.'; if index(Manufacturer,'JEEP')^=0 and index(Model,'WAGONEER')^=0 then Class_guess='S.U.V.'; if index(Manufacturer,'JEEP')^=0 and index(Model,'WRANGLER')^=0 then Class_guess='S.U.V.'; if index(Manufacturer,'JEEP')^=0 and index(Model,'CJ-7')^=0 then Class_guess='S.U.V.'; if index(Manufacturer,'JEEP')^=0 and index(Model,'CJ-8')^=0 then Class_guess='S.U.V.'; if index(Manufacturer,'GMC')^=0 and index(Model,'SUBURBAN')^=0 then Class_guess='S.U.V.'; if index(Manufacturer,'GMC')^=0 and index(Model,'JIMMY')^=0 then Class_guess='S.U.V.'; if index(Manufacturer,'FORD')^=0 and index(Model,'BRONCO')^=0 then Class_guess='S.U.V.'; if index(Manufacturer,'CHEVROLET')^=0 and index(Model,'BLAZER')^=0 then Class_guess='S.U.V.'; if index(Manufacturer,'CHEVROLET')^=0 and index(Model,'SUBURBAN')^=0 then Class_guess='S.U.V.'; if index(Manufacturer,'ISUZU')^=0 and index(Model,'TROOPER')^=0 then Class_guess='S.U.V.'; if index(Manufacturer,'TOYOTA')^=0 and index(Model,'4-RUNNER')^=0 then Class_guess='S.U.V.'; if index(Manufacturer,'NISSAN')^=0 and index(Model,'PATHFINDER')^=0 and index(Model,'VAN')=0 then Class_guess='S.U.V.'; if index(Model,'RANGE ROVER')^=0 then Class_guess='S.U.V.'; if index(Model,'LAND ROVER')^=0 then Class_guess='S.U.V.'; if Class_guess ne '' and index(Model,'4WD')^=0 then Class_guess=trim(left(Class_guess))||' - 4WD'; if Class_guess ne '' and index(Model,'2WD')^=0 then Class_guess=trim(left(Class_guess))||' - 2WD'; if Class_guess ne '' then Class=Class_guess; end; Class_guess=''; if Class eq '' then do; if index(Manufacturer,'CHEVROLET')^=0 and index(Model,'C10 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'CHEVROLET')^=0 and index(Model,'C20 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'CHEVROLET')^=0 and index(Model,'K10 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'CHEVROLET')^=0 and index(Model,'K20 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'CHEVROLET')^=0 and index(Model,'K2500 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'DODGE')^=0 and index(Model,'D100')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'DODGE')^=0 and index(Model,'D150')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'DODGE')^=0 and index(Model,'D250 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'DODGE')^=0 and index(Model,'W250 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'FORD')^=0 and index(Model,'F250 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'GMC')^=0 and index(Model,'C15 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'GMC')^=0 and index(Model,'K15 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'GMC')^=0 and index(Model,'C25 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'GMC')^=0 and index(Model,'K25 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'JEEP')^=0 and index(Model,'J-10 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'JEEP')^=0 and index(Model,'J-20 PICKUP')^=0 then Class_guess='STANDARD PICKUP TRUCKS'; if index(Manufacturer,'JEEP')^=0 and index(Model,'COMANCHE')^=0 then Class_guess='PICKUP TRUCKS'; if index(Manufacturer,'ISUZU')^=0 and index(Model,"P'UP")^=0 then Class_guess='PICKUP TRUCKS'; if index(Manufacturer,'DODGE')^=0 and index(Model,'RAM50')^=0 then Class_guess='PICKUP TRUCKS'; if index(Manufacturer,'DODGE')^=0 and index(Model,'DAKOTA')^=0 then Class_guess='PICKUP TRUCKS'; if index(Manufacturer,'MAZDA')^=0 and index(Model,'B2200')^=0 then Class_guess='PICKUP TRUCKS'; if index(Manufacturer,'MAZDA')^=0 and index(Model,'B2600')^=0 then Class_guess='PICKUP TRUCKS'; if index(Manufacturer,'SUBARU')^=0 and index(Model,'BRAT')^=0 then Class_guess='PICKUP TRUCKS'; if Class_guess ne '' and index(Model,'4WD')^=0 then Class_guess=trim(left(Class_guess))||' 4WD'; if Class_guess ne '' and index(Model,'2WD')^=0 then Class_guess=trim(left(Class_guess))||' 2WD'; if Class_guess ne '' then Class=Class_guess; end; Class_guess=''; if Class eq '' then do; if index(Model,'TRUCK')^=0 then Class_guess='PICKUP TRUCKS'; if index(Model,'PICKUP')^=0 then Class_guess='PICKUP TRUCKS'; if index(Model,'P/U')^=0 then Class_guess='PICKUP TRUCKS'; if Class_guess ne '' and index(Model,'4WD')^=0 then Class_guess=trim(left(Class_guess))||' 4WD'; if Class_guess ne '' and index(Model,'2WD')^=0 then Class_guess=trim(left(Class_guess))||' 2WD'; if Class_guess ne '' then Class=Class_guess; end; Class_guess=''; if Class eq '' then do; if index(Model,'VANDURA')^=0 then Class_guess='VANS'; if index(Model,'CLUB WAGON')^=0 then Class_guess='VANS'; if index(Model,'CARAVAN')^=0 then Class_guess='VANS'; if index(Model,'SPORTVAN')^=0 then Class_guess='VANS'; if index(Model,'VANAGON')^=0 then Class_guess='VANS'; if index(Model,' VAN ')^=0 then Class_guess='VANS'; if substr(Model,1,4) eq 'VAN ' then Class_guess='VANS'; if Class_guess ne '' and index(Model,'PASSENGER')^=0 then Class_guess=trim(left(Class_guess))||', PASSENGER'; if Class_guess ne '' and index(Model,'CARGO')^=0 then Class_guess=trim(left(Class_guess))||', CARGO'; if Class_guess ne '' then Class=Class_guess; end; /* Note that at this point, we've already caught the Wagoneers and Club Wagon vans */ if Class eq '' and index(Model,'WAGON')^=0 and index(Model,'COACHBUILDER')=0 then Class='STATION WAGON'; if Class eq '' and Manufacturer='PONTIAC' and Model='FIERO' then Class='TWO SEATERS'; if class='S.U.V. - 2WD' then class='S.U.V. 2WD'; run; proc sort data=mpg_data out=mpg_data; by descending Year Manufacturer Model Drive_sys Transmission Engine_liters; run; /* Order the variables, and make the labels & formats the way I want them */ data mpg_data; label Year='Year'; label Manufacturer='Manufacturer'; label Model='Model'; label Drive_sys='Drive System'; length Drive_sys $3; label Transmission='Transmission'; label Cylinders='Cylinders'; label Engine_liters='Engine Liters'; format Engine_liters comma4.1; label Class='Class'; label Class_guess='Class_guess'; label MPG_City='MPG City'; label MPG_Hwy='MPG Highway'; label MPG_Cmb='MPG Combined'; label MPG_City_Unadj='MPG City (guzzler)'; label MPG_Hwy_Unadj='MPG Highway (guzzler)'; label MPG_Cmb_Unadj='MPG Combined (guzzler)'; format MPG_City_Unadj MPG_Hwy_Unadj MPG_Cmb_Unadj comma5.1; label Extra_info='Extra Info'; set mpg_data; run; libname mylib '.'; data mylib.mpg_data (compress=yes); set mpg_data; run; /* proc contents; run; options ls=200 ps=1000 nocenter; proc print; run; */