Wednesday, July 8, 2015

Working with MicroData, Creating Pumas

*upload your .csv file that you generated as an ipums extract;
*in ipums you can select for a state- this code assumes that you have done just that;
*this code works to make big table of all pumas for whatever csv you start with;
*check your leading zeros in .xls;
*remember to include statefip in your download;

PROC IMPORT OUT= WORK.file_name1
            DATAFILE= "filepath.csv"
            DBMS=CSV REPLACE;
     GETNAMES=YES;
     DATAROW=2;
RUN;

*make pumas 5 digits- keep leading zeros;
DATA file_name2;
SET file_name1;
PUMA = PUT(US00A_PUMA, z5.);
RUN;

*make statefip 2 digits (keep a leading zero);
options obs=max;
DATA file_name2;
SET file_name2;
FIP = PUT(US00A_STATEFIP, z2.);
RUN;

*now i want to turn my NIU's (9999999) and (9999998) into nulls;
*this is specific to my data cet, but shows how to make a value a null;
options obs=max;
data file_name2;
set file_name2;
if inctot = 9999999 then inctot =.;
if inctot = 9999999 then inctot =.;
run;

*rename puma to puma1 to stayclean;
*rename a variable;
options obs=max;
data file_name2;
set file_name2;
rename PUMA=PUMA1;run;

*concatenate fip+puma1 to make puma;
options obs=max;
data file_name2;
set file_name2;
PUMA=catt(fip,puma1);run;

*make sure puma is 7 digit;
*drop puma1;
*it doesn't seem to like z7 here, think it's a char$ v.s #(number) issue);
*also dropping some variables here;
options obs=max;
DATA file_name2;
SET file_name2;
PUMA = PUT(PUMA, z7.);run;
options obs=max;
DATA file_name2;
SET file_name2;
drop puma1 geolev1;
RUN;

*I want to create a table of means;
*I want it based on a puma level;
*first I want to format so that my gender is clear, my edattan is clear, my race is clear;

proc format;
value rac
10='White'
20='Black'
21='Black African'
22='Black Caribbean'
23='Afro-Ecuadorian'
24='Other Black'
30='Indigenous'
31='American Indian'
40='Asian'
41='Chinese'
42='Japanese'
43='Korean'
44='Vietnamese'
45='Filipino'
46='Indian'
47='Pakistani'
48='Bangladeshi'
49='Other Asian'
50='Mixed race'
51='Brown (Brazil)'
52='Mestizo (Indigenous and White)'
53='Mulatto (Black and White) (Ecuador)'
54='Coloured (South Africa)'
55='Two or more races'
60='Other'
61='Montubio (Ecuador)'
99='Unknown';


value educ
0='NIU (not in universe)'
1='Less than primary completed'
2='Primary completed'
3='Secondary completed'
4='University completed'
9='Unknown';


value sx
9='Unknown'
2='Female'
1='Male';run;


proc format;
value fips
01='Alabama'
02='Alaska'
04='Arizona'
05='Arkansas'
06='California'
08='Colorado'
09='Connecticut'
10='Delaware'
11='District of Columbia'
12='Florida'
13='Georgia'
15='Hawaii'
16='Idaho'
17='Illinois'
18='Indiana'
19='Iowa'
20='Kansas'
21='Kentucky'
22='Louisiana'
23='Maine'
24='Maryland'
25='Massachusetts'
26='Michigan'
27='Minnesota'
28='Mississippi'
29='Missouri'
30='Montana'
31='Nebraska'
32='Nevada'
33='New Hampshire'
34='New Jersey'
35='New Mexico'
36='New York'
37='North Carolina'
38='North Dakota'
39='Ohio'
40='Oklahoma'
41='Oregon'
42='Pennsylvania'
44='Rhode island'
45='South Carolina'
46='South Dakota'
47='Tennessee'
48='Texas'
49='Utah'
50='Vermont'
51='Virginia'
53='Washington'
54='West Virginia'
55='Wisconsin'
56='Wyoming';

*recode fip to numeric;
data file_name2;
set file_name2;
newfip = input(fip,2.);
run;

*this will include code to export;
*it also includes the creation of a nifty table;
*that will be exported;
*you can skip this if you just want to make a;
*csv for export;

ods tagsets.excelxp
file="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\export_name.xls"
style = minimal
options (orientation = 'landscape'
fittopage = 'yes'
pages_fitwidth = '1'
pages_fitheight = '100');


*now the proc tab code;
options obs = 100;
proc tabulate data = file_name2;
class   newfip puma edattan raceus sex;
var inctot;
table newfip = 'state' * puma = 'PUMA area'  , edattan='Edattan' * (n rowpctn) raceus='race' sex ='sex' inctot = 'income' * (mean);
format newfip fips. sex sx. edattan educ. raceus rac. inctot;
run;

*lose some unneccessary variables;
*us;
data file_name2;
set work.file_name2;
drop wtper sample cntry resident marstd edattand us00A_puma;
run;


*to make a data set for BERKELEY 2000 ONLY;
*you make ;
data BERKDAT00;
set work.file_name2;
drop US00A_STATEFIP newfip;
if puma = 0602401;
run;

*write the 2000 data for BERKELEY to a csv;
proc export data=work.BERKDAT00
   outfile="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\berkeley_puma_ca_00.csv"
   dbms=csv
   replace;run;


*make a data set for MARIN 2000 ONLY;
data MARINDAT00;
set work.file_name2;
drop US00A_STATEFIP newfip;
if puma = 0601201 or  puma = 0601202;
run;

*make a data set for OTOWN 2000 ONLY;
data OAKLANDDAT00;
set work.file_name2;
if puma = 0602402 or  puma = 0602403 or puma = 0602404;
drop US00A_STATEFIP newfip;
run;

*make a data set for part of tulare counte;
data tulare_part00;
set work.file_name2;
if puma = 0603503;
drop US00A_STATEFIP newfip;
run;

*make a data set for all of tulare counte;
data tulare_all00;
set work.file_name2;
if puma = 0603503 or puma= 0603501 or puma=0603502;
drop US00A_STATEFIP newfip;
run;

*write the 2000 data for BERKELEY to a csv;
proc export data=work.BERKDAT00
   outfile="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\berkeley_puma_ca_00.csv"
   dbms=csv
   replace;run;

*write the 2000 data for tulare to a csv;
proc export data=work.tulare_part00
   outfile="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\tulare_part_00.csv"
   dbms=csv
   replace;run;

*write the 2000 data for tulare_all to a csv;
proc export data=work.tulare_all00
   outfile="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\tulare_all_00.csv"
   dbms=csv
   replace;run;

*write the 2000 data for OAKLAND to a csv;
proc export data=work.OAKLANDDAT00
   outfile="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\otown_puma_ca_00.csv"
   dbms=csv
   replace;run;

   *write the 2000 data for MARIN to a csv;
proc export data=work.MARINDAT00
   outfile="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\marin_puma_ca_00.csv"
   dbms=csv
   replace;run;


No comments:

Post a Comment