*upload your .csv file that you generated as an ipums extract;
*in ipums you can select for a state- this code assumes that you have done just that;
*this code works to make big table of all pumas for whatever csv you start with;
*check your leading zeros in .xls;
*remember to include statefip in your download;
PROC IMPORT OUT= WORK.file_name1
DATAFILE= "filepath.csv"
DBMS=CSV REPLACE;
GETNAMES=YES;
DATAROW=2;
RUN;
*make pumas 5 digits- keep leading zeros;
DATA file_name2;
SET file_name1;
PUMA = PUT(US00A_PUMA, z5.);
RUN;
*make statefip 2 digits (keep a leading zero);
options obs=max;
DATA file_name2;
SET file_name2;
FIP = PUT(US00A_STATEFIP, z2.);
RUN;
*now i want to turn my NIU's (9999999) and (9999998) into nulls;
*this is specific to my data cet, but shows how to make a value a null;
options obs=max;
data file_name2;
set file_name2;
if inctot = 9999999 then inctot =.;
if inctot = 9999999 then inctot =.;
run;
*rename puma to puma1 to stayclean;
*rename a variable;
options obs=max;
data file_name2;
set file_name2;
rename PUMA=PUMA1;run;
*concatenate fip+puma1 to make puma;
options obs=max;
data file_name2;
set file_name2;
PUMA=catt(fip,puma1);run;
*make sure puma is 7 digit;
*drop puma1;
*it doesn't seem to like z7 here, think it's a char$ v.s #(number) issue);
*also dropping some variables here;
options obs=max;
DATA file_name2;
SET file_name2;
PUMA = PUT(PUMA, z7.);run;
options obs=max;
DATA file_name2;
SET file_name2;
drop puma1 geolev1;
RUN;
*I want to create a table of means;
*I want it based on a puma level;
*first I want to format so that my gender is clear, my edattan is clear, my race is clear;
proc format;
value rac
10='White'
20='Black'
21='Black African'
22='Black Caribbean'
23='Afro-Ecuadorian'
24='Other Black'
30='Indigenous'
31='American Indian'
40='Asian'
41='Chinese'
42='Japanese'
43='Korean'
44='Vietnamese'
45='Filipino'
46='Indian'
47='Pakistani'
48='Bangladeshi'
49='Other Asian'
50='Mixed race'
51='Brown (Brazil)'
52='Mestizo (Indigenous and White)'
53='Mulatto (Black and White) (Ecuador)'
54='Coloured (South Africa)'
55='Two or more races'
60='Other'
61='Montubio (Ecuador)'
99='Unknown';
value educ
0='NIU (not in universe)'
1='Less than primary completed'
2='Primary completed'
3='Secondary completed'
4='University completed'
9='Unknown';
value sx
9='Unknown'
2='Female'
1='Male';run;
proc format;
value fips
01='Alabama'
02='Alaska'
04='Arizona'
05='Arkansas'
06='California'
08='Colorado'
09='Connecticut'
10='Delaware'
11='District of Columbia'
12='Florida'
13='Georgia'
15='Hawaii'
16='Idaho'
17='Illinois'
18='Indiana'
19='Iowa'
20='Kansas'
21='Kentucky'
22='Louisiana'
23='Maine'
24='Maryland'
25='Massachusetts'
26='Michigan'
27='Minnesota'
28='Mississippi'
29='Missouri'
30='Montana'
31='Nebraska'
32='Nevada'
33='New Hampshire'
34='New Jersey'
35='New Mexico'
36='New York'
37='North Carolina'
38='North Dakota'
39='Ohio'
40='Oklahoma'
41='Oregon'
42='Pennsylvania'
44='Rhode island'
45='South Carolina'
46='South Dakota'
47='Tennessee'
48='Texas'
49='Utah'
50='Vermont'
51='Virginia'
53='Washington'
54='West Virginia'
55='Wisconsin'
56='Wyoming';
*recode fip to numeric;
data file_name2;
set file_name2;
newfip = input(fip,2.);
run;
*this will include code to export;
*it also includes the creation of a nifty table;
*that will be exported;
*you can skip this if you just want to make a;
*csv for export;
ods tagsets.excelxp
file="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\export_name.xls"
style = minimal
options (orientation = 'landscape'
fittopage = 'yes'
pages_fitwidth = '1'
pages_fitheight = '100');
*now the proc tab code;
options obs = 100;
proc tabulate data = file_name2;
class newfip puma edattan raceus sex;
var inctot;
table newfip = 'state' * puma = 'PUMA area' , edattan='Edattan' * (n rowpctn) raceus='race' sex ='sex' inctot = 'income' * (mean);
format newfip fips. sex sx. edattan educ. raceus rac. inctot;
run;
*lose some unneccessary variables;
*us;
data file_name2;
set work.file_name2;
drop wtper sample cntry resident marstd edattand us00A_puma;
run;
*to make a data set for BERKELEY 2000 ONLY;
*you make ;
data BERKDAT00;
set work.file_name2;
drop US00A_STATEFIP newfip;
if puma = 0602401;
run;
*write the 2000 data for BERKELEY to a csv;
proc export data=work.BERKDAT00
outfile="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\berkeley_puma_ca_00.csv"
dbms=csv
replace;run;
*make a data set for MARIN 2000 ONLY;
data MARINDAT00;
set work.file_name2;
drop US00A_STATEFIP newfip;
if puma = 0601201 or puma = 0601202;
run;
*make a data set for OTOWN 2000 ONLY;
data OAKLANDDAT00;
set work.file_name2;
if puma = 0602402 or puma = 0602403 or puma = 0602404;
drop US00A_STATEFIP newfip;
run;
*make a data set for part of tulare counte;
data tulare_part00;
set work.file_name2;
if puma = 0603503;
drop US00A_STATEFIP newfip;
run;
*make a data set for all of tulare counte;
data tulare_all00;
set work.file_name2;
if puma = 0603503 or puma= 0603501 or puma=0603502;
drop US00A_STATEFIP newfip;
run;
*write the 2000 data for BERKELEY to a csv;
proc export data=work.BERKDAT00
outfile="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\berkeley_puma_ca_00.csv"
dbms=csv
replace;run;
*write the 2000 data for tulare to a csv;
proc export data=work.tulare_part00
outfile="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\tulare_part_00.csv"
dbms=csv
replace;run;
*write the 2000 data for tulare_all to a csv;
proc export data=work.tulare_all00
outfile="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\tulare_all_00.csv"
dbms=csv
replace;run;
*write the 2000 data for OAKLAND to a csv;
proc export data=work.OAKLANDDAT00
outfile="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\otown_puma_ca_00.csv"
dbms=csv
replace;run;
*write the 2000 data for MARIN to a csv;
proc export data=work.MARINDAT00
outfile="\\tsclient\Y\ipumsCodap\Ipums_and_Pumas\marin_puma_ca_00.csv"
dbms=csv
replace;run;
Showing posts with label sas. Show all posts
Showing posts with label sas. Show all posts
Wednesday, July 8, 2015
Wednesday, June 24, 2015
SAS CODE "Simple Random Sample and Export"
*This following piece of sas imports the really long and wide file;
* terrapop_extract_1016.csv, and randomly draws 5000 households;
* out of it. In this case, a household is determined by a shared serial;
* number -- everyone in the household has the same serial number,;
* so 5000 households exports as approximately 11000 people.;
*seed is part of the randomization process, srs is simple random sample;
PROC IMPORT OUT= WORK.demos
DATAFILE= "filepath.csv"
DBMS=CSV REPLACE;
GETNAMES=YES;
DATAROW=2;
RUN;
proc surveyselect data= demos out=demos1 method=srs sampsize=5000 seed=377183 noprint;
samplingunit serial;
run;
*this is my 5000 person random sample;
options obs=max;
proc export data=work.demos1
outfile='Y:\demos1.csv'
dbms=csv
replace;run;
Here is a little snapshot of the data...
* terrapop_extract_1016.csv, and randomly draws 5000 households;
* out of it. In this case, a household is determined by a shared serial;
* number -- everyone in the household has the same serial number,;
* so 5000 households exports as approximately 11000 people.;
*seed is part of the randomization process, srs is simple random sample;
PROC IMPORT OUT= WORK.demos
DATAFILE= "filepath.csv"
DBMS=CSV REPLACE;
GETNAMES=YES;
DATAROW=2;
RUN;
proc surveyselect data= demos out=demos1 method=srs sampsize=5000 seed=377183 noprint;
samplingunit serial;
run;
*this is my 5000 person random sample;
options obs=max;
proc export data=work.demos1
outfile='Y:\demos1.csv'
dbms=csv
replace;run;
Here is a little snapshot of the data...
44135083 | 201896 | 6 | 840001 | 840001 | 0 | 2 | 999 | 12000 | 20 | 350 | 11 | 1 | 2.65E+08 | 4583402 | 99 | 110 | 1 | 2 | 2 | 100 | 5 | 8407 | 2 | 45 | 0 | 11900 | 2010 |
Subscribe to:
Posts (Atom)