H O M E
A Brief Biography
Curriculum Vitae
Student Area (Emory)
PODCASTS
Political Music Videos
Data Sets and
Computer Programs
Scholarly
Speculative Nonfiction
The Farsight Institute
Links of Note
Book Reviews
Topics
Schedule
Live Music
Studio Music
Poetry
Peace Corps

C O N T A C T

 

Example SAS code for the Democracy Data Set

by way of Rafal Raciborski

******************************************
******* LINEAR REGRESSION examples *******
******************************************;

* let's focus on just one year, let's say 1999
* the easiest way to do it is to subset the dataset
* keeping only those obs for which year is 1999;

data work.Dem1999;
set tmp1.Democracy;
if year = 1999;
run;

* from now on, we will be working with Dem1999 dataset....;

* proc means summarizes the data;
proc means data = work.Dem1999 maxdec=2;
run;

* another important statement is proc corr which returns correlations
* between all the numerical vars in the dataset;
proc corr;
var DEMSCORE REGIONPOLITY RENT MUSLIM GDPPC;
run;

* you may also plot some vars together, e.g. let's see whether countries with
* predominant Muslim population tend to be less democratic;
* (note SAS is not case-sensitive);
proc gplot;
plot demscore * muslim;
symbol value = square;
run;

* example of a nonlinear association;
* (note the correlation between these two vars!);
proc gplot;
plot demscore * gdppc;
run;

* finally, let's regress demscore on muslim;
* (the p option fetches predicted values);
proc reg;
model demscore = muslim / p;
plot demscore*muslim p.*muslim;
run;

* how do you interpret the coefficient on muslim?
* (remember that the variable muslim varies from 0 to .99);

*effect of outliers;
*let's change America's muslim score to 1.00;
data work.Dem1999c;
set work.Dem1999;
if country = 'United States' then muslim = 1;
run;

proc reg;
model demscore = muslim / p;
plot demscore*muslim p.*muslim;
run;

quit;


********************************
********* LECTURE 2 ************
*** LINEAR REGRESSION cont'd ***
********************************;

data work.Dem1999m;
set tmp1.Democracy;
mTwo = muslim*2;
mHalf = muslim*.5;
dsTwo = demscore*2;
dsHalf = demscore*.5;
if year = 1999;
run;

* correlation does NOT depend on units of measurement;
proc corr data=work.Dem1999m;
var demscore dsTwo dsHalf MUSLIM mTwo mHalf;
run;

proc reg;
model demscore = muslim;
run;

proc reg;
model demscore = mTwo;
run;

proc reg;
model demscore = mHalf;
run;

proc reg;
model dsTwo = muslim;
run;

proc reg;
model dsTwo = mTwo;
run;

* multivariate model, just to interpret t-values;
proc reg;
model demscore = muslim netoil income regionpolity elf85d econfree trade;
* examples of the TEST statement (with labels);
* netoil: test netoil = 0;
* netoil_trade: test netoil = 0, trade = 0; * (equivalently, test netoil, trade);
* econ_trade: test econfree + trade = 1;
run;

quit;


****************************************
********* MULTICOLLINEARITY ************
****************************************;

* note: i am adding a tradeTwice var and a non-sensical var BUMP;
data work.Dem1999;
set tmp1.Democracy;
if year = 1999;
tradeTwice = 2*trade;
bump = ( 1.5*rent + .85*netoil ) / trade;
run;

proc corr;
var _NUMERIC_;
run;

proc corr best = 4;
var _NUMERIC_;
run;

proc corr;
var bump rent trade econfree;
run;

proc reg;
no_collin_trade: model demscore = econfree trade;
bump_not_correlated_with_trade: model demscore = econfree trade bump;
* example of perfect collinearity;
tradeTwice: model demscore = econfree trade tradeTwice;
no_collin_rent: model demscore = econfree trade rent;
* warning sign: you add a var to the model, and some other var drops in significance;
bump_correlated_with_rent: model demscore = econfree trade rent bump;
run;

* similarly;
proc reg;
model demscore = econfree rent income gdppp; *then add GDPPP;
run;

* detecting multicollinearity: look for strong correlations between the explanatory vars;
proc corr best=4; *BEST = n prints n highest correlation coefficients;
var _NUMERIC_;
run;

* detection cont'd: use the VIF and TOL options;
proc reg;
model demscore = econfree trade rent / vif tol;
run;

proc reg;
model demscore = econfree trade rent bump / vif tol;
run;

proc reg;
model demscore = econfree rent income gdppp / vif tol;
run;

proc corr;
var income gdppp;
run;

quit;


*******************************************
************ INTERACTION TERMS ************
************* AND LOOSE ENDS **************
*******************************************;

* substantive effects first (and logarithms)....;

data work.Dem1999;
set tmp1.Democracy;
if year = 1999;
run;

proc reg;
model demscore = econfree regionfh income; * interpret income! ;
run;

proc means data = work.Dem1999 maxdec=2;
var econfree regionfh income incomenolog;
run;

proc gplot;
plot demscore * incomenolog;
plot demscore * income;
run;

* do not run unreal counterfactuals, here we look at adjusting income by region;
proc means data = work.Dem1999 maxdec=2;
var income incomenolog;
class geo;
run;


*******************;
*continue from here;
*******************;

* create some interaction terms;
data work.Dem1999;
set tmp1.Democracy;
if year = 1999;
inc2 = incomenolog**2;
run;

proc reg;
income: model demscore = econfree regionfh income / vif tol; * interpret income! ;
inc2: model demscore = econfree regionfh incomenolog inc2 / vif tol;
run;

* more interaction terms....;

data work.Dem1999;
set tmp1.Democracy;
if year = 1999;
if demscore GE 8 then dem01 = 1;
if demscore LT 8 then dem01 = 0;
* robustness check: recode demscore in a different manner;
dem01elf = dem01*elf85d;
demElf = demscore*elf85d;
run;

proc corr best = 5;
var dem01 elf85d dem01elf;
var demscore elf85d demElf;
run;

*remember econfree is on an inverted scale!!!;
proc reg;
demElf: model econfree = demscore elf85d demElf income / vif tol;
dem01elf: model econfree = dem01 elf85d dem01elf income / vif tol;
noInterTerm: model econfree = dem01 elf85d income / vif tol;
run;

quit;