library(tidyverse)
library(rio) #for importing
Tutorial: Affective Polarization
I wanted to recreate the figures from The Origins and Consequences of Affective Polarization in the United States (Iyengar et al., 2019) and Political Sectarianism in America (Finkel et al., 2020) shown below. They use American National Election Studies data to show how Affective Polarization has changed over time. This tutorial will not reproduce the figures exactly, but it will at least give you the base figure to work from to modify for your own purposes. This code uses tidy principles.
Iyengar et al., (2019)
Finkel et al., (2020)
Overview
The steps are:
- Grab data from ANES
- Import
- Clean
- Mutate data for plotting
- Plot
Grab ANES data
Grab the Time Series Cumulative Data File from: https://electionstudies.org/data-center/anes-time-series-cumulative-data-file. Personally, I like grabbing SPSS files because they include metadata that might be useful. The file is about 82MB.
Import data
=import("anes_timeseries_cdf_spss_20211118.sav") raw
Clean data
Let’s clean the data, first by selecting just the variables of interest. I found the variables using the codebook on the ANES page.
- VCF0004: Year of Study
- VCF0218: Thermometer - Democratic Party
- VCF0224: Thermometer - Republican Party
- VCF0301: Party Identification of Respondent- 7-point Scale
And we might as well drop any participants that haven’t filled out any of these questions,
=raw %>%
dfselect(VCF0004, VCF0218, VCF0224, VCF0301) %>%
filter(!is.na(VCF0004) & !is.na(VCF0218) & !is.na(VCF0224) & !is.na(VCF0301))
%>% head df
VCF0004 VCF0218 VCF0224 VCF0301
1 1978 80 50 1
2 1978 50 50 4
3 1978 40 60 7
4 1978 60 60 3
5 1978 85 60 3
6 1978 50 50 2
Mutate data
We then want to change party identification from a 7-point scale into just Democrats and Republicans. Let’s look at the party ID variable to see the value coding.
$VCF0301 %>% glimpse df
num [1:42940] 1 4 7 3 3 2 3 2 2 1 ...
- attr(*, "label")= chr "Party Identification of Respondent- 7-point Scale"
- attr(*, "format.spss")= chr "F1.0"
- attr(*, "display_width")= int 9
- attr(*, "labels")= Named num [1:8] 0 1 2 3 4 5 6 7
..- attr(*, "names")= chr [1:8] "0. DK; NA; other; refused to answer; no Pre IW" "1. Strong Democrat" "2. Weak Democrat" "3. Independent - Democrat" ...
$VCF0301 %>% attr("labels") #because glimpse cuts off the labels df
0. DK; NA; other; refused to answer; no Pre IW
0
1. Strong Democrat
1
2. Weak Democrat
2
3. Independent - Democrat
3
4. Independent - Independent
4
5. Independent - Republican
5
6. Weak Republican
6
7. Strong Republican
7
We should drop the following: 0, 3, 4, 5. And we also want to collapse Strong and Weak partisans into the same category. We can do both of these at the same time using case_when(). Let’s also drop the NAs.
=df %>%
dfmutate(PID=case_when(
== 1 ~ "Democrat",
VCF0301 == 2 ~ "Democrat",
VCF0301 == 6 ~ "Republican",
VCF0301 == 7 ~ "Republican",
VCF0301 TRUE ~ NA_character_ #functions like ELSE
%>%
)) filter(!is.na(PID)) #drop the NAs
Now we need to make an inparty feeling thermomoter and and outparty feeling thermometer.
=df %>%
dfmutate(
inparty_feeling=case_when(
== "Democrat" ~ VCF0218, #if they are a democrat, then use democrat feeling variable
PID == "Republican" ~ VCF0224 #if they are a repoublican, then use republican feeling variable
PID
),
outparty_feeling=case_when(
== "Democrat" ~ VCF0224, #if they are a democrat, then use republican feeling variable
PID == "Republican" ~ VCF0218 #if they are a repoublican, then use democrat feeling variable
PID
)
)
%>% head() df
VCF0004 VCF0218 VCF0224 VCF0301 PID inparty_feeling outparty_feeling
1 1978 80 50 1 Democrat 80 50
2 1978 40 60 7 Republican 60 40
3 1978 50 50 2 Democrat 50 50
4 1978 60 60 2 Democrat 60 60
5 1978 70 40 2 Democrat 70 40
6 1978 85 85 1 Democrat 85 85
Now we need to make a dataframe for summary stats through time.
=
df_p%>%
df select(VCF0004, PID, inparty_feeling, outparty_feeling) %>%
group_by(VCF0004) %>%
summarise(inparty_feeling= mean(inparty_feeling, na.rm=T),
outparty_feeling=mean(outparty_feeling, na.rm=T)
)
df_p
# A tibble: 17 × 3
VCF0004 inparty_feeling outparty_feeling
<dbl> <dbl> <dbl>
1 1978 73.9 47.0
2 1980 75.0 45.6
3 1982 76.2 43.4
4 1984 76.8 44.9
5 1986 76.4 45.1
6 1988 77.6 44.0
7 1990 73.6 45.6
8 1992 72.5 40.8
9 1994 72.6 40.5
10 1996 74.3 39.5
11 1998 72.9 38.8
12 2000 76.5 39.8
13 2004 77.0 36.4
14 2008 77.0 32.3
15 2012 74.4 25.5
16 2016 70.2 24.4
17 2020 74.6 17.6
Looks good. To plot it, we need to reshape the data to a long format.
=df_p %>%
df_ppivot_longer(-VCF0004, names_to="feeling", values_to="Warmth")
%>% head df_p
# A tibble: 6 × 3
VCF0004 feeling Warmth
<dbl> <chr> <dbl>
1 1978 inparty_feeling 73.9
2 1978 outparty_feeling 47.0
3 1980 inparty_feeling 75.0
4 1980 outparty_feeling 45.6
5 1982 inparty_feeling 76.2
6 1982 outparty_feeling 43.4
Plotting
Now, we can plot it.
Simple plot
%>%
df_p ggplot(aes(x=VCF0004, y=Warmth, color=feeling)) +
geom_point() +
geom_line()
Simple plot 2
Not bad. Here’s the final version I ended up exporting as SVG so I can further play around with aesthetics in PowerPoint.
%>%
df_p mutate(feeling=recode(feeling,
inparty_feeling="Inparty Feeling",
outparty_feeling="Outparty Feeling",
%>%
)) ggplot(aes(x=VCF0004, y=Warmth, color=feeling)) +
geom_ribbon(
data=. %>% group_by(VCF0004) %>% rstatix::get_summary_stats(Warmth),
aes(ymin=min,ymax=max, y=mean, color=NULL),
fill="gray96", show.legend = F)+
geom_hline(yintercept=50, color="gray", linetype="dashed")+
geom_point(size=2.5) +
geom_line(size=1, show.legend = F) +
scale_x_continuous(breaks=seq(1980, 2020, by=4)) +
scale_y_continuous(limits=c(0,100),
labels = paste0(seq(0, 100, by=25), "°")
+
) theme_minimal() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle=45, hjust=1),
legend.title = element_blank()) +
labs(x="Election year", y="Feeling thermometer rating",
title="Affective Polarization Over Time Using ANES Data")
Affective Polarization
Let’s add an affective polarization columns. Iyengar et al. (2019) calcualtes this as the difference between the feelings, whereas Finkel et al. (2020) calculates this as the difference between inparty love (inparty feeling - 50) and outparty hate (50 - outparty feeling). I’ll do both.
# this is our earlier code, but i am re-running it because i reshaped it to be long
# and we want wide format to do our calculations
=
df_p%>%
df select(VCF0004, PID, inparty_feeling, outparty_feeling) %>%
group_by(VCF0004) %>%
summarise(inparty_feeling= mean(inparty_feeling, na.rm=T),
outparty_feeling=mean(outparty_feeling, na.rm=T)
%>%
)
#new code
rowwise() %>%
mutate(aff_polar_iyengar=inparty_feeling-outparty_feeling,
aff_polar_finkel = (inparty_feeling-50)-(50-outparty_feeling)
)
%>% head df_p
# A tibble: 6 × 5
# Rowwise:
VCF0004 inparty_feeling outparty_feeling aff_polar_iyengar aff_polar_finkel
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1978 73.9 47.0 26.9 20.8
2 1980 75.0 45.6 29.4 20.6
3 1982 76.2 43.4 32.8 19.6
4 1984 76.8 44.9 31.9 21.7
5 1986 76.4 45.1 31.3 21.5
6 1988 77.6 44.0 33.7 21.6
Let’s see if we can’t reproduce each figure more closely. In my experience the fine details require a lot of extra code, so this will only roughly approximate what the figures show.
Here’s the Iyengar reproduction.
Iyengar et al. (2019)
%>%
df_p
select(-aff_polar_finkel) %>% #we just want iyengar's aff polar
#reshape
pivot_longer(-VCF0004, names_to="feeling", values_to="Warmth") %>%
mutate(feeling=recode(feeling,
inparty_feeling="In-party feeling",
outparty_feeling="Out-party feeling",
aff_polar_iyengar="Affective polarization"
),feeling=factor(feeling, levels=c("In-party feeling", #setting levels just so ordering colors/shapes
"Out-party feeling", #is more intuitive. otherwise, ggplot orders this
"Affective polarization" #alphabetically
))%>%
)
#plot
ggplot(aes(x=VCF0004, y=Warmth, color=feeling)) +
geom_hline(yintercept=50, color="gray", linetype="dashed")+
geom_point(aes(shape=feeling, fill=feeling), size=2) +
geom_line(aes(linetype=feeling)) +
scale_x_continuous(breaks=seq(1980, 2020, by=4)) +
scale_y_continuous(limits=c(0,100),
labels = paste0(seq(0, 100, by=25), "°")
+
) theme_bw() +
theme(panel.grid = element_blank(),
# axis.text.x = element_text(angle=45, hjust=1),
legend.title = element_blank(),
axis.ticks.length=unit(-0.15, "cm") #flip this ticks inside
+
) labs(x="Election year", y="Feeling thermometer rating",
title="Affective Polarization - Iyengar et al. (2019) reproduction") +
#use Iyengar's colors. You can use a color picker to sample colors from images and get the hex code
#I use Color Cop for Windows (free)
scale_color_manual(values=c("#1D9E74", #green
"#AD6FAE", #purple
"#6C6D70" #gray
+
)) scale_fill_manual(values=c("#1D9E74", #green
"#AD6FAE", #purple
"#6C6D70" #gray
+
))
scale_shape_manual(values=c(19, #circle
15, #square
23 #diamond
+
)) scale_linetype_manual(values=c("dotted","dashed","solid"))
#Note: it seems like making a gradient background is not straightforward in ggplot
#I would just export this as SVG and then make a gradient in another program
Finkel et al. (2020)
This figure is interesting because it’s actually two figures: feeling thermometers on top and affective polarization on bottom.
Top figure
%>%
df_p
select(-aff_polar_iyengar, -aff_polar_finkel) %>% #we just want iyengar's aff polar
#reshape
pivot_longer(-VCF0004, names_to="feeling", values_to="Warmth") %>%
mutate(feeling=recode(feeling,
inparty_feeling="In-party feeling",
outparty_feeling="Out-party feeling",
aff_polar_finkel="Affective polarization"
),feeling=factor(feeling, levels=c("In-party feeling", #setting levels just so ordering colors/shapes
"Out-party feeling", #is more intuitive. otherwise, ggplot orders this
"Affective polarization" #alphabetically
))%>%
)
#plot
ggplot(aes(x=VCF0004, y=Warmth, color=feeling)) +
geom_hline(yintercept=50, color="black")+
geom_smooth(aes(group=feeling,color=NULL, linetype=feeling),
method=lm, se=F, color="black", size=.75)+
geom_point(aes(shape=feeling, fill=feeling), size=2) +
# geom_line(aes(linetype=feeling)) +
scale_x_continuous(breaks=seq(1980, 2020, by=4)
+
) scale_y_continuous(limits=c(0,100),
labels = paste0(seq(0, 100, by=25), "°")
+
) theme_classic() +
theme(panel.grid = element_blank(),
legend.title = element_blank(),
panel.border = element_blank()
+
) labs(x="Election year", y="Feeling thermometer ratings",
title="Affective Polarization - Finkel et al. (2020) reproduction") +
#use Iyengar's colors. You can use a color picker to sample colors from images and get the hex code
#I use Color Cop for Windows (free)
scale_color_manual(values=c("#D8631B",
"#00AEAE", #purple
"#6C6D70" #gray
+
)) scale_fill_manual(values=c("#1D9E74", #green
"#AD6FAE", #purple
"#6C6D70" #gray
+
))
scale_shape_manual(values=c(15, #square
17, #circle
15 #diamond
+
)) scale_linetype_manual(values=c("solid","dashed","solid"))
Bottom figure
%>%
df_p
select(VCF0004, aff_polar_finkel) %>% #
pivot_longer(-VCF0004, names_to="feeling", values_to="Warmth") %>% #reshape
mutate(feeling=recode(feeling,
aff_polar_finkel="Affective polarization"
%>%
))
#plot
ggplot(aes(x=VCF0004, y=Warmth)) +
geom_hline(yintercept=0, color="black")+
geom_line()+
geom_point(shape=15, size=2) +
scale_x_continuous(breaks=seq(1980, 2020, by=4)) +
scale_y_continuous(limits=c(-15, 25),
breaks=seq(-15, 25, by=5),
labels = paste0(seq(-15, 25, by=5), "°")
+
) theme_classic() +
theme(panel.grid = element_blank(),
legend.title = element_blank(),
panel.border = element_blank(),
axis.title.x = element_blank()
+
) labs(x="Election year", y="In-party love - Out-party hate",
title="Affective Polarization - Finkel et al. (2020)")