2 Robustness checking and exploration

Input cleaned data

library(pacman)
p_load(tidyverse, data.table, haven, glue, Hmisc, here, fixest, knitr, kableExtra, DT)

rm(list=ls())
gc()

          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells 1191878 63.7    2357902  126         NA  1754165 93.7
Vcells 2013691 15.4    8388608   64      32768  2818562 21.6

Input cleaned data

# read in dta file, Clara likes working with data table format
survey = read_dta(here::here("OriginalFiles", "Survey.dta"))
dts = read_dta(here::here("OriginalFiles", "Replication_Dataset.dta")) %>% data.table()

Code

dts <- dts %>% 
  mutate(
    euro_class = class-1,
    diesel = fuel == 1
  )

We proposed a range of robustness tests. We were able to do a subset of these.

2.1 Rates of voting for each car group split: implication for identification

We present a simple comparison of the Lega voting rates, voting changes, and other outcomes by each car-ownership group. The table below captures the ‘main effect’ the authors investigate. Euro 4 diesel fuel owners supported Lega more in the post-policy European elections, relative to Euro 4 gas owners. This also held for Euro 4 diesel fuel owners relative to diesel fuel owners in other classes. However, for these same comparisons in an earlier (2018) election, the rates of voting for Lega were fairly similar, or even went in the opposite direction. The second table below enables sorting by diesel or class, to aid this consideration.

Furthermore, the base rates of Lega voting were similar in order-of-magnitude, suggesting that the choice of functional form might not matter so much (see discussion below).

Code

# Assuming dts is your data frame
dts %>%
  group_by(euro_class, diesel) %>%
  summarise(
    mn_lega_18 = if(n() >= 50) mean(vote_lega_2018, na.rm = TRUE) else NA,  # Calculate mean if group size >= 10
    mn_lega_euro = if(n() >= 50) mean(vote_lega_euro, na.rm = TRUE) else NA,  # Calculate mean if group size >= 10
    count = n(), # Count of observations
    .groups = "drop" # Drop grouping structure after summarization
  ) %>% 
  filter(!is.na(mn_lega_18)) %>% # Remove groups with less than 10 observations
  kable(caption = "Voting for Lega by Euro category and fuel tyle", format = "html", escape = FALSE, digits=2) %>%
      kable_styling

Voting for Lega by Euro category and fuel tyle
euro_class	diesel	mn_lega_18	mn_lega_euro	count
4	FALSE	0.12	0.18	122
4	TRUE	0.13	0.24	293
5	FALSE	0.19	0.22	171
5	TRUE	0.14	0.15	120
6	FALSE	0.19	0.24	170
6	TRUE	0.17	0.17	133
998	FALSE	0.49	0.51	55

Code

dts %>%
  group_by(euro_class, diesel) %>%
  summarise(
    mn_lega_18 = if(n() >= 50) mean(vote_lega_2018, na.rm = TRUE) else NA,  # Calculate mean if group size >= 10
    mn_lega_euro = if(n() >= 50) mean(vote_lega_euro, na.rm = TRUE) else NA,  # Calculate mean if group size >= 10
    count = n(), # Count of observations
    .groups = "drop" # Drop grouping structure after summarization
  ) %>% 
  filter(!is.na(mn_lega_18)) %>% # Remove groups with less than 10 observations
    mutate(across(where(is.numeric), ~ round(., 3))) %>%  # Round all numeric columns to 3 decimal places
  DT::datatable()

2.2 Main results (table 2, recoded)

Table 2 recoded

# Table 2 ------

# Keep the same choices made by the authors
library(fixest)

mod1 = feols(vote_lega_euro ~ dummy_diesel + dummy_euro_4 + dummy_diesel*dummy_euro_4, 
             data = dts[target!=3 & target!=4 & no_answer_euro==0, ])

mod2 = feols(vote_lega_euro ~ dummy_diesel + dummy_euro_4 + dummy_diesel*dummy_euro_4
             + age + as.factor(female) | EDU1 + EDU2 + EDU3 + EDU4 + profile_gross_personal_eu, 
             data = dts[target!=3 & target!=4 & no_answer_euro==0, ],
             vcov = "hetero")

mod3 = feols(vote_lega_euro ~ dummy_diesel_ass + dummy_euro_4_ass + diesel_euro4_ass
             + age + female + dummy_car_unknown | EDU1 + EDU2 + EDU3 + EDU4 + profile_gross_personal_eu, 
             data = dts[target!=3 & no_answer_euro==0, ],
             vcov = "hetero")

mod4 = feols(vote_lega_euro ~ dummy_diesel + dummy_euro_4 + dummy_diesel*dummy_euro_4
             + age + female + vote_lega_2018 | EDU1 + EDU2 + EDU3 + EDU4 + profile_gross_personal_eu, 
             data = dts[target!=3 & target!=4 & no_answer_euro==0 & no_answer_2018==0, ],
             vcov = "hetero")

mod5 = feols(vote_lega_euro ~ dummy_diesel + dummy_euro_4 + dummy_diesel*dummy_euro_4
             + age + female + vote_lega_regional | EDU1 + EDU2 + EDU3 + EDU4 + profile_gross_personal_eu, 
             data = dts[target!=3 & target!=4 & no_answer_euro==0 & no_answer_regional==0, ],
             vcov = "hetero")

mod6 = feols(vote_lega_euro ~ dummy_diesel + dummy_euro_4 + dummy_diesel*dummy_euro_4
             + age + female + vote_lega_municipal | EDU1 + EDU2 + EDU3 + EDU4 + profile_gross_personal_eu, 
             data = dts[target!=3 & target!=4 & no_answer_euro==0 & no_answer_municipal==0, ],
             vcov = "hetero")

# Save
etable(mod1,mod2,mod3,mod4,mod5,mod6,
       keep = c("dummy_diesel", "dummy_euro_4", "diesel_euro4"),
  digits=3) %>%
  kable(format = "html", escape = FALSE) %>%
      kable_styling

	mod1	mod2	mod3	mod4	mod5	mod6
Dependent Var.:	vote_lega_euro	vote_lega_euro	vote_lega_euro	vote_lega_euro	vote_lega_euro	vote_lega_euro

dummy_diesel	-0.093 (0.058)	-0.105. (0.057)		-0.024 (0.036)	0.003 (0.040)	-0.0007 (0.049)
dummy_euro_4	-0.048 (0.058)	-0.048 (0.059)		0.007 (0.033)	0.026 (0.036)	-0.028 (0.043)
dummy_diesel x dummy_euro_4	0.119 (0.077)	0.183* (0.079)		0.115* (0.047)	0.094. (0.052)	0.146* (0.060)
dummy_diesel_ass			-0.082 (0.055)
dummy_euro_4_ass			-0.019 (0.059)
diesel_euro4_ass			0.154* (0.078)
Fixed-Effects:	--------------	---------------	--------------	--------------	--------------	---------------
EDU1	No	Yes	Yes	Yes	Yes	Yes
EDU2	No	Yes	Yes	Yes	Yes	Yes
EDU3	No	Yes	Yes	Yes	Yes	Yes
EDU4	No	Yes	Yes	Yes	Yes	Yes
profile_gross_personal_eu	No	Yes	Yes	Yes	Yes	Yes
___________________________	______________	_______________	______________	______________	______________	_______________
S.E. type	IID	Heteroske.-rob.	Heterosk.-rob.	Heterosk.-rob.	Heterosk.-rob.	Heteroske.-rob.
Observations	602	602	665	583	551	533
R2	0.00499	0.13011	0.15288	0.60097	0.57668	0.49357
Within R2	--	0.05339	0.06408	0.56532	0.53752	0.44663

2.3 Alternate control/treatment splits

I don’t think we did this, but we could do. This seems to me like an important robustness check

2.4 Functional form

The main outcome variable is binary 0/1. The authors consider linear models throughout. I (David Reinstein) believe their identification implicitly depends on

each of the unmeasured underlying characteristics (picked up by the proxy control in ‘difference’) each being associated with the voting outcome linearly on ‘percentage point voting for Lega’ … so the each the

But the treated and groups might have started from a different baseline level of voting for Lega – (however, as we saw above, these were order-of-magnitude similar).

If the true model is nonlinear (e.g., proportional, or with ceiling effects), the results here may be misleading. E.g., suppose the true effect of the policy on voting was the same for each car-ownership group as a percentage of the initial vote. Inappropriately estimating a linear model would find a substantial ‘difference in difference’ between these groups, even though the effect did not interact with car ownership itself.

Chen and Roth (2023) cite Wooldridge 2023

An alternative identifying assumption is to impose that, in the absence of treatment, the percentage changes in the mean would have been the same for the treated and control group. As in Wooldridge (2023), this can be formalized using a “ratio” version of the parallel trends assumption,

[ = ]

We probably don’t have time to test for proportional trends (as well as limited data for a powerful test). But it seems an equally-plausible model off-the cuff. Chen and Roth offer an estimation strategy

Conveniently, this estimate can also be obtained using Poisson QMLE to estimate \(Y_{it} = \exp(\beta_0 + D_i \times Post_t \beta_1 + D_i\beta_2 + Post_t\beta_3)\epsilon_{it}\)

and then computing \(\hat{\theta}_{ATT\%} = \exp(\hat{\beta}_1) - 1 = -0.82\) as shown in column (1) of Table 6.

Estimate this as a logit model? Other specifications like proportional hazards? Roth recommends Quasi-poisson See section 5.2, “A DiD setting: Sequeira (2016)”

We re-ran the model as a Poisson model. Results were qualitatively similar at a glance.

Code

# Table 2 with Poisson ------

mod1p = fepois(vote_lega_euro ~ dummy_diesel + dummy_euro_4 + dummy_diesel*dummy_euro_4, 
             data = dts[target!=3 & target!=4 & no_answer_euro==0, ])

mod2p = fepois(vote_lega_euro ~ dummy_diesel + dummy_euro_4 + dummy_diesel*dummy_euro_4
             + age + as.factor(female) | EDU1 + EDU2 + EDU3 + EDU4 + profile_gross_personal_eu, 
             data = dts[target!=3 & target!=4 & no_answer_euro==0, ],
             vcov = "hetero")

mod3p = fepois(vote_lega_euro ~ dummy_diesel_ass + dummy_euro_4_ass + diesel_euro4_ass
             + age + female + dummy_car_unknown | EDU1 + EDU2 + EDU3 + EDU4 + profile_gross_personal_eu, 
             data = dts[target!=3 & no_answer_euro==0, ],
             vcov = "hetero")

mod4p = fepois(vote_lega_euro ~ dummy_diesel + dummy_euro_4 + dummy_diesel*dummy_euro_4
             + age + female + vote_lega_2018 | EDU1 + EDU2 + EDU3 + EDU4 + profile_gross_personal_eu, 
             data = dts[target!=3 & target!=4 & no_answer_euro==0 & no_answer_2018==0, ],
             vcov = "hetero")

mod5p = fepois(vote_lega_euro ~ dummy_diesel + dummy_euro_4 + dummy_diesel*dummy_euro_4
             + age + female + vote_lega_regional | EDU1 + EDU2 + EDU3 + EDU4 + profile_gross_personal_eu, 
             data = dts[target!=3 & target!=4 & no_answer_euro==0 & no_answer_regional==0, ],
             vcov = "hetero")

mod6p = fepois(vote_lega_euro ~ dummy_diesel + dummy_euro_4 + dummy_diesel*dummy_euro_4
             + age + female + vote_lega_municipal | EDU1 + EDU2 + EDU3 + EDU4 + profile_gross_personal_eu, 
             data = dts[target!=3 & target!=4 & no_answer_euro==0 & no_answer_municipal==0, ],
             vcov = "hetero")

# Save
etable(mod1p,mod2p,mod3p,mod4p,mod5p,mod6p,
       keep = c("dummy_diesel", "dummy_euro_4", "diesel_euro4"), 
  digits=3) %>%
  kable(caption = "poisson, raw coefs", format = "html", escape = FALSE, digits=3) %>%
      kable_styling

poisson, raw coefs
	mod1p	mod2p	mod3p	mod4p	mod5p	mod6p
Dependent Var.:	vote_lega_euro	vote_lega_euro	vote_lega_euro	vote_lega_euro	vote_lega_euro	vote_lega_euro

dummy_diesel	-0.411 (0.289)	-0.461. (0.271)		-0.008 (0.197)	0.109 (0.218)	0.128 (0.307)
dummy_euro_4	-0.189 (0.271)	-0.224 (0.248)		0.026 (0.191)	0.176 (0.183)	0.002 (0.199)
dummy_diesel x dummy_euro_4	0.519 (0.379)	0.712* (0.348)		0.343 (0.259)	0.245 (0.263)	0.522 (0.322)
dummy_diesel_ass			-0.383 (0.256)
dummy_euro_4_ass			-0.154 (0.253)
diesel_euro4_ass			0.610. (0.344)
Fixed-Effects:	--------------	---------------	--------------	--------------	--------------	--------------
EDU1	No	Yes	Yes	Yes	Yes	Yes
EDU2	No	Yes	Yes	Yes	Yes	Yes
EDU3	No	Yes	Yes	Yes	Yes	Yes
EDU4	No	Yes	Yes	Yes	Yes	Yes
profile_gross_personal_eu	No	Yes	Yes	Yes	Yes	Yes
___________________________	______________	_______________	______________	______________	______________	______________
S.E. type	IID	Heteroske.-rob.	Heterosk.-rob.	Heterosk.-rob.	Heterosk.-rob.	Heterosk.-rob.
Observations	602	602	665	583	551	533
Squared Cor.	0.00499	0.14100	0.16530	0.45263	0.38159	0.43376
Pseudo R2	0.00337	0.08354	0.09005	0.29877	0.30633	0.26270
BIC	731.70	809.31	924.67	640.01	614.81	623.33