= "NY"
state_code = "36"
state_fips = 0.06 heat_cutoff
In [1]:
NY HEAT and the Hudson Valley
In [2]:
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.2 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.2 ✔ tibble 3.2.1
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(gt)
library(tigris)
To enable caching of data, set `options(tigris_use_cache = TRUE)`
in your R script or .Rprofile.
library(sf)
library(tidycensus)
options(tigris_use_cache=TRUE, tigris_year=2019)
In [3]:
<- tribble(
hudson_valley ~county, ~region, ~fips,
"Westchester", "Lower Hudson Valley", 36119,
"Rockland", "Lower Hudson Valley", 36087,
"Putnam", "Lower Hudson Valley", 36115,
"Orange", "Mid-Hudson Valley", 36121,
"Dutchess", "Mid-Hudson Valley", 36027,
"Ulster", "Mid-Hudson Valley", 36111,
"Sullivan", "Upper Hudson Valley", 36053,
"Columbia", "Upper Hudson Valley", 36021,
"Greene", "Upper Hudson Valley", 36017,
"Albany", "Upper Hudson Valley", 36001,
"Rensselaer", "Upper Hudson Valley", 36083
)
In [4]:
<- read_csv("/mnt/data/buildings2/geocorr/puma_county_mapping.csv", skip=1) |>
puma_county_mapping ::clean_names() janitor
Rows: 187 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): PUMA (2012), State abbr., County name, PUMA12 name
dbl (5): State code, County code, Total housing units (2020 Census), county-...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
In [5]:
<- c("BLD", "PUMA", "HFL", "TEN", "YRBLT", "HINCP", "NP", "MV", "BDSP", "ELEP", "FULP", "GASP", "MRGP", "RNTP", "RMSP", "ELEFP", "FULFP", "GASFP")
vars
# Get data from the Census API
<- paste0("/mnt/data/pums_1_22_", state_fips, ".Rds")
filename
# Get data from the Census API
if (file.exists(filename)) {
<- readRDS(filename)
raw_data else {
} <- get_pums(variables = vars,
raw_data state = state_fips,
year = 2022,
survey = "acs1",
rep_weights = "housing",
recode = TRUE) |>
distinct(SERIALNO, .keep_all = TRUE) # housing units only, not people
saveRDS(raw_data, filename)
}
In [6]:
<- readxl::read_excel("/mnt/data/buildings2/HUD_2021_income_limits_by_NY_county.xlsx") |>
county_lmi pivot_longer(-County, names_to = "NP") |>
rename(lmi_cutoff = value)
In [7]:
<- raw_data |>
data filter(HINCP != -60000) |> # remove missing for now
mutate(HINCP = HINCP+1,
bill_in_rent = (ELEFP == "1") | (GASFP == "1") | (FULP == "1"),
GASP = case_when(
<= 4 ~ 0,
GASP .default = GASP
),ELEP = case_when(
<= 3 ~ 0,
ELEP .default = ELEP
),FULP = case_when(
<= 3 ~ 0,
FULP .default = FULP
),yearly_energy_bill = (ELEP + GASP) * 12 + FULP,
energy_burden_pct = yearly_energy_bill / (HINCP+1),
monthly_energy_burden = (yearly_energy_bill - (heat_cutoff*HINCP))/12,
is_energy_burdened = energy_burden_pct > heat_cutoff,
# `utility` bill does not include delivered fuel
yearly_utility_bill = (ELEP + GASP) * 12,
utility_burden_pct = yearly_utility_bill / (HINCP+1),
monthly_utility_burden = (yearly_utility_bill - (heat_cutoff*HINCP))/12,
is_utility_burdened = utility_burden_pct > heat_cutoff) |>
inner_join(puma_county_mapping, by = c("PUMA" = "puma_2012"),
relationship = 'many-to-many') |>
inner_join(hudson_valley, by = c('county_code' = 'fips')) |>
mutate(wt = WGTP * puma12_to_county_allocation_factor) |>
mutate(NP = as.character(NP)) |>
inner_join(county_lmi, by = c("county" = "County", "NP")) |>
mutate(is_lmi = HINCP <= lmi_cutoff)
In [8]:
<- data |>
total_rollup filter(!bill_in_rent) |>
summarise(
households_included = sum(wt),
median_income = DescTools::Median(HINCP, weights = wt),
pct_energy_burdened = weighted.mean(is_energy_burdened, w = wt),
pct_utility_burdened = weighted.mean(is_utility_burdened, w =wt),
avg_monthly_bill_of_burdened = weighted.mean(case_when(
~ yearly_energy_bill/12), w = wt,
is_energy_burdened na.rm = TRUE),
utility_burden_of_burdened = mean(case_when(
~ monthly_utility_burden), w = wt,
is_utility_burdened na.rm = TRUE)) |>
mutate(county = "All",
region = "Hudson Valley")
In [9]:
<- data |>
county_rollup filter(!bill_in_rent) |>
summarise(
households_included = sum(wt),
median_income = DescTools::Median(HINCP, weights = wt),
pct_energy_burdened = weighted.mean(is_energy_burdened, w = wt),
pct_utility_burdened = weighted.mean(is_utility_burdened, w =wt),
avg_monthly_bill_of_burdened = weighted.mean(case_when(
~ yearly_energy_bill/12), w = wt,
is_energy_burdened na.rm = TRUE),
utility_burden_of_burdened = mean(case_when(
~ monthly_utility_burden), w = wt,
is_utility_burdened na.rm = TRUE),
.by = county) |>
left_join(hudson_valley, by = "county") |>
select(-fips)
In [10]:
$pct_energy_burdened[[1]] |> scales::percent(1) total_rollup
[1] "24%"
In [11]:
bind_rows(total_rollup, county_rollup) |>
select(region, county, pct_energy_burdened, avg_monthly_bill_of_burdened, utility_burden_of_burdened) |>
group_by(region) |>
gt() |>
row_group_order(c("Hudson Valley", "Upper Hudson Valley", "Mid-Hudson Valley", "Lower Hudson Valley")) |>
fmt_currency(c(avg_monthly_bill_of_burdened,
decimals = 0) |>
utility_burden_of_burdened), fmt_percent(c(pct_energy_burdened), decimals = 0) |>
cols_label(
county = "County",
pct_energy_burdened = "Energy Burdened Households (%)",
avg_monthly_bill_of_burdened = "Avg. Monthly Energy Bills of Burdened Households",
utility_burden_of_burdened = "Avg. Monthly Savings Under NY HEAT for Burdened Households",
|>
) tab_header("NY HEAT in the Hudson Valley", subtitle = "Household energy burdens and savings by county") |>
tab_footnote("Excluding households with utility bills in rent, or missing fuel or income information.\nSource: 2022 ACS. Win Climate, 2024")
NY HEAT in the Hudson Valley | |||
---|---|---|---|
Household energy burdens and savings by county | |||
County | Energy Burdened Households (%) | Avg. Monthly Energy Bills of Burdened Households | Avg. Monthly Savings Under NY HEAT for Burdened Households |
Hudson Valley | |||
All | 24% | $348 | $148 |
Upper Hudson Valley | |||
Albany | 19% | $255 | $117 |
Greene | 35% | $318 | $136 |
Rensselaer | 23% | $287 | $119 |
Columbia | 32% | $319 | $140 |
Sullivan | 28% | $294 | $131 |
Mid-Hudson Valley | |||
Dutchess | 25% | $338 | $138 |
Ulster | 32% | $340 | $144 |
Orange | 27% | $269 | $111 |
Lower Hudson Valley | |||
Rockland | 27% | $389 | $192 |
Putnam | 29% | $288 | $116 |
Westchester | 21% | $404 | $183 |
Excluding households with utility bills in rent, or missing fuel or income information. Source: 2022 ACS. Win Climate, 2024 |
In [12]:
|>
county_rollup summarise(sum(utility_burden_of_burdened * households_included))
# A tibble: 1 × 1
`sum(utility_burden_of_burdened * households_included)`
<dbl>
1 131845967.
In [13]:
|>
data filter(!is.na(is_lmi)) |>
group_by(is_lmi) |>
summarise(pct_energy_burdened = weighted.mean(is_energy_burdened, w = wt))
# A tibble: 2 × 2
is_lmi pct_energy_burdened
<lgl> <dbl>
1 FALSE 0.0507
2 TRUE 0.458
In [14]:
<- counties("NY") county_boundaries
Retrieving data for the year 2019
|>
county_boundaries right_join(county_rollup, by = c("NAME" = "county")) |>
mutate(label = paste0(NAME," (", scales::percent(pct_energy_burdened, 1) ,")")) |>
ggplot(aes(fill=pct_energy_burdened, label=label)) +
geom_sf(color = "black") +
::geom_label_repel(
ggrepelaes(geometry = geometry),
fill = "white",
stat = "sf_coordinates",
min.segment.length = 0
+
) scale_fill_distiller(palette = "Spectral",
name = "Percentage",
limits = c(0, .5),
labels = scales::percent_format(scale=100)) +
theme_void() +
labs(title = "Energy Burden by County",
subtitle = "Win Climate, 2024\n(Source: 2022 ACS)",
pct = "Percent")
Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
give correct results for longitude/latitude data
In [15]:
|>
county_boundaries right_join(county_rollup, by = c("NAME" = "county")) |>
mutate(label = paste0(NAME," (", scales::dollar(utility_burden_of_burdened, 1) ,")")) |>
ggplot(aes(fill=utility_burden_of_burdened, label=label)) +
geom_sf(color = "black") +
::geom_label_repel(
ggrepelaes(geometry = geometry),
fill = "white",
stat = "sf_coordinates",
min.segment.length = 0
+
) scale_fill_distiller(palette = "Greens",
direction = 1,
name = "NY HEAT Savings",
labels = scales::dollar_format(accuracy = 1),
limits = c(0, 250)) +
theme_void() +
labs(title = "Average Monthly NY HEAT Savings by County",
subtitle = "Win Climate, 2024\n(Source: 2022 ACS)")
Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
give correct results for longitude/latitude data
In [16]:
<- pumas("NY", 2019) puma_boundaries
Retrieving data for the year 2019
<- data |>
puma_rollup filter(!bill_in_rent) |>
summarise(
households_included = sum(wt),
median_income = DescTools::Median(HINCP, weights = wt),
pct_energy_burdened = weighted.mean(is_energy_burdened, w = wt),
avg_monthly_bill_of_burdened = weighted.mean(case_when(
~ yearly_energy_bill/12), w = wt,
is_energy_burdened na.rm = TRUE),
utility_burden_of_burdened = mean(case_when(
~ monthly_energy_burden), w = wt,
is_energy_burdened na.rm = TRUE),
.by = c(puma12_name, PUMA)) |>
mutate(GEOID10 = paste0(state_fips, PUMA))
|>
puma_boundaries right_join(puma_rollup, by = c("GEOID10")) |>
#mutate(label = paste0(NAME," (", scales::percent(pct_energy_burdened, 1) ,")")) |>
ggplot(aes(fill=pct_energy_burdened)) +#, label=label)) +
geom_sf(color = "black", alpha=.5) +
#ggrepel::geom_label_repel(
# aes(geometry = geometry),
# fill = "white",
# stat = "sf_coordinates",
# min.segment.length = 0
# ) +
scale_fill_distiller(palette = "Spectral",
name = "Percentage",
limits = c(0, .5),
labels = scales::percent_format(scale=100)) +
theme_void() +
theme(legend.position = "top") +
labs(title = "Energy Burden by County",
subtitle = "Win Climate for Spring Street Climate Fund, 2024\n(Source: 2022 ACS)",
pct = "Percent")