This document lists commonly used functions that could be used in targets workflow.
Inflation-adjusted wages and corrected 2-digit NAICS code table
- This is one of the most commonly used function and it is used almost every data request and program evaluation project.
- The order of input tables must be kept in order to avoid errors.
- The target year could be parameterized as well.
# calculate adjusted wages
# adjust to the target year of the CPI
# but use quarter for the original CPI
# adjusted wage = unadjusted wage * (target cpi / original cpi)
# source of procedure to adjust quarterly wages:
# https://kystats.ky.gov/Content/Reports/2023_MSPSR_Technical_Notes.pdf
make_adjusted_wages <- function(wf_uiwage_tbl, wf_naics_tbl, CPIAUCNS_tbl, target_year=2024L){
# NDOL tables
unadjusted_wages_lt <-
wf_uiwage_tbl |>
select(nswers_id, employer_id, wages, year_and_quarter) |>
mutate(wages = as.numeric(REPLACE(REPLACE(wages, "$", ""), ",", ""))) |>
filter(wages > 0) |>
distinct()
naics_2_lt <-
wf_naics_tbl |>
mutate(naics_2 = substr(naics, 1, 2)) |>
select(employer_id = ui_account, naics_2)
# get CPI adjustment data
cpi_yq_lt <-
CPIAUCNS_tbl |>
rename(cpi_date = DATE, cpi = VALUE) |>
mutate(year = year(cpi_date),
year_and_quarter = as.integer(CONCAT(year(cpi_date), quarter(cpi_date)))) |>
group_by(year) |>
mutate(cpi_yr = mean(cpi, na.rm = TRUE)) |>
group_by(year_and_quarter) |>
mutate(cpi_qtr = mean(cpi, na.rm = TRUE)) |>
distinct(year_and_quarter, year, cpi_yr, cpi_qtr)
# get target CPI
target_CPI <- cpi_yq_lt |> filter(year == target_year) |> pull(cpi_qtr) |> mean()
# get adjusted wages
# get 2-digit naics code
adjusted_wages_lt <-
unadjusted_wages_lt |>
left_join(cpi_yq_lt, by = "year_and_quarter") |>
left_join(naics_2_lt, by = "employer_id") |>
mutate(adjusted_wages = wages * (local(target_CPI) / cpi_qtr)) |>
select(nswers_id, employer_id, naics_2, year_and_quarter, wages, adjusted_wages) |>
distinct()
return(adjusted_wages_lt)
}
# Usage in _targets
list(
load_rdb_tables,
tar_target(cpi_target_year,
2024L),
tar_target(adjusted_wages_lt2,
make_adjusted_wages(wf_uiwage_tbl, wf_naics_tbl, CPIAUCNS_tbl, target_year=cpi_target_year)
)
Postsecondary enrollment table from all sources (two-year partners, four-year partners, & NSC)
- This function combines enrollment tables from partners and NSC.
- The resulting table does not include the following fields:
- first_time_student_flag, full_time_student_flag, and degree_seeking_student_flag.
- The order of input tables must be kept in order to avoid errors.
# combine ps2, ps4, NSC enrollment tables
make_all_ps_enrollment <- function(vw_ps_student_terms_tbl,
crosswalk_nsc_institution_tbl,
nsc_highschool_tbl,
nsc_postsecondary_tbl){
# get partner enrollment table
ps2_ps4_enrollment_lt <-
vw_ps_student_terms_tbl |>
mutate(ps_type=substr(ps_type, 3, 3)) |>
select(nswers_id,
institution_id,
ps_type,
start_date) |>
distinct() |>
mutate(college_state="In-state",
public_private="Public")
# get NSC college code crosswalk
nsc_institution_xwalk_lt <-
crosswalk_nsc_institution_tbl |>
rename_with(tolower) |>
select(college_code=nsc_college_and_branch_code, institution_id=ipeds_unit_id) |>
distinct()
# get NSC tables
nsc_hs_enrollment_lt <-
nsc_highschool_tbl |>
rename_with(tolower) |>
filter(found == "Y",
graduated == "N") |>
mutate(college_type=substr(college_type, 1, 1)) |>
mutate(college_type=ifelse(college_type=="L", "2", college_type)) |>
mutate(college_state=ifelse(college_state=="NE", "In-state", "Out-of-state")) |>
left_join(nsc_institution_xwalk_lt) |>
select(nswers_id,
institution_id,
college_state,
public_private,
ps_type = college_type,
start_date = enrollment_begin_date) |>
distinct()
nsc_ps_enrollment_lt <-
nsc_postsecondary_tbl |>
rename_with(tolower) |>
filter(found == "Y",
graduated == "N") |>
mutate(college_type=substr(college_type, 1, 1)) |>
mutate(college_type=ifelse(college_type=="L", "2", college_type)) |>
mutate(college_state=ifelse(college_state=="NE", "In-state", "Out-of-state")) |>
left_join(nsc_institution_xwalk_lt) |>
select(nswers_id,
institution_id,
college_state,
public_private,
ps_type = college_type,
start_date = enrollment_begin_date) |>
distinct()
# combine tables
all_ps_enrollment_lt <-
ps2_ps4_enrollment_lt |>
union(nsc_hs_enrollment_lt) |>
union(nsc_ps_enrollment_lt) |>
distinct()
return(all_ps_enrollment_lt)
}
# Usage in _targets
list(
load_rdb_tables,
tar_target(all_ps_enrollment_lt2,
make_all_ps_enrollment(vw_ps_student_terms_tbl,
crosswalk_nsc_institution_tbl,
nsc_highschool_tbl,
nsc_postsecondary_tbl))
)
Leave a Reply