Title: | Data tools used at the Economic Policy Institute |
---|---|
Description: | Tools used by the Economic Policy Institute. |
Authors: | Ben Zipperer [aut, cre], Jori Kandra [ctb], Economic Policy Institute [cph, fnd] |
Maintainer: | Ben Zipperer <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.5.0 |
Built: | 2025-02-20 06:16:47 UTC |
Source: | https://github.com/economic/epidatatools |
Calculate the averaged (smoothed) median
averaged_median( x, w = NULL, na.rm = TRUE, quantiles_n = 9L, quantiles_w = c(1:4, 5, 4:1) )
averaged_median( x, w = NULL, na.rm = TRUE, quantiles_n = 9L, quantiles_w = c(1:4, 5, 4:1) )
x |
numeric vector or an R object |
w |
numeric vector of sample weights the same length as x giving the weights to use for elements of x |
na.rm |
logical; if true, any NA or NaN's are removed from x before computation |
quantiles_n |
integer number of quantiles used for averaging; must be odd |
quantiles_w |
weights used for average quantiles; length must equal quantiles_n |
a scalar
averaged_median(x = mtcars$mpg)
averaged_median(x = mtcars$mpg)
Calculate the averaged (smoothed) quantile
averaged_quantile( x, w = NULL, probs = 0.5, na.rm = TRUE, quantiles_n = 9L, quantiles_w = c(1:4, 5, 4:1) )
averaged_quantile( x, w = NULL, probs = 0.5, na.rm = TRUE, quantiles_n = 9L, quantiles_w = c(1:4, 5, 4:1) )
x |
numeric vector or an R object |
w |
numeric vector of sample weights the same length as x giving the weights to use for elements of x |
probs |
numeric; percentile with value |
na.rm |
logical; if true, any NA or NaN's are removed from x before computation |
quantiles_n |
integer number of quantiles used for averaging; must be odd |
quantiles_w |
weights used for average quantiles; length must equal quantiles_n |
a numeric vector with length probs
averaged_quantile(x = mtcars$mpg, probs = c(0.25, 0.5, 0.75))
averaged_quantile(x = mtcars$mpg, probs = c(0.25, 0.5, 0.75))
Summarize a data frame as binned interpolated percentiles
binipolate(data, x, probs = 0.5, bin_size, .by = NULL, w = NULL)
binipolate(data, x, probs = 0.5, bin_size, .by = NULL, w = NULL)
data |
data frame |
x |
column to compute |
probs |
numeric vector of percentiles with values |
bin_size |
size of binning |
.by |
optional, a tidy-selection of columns for single-operation grouping |
w |
numeric vector of weights the same length as x giving the weights to use for elements of x |
a tibble or data frame
binipolate(mtcars, mpg, bin_size = 0.25) binipolate(mtcars, mpg, probs = c(0.25, 0.5, 0.75), bin_size = 0.25) binipolate(mtcars, mpg, probs = c(0.25, 0.5, 0.75), bin_size = 0.25, .by = cyl, w = wt)
binipolate(mtcars, mpg, bin_size = 0.25) binipolate(mtcars, mpg, probs = c(0.25, 0.5, 0.75), bin_size = 0.25) binipolate(mtcars, mpg, probs = c(0.25, 0.5, 0.75), bin_size = 0.25, .by = cyl, w = wt)
Cross-tabulate one or two variables
crosstab(data, ..., w = NULL, percent = NULL)
crosstab(data, ..., w = NULL, percent = NULL)
data |
a data frame |
... |
one or two variables, for a one- or two-way cross-tabulation |
w |
weight |
percent |
for a two-way cross-tabulation, replace counts with row or column percentages
|
a tibble
crosstab(mtcars, cyl) crosstab(mtcars, cyl, gear) crosstab(mtcars, cyl, gear, w = mpg, percent = "column")
crosstab(mtcars, cyl) crosstab(mtcars, cyl, gear) crosstab(mtcars, cyl, gear, w = mpg, percent = "column")
Convenience functions for downloading samples and variables from IPUMS microdata using their API and package ipumsr
.
dl_ipums_micro(extract) dl_ipums_acs1(years = NULL, variables, description = NULL) dl_ipums_asec(years = NULL, variables, description = NULL) dl_ipums_cps(months = NULL, variables, description = NULL)
dl_ipums_micro(extract) dl_ipums_acs1(years = NULL, variables, description = NULL) dl_ipums_asec(years = NULL, variables, description = NULL) dl_ipums_cps(months = NULL, variables, description = NULL)
extract |
an IPUMS microdata extract as defined by |
years |
a vector of years |
variables |
a vector of variable names, or a list of detailed variable specifications as created by |
description |
description for the extract |
months |
a vector of dates representing months of CPS samples. |
These functions are simply wrappers around ipumsr
and require you to have an IPUMS API key saved in the IPUMS_API_KEY
environment variable.
a tibble of microdata from the IPUMS API
dl_ipums_micro()
: base function group
dl_ipums_acs1()
: Download IPUMS ACS 1-year files
dl_ipums_asec()
: Download IPUMS CPS ASEC
dl_ipums_cps()
: Download IPUMS Monthly CPS
# example ASEC download dl_ipums_asec(2021:2023, c("YEAR", "OFFPOV", "ASECWT")) # example monthly CPS download begin_month = lubridate::ym("2022 September") end_month = lubridate::ym("2024 August") cps_months = seq(begin_month, end_month, by = "month") dl_ipums_cps(cps_months, c("EARNWT", "HOURWAGE2")) # use dl_ipums_micro with a custom extract extract = ipumsr::define_extract_micro( collection = "cps", description = "CPS ASEC extract", samples = c("cps2021_03s", "cps2022_03s", "cps2023_03s"), variables = c("YEAR", "OFFPOV", "ASECWT") ) dl_ipums_micro(extract)
# example ASEC download dl_ipums_asec(2021:2023, c("YEAR", "OFFPOV", "ASECWT")) # example monthly CPS download begin_month = lubridate::ym("2022 September") end_month = lubridate::ym("2024 August") cps_months = seq(begin_month, end_month, by = "month") dl_ipums_cps(cps_months, c("EARNWT", "HOURWAGE2")) # use dl_ipums_micro with a custom extract extract = ipumsr::define_extract_micro( collection = "cps", description = "CPS ASEC extract", samples = c("cps2021_03s", "cps2022_03s", "cps2023_03s"), variables = c("YEAR", "OFFPOV", "ASECWT") ) dl_ipums_micro(extract)
Calculate the binned interpolated median
interpolated_median(x, bin_size, w = NULL, na.rm = TRUE)
interpolated_median(x, bin_size, w = NULL, na.rm = TRUE)
x |
numeric vector or an R object |
bin_size |
size used for binning |
w |
numeric vector of weights the same length as x giving the weights to use for elements of x |
na.rm |
logical; if true, any NA or NaN's are removed from x before computation |
numeric vector
interpolated_median(x = mtcars$mpg, bin_size = 0.50)
interpolated_median(x = mtcars$mpg, bin_size = 0.50)
Calculate the binned interpolated quantile
interpolated_quantile(x, bin_size, probs = 0.5, w = NULL, na.rm = TRUE)
interpolated_quantile(x, bin_size, probs = 0.5, w = NULL, na.rm = TRUE)
x |
numeric vector or an R object |
bin_size |
size used for binning |
probs |
numeric; percentile with value |
w |
numeric vector of weights the same length as x giving the weights to use for elements of x |
na.rm |
logical; if true, any NA or NaN's are removed from x before computation |
a numeric vector
interpolated_quantile(x = mtcars$mpg, bin_size = 0.50, probs = c(0.25, 0.5, 0.75))
interpolated_quantile(x = mtcars$mpg, bin_size = 0.50, probs = c(0.25, 0.5, 0.75))
Join data frames and create a merge indicator
merge_status(x, y, ...) ## S3 method for class 'data.frame' merge_status(x, y, ...)
merge_status(x, y, ...) ## S3 method for class 'data.frame' merge_status(x, y, ...)
x , y
|
data frames |
... |
passed to dplyr::full_join() |
a merged data frame from full_join with an extra column _merge
library(dplyr) merge_status(band_members, band_instruments, by = "name")
library(dplyr) merge_status(band_members, band_instruments, by = "name")
Summarize distinct groups
summarize_groups(.data, .groups, ...)
summarize_groups(.data, .groups, ...)
.data |
a data frame |
.groups |
grouping variables as a tidy selection specification
of columns, as used in |
... |
name-value pairs passed to dplyr::summarize() |
a tibble
summarize_groups(mtcars, cyl|gear|carb, median(mpg), mean(hp))
summarize_groups(mtcars, cyl|gear|carb, median(mpg), mean(hp))