This vignette shows a more complex use case of auscensus. Let’s assume we want to extract the percentage of Australian Citizens for all Commonwealth Electoral Divisions, as measured in last 4 Censuses (2006-2021).
An initial exploration shows that this data can be found in table 01 (across all four censuses) - which provided an statistical summary. However, is not published aggregated by electorate across all censuses.
#| eval: true
census_tables <- list_census_tables() |>
filter(Number %in% c("01"))
tables_summary <- census_tables |>
select(-contains("Table")) |>
pivot_longer(-Number, names_to="Year",values_to = "Value") |>
filter(!is.na(Value)) |>
select(-Value)
list_census_geo_tables(year = tables_summary$Year,geo="CED|CED_2007|CED_2004",table_number = tables_summary$Number) |>
mutate(Year=as.character(Year)) |>
right_join(tables_summary, by=c("table_number"="Number","Year"="Year"))
#> # A tibble: 4 × 3
#> Year table_number CED
#> <chr> <chr> <lgl>
#> 1 2011 01 TRUE
#> 2 2016 01 TRUE
#> 3 2021 01 TRUE
#> 4 2006 01 NA
Therefore, we will retrieve the data from the lowest statistical unit. However, SA1 were not available in 2006 - where the smallest area was a “CD”.
#| eval: true
list_census_geo_tables(year = tables_summary$Year,geo=c("SA1|CD"),table_number = tables_summary$Number) |>
mutate(Year=as.character(Year)) |>
right_join(tables_summary, by=c("table_number"="Number","Year"="Year"))
#> # A tibble: 4 × 4
#> Year table_number CD SA1
#> <chr> <chr> <lgl> <lgl>
#> 1 2006 01 TRUE NA
#> 2 2011 01 NA TRUE
#> 3 2016 01 NA TRUE
#> 4 2021 01 NA TRUE
The next step is to figure the attributes for the numbers of Australian citizen and total population, which are presented below:
citizenship_attributes <- tibble()
for(i in 1:nrow(tables_summary)){
table_i <- tables_summary[i,]$Number
year_i <- tables_summary[i,]$Year
attr_i <- list_census_attributes(table_i) |>
pivot_longer(-c(Table,Attribute),
names_to="Year",values_to = "Value") |>
filter(Year==year_i) |>
filter(!is.na(Value)) |>
select(-Value)
citizenship_attributes <- bind_rows(attr_i,citizenship_attributes)
}
citizenship_attributes |>
head()
#> # A tibble: 6 × 3
#> Table Attribute Year
#> <chr> <chr> <chr>
#> 1 01 Total Persons_males 2006
#> 2 01 Total Persons_females 2006
#> 3 01 Total Persons_persons 2006
#> 4 01 Age Groups: 0-4 Years_males 2006
#> 5 01 Age Groups: 0-4 Years_females 2006
#> 6 01 Age Groups: 0-4 Years_persons 2006
citizenship_attributes <- citizenship_attributes |>
distinct(Attribute,Year) |>
filter(str_detect(Attribute,"[Aa]ustralian")) |>
filter(str_detect(Attribute,"[Pp]erson")) |>
mutate(dummy=TRUE) |>
pivot_wider(names_from = Year, values_from = dummy) |>
bind_rows(
citizenship_attributes |>
distinct(Attribute,Year) |>
filter(str_detect(Attribute,"^[Tt]otal")) |>
filter(str_detect(Attribute,"[Pp]ersons$")) |>
mutate(dummy=TRUE) |>
pivot_wider(names_from = Year, values_from = dummy)
)
citizenship_attributes
#> # A tibble: 4 × 5
#> Attribute `2006` `2011` `2016` `2021`
#> <chr> <lgl> <lgl> <lgl> <lgl>
#> 1 Australian Citizen_persons TRUE NA NA NA
#> 2 Australian_citizen_persons NA TRUE TRUE TRUE
#> 3 Total Persons_persons TRUE NA NA NA
#> 4 Total_persons_persons NA TRUE TRUE TRUE
Using attribute_tibble_to_list, this data frame can be converted into the required format.
citizenship_attributes <- citizenship_attributes |>
select(Attribute) |>
mutate(AttrNew = case_when(
str_detect(Attribute,"Australian") ~ "Australian Citizens",
str_detect(Attribute,"Total") ~ "Total"
))
levels <- attribute_tibble_to_list(citizenship_attributes)
Now, we can cycle through the four censuses and extract the data. Please note that CDs and SA1s are not equivalent, but they are stored together for convenience:
census_years <- c("2006","2011","2016","2021")
citizenship <- tibble()
for(year in census_years){
if(year=="2006"){
geo_structure_x <- "CD"
} else{
geo_structure_x <- "SA1"
}
citizenship_i <- get_census_summary(census_table = census_tables,
selected_years = year,
geo_structure=geo_structure_x,
attribute = levels)
citizenship <- bind_rows(citizenship,citizenship_i)
}
#> ERROR : [EBUSY] Failed to remove 'C:/Users/carlo/OneDrive/Documents/.auscensus_cache/CD_NSW_B01.csv': resource busy or locked
#> ERROR : [EBUSY] Failed to remove 'C:/Users/carlo/OneDrive/Documents/.auscensus_cache/CD_NSW_B01.csv': resource busy or locked
#> ERROR : [EBUSY] Failed to remove 'C:/Users/carlo/OneDrive/Documents/.auscensus_cache/2011Census_B01_AUST_SA1_short.csv': resource busy or locked
#> ERROR : [EBUSY] Failed to remove 'C:/Users/carlo/OneDrive/Documents/.auscensus_cache/2016Census_G01_AUS_SA1.csv': resource busy or locked
#> ERROR : [EBUSY] Failed to remove 'C:/Users/carlo/OneDrive/Documents/.auscensus_cache/2021Census_G01_AUST_SA1.csv': resource busy or locked
rm(citizenship_i,geo_structure_x,levels,citizenship_attributes)
To aggregate the data, aussiemaps::geo_aggregate() can help using area to apportion on non-overalpping cases. Then, this package’s calculate_percentage() will take the totals from the list and calculate percentages.
citizenship_ced<- tibble()
codes <- c("CD_CODE_2006","SA1_7DIGITCODE_2011","SA1_7DIGITCODE_2016","SA1_CODE_2021")
ceds <- c("CED_NAME_2006","CED_NAME_2011","CED_NAME_2016","CED_NAME_2021")
for(i in 1:length(census_years)){
year <- as.double(census_years[i])
value_i <- citizenship |>
filter(Year==year) |>
select(-Unit) |>
rename(!!codes[i]:="Census_Code") |>
collect() |>
aussiemaps::geo_aggregate(
values_col="Value",
original_geo=codes[i],
new_geo=ceds[i],
grouping_col = c("Year","Attribute"),
year=census_years[i]) |>
rename("Unit"=ceds[i]) |>
filter(!is.na(Unit)) |>
filter(str_detect(Unit,"[Ss]hipping",TRUE))|>
filter(str_detect(Unit,"[Uu]sual",TRUE)) |>
filter(str_detect(Unit,"[Aa]pplicable",TRUE))
value_i <- value_i |>
auscensus::calculate_percentage(key_col = "Attribute",
value_col = "Value",
key_value = "Total",
percentage_scale = 100)
citizenship_ced <- bind_rows(citizenship_ced,value_i)
}
citizenship_ced
#> # A tibble: 595 × 6
#> Unit Year Attribute Value Total Percentage
#> <chr> <dbl> <chr> <dbl> <dbl> <dbl>
#> 1 Adelaide 2006 Australian Citizens 115908 140151 82.7
#> 2 Aston 2006 Australian Citizens 115010 129463 88.8
#> 3 Ballarat 2006 Australian Citizens 118247 129221 91.5
#> 4 Banks 2006 Australian Citizens 104181 118371 88.0
#> 5 Barker 2006 Australian Citizens 133033 145410 91.5
#> 6 Barton 2006 Australian Citizens 105287 131234 80.2
#> 7 Bass 2006 Australian Citizens 85631 94270 90.8
#> 8 Batman 2006 Australian Citizens 105561 128107 82.4
#> 9 Bendigo 2006 Australian Citizens 121930 131885 92.5
#> 10 Bennelong 2006 Australian Citizens 104473 129340 80.8
#> # … with 585 more rows