## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  eval     = FALSE,
  message  = FALSE,
  warning  = FALSE
)
suppressPackageStartupMessages(library(systemfonts))
suppressPackageStartupMessages(library(textshaping))

## ----setup--------------------------------------------------------------------
# library(educabR)
# library(dplyr)
# library(tidyr)
# library(ggplot2)

## ----download-----------------------------------------------------------------
# # Download all schools for 2023
# escolas <- get_censo_escolar(year = 2023)
# 
# # Or filter by state for faster exploration
# escolas_sp <- get_censo_escolar(year = 2023, uf = "SP")

## ----overview-----------------------------------------------------------------
# indicators <- c(
#   "in_internet",
#   "in_banda_larga",
#   "in_biblioteca",
#   "in_laboratorio_informatica",
#   "in_laboratorio_ciencias",
#   "in_quadra_esportes",
#   "in_agua_potavel",
#   "in_esgoto_rede_publica"
# )
# 
# infra_summary <-
#   escolas |>
#   summarise(across(all_of(indicators), ~ mean(. == 1, na.rm = TRUE) * 100)) |>
#   pivot_longer(everything(), names_to = "indicator", values_to = "pct") |>
#   mutate(
#     label = c(
#       "Internet", "Broadband", "Library", "Computer lab",
#       "Science lab", "Sports court", "Drinking water", "Public sewage"
#     )
#   )
# 
# ggplot(infra_summary, aes(x = reorder(label, pct), y = pct)) +
#   geom_col(fill = "steelblue") +
#   coord_flip() +
#   labs(
#     title = "Percentage of Schools with Key Infrastructure (2023)",
#     x     = NULL,
#     y     = "% of schools"
#   ) +
#   theme_minimal()

## ----by-admin-----------------------------------------------------------------
# admin_labels <- c(
#   "1" = "Federal",
#   "2" = "State",
#   "3" = "Municipal",
#   "4" = "Private"
# )
# 
# infra_admin <-
#   escolas |>
#   mutate(admin = admin_labels[as.character(tp_dependencia)]) |>
#   group_by(admin) |>
#   summarise(
#     Internet       = mean(in_internet == 1, na.rm = TRUE) * 100,
#     Library        = mean(in_biblioteca == 1, na.rm = TRUE) * 100,
#     `Computer lab` = mean(in_laboratorio_informatica == 1, na.rm = TRUE) * 100,
#     `Science lab`  = mean(in_laboratorio_ciencias == 1, na.rm = TRUE) * 100,
#     .groups = "drop"
#   ) |>
#   pivot_longer(-admin, names_to = "resource", values_to = "pct")
# 
# ggplot(infra_admin, aes(x = resource, y = pct, fill = admin)) +
#   geom_col(position = "dodge") +
#   labs(
#     title = "School Infrastructure by Administrative Type (2023)",
#     x     = NULL,
#     y     = "% of schools",
#     fill  = "Type"
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 20, hjust = 1))

## ----by-region----------------------------------------------------------------
# region_labels <- c(
#   "Norte"        = "North",
#   "Nordeste"     = "Northeast",
#   "Sudeste"      = "Southeast",
#   "Sul"          = "South",
#   "Centro-Oeste" = "Central-West"
# )
# 
# infra_region <-
#   escolas |>
#   mutate(region = region_labels[no_regiao]) |>
#   group_by(region) |>
#   summarise(
#     Internet       = mean(in_internet == 1, na.rm = TRUE) * 100,
#     Library        = mean(in_biblioteca == 1, na.rm = TRUE) * 100,
#     `Science lab`  = mean(in_laboratorio_ciencias == 1, na.rm = TRUE) * 100,
#     `Sports court` = mean(in_quadra_esportes == 1, na.rm = TRUE) * 100,
#     .groups = "drop"
#   ) |>
#   pivot_longer(-region, names_to = "resource", values_to = "pct")
# 
# ggplot(infra_region, aes(x = region, y = pct, fill = resource)) +
#   geom_col(position = "dodge") +
#   labs(
#     title = "School Infrastructure by Region (2023)",
#     x     = NULL,
#     y     = "% of schools",
#     fill  = NULL
#   ) +
#   theme_minimal()

## ----urban-rural--------------------------------------------------------------
# infra_location <-
#   escolas |>
#   mutate(
#     location = ifelse(tp_localizacao == 1, "Urban", "Rural")
#   ) |>
#   group_by(location) |>
#   summarise(
#     Internet       = mean(in_internet == 1, na.rm = TRUE) * 100,
#     Broadband      = mean(in_banda_larga == 1, na.rm = TRUE) * 100,
#     Library        = mean(in_biblioteca == 1, na.rm = TRUE) * 100,
#     `Computer lab` = mean(in_laboratorio_informatica == 1, na.rm = TRUE) * 100,
#     `Science lab`  = mean(in_laboratorio_ciencias == 1, na.rm = TRUE) * 100,
#     `Sports court` = mean(in_quadra_esportes == 1, na.rm = TRUE) * 100,
#     .groups = "drop"
#   ) |>
#   pivot_longer(-location, names_to = "resource", values_to = "pct")
# 
# ggplot(infra_location, aes(x = resource, y = pct, fill = location)) +
#   geom_col(position = "dodge") +
#   scale_fill_manual(values = c("Urban" = "steelblue", "Rural" = "coral")) +
#   labs(
#     title = "School Infrastructure: Urban vs Rural (2023)",
#     x     = NULL,
#     y     = "% of schools",
#     fill  = NULL
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 20, hjust = 1))

## ----accessibility------------------------------------------------------------
# access_cols <- c(
#   "in_acessibilidade_rampas",
#   "in_acessibilidade_corrimao",
#   "in_acessibilidade_elevador",
#   "in_acessibilidade_pisos_tateis",
#   "in_acessibilidade_sinal_sonoro",
#   "in_acessibilidade_sinal_tatil",
#   "in_acessibilidade_sinal_visual",
#   "in_acessibilidade_inexistente"
# )
# 
# access_labels <- c(
#   "Ramps", "Handrails", "Elevator", "Tactile floors",
#   "Sound signals", "Tactile signals", "Visual signals", "None"
# )
# 
# access_summary <-
#   escolas |>
#   summarise(across(all_of(access_cols), ~ mean(. == 1, na.rm = TRUE) * 100)) |>
#   pivot_longer(everything(), names_to = "feature", values_to = "pct") |>
#   mutate(label = access_labels)
# 
# ggplot(access_summary, aes(x = reorder(label, pct), y = pct)) +
#   geom_col(fill = "#2a9d8f") +
#   coord_flip() +
#   labs(
#     title = "School Accessibility Features (2023)",
#     x     = NULL,
#     y     = "% of schools"
#   ) +
#   theme_minimal()

## ----internet-by-state--------------------------------------------------------
# internet_uf <-
#   escolas |>
#   group_by(sg_uf) |>
#   summarise(
#     pct_internet = mean(in_internet == 1, na.rm = TRUE) * 100,
#     .groups = "drop"
#   ) |>
#   arrange(pct_internet)
# 
# ggplot(internet_uf, aes(x = reorder(sg_uf, pct_internet), y = pct_internet)) +
#   geom_col(fill = "steelblue") +
#   coord_flip() +
#   labs(
#     title = "Schools with Internet Access by State (2023)",
#     x     = NULL,
#     y     = "% of schools"
#   ) +
#   theme_minimal()

